class TestSearchIntegration(unittest.TestCase): def setUp(self): # The current directory depends on if you're running inside an IDE or from command line. curdir = os.getcwd() if curdir.endswith('sparse'): self.pyserini_root = '../..' else: self.pyserini_root = '.' self.tmp = f'{self.pyserini_root}/integrations/tmp{randint(0, 10000)}' if os.path.exists(self.tmp): shutil.rmtree(self.tmp) else: os.mkdir(self.tmp) #wget cacm jsonl file os.system(f'wget https://raw.githubusercontent.com/castorini/anserini-data/master/CACM/corpus/jsonl/cacm.json -P {self.tmp}/cacm_jsonl') #pre tokenized jsonl os.system(f'python -m pyserini.tokenize_json_collection --input {self.tmp}/cacm_jsonl/ --output {self.tmp}/cacm_bert_jsonl/ --tokenizer bert-base-uncased') self.pyserini_index_cmd = 'python -m pyserini.index' self.pyserini_search_cmd = 'python -m pyserini.search' self.cacm_jsonl_path = os.path.join(self.tmp, 'cacm_jsonl') self.cacm_bert_jsonl_path = os.path.join(self.tmp, 'cacm_bert_jsonl') self.cacm_index_path = os.path.join(self.tmp, 'cacm_index') self.cacm_bert_index_path = os.path.join(self.tmp, 'cacm_bert_index') self.cacm_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.cacm.txt') self.cacm_topics_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.cacm.txt') os.system(f'{self.pyserini_index_cmd} -collection JsonCollection -generator DefaultLuceneDocumentGenerator -threads 9 -input {self.cacm_jsonl_path} -index {self.cacm_index_path} -storePositions -storeDocvectors -storeRaw' ) os.system(f'{self.pyserini_index_cmd} -collection JsonCollection -generator DefaultLuceneDocumentGenerator -threads 9 -input {self.cacm_bert_jsonl_path} -index {self.cacm_bert_index_path} -storePositions -storeDocvectors -storeRaw -pretokenized') self.cacm_checker = SimpleSearcherScoreChecker( index=self.cacm_index_path, topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.cacm.txt'), pyserini_topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.cacm.txt'), qrels=self.cacm_qrels_path, eval=f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30') self.cacm_bert_checker = SimpleSearcherScoreChecker( index=self.cacm_bert_index_path, topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.cacm.txt'), pyserini_topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.cacm.txt'), qrels=self.cacm_qrels_path, eval=f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30') def test_without_pretokenized(self): self.assertTrue(self.cacm_checker.run('cacm', '--bm25', 0.3114)) def test_with_pretokenized(self): self.assertTrue(self.cacm_bert_checker.run('cacm_bert', '--bm25', 0.2750, 'bert-base-uncased')) def tearDown(self): shutil.rmtree(f'{self.tmp}')
def setUp(self): # The current directory depends on if you're running inside an IDE or from command line. curdir = os.getcwd() if curdir.endswith('clprf'): self.pyserini_root = '../..' self.anserini_root = '../../../anserini' else: self.pyserini_root = '.' self.anserini_root = '../anserini' self.tmp = f'{self.pyserini_root}/integrations/tmp{randint(0, 10000)}' if os.path.exists(self.tmp): shutil.rmtree(self.tmp) else: os.mkdir(self.tmp) self.pyserini_search_cmd = 'python -m pyserini.search' self.pyserini_fusion_cmd = 'python -m pyserini.fusion' self.core17_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.nyt') self.core17_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core17.txt') self.core18_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.wapo.v2') self.core18_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt') self.robust04_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.disk45') self.robust04_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt') self.robust05_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.robust05') self.robust05_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt') self.core17_checker = SimpleSearcherScoreChecker( index=self.core17_index_path, topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core17.txt'), pyserini_topics='core17', qrels=self.core17_qrels_path, eval=f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30') self.core18_checker = SimpleSearcherScoreChecker( index=self.core18_index_path, topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core18.txt'), pyserini_topics='core18', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt'), eval=f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30') self.robust04_checker = SimpleSearcherScoreChecker( index=self.robust04_index_path, topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust04.txt'), pyserini_topics='robust04', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt'), eval=f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30') self.robust05_checker = SimpleSearcherScoreChecker( index=self.robust05_index_path, topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust05.txt'), pyserini_topics='robust05', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt'), eval=f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30')
class TestSearchIntegration(unittest.TestCase): def setUp(self): # The current directory depends on if you're running inside an IDE or from command line. curdir = os.getcwd() if curdir.endswith('clprf'): self.pyserini_root = '../..' self.anserini_root = '../../../anserini' else: self.pyserini_root = '.' self.anserini_root = '../anserini' self.tmp = f'{self.pyserini_root}/integrations/tmp{randint(0, 10000)}' if os.path.exists(self.tmp): shutil.rmtree(self.tmp) else: os.mkdir(self.tmp) self.pyserini_search_cmd = 'python -m pyserini.search' self.pyserini_fusion_cmd = 'python -m pyserini.fusion' self.core17_index_path = os.path.join( self.anserini_root, 'indexes/lucene-index.core17.pos+docvectors+raw') self.core17_qrels_path = os.path.join( self.pyserini_root, 'tools/topics-and-qrels/qrels.core17.txt') self.core18_index_path = os.path.join( self.anserini_root, 'indexes/lucene-index.core18.pos+docvectors+raw') self.core18_qrels_path = os.path.join( self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt') self.robust04_index_path = os.path.join( self.anserini_root, 'indexes/lucene-index.robust04.pos+docvectors+raw') self.robust04_qrels_path = os.path.join( self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt') self.robust05_index_path = os.path.join( self.anserini_root, 'indexes/lucene-index.robust05.pos+docvectors+raw') self.robust05_qrels_path = os.path.join( self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt') self.core17_checker = SimpleSearcherScoreChecker( index=self.core17_index_path, topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core17.txt'), pyserini_topics='core17', qrels=self.core17_qrels_path, eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) self.core18_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.core18.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core18.txt'), pyserini_topics='core18', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) self.robust04_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.robust04.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust04.txt'), pyserini_topics='robust04', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) self.robust05_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.robust05.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust05.txt'), pyserini_topics='robust05', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) def test_cross_validation(self): pyserini_topics = 'core17' os.mkdir(f'{self.tmp}/core17') for alpha in [x / 10.0 for x in range(0, 11)]: run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17/core17_lr_A{alpha}_bm25.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha {alpha}' status = os.system(run_file_cmd) self.assertEqual(status, 0) os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_lr.txt --classifier lr ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_lr.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2462, delta=0.0001) def test_core17(self): self.assertTrue( self.core17_checker.run('core17_bm25', '--bm25', 0.2087)) def test_core17_rm3(self): self.assertTrue( self.core17_checker.run('core17_bm25', '--bm25 --rm3', 0.2823)) def test_core17_lr(self): pyserini_topics = 'core17' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17_lr.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_lr.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2473, delta=0.0001) def test_core17_lr_rm3(self): pyserini_topics = 'core17' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17_lr_rm3.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_lr_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2940, delta=0.0001) def test_core17_svm(self): pyserini_topics = 'core17' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17_svm.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_svm.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2385, delta=0.0001) def test_core17_svm_rm3(self): pyserini_topics = 'core17' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17_svm_rm3.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_svm_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2970, delta=0.0001) def test_core17_avg(self): pyserini_topics = 'core17' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17_avg.txt ' \ + f'--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_avg.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2442, delta=0.0001) def test_core17_avg_rm3(self): pyserini_topics = 'core17' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17_avg_rm3.txt ' \ + f'--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_avg_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2967, delta=0.0001) def test_core17_rrf(self): pyserini_topics = 'core17' lr_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17_lr.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7' status = os.system(lr_cmd) self.assertEqual(status, 0) svm_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17_svm.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7' status = os.system(svm_cmd) self.assertEqual(status, 0) rrf_cmd = f'{self.pyserini_fusion_cmd} ' \ + f'--runs {self.tmp}/core17_lr.txt {self.tmp}/core17_svm.txt ' \ + f'--output {self.tmp}/core17_rrf.txt --resort' status = os.system(rrf_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_rrf.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2446, delta=0.0001) def test_core17_rrf_rm3(self): pyserini_topics = 'core17' lr_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17_lr_rm3.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3' status = os.system(lr_cmd) self.assertEqual(status, 0) svm_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core17_svm_rm3.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3' status = os.system(svm_cmd) self.assertEqual(status, 0) rrf_cmd = f'{self.pyserini_fusion_cmd} ' \ + f'--runs {self.tmp}/core17_lr_rm3.txt {self.tmp}/core17_svm_rm3.txt ' \ + f'--output {self.tmp}/core17_rrf_rm3.txt --resort' status = os.system(rrf_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_rrf_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2965, delta=0.0001) def test_core18(self): self.assertTrue( self.core18_checker.run('core18_bm25', '--bm25', 0.2495)) def test_core18_rm3(self): self.assertTrue( self.core18_checker.run('core18_bm25', '--bm25 --rm3', 0.3135)) def test_core18_lr(self): pyserini_topics = 'core18' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_lr.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_lr.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2837, delta=0.0001) def test_core18_lr_rm3(self): pyserini_topics = 'core18' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_lr_rm3.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_lr_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3222, delta=0.0001) def test_core18_svm(self): pyserini_topics = 'core18' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_svm.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_svm.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2840, delta=0.0001) def test_core18_svm_rm3(self): pyserini_topics = 'core18' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_svm_rm3.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_svm_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3219, delta=0.0001) def test_core18_avg(self): pyserini_topics = 'core18' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_avg.txt ' \ + f'--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_avg.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2860, delta=0.0001) def test_core18_avg_rm3(self): pyserini_topics = 'core18' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_avg_rm3.txt ' \ + f'--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_avg_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3227, delta=0.0001) def test_core18_rrf(self): pyserini_topics = 'core18' lr_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_lr.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' status = os.system(lr_cmd) self.assertEqual(status, 0) svm_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_svm.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6' status = os.system(svm_cmd) self.assertEqual(status, 0) rrf_cmd = f'{self.pyserini_fusion_cmd} ' \ + f'--runs {self.tmp}/core18_lr.txt {self.tmp}/core18_svm.txt ' \ + f'--output {self.tmp}/core18_rrf.txt --resort' status = os.system(rrf_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_rrf.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2881, delta=0.0001) def test_core18_rrf_rm3(self): pyserini_topics = 'core18' lr_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_lr_rm3.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3' status = os.system(lr_cmd) self.assertEqual(status, 0) svm_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/core18_svm_rm3.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3' status = os.system(svm_cmd) self.assertEqual(status, 0) rrf_cmd = f'{self.pyserini_fusion_cmd} ' \ + f'--runs {self.tmp}/core18_lr_rm3.txt {self.tmp}/core18_svm_rm3.txt ' \ + f'--output {self.tmp}/core18_rrf_rm3.txt --resort' status = os.system(rrf_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_rrf_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3214, delta=0.0001) def test_robust04(self): self.assertTrue( self.robust04_checker.run('robust04_bm25', '--bm25', 0.2531)) def test_robust04_rm3(self): self.assertTrue( self.robust04_checker.run('robust04_bm25_rm3', '--bm25 --rm3', 0.2903)) def test_robust04_lr(self): pyserini_topics = 'robust04' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust04_lr.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_lr.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2747, delta=0.0001) def test_robust04_lr_rm3(self): pyserini_topics = 'robust04' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust04_lr_rm3.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_lr_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2971, delta=0.0001) def test_robust04_svm(self): pyserini_topics = 'robust04' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust04_svm.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_svm.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2726, delta=0.0001) def test_robust04_svm_rm3(self): pyserini_topics = 'robust04' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust04_svm_rm3.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_svm_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2967, delta=0.0001) def test_robust04_avg(self): pyserini_topics = 'robust04' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust04_avg.txt ' \ + f'--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_avg.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.276, delta=0.0001) def test_robust04_avg_rm3(self): pyserini_topics = 'robust04' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust04_avg_rm3.txt ' \ + f'--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_avg_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2980, delta=0.0001) def test_robust04_rrf(self): pyserini_topics = 'robust04' lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust04_lr.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' status = os.system(lr_cmd) self.assertEqual(status, 0) svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust04_svm.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' status = os.system(svm_cmd) self.assertEqual(status, 0) rrf_cmd = f'{self.pyserini_fusion_cmd} ' \ + f'--runs {self.tmp}/robust04_lr.txt {self.tmp}/robust04_svm.txt ' \ + f'--output {self.tmp}/robust04_rrf.txt --resort' status = os.system(rrf_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_rrf.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.275, delta=0.0001) def test_robust04_rrf_rm3(self): pyserini_topics = 'robust04' lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust04_lr_rm3.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' status = os.system(lr_cmd) self.assertEqual(status, 0) svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust04_svm_rm3.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' status = os.system(svm_cmd) self.assertEqual(status, 0) rrf_cmd = f'{self.pyserini_fusion_cmd} ' \ + f'--runs {self.tmp}/robust04_lr_rm3.txt {self.tmp}/robust04_svm_rm3.txt ' \ + f'--output {self.tmp}/robust04_rrf_rm3.txt --resort' status = os.system(rrf_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_rrf_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2975, delta=0.0001) def test_robust05(self): self.assertTrue( self.robust05_checker.run('robust05_bm25', '--bm25', 0.2032)) def test_robust05_rm3(self): self.assertTrue( self.robust05_checker.run('robust05_bm25_rm3', '--bm25 --rm3', 0.2602)) def test_robust05_lr(self): pyserini_topics = 'robust05' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_lr.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.8' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_lr.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2476, delta=0.0001) def test_robust05_lr_rm3(self): pyserini_topics = 'robust05' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_lr_rm3.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_lr_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2854, delta=0.0001) def test_robust05_svm(self): pyserini_topics = 'robust05' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_svm.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.8' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_svm.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2486, delta=0.0001) def test_robust05_svm_rm3(self): pyserini_topics = 'robust05' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_svm_rm3.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_svm_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2855, delta=0.0001) def test_robust05_avg(self): pyserini_topics = 'robust05' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_avg.txt ' \ + f'--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.8' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_avg.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2485, delta=0.0001) def test_robust05_avg_rm3(self): pyserini_topics = 'robust05' run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_avg_rm3.txt ' \ + f'--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6 --rm3' status = os.system(run_file_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_avg_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2865, delta=0.0001) def test_robust05_rrf(self): pyserini_topics = 'robust05' lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_lr.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' status = os.system(lr_cmd) self.assertEqual(status, 0) svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_svm.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5' status = os.system(svm_cmd) self.assertEqual(status, 0) rrf_cmd = f'{self.pyserini_fusion_cmd} ' \ + f'--runs {self.tmp}/robust05_lr.txt {self.tmp}/robust05_svm.txt ' \ + f'--output {self.tmp}/robust05_rrf.txt --resort' status = os.system(rrf_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_rrf.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2401, delta=0.0001) def test_robust05_rrf_rm3(self): pyserini_topics = 'robust05' lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_lr_rm3.txt ' \ + f'--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' status = os.system(lr_cmd) self.assertEqual(status, 0) svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} ' \ + f'--topics {pyserini_topics} --output {self.tmp}/robust05_svm_rm3.txt ' \ + f'--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3' status = os.system(svm_cmd) self.assertEqual(status, 0) rrf_cmd = f'{self.pyserini_fusion_cmd} ' \ + f'--runs {self.tmp}/robust05_lr_rm3.txt {self.tmp}/robust05_svm_rm3.txt ' \ + f'--output {self.tmp}/robust05_rrf_rm3.txt --resort' status = os.system(rrf_cmd) self.assertEqual(status, 0) score_cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_rrf_rm3.txt' status = os.system(score_cmd) stdout, stderr = run_command(score_cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2788, delta=0.0001) def tearDown(self): shutil.rmtree(f'{self.tmp}')
def setUp(self): curdir = os.getcwd() if curdir.endswith('integrations'): self.pyserini_root = '..' self.anserini_root = '../../anserini' else: self.pyserini_root = '.' self.anserini_root = '../anserini' self.tmp = f'{self.pyserini_root}/integrations/tmp{randint(0, 10000)}' if os.path.exists(self.tmp): shutil.rmtree(self.tmp) else: os.mkdir(self.tmp) self.core17_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.core17.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core17.txt'), pyserini_topics='core17', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core17.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) try: if os.path.exists(f'{self.tmp}/core17') == False: tar = tarfile.open( f"{self.pyserini_root}/integrations/core17.tar.gz", "r:gz") tar.extractall(path=f'{self.tmp}') tar.close() except: shutil.rmtree(f'{self.tmp}') print( f'core17.tar.gz is not saved in {self.pyserini_root}/integrations' ) raise self.core18_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.core18.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core18.txt'), pyserini_topics='core18', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) try: if os.path.exists(f'{self.tmp}/core18') == False: tar = tarfile.open( f"{self.pyserini_root}/integrations/core18.tar.gz", "r:gz") tar.extractall(path=f'{self.tmp}') tar.close() except: shutil.rmtree(f'{self.tmp}') print( f'core18.tar.gz is not saved in {self.pyserini_root}/integrations' ) raise self.robust04_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.robust04.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust04.txt'), pyserini_topics='robust04', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) try: if os.path.exists(f'{self.tmp}/robust04') == False: tar = tarfile.open( f"{self.pyserini_root}/integrations/robust04.tar.gz", "r:gz") tar.extractall(path=f'{self.tmp}') tar.close() except: shutil.rmtree(f'{self.tmp}') print( f'robust04.tar.gz is not saved in {self.pyserini_root}/integrations' ) raise self.robust05_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.robust05.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust05.txt'), pyserini_topics='robust05', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) try: if os.path.exists(f'{self.tmp}/robust05') == False: tar = tarfile.open( f"{self.pyserini_root}/integrations/robust05.tar.gz", "r:gz") tar.extractall(path=f'{self.tmp}') tar.close() except: shutil.rmtree(f'{self.tmp}') print( f'robust05.tar.gz is not saved in {self.pyserini_root}/integrations' ) raise
class TestSearchIntegration(unittest.TestCase): def setUp(self): curdir = os.getcwd() if curdir.endswith('integrations'): self.pyserini_root = '..' self.anserini_root = '../../anserini' else: self.pyserini_root = '.' self.anserini_root = '../anserini' self.tmp = f'{self.pyserini_root}/integrations/tmp{randint(0, 10000)}' if os.path.exists(self.tmp): shutil.rmtree(self.tmp) else: os.mkdir(self.tmp) self.core17_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.core17.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core17.txt'), pyserini_topics='core17', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core17.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) try: if os.path.exists(f'{self.tmp}/core17') == False: tar = tarfile.open( f"{self.pyserini_root}/integrations/core17.tar.gz", "r:gz") tar.extractall(path=f'{self.tmp}') tar.close() except: shutil.rmtree(f'{self.tmp}') print( f'core17.tar.gz is not saved in {self.pyserini_root}/integrations' ) raise self.core18_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.core18.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core18.txt'), pyserini_topics='core18', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) try: if os.path.exists(f'{self.tmp}/core18') == False: tar = tarfile.open( f"{self.pyserini_root}/integrations/core18.tar.gz", "r:gz") tar.extractall(path=f'{self.tmp}') tar.close() except: shutil.rmtree(f'{self.tmp}') print( f'core18.tar.gz is not saved in {self.pyserini_root}/integrations' ) raise self.robust04_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.robust04.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust04.txt'), pyserini_topics='robust04', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) try: if os.path.exists(f'{self.tmp}/robust04') == False: tar = tarfile.open( f"{self.pyserini_root}/integrations/robust04.tar.gz", "r:gz") tar.extractall(path=f'{self.tmp}') tar.close() except: shutil.rmtree(f'{self.tmp}') print( f'robust04.tar.gz is not saved in {self.pyserini_root}/integrations' ) raise self.robust05_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.robust05.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust05.txt'), pyserini_topics='robust05', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) try: if os.path.exists(f'{self.tmp}/robust05') == False: tar = tarfile.open( f"{self.pyserini_root}/integrations/robust05.tar.gz", "r:gz") tar.extractall(path=f'{self.tmp}') tar.close() except: shutil.rmtree(f'{self.tmp}') print( f'robust05.tar.gz is not saved in {self.pyserini_root}/integrations' ) raise def test_core17(self): self.assertTrue( self.core17_checker.run('core17_bm25', '--bm25', 0.2087)) def test_core17_rm3(self): self.assertTrue( self.core17_checker.run('core17_bm25', '--bm25 --rm3', 0.2823)) def test_core17_lr(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_lr.txt --classifier lr ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt {self.tmp}/core17_lr.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2462, delta=0.0001) def test_core17_lr_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_lr_rm3.txt --classifier lr -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_lr_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2906, delta=0.0001) def test_core17_svm(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_svm.txt --classifier svm' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_svm.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2367, delta=0.0001) def test_core17_svm_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_svm_rm3.txt --classifier svm -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_svm_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2940, delta=0.0001) def test_core17_avg(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_lr+svm.txt --classifier lr+svm ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_lr+svm.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2426, delta=0.0001) def test_core17_avg_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_lr+svm_rm3.txt --classifier lr+svm -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_lr+svm_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2952, delta=0.0001) def test_core17_rrf(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_rrf.txt --classifier rrf' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_rrf.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2433, delta=0.0001) def test_core17_rrf_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core17 --output {self.tmp}/core17_rrf_rm3.txt --classifier rrf -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \ {self.tmp}/core17_rrf_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, "map") self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2951, delta=0.0001) def test_core18(self): self.assertTrue( self.core18_checker.run('core18_bm25', '--bm25', 0.2495)) def test_core18_rm3(self): self.assertTrue( self.core18_checker.run('core18_bm25', '--bm25 --rm3', 0.3135)) def test_core18_lr(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core18 --output {self.tmp}/core18_lr.txt --classifier lr' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_lr.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2837, delta=0.0001) def test_core18_lr_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core18 --output {self.tmp}/core18_lr_rm3.txt --classifier lr -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_lr_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3195, delta=0.0001) def test_core18_svm(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core18 --output {self.tmp}/core18_svm.txt --classifier svm ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_svm.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2786, delta=0.0001) def test_core18_svm_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core18 --output {self.tmp}/core18_svm_rm3.txt --classifier svm -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_svm_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3220, delta=0.0001) def test_core18_avg(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core18 --output {self.tmp}/core18_lr+svm.txt --classifier lr+svm' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_lr+svm.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2821, delta=0.0001) def test_core18_avg_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core18 --output {self.tmp}/core18_lr+svm_rm3.txt --classifier lr+svm -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_lr+svm_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3200, delta=0.0001) def test_core18_rrf(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core18 --output {self.tmp}/core18_rrf.txt --classifier rrf' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_rrf.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2871, delta=0.0001) def test_core18_rrf_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection core18 --output {self.tmp}/core18_rrf_rm3.txt --classifier rrf -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \ {self.tmp}/core18_rrf_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.3204, delta=0.0001) def test_robust04(self): self.assertTrue( self.robust04_checker.run('robust04_bm25', '--bm25', 0.2531)) def test_robust04_rm3(self): self.assertTrue( self.robust04_checker.run('robust04_bm25_rm3', '--bm25 --rm3', 0.2903)) def test_robust04_lr(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust04 --output {self.tmp}/robust04_lr.txt --classifier lr ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_lr.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2721, delta=0.0001) def test_robust04_lr_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust04 --output {self.tmp}/robust04_lr_rm3.txt --classifier lr -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_lr_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2961, delta=0.0001) def test_robust04_svm(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust04 --output {self.tmp}/robust04_svm.txt --classifier svm ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_svm.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2716, delta=0.0001) def test_robust04_svm_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust04 --output {self.tmp}/robust04_svm_rm3.txt --classifier svm -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_svm_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2963, delta=0.0001) def test_robust04_avg(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust04 --output {self.tmp}/robust04_lr+svm.txt --classifier lr+svm' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_lr+svm.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2745, delta=0.0001) def test_robust04_avg_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust04 --output {self.tmp}/robust04_lr+svm_rm3.txt --classifier lr+svm -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_lr+svm_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2980, delta=0.0001) def test_robust04_rrf(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust04 --output {self.tmp}/robust04_rrf.txt --classifier rrf' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_rrf.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2740, delta=0.0001) def test_robust04_rrf_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust04 --output {self.tmp}/robust04_rrf_rm3.txt --classifier rrf -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \ {self.tmp}/robust04_rrf_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2975, delta=0.0001) def test_robust05(self): self.assertTrue( self.robust05_checker.run('robust05_bm25', '--bm25', 0.2032)) def test_robust05_rm3(self): self.assertTrue( self.robust05_checker.run('robust05_bm25_rm3', '--bm25 --rm3', 0.2602)) def test_robust05_lr(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust05 --output {self.tmp}/robust05_lr.txt --classifier lr ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_lr.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2476, delta=0.0001) def test_robust05_lr_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust05 --output {self.tmp}/robust05_lr_rm3.txt --classifier lr -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_lr_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2845, delta=0.0001) def test_robust05_svm(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust05 --output {self.tmp}/robust05_svm.txt --classifier svm ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_svm.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2454, delta=0.0001) def test_robust05_svm_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust05 --output {self.tmp}/robust05_svm_rm3.txt --classifier svm -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_svm_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2850, delta=0.0001) def test_robust05_avg(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust05 --output {self.tmp}/robust05_lr+svm.txt --classifier lr+svm ' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_lr+svm.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2485, delta=0.0001) def test_robust05_avg_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust05 --output {self.tmp}/robust05_lr+svm_rm3.txt --classifier lr+svm -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_lr+svm_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2860, delta=0.0001) def test_robust05_rrf(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust05 --output {self.tmp}/robust05_rrf.txt --classifier rrf' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_rrf.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2490, delta=0.0001) def test_robust05_rrf_rm3(self): os.system( f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \ --anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \ --collection robust05 --output {self.tmp}/robust05_rrf_rm3.txt --classifier rrf -rm3' ) cmd = f'{self.anserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30 \ {self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \ {self.tmp}/robust05_rrf_rm3.txt' status = os.system(cmd) stdout, stderr = run_command(cmd) score = parse_score(stdout, 'map') self.assertEqual(status, 0) self.assertEqual(stderr, '') self.assertAlmostEqual(score, 0.2870, delta=0.0001) def tearDown(self): shutil.rmtree(f'{self.tmp}')
def setUp(self): curdir = os.getcwd() if curdir.endswith('integrations'): self.pyserini_root = '..' self.anserini_root = '../../anserini' else: self.pyserini_root = '.' self.anserini_root = '../anserini' self.tmp = f'{self.pyserini_root}/integrations/tmp{randint(0, 10000)}' if os.path.exists(self.tmp): shutil.rmtree(self.tmp) else: os.mkdir(self.tmp) self.pyserini_search_cmd = 'python -m pyserini.search' self.core17_index_path = os.path.join( self.anserini_root, 'indexes/lucene-index.core17.pos+docvectors+raw') self.core17_qrels_path = os.path.join( self.pyserini_root, 'tools/topics-and-qrels/qrels.core17.txt') self.core18_index_path = os.path.join( self.anserini_root, 'indexes/lucene-index.core18.pos+docvectors+raw') self.core18_qrels_path = os.path.join( self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt') self.robust04_index_path = os.path.join( self.anserini_root, 'indexes/lucene-index.robust04.pos+docvectors+raw') self.robust04_qrels_path = os.path.join( self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt') self.robust05_index_path = os.path.join( self.anserini_root, 'indexes/lucene-index.robust05.pos+docvectors+raw') self.robust05_qrels_path = os.path.join( self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt') self.core17_checker = SimpleSearcherScoreChecker( index=self.core17_index_path, topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core17.txt'), pyserini_topics='core17', qrels=self.core17_qrels_path, eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) self.core18_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.core18.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core18.txt'), pyserini_topics='core18', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) self.robust04_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.robust04.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust04.txt'), pyserini_topics='robust04', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' ) self.robust05_checker = SimpleSearcherScoreChecker( index=os.path.join( self.anserini_root, 'indexes/lucene-index.robust05.pos+docvectors+raw'), topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust05.txt'), pyserini_topics='robust05', qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt'), eval= f'{self.pyserini_root}/tools/eval/trec_eval.9.0.4/trec_eval -m map -m P.30' )