def from_prebuilt_index(cls, prebuilt_index_name: str, query_encoder: QueryEncoder): """Build a searcher from a pre-built index; download the index if necessary. Parameters ---------- query_encoder: QueryEncoder the query encoder, which has `encode` method that convert query text to embedding prebuilt_index_name : str Prebuilt index name. Returns ------- SimpleDenseSearcher Searcher built from the prebuilt faiss index. """ print( f'Attempting to initialize pre-built index {prebuilt_index_name}.') try: index_dir = download_prebuilt_index(prebuilt_index_name) except ValueError as e: print(str(e)) return None print(f'Initializing {prebuilt_index_name}...') return cls(index_dir, query_encoder)
def from_prebuilt_index(cls, prebuilt_index_name: str, query_encoder: Union[QueryEncoder, str], min_idf=0): """Build a searcher from a pre-built index; download the index if necessary. Parameters ---------- prebuilt_index_name : str Prebuilt index name. Returns ------- LuceneSearcher Searcher built from the prebuilt index. """ print( f'Attempting to initialize pre-built index {prebuilt_index_name}.') try: index_dir = download_prebuilt_index(prebuilt_index_name) except ValueError as e: print(str(e)) return None print(f'Initializing {prebuilt_index_name}...') return cls(index_dir, query_encoder, min_idf)
def from_prebuilt_index(cls, prebuilt_index_name: str): """Build an index reader from the prebuilt index, download the index if necessary. Parameters ---------- prebuilt_index_name : str Prebuilt index name. Returns ------- IndexReader Index reader built from the prebuilt index. """ index_dir = download_prebuilt_index(prebuilt_index_name) return cls(index_dir)
def from_prebuilt_index(cls, prebuilt_index_name: str): """Build a searcher from the prebuilt index, download the index if necessary. Parameters ---------- prebuilt_index_name : str Prebuilt index name. Returns ------- SimpleSearcher Searcher built from the prebuilt index. """ index_dir = download_prebuilt_index(prebuilt_index_name) return cls(index_dir)
def test_bm25(self): tmp_folder_name = self.tmp.split('/')[-1] prebuilt_index_path = download_prebuilt_index('trec-covid-r3-abstract') os.system(f'python {self.pyserini_root}/scripts/classifier_prf/rank_trec_covid.py \ -alpha 0.5 \ -clf lr \ -vectorizer tfidf \ -new_qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round3.txt \ -base {self.tmp}/runs/covidex.t5.final.txt \ -tmp_base {tmp_folder_name} \ -qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round2-cumulative.txt \ -index {prebuilt_index_path} \ -tag covidex.r3.t5.lr \ -output {self.tmp}/output.json') with open(f'{self.tmp}/output.json') as json_file: data = json.load(json_file) self.assertEqual("0.3311\\n'", data['map']) self.assertEqual("0.6866\\n'", data['ndcg'])
def from_prebuilt_index(cls, prebuilt_index_name: str): """Build a searcher from a pre-built index; download the index if necessary. Parameters ---------- prebuilt_index_name : str Prebuilt index name. Returns ------- SimpleSearcher Searcher built from the prebuilt index. """ print(f'Attempting to initialize pre-built index {prebuilt_index_name}.') try: index_dir = download_prebuilt_index(prebuilt_index_name) except ValueError as e: print(str(e)) return None print(f'Initializing {prebuilt_index_name}...') return cls(index_dir)
def test_round5(self): tmp_folder_name = self.tmp.split('/')[-1] prebuilt_index_path = download_prebuilt_index('trec-covid-r5-abstract') os.system(f'python {self.pyserini_root}/scripts/classifier_prf/rank_trec_covid.py \ -alpha 0.6 \ -clf lr \ -vectorizer tfidf \ -new_qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round5.txt \ -base {self.tmp}/runs/covidex.r5.d2q.1s \ -tmp_base {tmp_folder_name} \ -qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round4-cumulative.txt \ -index {prebuilt_index_path} \ -tag covidex.r5.d2q.1s \ -output {self.tmp}/output.json') with open(f'{self.tmp}/output.json') as json_file: data = json.load(json_file) self.assertEqual("0.3859", data['map']) self.assertEqual("0.8221", data['ndcg']) os.system(f'python {self.pyserini_root}/scripts/classifier_prf/rank_trec_covid.py \ -alpha 0.6 \ -clf lr \ -vectorizer tfidf \ -new_qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round5.txt \ -base {self.tmp}/runs/covidex.r5.d2q.2s \ -tmp_base {tmp_folder_name} \ -qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round4-cumulative.txt \ -index {prebuilt_index_path} \ -tag covidex.r5.d2q.2s \ -output {self.tmp}/output.json') with open(f'{self.tmp}/output.json') as json_file: data = json.load(json_file) self.assertEqual("0.3875", data['map']) self.assertEqual("0.8304", data['ndcg']) os.system(f'python {self.pyserini_root}/scripts/classifier_prf/rank_trec_covid.py \ -alpha 0.6 \ -clf lr \ -vectorizer tfidf \ -new_qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round5.txt \ -base {self.tmp}/runs/covidex.r5.1s \ -tmp_base {tmp_folder_name} \ -qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round4-cumulative.txt \ -index {prebuilt_index_path} \ -tag covidex.r5.1s \ -output {self.tmp}/output.json') with open(f'{self.tmp}/output.json') as json_file: data = json.load(json_file) self.assertEqual("0.3885", data['map']) self.assertEqual("0.8135", data['ndcg']) os.system(f'python {self.pyserini_root}/scripts/classifier_prf/rank_trec_covid.py \ -alpha 0.6 \ -clf lr \ -vectorizer tfidf \ -new_qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round5.txt \ -base {self.tmp}/runs/covidex.r5.2s \ -tmp_base {tmp_folder_name} \ -qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round4-cumulative.txt \ -index {prebuilt_index_path} \ -tag covidex.r5.2s \ -output {self.tmp}/output.json') with open(f'{self.tmp}/output.json') as json_file: data = json.load(json_file) self.assertEqual("0.3922", data['map']) self.assertEqual("0.8311", data['ndcg'])
def from_prebuilt_index(cls, prebuilt_index_name: str): index_dir = download_prebuilt_index(prebuilt_index_name) return cls(index_dir)