Example #1
0
    def from_prebuilt_index(cls, prebuilt_index_name: str,
                            query_encoder: QueryEncoder):
        """Build a searcher from a pre-built index; download the index if necessary.

        Parameters
        ----------
        query_encoder: QueryEncoder
            the query encoder, which has `encode` method that convert query text to embedding
        prebuilt_index_name : str
            Prebuilt index name.

        Returns
        -------
        SimpleDenseSearcher
            Searcher built from the prebuilt faiss index.
        """
        print(
            f'Attempting to initialize pre-built index {prebuilt_index_name}.')
        try:
            index_dir = download_prebuilt_index(prebuilt_index_name)
        except ValueError as e:
            print(str(e))
            return None

        print(f'Initializing {prebuilt_index_name}...')
        return cls(index_dir, query_encoder)
Example #2
0
    def from_prebuilt_index(cls,
                            prebuilt_index_name: str,
                            query_encoder: Union[QueryEncoder, str],
                            min_idf=0):
        """Build a searcher from a pre-built index; download the index if necessary.

        Parameters
        ----------
        prebuilt_index_name : str
            Prebuilt index name.

        Returns
        -------
        LuceneSearcher
            Searcher built from the prebuilt index.
        """
        print(
            f'Attempting to initialize pre-built index {prebuilt_index_name}.')
        try:
            index_dir = download_prebuilt_index(prebuilt_index_name)
        except ValueError as e:
            print(str(e))
            return None

        print(f'Initializing {prebuilt_index_name}...')
        return cls(index_dir, query_encoder, min_idf)
Example #3
0
    def from_prebuilt_index(cls, prebuilt_index_name: str):
        """Build an index reader from the prebuilt index, download the index if necessary.

        Parameters
        ----------
        prebuilt_index_name : str
            Prebuilt index name.

        Returns
        -------
        IndexReader
            Index reader built from the prebuilt index.
        """
        index_dir = download_prebuilt_index(prebuilt_index_name)
        return cls(index_dir)
Example #4
0
    def from_prebuilt_index(cls, prebuilt_index_name: str):
        """Build a searcher from the prebuilt index, download the index if necessary.

        Parameters
        ----------
        prebuilt_index_name : str
            Prebuilt index name.

        Returns
        -------
        SimpleSearcher
            Searcher built from the prebuilt index.
        """
        index_dir = download_prebuilt_index(prebuilt_index_name)
        return cls(index_dir)
Example #5
0
 def test_bm25(self):
     tmp_folder_name = self.tmp.split('/')[-1]
     prebuilt_index_path = download_prebuilt_index('trec-covid-r3-abstract')
     os.system(f'python {self.pyserini_root}/scripts/classifier_prf/rank_trec_covid.py \
                 -alpha 0.5 \
                 -clf lr \
                 -vectorizer tfidf \
                 -new_qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round3.txt \
                 -base {self.tmp}/runs/covidex.t5.final.txt \
                 -tmp_base {tmp_folder_name} \
                 -qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round2-cumulative.txt \
                 -index {prebuilt_index_path} \
                 -tag covidex.r3.t5.lr \
                 -output {self.tmp}/output.json')
     with open(f'{self.tmp}/output.json') as json_file:
         data = json.load(json_file)
         self.assertEqual("0.3311\\n'", data['map'])
         self.assertEqual("0.6866\\n'", data['ndcg'])
Example #6
0
    def from_prebuilt_index(cls, prebuilt_index_name: str):
        """Build a searcher from a pre-built index; download the index if necessary.

        Parameters
        ----------
        prebuilt_index_name : str
            Prebuilt index name.

        Returns
        -------
        SimpleSearcher
            Searcher built from the prebuilt index.
        """
        print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
        try:
            index_dir = download_prebuilt_index(prebuilt_index_name)
        except ValueError as e:
            print(str(e))
            return None

        print(f'Initializing {prebuilt_index_name}...')
        return cls(index_dir)
Example #7
0
 def test_round5(self):
     tmp_folder_name = self.tmp.split('/')[-1]
     prebuilt_index_path = download_prebuilt_index('trec-covid-r5-abstract')
     
     os.system(f'python {self.pyserini_root}/scripts/classifier_prf/rank_trec_covid.py \
                 -alpha 0.6 \
                 -clf lr \
                 -vectorizer tfidf \
                 -new_qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round5.txt \
                 -base {self.tmp}/runs/covidex.r5.d2q.1s \
                 -tmp_base {tmp_folder_name} \
                 -qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round4-cumulative.txt \
                 -index {prebuilt_index_path} \
                 -tag covidex.r5.d2q.1s \
                 -output {self.tmp}/output.json')
     with open(f'{self.tmp}/output.json') as json_file:
         data = json.load(json_file)
         self.assertEqual("0.3859", data['map'])
         self.assertEqual("0.8221", data['ndcg'])
     
     os.system(f'python {self.pyserini_root}/scripts/classifier_prf/rank_trec_covid.py \
                 -alpha 0.6 \
                 -clf lr \
                 -vectorizer tfidf \
                 -new_qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round5.txt \
                 -base {self.tmp}/runs/covidex.r5.d2q.2s \
                 -tmp_base {tmp_folder_name} \
                 -qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round4-cumulative.txt \
                 -index {prebuilt_index_path} \
                 -tag covidex.r5.d2q.2s \
                 -output {self.tmp}/output.json')
     with open(f'{self.tmp}/output.json') as json_file:
         data = json.load(json_file)
         self.assertEqual("0.3875", data['map'])
         self.assertEqual("0.8304", data['ndcg'])
     
     os.system(f'python {self.pyserini_root}/scripts/classifier_prf/rank_trec_covid.py \
                 -alpha 0.6 \
                 -clf lr \
                 -vectorizer tfidf \
                 -new_qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round5.txt \
                 -base {self.tmp}/runs/covidex.r5.1s \
                 -tmp_base {tmp_folder_name} \
                 -qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round4-cumulative.txt \
                 -index {prebuilt_index_path} \
                 -tag covidex.r5.1s \
                 -output {self.tmp}/output.json')
     with open(f'{self.tmp}/output.json') as json_file:
         data = json.load(json_file)
         self.assertEqual("0.3885", data['map'])
         self.assertEqual("0.8135", data['ndcg'])
     
     os.system(f'python {self.pyserini_root}/scripts/classifier_prf/rank_trec_covid.py \
                 -alpha 0.6 \
                 -clf lr \
                 -vectorizer tfidf \
                 -new_qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round5.txt \
                 -base {self.tmp}/runs/covidex.r5.2s \
                 -tmp_base {tmp_folder_name} \
                 -qrels {self.pyserini_root}/tools/topics-and-qrels/qrels.covid-round4-cumulative.txt \
                 -index {prebuilt_index_path} \
                 -tag covidex.r5.2s \
                 -output {self.tmp}/output.json')
     with open(f'{self.tmp}/output.json') as json_file:
         data = json.load(json_file)
         self.assertEqual("0.3922", data['map'])
         self.assertEqual("0.8311", data['ndcg'])
Example #8
0
 def from_prebuilt_index(cls, prebuilt_index_name: str):
     index_dir = download_prebuilt_index(prebuilt_index_name)
     return cls(index_dir)