Beispiel #1
0
    def download_if_missing(self):
        super().download_if_missing()
        full_topic_file = super().topic_file
        assert full_topic_file.exists()
        if self.topic_file.exists():
            return

        title = load_trec_topics(full_topic_file)["title"]
        with open(self.topic_file, "w") as f:
            for qid, full_topic in title:
                kw_topic = self.tokenizer.tokenize(full_topic)
                f.write(topic_to_trectxt(qid, kw_topic))
Beispiel #2
0
def test_searcher_query(tmpdir_as_cache, tmpdir, dummy_index, searcher_name):
    topics_fn = DummyBenchmark.topic_file
    query = list(load_trec_topics(topics_fn)["title"].values())[0]
    nhits = 1
    searcher = Searcher.create(searcher_name, config={"hits": nhits}, provide={"index": dummy_index})
    results = searcher.query(query)
    if searcher_name == "SPL":
        # if searcher_name != "BM25":
        return

    print(results.values())
    if isinstance(list(results.values())[0], dict):
        assert all(len(d) == nhits for d in results.values())
    else:
        assert len(results) == nhits
Beispiel #3
0
 def topics(self):
     if not hasattr(self, "_topics"):
         self._topics = load_trec_topics(self.config["topics"]["path"])
     return self._topics
Beispiel #4
0
 def set_topics(self, path):
     self._topics = load_trec_topics(path)
Beispiel #5
0
 def topics(self):
     if not hasattr(self, "_topics"):
         self._topics = load_trec_topics(self.topic_file)
     return self._topics