Example #1
0
 def from_topics(cls, topics_path: str):
     if os.path.exists(topics_path):
         if topics_path.endswith('.json'):
             with open(topics_path, 'r') as f:
                 topics = json.load(f)
         elif topics_path.endswith('.tsv'):
             topics = get_topics_with_reader(
                 'io.anserini.search.topicreader.TsvIntTopicReader',
                 topics_path)
         elif topics_path.endswith('.trec'):
             topics = get_topics_with_reader(
                 'io.anserini.search.topicreader.TrecTopicReader',
                 topics_path)
         elif 'cacm' in topics_path:
             topics = get_topics_with_reader(
                 'io.anserini.search.topicreader.CacmTopicReader',
                 topics_path)
         else:
             raise NotImplementedError(
                 f"Not sure how to parse {topics_path}. Please specify the file extension."
             )
     else:
         topics = get_topics(topics_path)
     if not topics:
         raise FileNotFoundError(f'Topic {topics_path} Not Found')
     order = QueryIterator.get_predefined_order(topics_path)
     return cls(topics, order)
Example #2
0
    def test_trec_topicreader(self):
        # Running from command-line, we're in root of repo, but running in IDE, we're in tests/
        path = 'tools/topics-and-qrels/topics.robust04.txt'
        if not os.path.exists(path):
            path = f'../{path}'

        self.assertTrue(os.path.exists(path))
        topics = search.get_topics_with_reader('io.anserini.search.topicreader.TrecTopicReader', path)
        self.assertEqual(len(topics), 250)
        self.assertTrue(isinstance(next(iter(topics.keys())), int))

        self.assertEqual(search.get_topics('robust04'), topics)
Example #3
0
    def test_tsv_int_topicreader(self):
        # Running from command-line, we're in root of repo, but running in IDE, we're in tests/
        path = 'tools/topics-and-qrels/topics.msmarco-doc.dev.txt'
        if not os.path.exists(path):
            path = f'../{path}'

        self.assertTrue(os.path.exists(path))
        topics = search.get_topics_with_reader('io.anserini.search.topicreader.TsvIntTopicReader', path)
        self.assertEqual(len(topics), 5193)
        self.assertTrue(isinstance(next(iter(topics.keys())), int))

        self.assertEqual(search.get_topics('msmarco_doc_dev'), topics)