def search(self, search_term, room=None, max_results=10, order_by_recent=False): # type (str, str, int, bool) -> List[int, int] """Search for events in the index. Returns the score and the column id for the event. """ queryparser = tantivy.QueryParser.for_index( self._index, [ self.body_field, self.name_field, self.topic_field, self.room_field ], ) # This currently supports only a single room since the query parser # doesn't seem to work with multiple room fields here. if room: query_string = "{} AND room:{}".format(search_term, sanitize_room_id(room)) else: query_string = search_term try: query = queryparser.parse_query(query_string) except ValueError: raise InvalidQueryError(f"Invalid search term: {search_term}") if order_by_recent: collector = tantivy.TopDocs( max_results, order_by_field=self.timestamp_field) else: collector = tantivy.TopDocs(max_results) result = self._searcher.search(query, collector) retrieved_result = [] for score, doc_address in result: doc = self._searcher.doc(doc_address) column = doc.get_first(self.column_field) retrieved_result.append((score, column)) return retrieved_result
def test_simple_search_in_dir(self, dir_index): _, index = dir_index query = index.parse_query("sea whale", ["title", "body"]) top_docs = tantivy.TopDocs(10) result = index.searcher().search(query, top_docs) assert len(result) == 1
def test_simple_search_after_reuse(self, dir_index): index_dir, _ = dir_index index = Index(schema(), str(index_dir)) query = index.parse_query("sea whale", ["title", "body"]) top_docs = tantivy.TopDocs(10) result = index.searcher().search(query, top_docs) assert len(result) == 1
def test_simple_search_in_ram(self, ram_index): index = ram_index query = index.parse_query("sea whale", ["title", "body"]) top_docs = tantivy.TopDocs(10) result = index.searcher().search(query, top_docs) assert len(result) == 1 _, doc_address = result[0] searched_doc = index.searcher().doc(doc_address) assert searched_doc["title"] == ["The Old Man and the Sea"]
def test_and_query(self, ram_index): index = ram_index query = index.parse_query("title:men AND body:summer", default_field_names=["title", "body"]) # look for an intersection of documents top_docs = tantivy.TopDocs(10) searcher = index.searcher() result = searcher.search(query, top_docs) # summer isn't present assert len(result) == 0 query = index.parse_query("title:men AND body:winter", ["title", "body"]) result = searcher.search(query, top_docs) assert len(result) == 1
def test_delete_update(self, ram_index): query = ram_index.parse_query("Frankenstein", ["title"]) top_docs = tantivy.TopDocs(10) result = ram_index.searcher().search(query, top_docs) assert len(result) == 1 writer = ram_index.writer() with pytest.raises(ValueError): writer.delete_documents("fake_field", "frankenstein") with pytest.raises(ValueError): writer.delete_documents("title", b"frankenstein") writer.delete_documents("title", "frankenstein") writer.commit() ram_index.reload() result = ram_index.searcher().search(query, top_docs) assert len(result) == 0