Exemple #1
0
        def search(self,
                   search_term,
                   room=None,
                   max_results=10,
                   order_by_recent=False):
            # type (str, str, int, bool) -> List[int, int]
            """Search for events in the index.

            Returns the score and the column id for the event.
            """
            queryparser = tantivy.QueryParser.for_index(
                self._index,
                [
                    self.body_field, self.name_field, self.topic_field,
                    self.room_field
                ],
            )

            # This currently supports only a single room since the query parser
            # doesn't seem to work with multiple room fields here.
            if room:
                query_string = "{} AND room:{}".format(search_term,
                                                       sanitize_room_id(room))
            else:
                query_string = search_term

            try:
                query = queryparser.parse_query(query_string)
            except ValueError:
                raise InvalidQueryError(f"Invalid search term: {search_term}")

            if order_by_recent:
                collector = tantivy.TopDocs(
                    max_results, order_by_field=self.timestamp_field)
            else:
                collector = tantivy.TopDocs(max_results)

            result = self._searcher.search(query, collector)

            retrieved_result = []

            for score, doc_address in result:
                doc = self._searcher.doc(doc_address)
                column = doc.get_first(self.column_field)
                retrieved_result.append((score, column))

            return retrieved_result
Exemple #2
0
    def test_simple_search_in_dir(self, dir_index):
        _, index = dir_index
        query = index.parse_query("sea whale", ["title", "body"])

        top_docs = tantivy.TopDocs(10)

        result = index.searcher().search(query, top_docs)
        assert len(result) == 1
Exemple #3
0
    def test_simple_search_after_reuse(self, dir_index):
        index_dir, _ = dir_index
        index = Index(schema(), str(index_dir))
        query = index.parse_query("sea whale", ["title", "body"])

        top_docs = tantivy.TopDocs(10)

        result = index.searcher().search(query, top_docs)
        assert len(result) == 1
Exemple #4
0
    def test_simple_search_in_ram(self, ram_index):
        index = ram_index
        query = index.parse_query("sea whale", ["title", "body"])

        top_docs = tantivy.TopDocs(10)

        result = index.searcher().search(query, top_docs)
        assert len(result) == 1
        _, doc_address = result[0]
        searched_doc = index.searcher().doc(doc_address)
        assert searched_doc["title"] == ["The Old Man and the Sea"]
Exemple #5
0
    def test_and_query(self, ram_index):
        index = ram_index
        query = index.parse_query("title:men AND body:summer", default_field_names=["title", "body"])
        # look for an intersection of documents
        top_docs = tantivy.TopDocs(10)
        searcher = index.searcher()
        result = searcher.search(query, top_docs)

        # summer isn't present
        assert len(result) == 0

        query = index.parse_query("title:men AND body:winter", ["title", "body"])
        result = searcher.search(query, top_docs)

        assert len(result) == 1
Exemple #6
0
    def test_delete_update(self, ram_index):
        query = ram_index.parse_query("Frankenstein", ["title"])
        top_docs = tantivy.TopDocs(10)
        result = ram_index.searcher().search(query, top_docs)
        assert len(result) == 1

        writer = ram_index.writer()

        with pytest.raises(ValueError):
            writer.delete_documents("fake_field", "frankenstein")

        with pytest.raises(ValueError):
            writer.delete_documents("title", b"frankenstein")

        writer.delete_documents("title", "frankenstein")
        writer.commit()
        ram_index.reload()

        result = ram_index.searcher().search(query, top_docs)
        assert len(result) == 0