def test_create_readers(self): # not sure what is the point of this test. idx = Index(schema()) idx.config_reader("Manual", 4) assert idx.searcher().num_docs == 0 # by default this is manual mode writer = idx.writer(30000000, 1) writer.add_document(Document(title="mytitle", body="mybody")) writer.commit() assert idx.searcher().num_docs == 0 # Manual is the default setting. # In this case, change are reflected only when # the index is manually reloaded. idx.reload() assert idx.searcher().num_docs == 1 idx.config_reader("OnCommit", 4) writer.add_document(Document(title="mytitle2", body="mybody2")) writer.commit() import time for i in range(50): # The index should be automatically reloaded. # Wait for at most 5s for it to happen. time.sleep(0.1) if idx.searcher().num_docs == 2: return assert False
def test_order_by_search(self): schema = (SchemaBuilder() .add_unsigned_field("order", fast="single") .add_text_field("title", stored=True).build() ) index = Index(schema) writer = index.writer() doc = Document() doc.add_unsigned("order", 0) doc.add_text("title", "Test title") writer.add_document(doc) doc = Document() doc.add_unsigned("order", 2) doc.add_text("title", "Final test title") writer.add_document(doc) doc = Document() doc.add_unsigned("order", 1) doc.add_text("title", "Another test title") writer.add_document(doc) writer.commit() index.reload() query = index.parse_query("test") searcher = index.searcher() result = searcher.search(query, 10, offset=2, order_by_field="order") assert len(result.hits) == 1 result = searcher.search(query, 10, order_by_field="order") assert len(result.hits) == 3 _, doc_address = result.hits[0] searched_doc = index.searcher().doc(doc_address) assert searched_doc["title"] == ["Final test title"] _, doc_address = result.hits[1] searched_doc = index.searcher().doc(doc_address) assert searched_doc["title"] == ["Another test title"] _, doc_address = result.hits[2] searched_doc = index.searcher().doc(doc_address) assert searched_doc["title"] == ["Test title"]
def test_simple_search_after_reuse(self, dir_index): index_dir, _ = dir_index index = Index(schema(), str(index_dir)) query = index.parse_query("sea whale", ["title", "body"]) result = index.searcher().search(query, 10) assert len(result.hits) == 1
def test_simple_search_after_reuse(self, dir_index): index_dir, _ = dir_index index = Index(schema(), str(index_dir)) query = index.parse_query("sea whale", ["title", "body"]) top_docs = tantivy.TopDocs(10) result = index.searcher().search(query, top_docs) assert len(result) == 1
def test_order_by_search_without_fast_field(self): schema = (SchemaBuilder().add_unsigned_field("order").add_text_field( "title", stored=True).build()) index = Index(schema) writer = index.writer() doc = Document() doc.add_unsigned("order", 0) doc.add_text("title", "Test title") query = index.parse_query("test") searcher = index.searcher() result = searcher.search(query, 10, order_by_field="order") assert len(result.hits) == 0
def test_opens_from_dir(self): index = Index(schema(), PATH_TO_INDEX, reuse=True) assert index.searcher().num_docs == 3
def test_opens_from_dir(self, dir_index): index_dir, _ = dir_index index = Index(schema(), str(index_dir), reuse=True) assert index.searcher().num_docs == 3