def test_create_readers(self): # not sure what is the point of this test. idx = Index(schema()) idx.config_reader("Manual", 4) assert idx.searcher().num_docs == 0 # by default this is manual mode writer = idx.writer(30000000, 1) writer.add_document(Document(title="mytitle", body="mybody")) writer.commit() assert idx.searcher().num_docs == 0 # Manual is the default setting. # In this case, change are reflected only when # the index is manually reloaded. idx.reload() assert idx.searcher().num_docs == 1 idx.config_reader("OnCommit", 4) writer.add_document(Document(title="mytitle2", body="mybody2")) writer.commit() import time for i in range(50): # The index should be automatically reloaded. # Wait for at most 5s for it to happen. time.sleep(0.1) if idx.searcher().num_docs == 2: return assert False
def test_order_by_search(self): schema = (SchemaBuilder() .add_unsigned_field("order", fast="single") .add_text_field("title", stored=True).build() ) index = Index(schema) writer = index.writer() doc = Document() doc.add_unsigned("order", 0) doc.add_text("title", "Test title") writer.add_document(doc) doc = Document() doc.add_unsigned("order", 2) doc.add_text("title", "Final test title") writer.add_document(doc) doc = Document() doc.add_unsigned("order", 1) doc.add_text("title", "Another test title") writer.add_document(doc) writer.commit() index.reload() query = index.parse_query("test") searcher = index.searcher() result = searcher.search(query, 10, offset=2, order_by_field="order") assert len(result.hits) == 1 result = searcher.search(query, 10, order_by_field="order") assert len(result.hits) == 3 _, doc_address = result.hits[0] searched_doc = index.searcher().doc(doc_address) assert searched_doc["title"] == ["Final test title"] _, doc_address = result.hits[1] searched_doc = index.searcher().doc(doc_address) assert searched_doc["title"] == ["Another test title"] _, doc_address = result.hits[2] searched_doc = index.searcher().doc(doc_address) assert searched_doc["title"] == ["Test title"]
def create_index(dir=None): # assume all tests will use the same documents for now # other methods may set up function-local indexes schema_ = schema() index = Index(schema_, dir) writer = index.writer() # 2 ways of adding documents # 1 doc = Document() # create a document instance # add field-value pairs doc.add_text("title", "The Old Man and the Sea") doc.add_text( "body", ("He was an old man who fished alone in a skiff in" "the Gulf Stream and he had gone eighty-four days " "now without taking a fish."), ) # print( int( datetime.timestamp(datetime.utcnow())) % 2**64 ) # doc.add_integer("timestamp", int(int( datetime.timestamp(datetime.utcnow())) % 2**64 )) writer.add_document(doc) # 2 use the built-in json support # keys need to coincide with field names doc = Document.from_dict({ "title": "Of Mice and Men", "body": ("A few miles south of Soledad, the Salinas River drops " "in close to the hillside bank and runs deep and " "green. The water is warm too, for it has slipped " "twinkling over the yellow sands in the sunlight " "before reaching the narrow pool. On one side of the " "river the golden foothill slopes curve up to the " "strong and rocky Gabilan Mountains, but on the valley " "side the water is lined with trees—willows fresh and " "green with every spring, carrying in their lower leaf " "junctures the debris of the winter’s flooding; and " "sycamores with mottled, white, recumbent limbs and " "branches that arch over the pool"), }) writer.add_document(doc) doc = Document.from_dict({ "title": ["Frankenstein", "The Modern Prometheus"], "body": ("You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings. I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking. " ), }) writer.add_document(doc) writer.commit() index.reload() return index, schema_