Esempio n. 1
0
    def test_simple_search_after_reuse(self, dir_index):
        index_dir, _ = dir_index
        index = Index(schema(), str(index_dir))
        query = index.parse_query("sea whale", ["title", "body"])

        result = index.searcher().search(query, 10)
        assert len(result.hits) == 1
Esempio n. 2
0
    def test_simple_search_after_reuse(self, dir_index):
        index_dir, _ = dir_index
        index = Index(schema(), str(index_dir))
        query = index.parse_query("sea whale", ["title", "body"])

        top_docs = tantivy.TopDocs(10)

        result = index.searcher().search(query, top_docs)
        assert len(result) == 1
Esempio n. 3
0
    def test_order_by_search(self):
        schema = (SchemaBuilder()
            .add_unsigned_field("order", fast="single")
            .add_text_field("title", stored=True).build()
        )

        index = Index(schema)
        writer = index.writer()

        doc = Document()
        doc.add_unsigned("order", 0)
        doc.add_text("title", "Test title")

        writer.add_document(doc)

        doc = Document()
        doc.add_unsigned("order", 2)
        doc.add_text("title", "Final test title")
        writer.add_document(doc)

        doc = Document()
        doc.add_unsigned("order", 1)
        doc.add_text("title", "Another test title")


        writer.add_document(doc)

        writer.commit()
        index.reload()

        query = index.parse_query("test")


        searcher = index.searcher()

        result = searcher.search(query, 10, offset=2, order_by_field="order")

        assert len(result.hits) == 1

        result = searcher.search(query, 10, order_by_field="order")

        assert len(result.hits) == 3

        _, doc_address = result.hits[0]
        searched_doc = index.searcher().doc(doc_address)
        assert searched_doc["title"] == ["Final test title"]

        _, doc_address = result.hits[1]
        searched_doc = index.searcher().doc(doc_address)
        assert searched_doc["title"] == ["Another test title"]

        _, doc_address = result.hits[2]
        searched_doc = index.searcher().doc(doc_address)
        assert searched_doc["title"] == ["Test title"]
Esempio n. 4
0
def create_index(dir=None):
    # assume all tests will use the same documents for now
    # other methods may set up function-local indexes
    schema_ = schema()
    index = Index(schema_, dir)
    writer = index.writer()

    # 2 ways of adding documents
    # 1
    doc = Document()
    # create a document instance
    # add field-value pairs
    doc.add_text("title", "The Old Man and the Sea")
    doc.add_text(
        "body",
        ("He was an old man who fished alone in a skiff in"
         "the Gulf Stream and he had gone eighty-four days "
         "now without taking a fish."),
    )
    # print( int( datetime.timestamp(datetime.utcnow())) % 2**64 )
    # doc.add_integer("timestamp", int(int( datetime.timestamp(datetime.utcnow())) % 2**64 ))
    writer.add_document(doc)

    # 2 use the built-in json support
    # keys need to coincide with field names
    doc = Document.from_dict({
        "title":
        "Of Mice and Men",
        "body": ("A few miles south of Soledad, the Salinas River drops "
                 "in close to the hillside bank and runs deep and "
                 "green. The water is warm too, for it has slipped "
                 "twinkling over the yellow sands in the sunlight "
                 "before reaching the narrow pool. On one side of the "
                 "river the golden foothill slopes curve up to the "
                 "strong and rocky Gabilan Mountains, but on the valley "
                 "side the water is lined with trees—willows fresh and "
                 "green with every spring, carrying in their lower leaf "
                 "junctures the debris of the winter’s flooding; and "
                 "sycamores with mottled, white, recumbent limbs and "
                 "branches that arch over the pool"),
    })
    writer.add_document(doc)
    doc = Document.from_dict({
        "title": ["Frankenstein", "The Modern Prometheus"],
        "body":
        ("You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings.  I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking. "
         ),
    })
    writer.add_document(doc)
    writer.commit()

    index.reload()
    return index, schema_
Esempio n. 5
0
    def test_order_by_search_without_fast_field(self):
        schema = (SchemaBuilder().add_unsigned_field("order").add_text_field(
            "title", stored=True).build())

        index = Index(schema)
        writer = index.writer()

        doc = Document()
        doc.add_unsigned("order", 0)
        doc.add_text("title", "Test title")

        query = index.parse_query("test")

        searcher = index.searcher()
        result = searcher.search(query, 10, order_by_field="order")
        assert len(result.hits) == 0
Esempio n. 6
0
    def test_create_readers(self):
        # not sure what is the point of this test.
        idx = Index(schema())
        idx.config_reader("Manual", 4)
        assert idx.searcher().num_docs == 0
        # by default this is manual mode
        writer = idx.writer(30000000, 1)
        writer.add_document(Document(title="mytitle", body="mybody"))
        writer.commit()
        assert idx.searcher().num_docs == 0
        # Manual is the default setting.
        # In this case, change are reflected only when
        # the index is manually reloaded.
        idx.reload()
        assert idx.searcher().num_docs == 1
        idx.config_reader("OnCommit", 4)
        writer.add_document(Document(title="mytitle2", body="mybody2"))
        writer.commit()
        import time

        for i in range(50):
            # The index should be automatically reloaded.
            # Wait for at most 5s for it to happen.
            time.sleep(0.1)
            if idx.searcher().num_docs == 2:
                return
        assert False
Esempio n. 7
0
 def test_opens_from_dir(self):
     index = Index(schema(), PATH_TO_INDEX, reuse=True)
     assert index.searcher().num_docs == 3
Esempio n. 8
0
 def test_exists(self):
     # prefer to keep it separate in case anyone deletes this
     # runs from the root directory
     assert Index.exists(PATH_TO_INDEX)
Esempio n. 9
0
    def test_opens_from_dir(self, dir_index):
        index_dir, _ = dir_index

        index = Index(schema(), str(index_dir), reuse=True)
        assert index.searcher().num_docs == 3
Esempio n. 10
0
 def test_opens_from_dir_invalid_schema(self):
     with pytest.raises(ValueError):
         index = Index(schema(), PATH_TO_INDEX, reuse=True)