def test_intersection():
    schema = fields.Schema(key=fields.ID(stored=True),
                           value=fields.TEXT(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)

    w = ix.writer()
    w.add_document(key=u("a"), value=u("alpha bravo charlie delta"))
    w.add_document(key=u("b"), value=u("echo foxtrot alpha bravo"))
    w.add_document(key=u("c"), value=u("charlie delta golf hotel"))
    w.commit()

    w = ix.writer()
    w.add_document(key=u("d"), value=u("india alpha bravo charlie"))
    w.add_document(key=u("e"), value=u("delta bravo india bravo"))
    w.commit()

    with ix.searcher() as s:
        q = And([Term("value", u("bravo")), Term("value", u("delta"))])
        m = q.matcher(s)
        assert _keys(s, m.all_ids()) == ["a", "e"]

        q = And([Term("value", u("bravo")), Term("value", u("alpha"))])
        m = q.matcher(s)
        assert _keys(s, m.all_ids()) == ["a", "b", "d"]
Ejemplo n.º 2
0
def test_regular_and():
    ix = get_index()
    with ix.searcher() as s:
        aq = And([Term("text", "bravo"), Term("text", "alfa")])
        m = aq.matcher(s)
        while m.is_active():
            orig = s.stored_fields(m.id())["text"]
            for span in m.spans():
                v = orig[span.start]
                assert v == "bravo" or v == "alfa"
            m.next()
def test_random_intersections():
    domain = [
        u("alpha"),
        u("bravo"),
        u("charlie"),
        u("delta"),
        u("echo"),
        u("foxtrot"),
        u("golf"),
        u("hotel"),
        u("india"),
        u("juliet"),
        u("kilo"),
        u("lima"),
        u("mike"),
    ]
    segments = 5
    docsperseg = 50
    fieldlimits = (3, 10)
    documents = []

    schema = fields.Schema(key=fields.STORED, value=fields.TEXT(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)

    # Create docsperseg * segments documents containing random words from
    # the domain list. Add the documents to the index, but also keep them
    # in the "documents" list for the sanity check
    for i in xrange(segments):
        w = ix.writer()
        for j in xrange(docsperseg):
            docnum = i * docsperseg + j
            # Create a string of random words
            doc = u(" ").join(choice(domain) for _ in xrange(randint(*fieldlimits)))
            # Add the string to the index
            w.add_document(key=docnum, value=doc)
            # Add a (docnum, string) tuple to the documents list
            documents.append((docnum, doc))
        w.commit()
    assert_not_equal(len(ix._segments()), 1)

    testcount = 20
    testlimits = (2, 5)

    with ix.searcher() as s:
        for i in xrange(s.doc_count_all()):
            assert_not_equal(s.stored_fields(i).get("key"), None)

        for _ in xrange(testcount):
            # Create a random list of words and manually do an intersection of
            # items in "documents" that contain the words ("target").
            words = sample(domain, randint(*testlimits))
            target = []
            for docnum, doc in documents:
                if all((doc.find(w) > -1) for w in words):
                    target.append(docnum)
            target.sort()

            # Create a query from the list of words and get two matchers from
            # it.
            q = And([Term("value", w) for w in words])
            m1 = q.matcher(s)
            m2 = q.matcher(s)

            # Try getting the list of IDs from all_ids()
            ids1 = list(m1.all_ids())

            # Try getting the list of IDs using id()/next()
            ids2 = []
            while m2.is_active():
                ids2.append(m2.id())
                m2.next()

            # Check that the two methods return the same list
            assert_equal(ids1, ids2)

            # Check that the IDs match the ones we manually calculated
            assert_equal(_keys(s, ids1), target)
def test_random_intersections():
    domain = [u("alpha"), u("bravo"), u("charlie"), u("delta"), u("echo"),
              u("foxtrot"), u("golf"), u("hotel"), u("india"), u("juliet"),
              u("kilo"), u("lima"), u("mike")]
    segments = 5
    docsperseg = 50
    fieldlimits = (3, 10)
    documents = []

    schema = fields.Schema(key=fields.STORED, value=fields.TEXT(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)

    # Create docsperseg * segments documents containing random words from
    # the domain list. Add the documents to the index, but also keep them
    # in the "documents" list for the sanity check
    for i in xrange(segments):
        w = ix.writer()
        for j in xrange(docsperseg):
            docnum = i * docsperseg + j
            # Create a string of random words
            doc = u(" ").join(choice(domain)
                            for _ in xrange(randint(*fieldlimits)))
            # Add the string to the index
            w.add_document(key=docnum, value=doc)
            # Add a (docnum, string) tuple to the documents list
            documents.append((docnum, doc))
        w.commit()
    assert len(ix._segments()) != 1

    testcount = 20
    testlimits = (2, 5)

    with ix.searcher() as s:
        for i in xrange(s.doc_count_all()):
            assert s.stored_fields(i).get("key") is not None

        for _ in xrange(testcount):
            # Create a random list of words and manually do an intersection of
            # items in "documents" that contain the words ("target").
            words = sample(domain, randint(*testlimits))
            target = []
            for docnum, doc in documents:
                if all((doc.find(w) > -1) for w in words):
                    target.append(docnum)
            target.sort()

            # Create a query from the list of words and get two matchers from
            # it.
            q = And([Term("value", w) for w in words])
            m1 = q.matcher(s)
            m2 = q.matcher(s)

            # Try getting the list of IDs from all_ids()
            ids1 = list(m1.all_ids())

            # Try getting the list of IDs using id()/next()
            ids2 = []
            while m2.is_active():
                ids2.append(m2.id())
                m2.next()

            # Check that the two methods return the same list
            assert ids1 == ids2

            # Check that the IDs match the ones we manually calculated
            assert _keys(s, ids1) == target