def test_span_term(): ix = get_index() with ix.searcher() as s: alllists = [d["text"] for d in s.all_stored_fields()] for word in domain: q = Term("text", word) m = q.matcher(s) ids = set() while m.is_active(): id = m.id() sps = m.spans() ids.add(id) original = list(s.stored_fields(id)["text"]) assert word in original if word != "bravo": assert len(sps) == 1 assert original.index(word) == sps[0].start assert original.index(word) == sps[0].end m.next() for i, ls in enumerate(alllists): if word in ls: assert i in ids else: assert i not in ids
def test_excludematcher(): schema = fields.Schema(content=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) domain = ("alfa", "bravo", "charlie", "delta") for _ in xrange(3): w = ix.writer() for ls in permutations(domain): w.add_document(content=u(" ").join(ls)) w.commit(merge=False) w = ix.writer() w.delete_document(5) w.delete_document(10) w.delete_document(28) w.commit(merge=False) q = Term("content", "bravo") with ix.searcher() as s: m = q.matcher(s) while m.is_active(): content = s.stored_fields(m.id())["content"].split() spans = m.spans() for span in spans: assert content[span.start] == "bravo" m.next()