def test_compatibility(): from whoosh.scoring import Weighting # This is the old way of doing a custom weighting model, check that # it's still supported... class LegacyWeighting(Weighting): use_final = True def score(self, searcher, fieldname, text, docnum, weight): return weight + 0.5 def final(self, searcher, docnum, score): return score * 1.5 schema = fields.Schema(text=fields.TEXT) ix = RamStorage().create_index(schema) w = ix.writer() domain = "alfa bravo charlie delta".split() for ls in permutations(domain, 3): w.add_document(text=u(" ").join(ls)) w.commit() s = ix.searcher(weighting=LegacyWeighting()) r = s.search(query.Term("text", u("bravo"))) assert_equal(r.score(0), 2.25)
def test_deleteall(): schema = fields.Schema(text=fields.TEXT) with TempIndex(schema, "deleteall") as ix: w = ix.writer() domain = u("alfa bravo charlie delta echo").split() for i, ls in enumerate(permutations(domain)): w.add_document(text=u(" ").join(ls)) if not i % 10: w.commit() w = ix.writer() w.commit() # This is just a test, don't use this method to delete all docs IRL! doccount = ix.doc_count_all() w = ix.writer() for docnum in xrange(doccount): w.delete_document(docnum) w.commit() with ix.searcher() as s: r = s.search( query.Or([ query.Term("text", u("alfa")), query.Term("text", u("bravo")) ])) assert_equal(len(r), 0) ix.optimize() assert_equal(ix.doc_count_all(), 0) with ix.reader() as r: assert_equal(list(r), [])
def test_excludematcher(): schema = fields.Schema(content=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) domain = ("alfa", "bravo", "charlie", "delta") for _ in xrange(3): w = ix.writer() for ls in permutations(domain): w.add_document(content=u(" ").join(ls)) w.commit(merge=False) w = ix.writer() w.delete_document(5) w.delete_document(10) w.delete_document(28) w.commit(merge=False) q = Term("content", "bravo") with ix.searcher() as s: m = q.matcher(s) while m.is_active(): content = s.stored_fields(m.id())["content"].split() spans = m.spans() for span in spans: assert_equal(content[span.start], "bravo") m.next()
def test_boost_phrase(): schema = fields.Schema(title=fields.TEXT(field_boost=5.0, stored=True), text=fields.TEXT) ix = RamStorage().create_index(schema) domain = u("alfa bravo charlie delta").split() w = ix.writer() for ls in permutations(domain): t = u(" ").join(ls) w.add_document(title=t, text=t) w.commit() q = Or([Term("title", u("alfa")), Term("title", u("bravo")), Phrase("text", [u("bravo"), u("charlie"), u("delta")])]) def boost_phrases(q): if isinstance(q, Phrase): q.boost *= 1000.0 return q else: return q.apply(boost_phrases) q = boost_phrases(q) with ix.searcher() as s: r = s.search(q, limit=None) for hit in r: if "bravo charlie delta" in hit["title"]: assert hit.score > 100.0
def test_deleteall(): schema = fields.Schema(text=fields.TEXT) with TempIndex(schema, "deleteall") as ix: w = ix.writer() domain = u("alfa bravo charlie delta echo").split() for i, ls in enumerate(permutations(domain)): w.add_document(text=u(" ").join(ls)) if not i % 10: w.commit() w = ix.writer() w.commit() # This is just a test, don't use this method to delete all docs IRL! doccount = ix.doc_count_all() w = ix.writer() for docnum in xrange(doccount): w.delete_document(docnum) w.commit() with ix.searcher() as s: r = s.search(query.Or([query.Term("text", u("alfa")), query.Term("text", u("bravo"))])) assert_equal(len(r), 0) ix.optimize() assert_equal(ix.doc_count_all(), 0) with ix.reader() as r: assert_equal(list(r), [])
def test_spelling_field_order(): ana = analysis.StemmingAnalyzer() schema = fields.Schema(a=fields.TEXT, b=fields.TEXT(analyzer=ana), c=fields.TEXT, d=fields.TEXT(analyzer=ana), e=fields.TEXT(analyzer=ana), f=fields.TEXT) ix = RamStorage().create_index(schema) domain = u("alfa bravo charlie delta").split() w = ix.writer() for ls in permutations(domain): value = " ".join(ls) w.add_document(a=value, b=value, c=value, d=value, e=value, f=value) w.commit()
def test_current_terms(): domain = u("alfa bravo charlie delta").split() schema = fields.Schema(text=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() for ls in permutations(domain, 3): w.add_document(text=" ".join(ls), _stored_text=ls) w.commit() with ix.searcher() as s: q = query.And([query.Term("text", "alfa"), query.Term("text", "charlie")]) m = q.matcher(s) while m.is_active(): assert_equal(sorted(m.matching_terms()), [("text", "alfa"), ("text", "charlie")]) m.next()
def get_index(): global _ix if _ix is not None: return _ix charfield = fields.FieldType(formats.Characters(), analysis.SimpleAnalyzer(), scorable=True, stored=True) schema = fields.Schema(text=charfield) st = RamStorage() _ix = st.create_index(schema) w = _ix.writer() for ls in permutations(domain, 4): w.add_document(text=u(" ").join(ls), _stored_text=ls) w.commit() return _ix
def test_stability(): schema = fields.Schema(text=fields.TEXT) ix = RamStorage().create_index(schema) domain = u("alfa bravo charlie delta").split() w = ix.writer() for ls in permutations(domain, 3): w.add_document(text=u(" ").join(ls)) w.commit() with ix.searcher() as s: q = query.Term("text", u("bravo")) last = [] for i in xrange(s.doc_frequency("text", u("bravo"))): # Only un-optimized results are stable r = s.search(q, limit=i + 1, optimize=False) docnums = [hit.docnum for hit in r] assert_equal(docnums[:-1], last) last = docnums
def test_phrase_order(): tfield = fields.TEXT(stored=True, analyzer=analysis.SimpleAnalyzer()) schema = fields.Schema(text=tfield) storage = RamStorage() ix = storage.create_index(schema) writer = ix.writer() for ls in permutations(["ape", "bay", "can", "day"], 4): writer.add_document(text=u(" ").join(ls)) writer.commit() with ix.searcher() as s: def result(q): r = s.search(q, limit=None, sortedby=None) return sorted([d['text'] for d in r]) q = Phrase("text", ["bay", "can", "day"]) assert_equal(result(q), [u('ape bay can day'), u('bay can day ape')])
def test_current_terms(): domain = u("alfa bravo charlie delta").split() schema = fields.Schema(text=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() for ls in permutations(domain, 3): w.add_document(text=" ".join(ls), _stored_text=ls) w.commit() with ix.searcher() as s: q = query.And( [query.Term("text", "alfa"), query.Term("text", "charlie")]) m = q.matcher(s) while m.is_active(): assert_equal(sorted(m.matching_terms()), [("text", "alfa"), ("text", "charlie")]) m.next()
def test_phrase_multi(): schema = fields.Schema(id=fields.STORED, text=fields.TEXT) ix = RamStorage().create_index(schema) domain = u("alfa bravo charlie delta echo").split() w = None for i, ls in enumerate(permutations(domain)): if w is None: w = ix.writer() w.add_document(id=i, text=u(" ").join(ls)) if not i % 30: w.commit() w = None if w is not None: w.commit() with ix.searcher() as s: q = Phrase("text", ["alfa", "bravo"]) _ = s.search(q)
def test_lengths2(): schema = fields.Schema(text=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) count = 0 for _ in xrange(3): w = ix.writer() for ls in permutations(u("alfa bravo charlie").split()): if "bravo" in ls and "charlie" in ls: count += 1 w.add_document(text=u(" ").join(ls)) w.commit(merge=False) with ix.searcher() as s: q = query.Or([query.Term("text", u("bravo")), query.Term("text", u("charlie"))]) r = s.search(q, limit=None) assert_equal(len(r), count) r = s.search(q, limit=3) assert_equal(len(r), count)
def test_lengths2(): schema = fields.Schema(text=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) count = 0 for _ in xrange(3): w = ix.writer() for ls in permutations(u("alfa bravo charlie").split()): if "bravo" in ls and "charlie" in ls: count += 1 w.add_document(text=u(" ").join(ls)) w.commit(merge=False) with ix.searcher() as s: q = query.Or( [query.Term("text", u("bravo")), query.Term("text", u("charlie"))]) r = s.search(q, limit=None) assert_equal(len(r), count) r = s.search(q, limit=3) assert_equal(len(r), count)
def test_ordered(): domain = u("alfa bravo charlie delta echo foxtrot").split(" ") schema = fields.Schema(f=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) writer = ix.writer() for ls in permutations(domain): writer.add_document(f=u(" ").join(ls)) writer.commit() with ix.searcher() as s: q = Ordered([Term("f", u("alfa")), Term("f", u("charlie")), Term("f", "echo")]) r = s.search(q) for hit in r: ls = hit["f"].split() assert "alfa" in ls assert "charlie" in ls assert "echo" in ls a = ls.index("alfa") c = ls.index("charlie") e = ls.index("echo") assert a < c and c < e, repr(ls)
def test_resultspage(): schema = fields.Schema(id=fields.STORED, content=fields.TEXT) ix = RamStorage().create_index(schema) domain = ("alfa", "bravo", "bravo", "charlie", "delta") w = ix.writer() for i, lst in enumerate(permutations(domain, 3)): w.add_document(id=text_type(i), content=u(" ").join(lst)) w.commit() with ix.searcher() as s: q = query.Term("content", u("bravo")) r = s.search(q, limit=10) tops = list(r) rp = s.search_page(q, 1, pagelen=5) assert_equal(rp.scored_length(), 5) assert_equal(list(rp), tops[0:5]) assert_equal(rp[10:], []) rp = s.search_page(q, 2, pagelen=5) assert_equal(list(rp), tops[5:10]) rp = s.search_page(q, 1, pagelen=10) assert_equal(len(rp), 54) assert_equal(rp.pagecount, 6) rp = s.search_page(q, 6, pagelen=10) assert_equal(len(list(rp)), 4) assert rp.is_last_page() assert_raises(ValueError, s.search_page, q, 0) assert_raises(ValueError, s.search_page, q, 7) rp = s.search_page(query.Term("content", "glonk"), 1) assert_equal(len(rp), 0) assert rp.is_last_page()