def test_multifilter(): f1 = analysis.LowercaseFilter() f2 = analysis.PassFilter() mf = analysis.MultiFilter(a=f1, b=f2) ana = analysis.RegexTokenizer(r"\S+") | mf text = u("ALFA BRAVO CHARLIE") assert [t.text for t in ana(text, mode="a")] == ["alfa", "bravo", "charlie"] assert [t.text for t in ana(text, mode="b")] == ["ALFA", "BRAVO", "CHARLIE"]
def test_multifilter(): iwf_for_index = analysis.IntraWordFilter(mergewords=True, mergenums=False) iwf_for_query = analysis.IntraWordFilter(mergewords=False, mergenums=False) mf = analysis.MultiFilter(index=iwf_for_index, query=iwf_for_query) ana = analysis.RegexTokenizer() | mf | analysis.LowercaseFilter() schema = fields.Schema(text=fields.TEXT(analyzer=ana, stored=True)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(text=u("Our BabbleTron5000 is great")) w.commit() with ix.searcher() as s: hit = s.search(query.Term("text", "5000"))[0] assert_equal(hit.highlights("text"), 'Our BabbleTron<b class="match term0">5000</b> is great')