def test_copyfield(): qp = qparser.QueryParser("a", None) qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, None)) assert_equal(text_type(qp.parse("hello b:matt")), "(a:hello AND b:matt AND c:matt)") qp = qparser.QueryParser("a", None) qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, syntax.AndMaybeGroup)) assert_equal(text_type(qp.parse("hello b:matt")), "(a:hello AND (b:matt ANDMAYBE c:matt))") qp = qparser.QueryParser("a", None) qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, syntax.RequireGroup)) assert_equal(text_type(qp.parse("hello (there OR b:matt)")), "(a:hello AND (a:there OR (b:matt REQUIRE c:matt)))") qp = qparser.QueryParser("a", None) qp.add_plugin(plugins.CopyFieldPlugin({"a": "c"}, syntax.OrGroup)) assert_equal(text_type(qp.parse("hello there")), "((a:hello OR c:hello) AND (a:there OR c:there))") qp = qparser.QueryParser("a", None) qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, mirror=True)) assert_equal(text_type(qp.parse("hello c:matt")), "(a:hello AND (c:matt OR b:matt))") qp = qparser.QueryParser("a", None) qp.add_plugin(plugins.CopyFieldPlugin({"c": "a"}, mirror=True)) assert_equal(text_type(qp.parse("hello c:matt")), "((a:hello OR c:hello) AND (c:matt OR a:matt))") ana = analysis.RegexAnalyzer(r"\w+") | analysis.DoubleMetaphoneFilter() fmt = formats.Frequency() schema = fields.Schema(name=fields.KEYWORD, name_phone=fields.FieldType(fmt, ana, multitoken_query="or")) qp = qparser.QueryParser("name", schema) qp.add_plugin(plugins.CopyFieldPlugin({"name": "name_phone"})) assert_equal(text_type(qp.parse(u("spruce view"))), "((name:spruce OR name_phone:SPRS) AND (name:view OR name_phone:F OR name_phone:FF))")
def test_vector_phrase(self): ana = analysis.StandardAnalyzer() ftype = fields.FieldType(formats.Frequency(ana), formats.Positions(ana), scorable=True) schema = fields.Schema(name=fields.ID(stored=True), value=ftype) storage = RamStorage() ix = storage.create_index(schema) writer = ix.writer() writer.add_document(name=u"A", value=u"Little Miss Muffet sat on a tuffet") writer.add_document(name=u"B", value=u"Miss Little Muffet tuffet") writer.add_document(name=u"C", value=u"Miss Little Muffet tuffet sat") writer.add_document( name=u"D", value=u"Gibberish blonk falunk miss muffet sat tuffet garbonzo") writer.add_document(name=u"E", value=u"Blah blah blah pancakes") writer.commit() searcher = ix.searcher() def names(results): return sorted([fields['name'] for fields in results]) q = query.Phrase("value", [u"little", u"miss", u"muffet", u"sat", u"tuffet"]) sc = q.scorer(searcher) self.assertEqual(sc.__class__.__name__, "VectorPhraseScorer") self.assertEqual(names(searcher.search(q)), ["A"]) q = query.Phrase("value", [u"miss", u"muffet", u"sat", u"tuffet"]) self.assertEqual(names(searcher.search(q)), ["A", "D"]) q = query.Phrase("value", [u"falunk", u"gibberish"]) self.assertEqual(names(searcher.search(q)), []) q = query.Phrase("value", [u"gibberish", u"falunk"], slop=2) self.assertEqual(names(searcher.search(q)), ["D"]) #q = query.Phrase("value", [u"blah"] * 4) #self.assertEqual(names(searcher.search(q)), []) # blah blah blah blah q = query.Phrase("value", [u"blah"] * 3) self.assertEqual(names(searcher.search(q)), ["E"])
def get_index(): global _ix if _ix is not None: return _ix charfield = fields.FieldType(formats.Characters(), analysis.SimpleAnalyzer(), scorable=True, stored=True) schema = fields.Schema(text=charfield) st = RamStorage() _ix = st.create_index(schema) w = _ix.writer() for ls in permutations(domain, 4): w.add_document(text=u(" ").join(ls), _stored_text=ls) w.commit() return _ix
def _roundtrip(content, format_, astype, ana=None): with TempStorage("roundtrip") as st: codec = default_codec() seg = codec.new_segment(st, "") ana = ana or analysis.StandardAnalyzer() field = fields.FieldType(format=format_, analyzer=ana) fw = codec.field_writer(st, seg) fw.start_field("f1", field) for text, _, weight, valuestring in sorted(field.index(content)): fw.start_term(text) fw.add(0, weight, valuestring, None) fw.finish_term() fw.finish_field() fw.close() tr = codec.terms_reader(st, seg) ps = [] for fieldname, btext in tr.terms(): m = tr.matcher(fieldname, btext, format_) ps.append((field.from_bytes(btext), m.value_as(astype))) tr.close() return ps