Ejemplo n.º 1
0
def test_copyfield():
    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, None))
    assert_equal(text_type(qp.parse("hello b:matt")), "(a:hello AND b:matt AND c:matt)")

    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, syntax.AndMaybeGroup))
    assert_equal(text_type(qp.parse("hello b:matt")), "(a:hello AND (b:matt ANDMAYBE c:matt))")

    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, syntax.RequireGroup))
    assert_equal(text_type(qp.parse("hello (there OR b:matt)")), "(a:hello AND (a:there OR (b:matt REQUIRE c:matt)))")

    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"a": "c"}, syntax.OrGroup))
    assert_equal(text_type(qp.parse("hello there")), "((a:hello OR c:hello) AND (a:there OR c:there))")

    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"b": "c"}, mirror=True))
    assert_equal(text_type(qp.parse("hello c:matt")), "(a:hello AND (c:matt OR b:matt))")

    qp = qparser.QueryParser("a", None)
    qp.add_plugin(plugins.CopyFieldPlugin({"c": "a"}, mirror=True))
    assert_equal(text_type(qp.parse("hello c:matt")), "((a:hello OR c:hello) AND (c:matt OR a:matt))")

    ana = analysis.RegexAnalyzer(r"\w+") | analysis.DoubleMetaphoneFilter()
    fmt = formats.Frequency()
    schema = fields.Schema(name=fields.KEYWORD, name_phone=fields.FieldType(fmt, ana, multitoken_query="or"))
    qp = qparser.QueryParser("name", schema)
    qp.add_plugin(plugins.CopyFieldPlugin({"name": "name_phone"}))
    assert_equal(text_type(qp.parse(u("spruce view"))), "((name:spruce OR name_phone:SPRS) AND (name:view OR name_phone:F OR name_phone:FF))")
Ejemplo n.º 2
0
    def test_vector_phrase(self):
        ana = analysis.StandardAnalyzer()
        ftype = fields.FieldType(formats.Frequency(ana),
                                 formats.Positions(ana),
                                 scorable=True)
        schema = fields.Schema(name=fields.ID(stored=True), value=ftype)
        storage = RamStorage()
        ix = storage.create_index(schema)
        writer = ix.writer()
        writer.add_document(name=u"A",
                            value=u"Little Miss Muffet sat on a tuffet")
        writer.add_document(name=u"B", value=u"Miss Little Muffet tuffet")
        writer.add_document(name=u"C", value=u"Miss Little Muffet tuffet sat")
        writer.add_document(
            name=u"D",
            value=u"Gibberish blonk falunk miss muffet sat tuffet garbonzo")
        writer.add_document(name=u"E", value=u"Blah blah blah pancakes")
        writer.commit()

        searcher = ix.searcher()

        def names(results):
            return sorted([fields['name'] for fields in results])

        q = query.Phrase("value",
                         [u"little", u"miss", u"muffet", u"sat", u"tuffet"])
        sc = q.scorer(searcher)
        self.assertEqual(sc.__class__.__name__, "VectorPhraseScorer")

        self.assertEqual(names(searcher.search(q)), ["A"])

        q = query.Phrase("value", [u"miss", u"muffet", u"sat", u"tuffet"])
        self.assertEqual(names(searcher.search(q)), ["A", "D"])

        q = query.Phrase("value", [u"falunk", u"gibberish"])
        self.assertEqual(names(searcher.search(q)), [])

        q = query.Phrase("value", [u"gibberish", u"falunk"], slop=2)
        self.assertEqual(names(searcher.search(q)), ["D"])

        #q = query.Phrase("value", [u"blah"] * 4)
        #self.assertEqual(names(searcher.search(q)), []) # blah blah blah blah

        q = query.Phrase("value", [u"blah"] * 3)
        self.assertEqual(names(searcher.search(q)), ["E"])
Ejemplo n.º 3
0
def get_index():
    global _ix

    if _ix is not None:
        return _ix

    charfield = fields.FieldType(formats.Characters(),
                                 analysis.SimpleAnalyzer(),
                                 scorable=True, stored=True)
    schema = fields.Schema(text=charfield)
    st = RamStorage()
    _ix = st.create_index(schema)

    w = _ix.writer()
    for ls in permutations(domain, 4):
        w.add_document(text=u(" ").join(ls), _stored_text=ls)
    w.commit()

    return _ix
Ejemplo n.º 4
0
def _roundtrip(content, format_, astype, ana=None):
    with TempStorage("roundtrip") as st:
        codec = default_codec()
        seg = codec.new_segment(st, "")
        ana = ana or analysis.StandardAnalyzer()
        field = fields.FieldType(format=format_, analyzer=ana)

        fw = codec.field_writer(st, seg)
        fw.start_field("f1", field)
        for text, _, weight, valuestring in sorted(field.index(content)):
            fw.start_term(text)
            fw.add(0, weight, valuestring, None)
            fw.finish_term()
        fw.finish_field()
        fw.close()

        tr = codec.terms_reader(st, seg)
        ps = []
        for fieldname, btext in tr.terms():
            m = tr.matcher(fieldname, btext, format_)
            ps.append((field.from_bytes(btext), m.value_as(astype)))
        tr.close()
        return ps