Esempio n. 1
0
def test_add_spelling():
    schema = fields.Schema(text1=fields.TEXT, text2=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text1=u("render zorro kaori postal"), text2=u("alfa"))
    w.add_document(text1=u("reader zebra koala pastry"), text2=u("alpa"))
    w.add_document(text1=u("leader libra ooala paster"), text2=u("alpha"))
    w.add_document(text1=u("feeder lorry zoala baster"), text2=u("olfo"))
    w.commit()

    with ix.reader() as r:
        assert not r.has_word_graph("text1")
        assert not r.has_word_graph("text2")

    from whoosh.writing import add_spelling
    add_spelling(ix, ["text1", "text2"])

    with ix.reader() as r:
        assert r.has_word_graph("text1")
        assert r.has_word_graph("text2")

        sp = spelling.ReaderCorrector(r, "text1")
        assert sp.suggest(u("kaola"), maxdist=1) == [u('koala')]
        assert sp.suggest(u("kaola"), maxdist=2) == [
            u('koala'), u('kaori'),
            u('ooala'), u('zoala')
        ]

        sp = spelling.ReaderCorrector(r, "text2")
        assert sp.suggest(u("alfo"), maxdist=1) == [u("alfa"), u("olfo")]
Esempio n. 2
0
def test_reader_corrector_nograph():
    schema = fields.Schema(text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text=u("render zorro kaori postal"))
    w.add_document(text=u("reader zebra koala pastry"))
    w.add_document(text=u("leader libra ooala paster"))
    w.add_document(text=u("feeder lorry zoala baster"))
    w.commit()

    with ix.reader() as r:
        sp = spelling.ReaderCorrector(r, "text")
        assert sp.suggest(u("kaola"), maxdist=1) == ['koala']
        assert sp.suggest(u("kaola"),
                          maxdist=2) == ['koala', 'kaori', 'ooala', 'zoala']
Esempio n. 3
0
def test_reader_corrector():
    schema = fields.Schema(text=fields.TEXT())
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            w.add_document(text=u"render zorro kaori postal")
            w.add_document(text=u"reader zebra koala pastry")
            w.add_document(text=u"leader libra oola paster")
            w.add_document(text=u"feeder lorry zoala baster")

        with ix.reader() as r:
            sp = spelling.ReaderCorrector(r, "text", schema["text"])
            assert sp.suggest(u"koala", maxdist=1) == [u'koala', u"zoala"]

            target = [u'kaori', u'koala', u'oola']
            sugs = sp.suggest(u"kaola", maxdist=2)
            assert sugs == target
Esempio n. 4
0
def test_unicode_spelling():
    schema = fields.Schema(text=fields.ID())

    domain = [
        u"\u0924\u092a\u093e\u0907\u0939\u0930\u0941",
        u"\u65e5\u672c",
        u"\uc774\uc124\ud76c",
    ]

    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for word in domain:
                w.add_document(text=word)

        with ix.reader() as r:
            rc = spelling.ReaderCorrector(r, "text", schema["text"])
            assert rc.suggest(u"\u65e5\u672e\u672c") == [u"\u65e5\u672c"]
Esempio n. 5
0
def test_reader_corrector():
    schema = fields.Schema(text=fields.TEXT(spelling=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text=u("render zorro kaori postal"))
    w.add_document(text=u("reader zebra koala pastry"))
    w.add_document(text=u("leader libra ooala paster"))
    w.add_document(text=u("feeder lorry zoala baster"))
    w.commit()

    with ix.reader() as r:
        assert r.has_word_graph("text")
        sp = spelling.ReaderCorrector(r, "text")
        assert_equal(sp.suggest(u("kaola"), maxdist=1), [u('koala')])
        assert_equal(
            sp.suggest(u("kaola"), maxdist=2),
            [u('koala'), u('kaori'),
             u('ooala'), u('zoala')])
Esempio n. 6
0
def test_unicode_spelling():
    schema = fields.Schema(text=fields.ID(spelling=True))

    domain = [
        u("\u0924\u092a\u093e\u0907\u0939\u0930\u0941"),
        u("\u65e5\u672c"),
        u("\uc774\uc124\ud76c"),
    ]

    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        for word in domain:
            w.add_document(text=word)

    with ix.reader() as r:
        assert r.has_word_graph("text")
        c = r._get_graph().cursor("text")
        assert list(c.flatten_strings()) == domain
        assert list(r.word_graph("text").flatten_strings()) == domain

        rc = spelling.ReaderCorrector(r, "text")
        assert rc.suggest(u("\u65e5\u672e\u672c")) == [u("\u65e5\u672c")]