Example #1
0
def test_find_self():
    wordlist = sorted(u("book bake bike bone").split())
    st = RamStorage()
    f = st.create_file("test")
    spelling.wordlist_to_graph_file(wordlist, f)

    gr = dawg.GraphReader(st.open_file("test"))
    gc = spelling.GraphCorrector(gr)
    assert_not_equal(gc.suggest("book")[0], "book")
    assert_not_equal(gc.suggest("bake")[0], "bake")
    assert_not_equal(gc.suggest("bike")[0], "bike")
    assert_not_equal(gc.suggest("bone")[0], "bone")
Example #2
0
def test_insert_bytes():
    # This test is only meaningful on Python 3
    domain = [b("alfa"), b("bravo"), b("charlie")]

    st = RamStorage()
    gw = dawg.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    cur = dawg.GraphReader(st.open_file("test")).cursor()
    assert_equal(list(cur.flatten()), domain)
Example #3
0
def _fst_roundtrip(domain, t):
    with TempStorage() as st:
        f = st.create_file("test")
        gw = dawg.GraphWriter(f, vtype=t)
        gw.start_field("_")
        for key, value in domain:
            gw.insert(key, value)
        gw.finish_field()
        gw.close()

        f = st.open_file("test")
        gr = dawg.GraphReader(f, vtype=t)
        cur = dawg.Cursor(gr)
        assert_equal(list(cur.flatten_v()), domain)
        f.close()
Example #4
0
def test_insert_unicode():
    domain = [
        u("\u280b\u2817\u2801\u281d\u2809\u2811"),
        u("\u65e5\u672c"),
        u("\uc774\uc124\ud76c"),
    ]

    st = RamStorage()
    gw = dawg.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    cur = dawg.GraphReader(st.open_file("test")).cursor()
    assert_equal(list(cur.flatten_strings()), domain)
Example #5
0
def test_within_unicode():
    domain = [
        u("\u280b\u2817\u2801\u281d\u2809\u2811"),
        u("\u65e5\u672c"),
        u("\uc774\uc124\ud76c"),
    ]

    st = RamStorage()
    gw = dawg.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    gr = dawg.GraphReader(st.open_file("test"))
    s = list(dawg.within(gr, u("\uc774.\ud76c")))
    assert_equal(s, [u("\uc774\uc124\ud76c")])
Example #6
0
def test_fields():
    with TempStorage() as st:
        f = st.create_file("test")
        gw = dawg.GraphWriter(f)
        gw.start_field("f1")
        gw.insert("a")
        gw.insert("aa")
        gw.insert("ab")
        gw.finish_field()
        gw.start_field("f2")
        gw.insert("ba")
        gw.insert("baa")
        gw.insert("bab")
        gw.close()

        gr = dawg.GraphReader(st.open_file("test"))
        cur1 = dawg.Cursor(gr, gr.root("f1"))
        cur2 = dawg.Cursor(gr, gr.root("f2"))
        assert_equal(list(cur1.flatten_strings()), ["a", "aa", "ab"])
        assert_equal(list(cur2.flatten_strings()), ["ba", "baa", "bab"])
        gr.close()
Example #7
0
def greader(st):
    return dawg.GraphReader(st.open_file("test"))
Example #8
0
def words_to_corrector(words):
    st = RamStorage()
    f = st.create_file("test")
    spelling.wordlist_to_graph_file(words, f)
    f = st.open_file("test")
    return spelling.GraphCorrector(dawg.GraphReader(f))