def test_find_self(): wordlist = sorted(u("book bake bike bone").split()) st = RamStorage() f = st.create_file("test") spelling.wordlist_to_graph_file(wordlist, f) gr = dawg.GraphReader(st.open_file("test")) gc = spelling.GraphCorrector(gr) assert_not_equal(gc.suggest("book")[0], "book") assert_not_equal(gc.suggest("bake")[0], "bake") assert_not_equal(gc.suggest("bike")[0], "bike") assert_not_equal(gc.suggest("bone")[0], "bone")
def test_insert_bytes(): # This test is only meaningful on Python 3 domain = [b("alfa"), b("bravo"), b("charlie")] st = RamStorage() gw = dawg.GraphWriter(st.create_file("test")) gw.start_field("test") for key in domain: gw.insert(key) gw.close() cur = dawg.GraphReader(st.open_file("test")).cursor() assert_equal(list(cur.flatten()), domain)
def _fst_roundtrip(domain, t): with TempStorage() as st: f = st.create_file("test") gw = dawg.GraphWriter(f, vtype=t) gw.start_field("_") for key, value in domain: gw.insert(key, value) gw.finish_field() gw.close() f = st.open_file("test") gr = dawg.GraphReader(f, vtype=t) cur = dawg.Cursor(gr) assert_equal(list(cur.flatten_v()), domain) f.close()
def test_insert_unicode(): domain = [ u("\u280b\u2817\u2801\u281d\u2809\u2811"), u("\u65e5\u672c"), u("\uc774\uc124\ud76c"), ] st = RamStorage() gw = dawg.GraphWriter(st.create_file("test")) gw.start_field("test") for key in domain: gw.insert(key) gw.close() cur = dawg.GraphReader(st.open_file("test")).cursor() assert_equal(list(cur.flatten_strings()), domain)
def test_within_unicode(): domain = [ u("\u280b\u2817\u2801\u281d\u2809\u2811"), u("\u65e5\u672c"), u("\uc774\uc124\ud76c"), ] st = RamStorage() gw = dawg.GraphWriter(st.create_file("test")) gw.start_field("test") for key in domain: gw.insert(key) gw.close() gr = dawg.GraphReader(st.open_file("test")) s = list(dawg.within(gr, u("\uc774.\ud76c"))) assert_equal(s, [u("\uc774\uc124\ud76c")])
def test_fields(): with TempStorage() as st: f = st.create_file("test") gw = dawg.GraphWriter(f) gw.start_field("f1") gw.insert("a") gw.insert("aa") gw.insert("ab") gw.finish_field() gw.start_field("f2") gw.insert("ba") gw.insert("baa") gw.insert("bab") gw.close() gr = dawg.GraphReader(st.open_file("test")) cur1 = dawg.Cursor(gr, gr.root("f1")) cur2 = dawg.Cursor(gr, gr.root("f2")) assert_equal(list(cur1.flatten_strings()), ["a", "aa", "ab"]) assert_equal(list(cur2.flatten_strings()), ["ba", "baa", "bab"]) gr.close()
def greader(st): return dawg.GraphReader(st.open_file("test"))
def words_to_corrector(words): st = RamStorage() f = st.create_file("test") spelling.wordlist_to_graph_file(words, f) f = st.open_file("test") return spelling.GraphCorrector(dawg.GraphReader(f))