def test_shared_suffix(): st = gwrite(enlist("blowing blue glowing")) gr = greader(st) cur1 = dawg.Cursor(gr) cur2 = dawg.Cursor(gr) cur1.find_path(b("blo")) cur2.find_path(b("glo")) assert_equal(cur1.stack[-1].target, cur2.stack[-1].target)
def test_words(): words = enlist("alfa alpaca amtrak bellow fellow fiona zebulon") with TempStorage() as st: gwrite(words, st) gr = greader(st) cur = dawg.Cursor(gr) assert_equal(list(cur.flatten_strings()), words) gr.close()
def test_fields(): with TempStorage() as st: f = st.create_file("test") gw = dawg.GraphWriter(f) gw.start_field("f1") gw.insert("a") gw.insert("aa") gw.insert("ab") gw.finish_field() gw.start_field("f2") gw.insert("ba") gw.insert("baa") gw.insert("bab") gw.close() gr = dawg.GraphReader(st.open_file("test")) cur1 = dawg.Cursor(gr, gr.root("f1")) cur2 = dawg.Cursor(gr, gr.root("f2")) assert_equal(list(cur1.flatten_strings()), ["a", "aa", "ab"]) assert_equal(list(cur2.flatten_strings()), ["ba", "baa", "bab"]) gr.close()
def _fst_roundtrip(domain, t): with TempStorage() as st: f = st.create_file("test") gw = dawg.GraphWriter(f, vtype=t) gw.start_field("_") for key, value in domain: gw.insert(key, value) gw.finish_field() gw.close() f = st.open_file("test") gr = dawg.GraphReader(f, vtype=t) cur = dawg.Cursor(gr) assert_equal(list(cur.flatten_v()), domain) f.close()
def test_inactive_raise(): st = gwrite(enlist("alfa bravo charlie")) cur = dawg.Cursor(greader(st)) while cur.is_active(): cur.next_arc() assert_raises(dawg.InactiveCursor, cur.label) assert_raises(dawg.InactiveCursor, cur.prefix) assert_raises(dawg.InactiveCursor, cur.prefix_bytes) assert_raises(dawg.InactiveCursor, list, cur.peek_key()) assert_raises(dawg.InactiveCursor, cur.peek_key_bytes) assert_raises(dawg.InactiveCursor, cur.stopped) assert_raises(dawg.InactiveCursor, cur.value) assert_raises(dawg.InactiveCursor, cur.accept) assert_raises(dawg.InactiveCursor, cur.at_last_arc) assert_raises(dawg.InactiveCursor, cur.next_arc) assert_raises(dawg.InactiveCursor, cur.follow) assert_raises(dawg.InactiveCursor, cur.switch_to, b("a")) assert_raises(dawg.InactiveCursor, cur.skip_to, b("a")) assert_raises(dawg.InactiveCursor, list, cur.flatten()) assert_raises(dawg.InactiveCursor, list, cur.flatten_v()) assert_raises(dawg.InactiveCursor, list, cur.flatten_strings()) assert_raises(dawg.InactiveCursor, cur.find_path, b("a"))
def test_random(): def randstring(): length = random.randint(1, 10) a = array("B", (random.randint(0, 255) for _ in xrange(length))) return array_tobytes(a) keys = sorted(randstring() for _ in xrange(1000)) with TempStorage() as st: gwrite(keys, st) gr = greader(st) cur = dawg.Cursor(gr) s1 = cur.flatten() s2 = sorted(set(keys)) for i, (k1, k2) in enumerate(zip(s1, s2)): assert k1 == k2, "%s: %r != %r" % (i, k1, k2) sample = list(keys) random.shuffle(sample) for key in sample: cur.reset() cur.find_path(key) assert_equal(cur.prefix_bytes(), key) gr.close()
def test_duplicate_keys(): st = gwrite(enlist("alfa bravo bravo bravo charlie")) cur = dawg.Cursor(greader(st)) assert_equal(list(cur.flatten_strings()), ["alfa", "bravo", "charlie"])