def test_alternation(): (nfa1, q01, f1) = nfa_to_triple(make_nfa1()) (nfa2, q02, f2) = nfa_to_triple(make_nfa2()) (nfa, q0, f) = alternation(nfa1, q01, f1, nfa2, q02, f2) assert accepts("xaab", nfa) == False assert accepts("x", nfa) == True assert accepts("aab", nfa) == True
def test_one_or_more(): (nfa, q0, f) = nfa_to_triple(make_nfa1()) (nfa, q0, f) = one_or_more(nfa, q0, f) assert accepts("", nfa) == False assert accepts("x", nfa) == True assert accepts("xx", nfa) == True assert accepts("a", nfa) == False
def test_thompson_compile_nfa_alternation(): (nfa, q0, f) = thompson_compile_nfa("a*|b") if in_ipynb(): ipynb_display_graph(nfa) assert not accepts("bbbbb", nfa) assert accepts("b", nfa) assert accepts("aaaaaa", nfa)
def test_thompson_compile_nfa_one_or_more(): (nfa, q0, f) = thompson_compile_nfa("(ab+)+") if in_ipynb(): ipynb_display_graph(nfa) assert not accepts("", nfa) assert not accepts("b", nfa) assert accepts("abbbbb", nfa) assert accepts("abbbbbabbbbbabbbbb", nfa)
def test_parse_bracket_custom(): s = "[X-Z03a-e]" chars = parse_bracket(s) (nfa, q0, f) = bracket(chars) for a in "XYZ03abcde": assert accepts(a, nfa) is True for a in "ABC12456789fghi": assert accepts(a, nfa) is False
def test_thompson_compile_nfa_concatenation(): (nfa, q0, f) = thompson_compile_nfa("a+b+") if in_ipynb(): ipynb_display_graph(nfa) assert not accepts("abab", nfa) assert not accepts("aaa", nfa) assert not accepts("bbb", nfa) assert accepts("ab", nfa) assert accepts("aaaaaabbbbbb", nfa)
def test_thompson_compile_nfa_bracket_repetitions(): (nfa, q0, f) = thompson_compile_nfa("[x-z]{1,3}") if in_ipynb(): ipynb_display_graph(nfa) for w in ["x", "y", "xx", "xy", "zy", "xxx", "yyy", "zzz", "xyz", "zyx"]: assert accepts(w, nfa) is True for w in ["", "xxxx", "aaa"]: assert accepts(w, nfa) is False (nfa, q0, f) = thompson_compile_nfa("x{3}") assert accepts("xxx", nfa)
def test_thompson_compile_nfa_repetition(): (nfa, q0, f) = thompson_compile_nfa("((ab){3})*") if in_ipynb(): ipynb_display_graph(nfa) for i in range(7): assert accepts("ab" * i, nfa) == (i % 3 == 0), f"w = {'ab' * i}, i = {i}"
def test_repetition(): (nfa, q0, f) = nfa_to_triple(make_nfa1()) m = 4 (nfa, q0, f) = repetition(nfa, q0, f, m) words = ["x" * i for i in range(10)] # Exactly m repetition for (i, w) in enumerate(words): assert accepts(w, nfa) == (i == m)
def test_repetition_range(): a = "a" for (m, n) in [(3, 5), (0, 3), (3, 3), (3, None)]: (nfa, q0, f) = literal(a) (nfa, q0, f) = repetition_range(nfa, q0, f, m, n) for i in range(10): expected = (m <= i) and (n is None or i <= n) w = a * i obtained = accepts(w, nfa) assert obtained == expected, f"(m, n) = {(m, n)} i = {i} w = {w}"
def test_escaped_operator(): (nfa, q0, f) = thompson_compile_nfa("a\\?b") assert accepts("a?b", nfa) == True assert accepts("ab", nfa) == False assert accepts("b", nfa) == False (nfa, q0, f) = thompson_compile_nfa("a?b") assert accepts("a?b", nfa) == False assert accepts("ab", nfa) == True assert accepts("b", nfa) == True for regexp in r"\|", r"\.", r"\*", r"\+", r"\(", r"\)", r"\{", r"\}", r"\[", r"\]": (nfa, q0, f) = thompson_compile_nfa(regexp) assert accepts(regexp.replace("\\", ""), nfa) regexp = r"\|\.\*\+\(\)\{\}\[\]" (nfa, q0, f) = thompson_compile_nfa(regexp) accepts(regexp.replace("\\", ""), nfa)
def test_escaped_classes(): whole_alphabet = DEFAULT_ALPHABET escaped_classes = [r"\d", r"\w", r"\s", r"\D", r"\W", r"\S"] map_escape_allowed = { r: set(parse_escaped(r, whole_alphabet)) for r in escaped_classes } for regexp in [r"\d", r"\w", r"\s", r"\D", r"\W", r"\S"]: allowed = map_escape_allowed[regexp.lower()] if regexp.lower() != regexp: allowed = set(whole_alphabet) - allowed (nfa, q0, f) = thompson_compile_nfa(regexp, whole_alphabet) for a in whole_alphabet: assert accepts(a, nfa) == (a in allowed), \ f"regexp = {regexp} a = '{a}' ({ord(a)}) allowed = '{allowed}' obtained = {accepts(a, nfa)} expected = {a in allowed}"
def test_thompson_compile_nfa_zero_or_one(): (nfa, q0, f) = thompson_compile_nfa("(ab?)*") if in_ipynb(): ipynb_display_graph(nfa) assert accepts("", nfa) assert not accepts("b", nfa) assert accepts("a", nfa) assert accepts("ab", nfa) assert accepts("aba", nfa) assert not accepts("abb", nfa)
def test_thompson_compile_nfa_escaped_operators(): regexp = r"\|\.\*\+\(\)\{\}\[\]aa" (nfa, q0, f) = thompson_compile_nfa(regexp) accepts(regexp.replace("\\", ""), nfa) if in_ipynb(): ipynb_display_graph(nfa)
def test_thompson_compile_nfa(): (nfa, q0, f) = thompson_compile_nfa("(a?b)*?c+d") if in_ipynb(): ipynb_display_graph(nfa) assert accepts("babbbababcccccd", nfa)