def test_pattern_reduce_basic(): # pattern -> pattern # (ab|cd) -> (ab|cd) assert pattern.parse("ab|cd").reduce() == pattern.parse("ab|cd") # pattern -> conc assert pattern.parse("a{2}b{2}").reduce() == conc.parse("a{2}b{2}") # pattern -> mult assert pattern.parse("a{2}").reduce() == mult.parse("a{2}") # pattern -> charclass assert pattern.parse("a").reduce() == charclass.parse("a")
def test_main_bug(): assert parse("a*").reduce() == mult.parse("a*") assert parse("a|a*").reduce() == mult.parse("a*") assert parse("a{1,2}|a{3,4}|bc").reduce() == pattern.parse("a{1,4}|bc") assert parse("a{1,2}|bc|a{3,4}").reduce() == pattern.parse("a{1,4}|bc") assert parse("a{1,2}|a{3,4}|a{5,6}|bc").reduce() == pattern.parse("a{1,6}|bc") assert parse("a{1,2}|a{3}|a{5,6}").reduce() == conc.parse("a{1,2}(a?|a{4})") assert parse("a{1,2}|a{3}|a{5,6}|bc").reduce() == pattern.parse("a{1,3}|a{5,6}|bc") assert parse("a{1,2}|a{4}|a{5,6}").reduce() == conc.parse("a{1,2}(a{3,4})?") assert parse("a{1,2}|a{4}|a{5,6}|bc").reduce() == pattern.parse("a{1,2}|a{4,6}|bc") assert (parse("a") | parse("a*")).reduce() == mult.parse("a*")
def test_cardinality(): assert charclass.parse("[]").cardinality() == 0 assert mult.parse("[]?").cardinality() == 1 assert mult.parse("[]{0,6}").cardinality() == 1 assert mult.parse("[ab]{3}").cardinality() == 8 assert mult.parse("[ab]{2,3}").cardinality() == 12 assert len(pattern.parse("abc|def(ghi|jkl)")) == 3 try: len(pattern.parse(".*")) assert False except OverflowError: assert True
def test_empty(): assert nothing.empty() assert charclass().empty() assert not dot.empty() assert not mult.parse("a{0}").empty() assert mult.parse("[]").empty() assert not mult.parse("[]?").empty() assert conc.parse("a[]").empty() assert not conc.parse("a[]?").empty() assert pattern().empty() assert not pattern.parse("a{0}").empty() assert not pattern.parse("[]?").empty()
def test_pattern_parsing(): assert pattern.parse("abc|def(ghi|jkl)") == pattern( conc( mult(charclass("a"), one), mult(charclass("b"), one), mult(charclass("c"), one), ), conc( mult(charclass("d"), one), mult(charclass("e"), one), mult(charclass("f"), one), mult( pattern( conc( mult(charclass("g"), one), mult(charclass("h"), one), mult(charclass("i"), one), ), conc( mult(charclass("j"), one), mult(charclass("k"), one), mult(charclass("l"), one), ), ), one ), ) )
def test_empty_conc_suppression(): assert pattern.parse("[]0\\d").reduce() == charclass.parse("[]") assert pattern( conc( mult(pattern(), one), # this mult can never actually match anything mult(charclass("0"), one), mult(charclass("0123456789"), one), ) # so neither can this conc ).reduce() == charclass.parse("[]")
def test_empty_conc_suppression(): assert pattern.parse("[]0\d").reduce() == charclass.parse("[]") assert pattern( conc( mult(pattern(), one), # this mult can never actually match anything mult(charclass("0"), one), mult(charclass("0123456789"), one), ) # so neither can this conc ).reduce() == charclass.parse("[]")
def test_pattern_commonconc(): assert pattern.parse("aa|aa")._commonconc() == conc.parse("aa") assert pattern.parse("abc|aa")._commonconc() == conc.parse("a") assert pattern.parse("a|bc")._commonconc() == conc.parse("") assert pattern.parse("cf{1,2}|cf")._commonconc() == conc.parse("cf") assert pattern.parse("ZA|ZB|ZC")._commonconc() == conc.parse("Z") assert pattern.parse("Z+A|ZB|ZZC")._commonconc() == conc.parse("Z") assert pattern.parse("a{2}b|a+c")._commonconc() == conc.parse("a")
def test_pattern_beheading(): # "aa".behead("a") = "a" assert pattern.parse("aa").behead(conc.parse("a")) == pattern.parse("a") # "abc|aa".behead("a") = "a|bc" assert pattern.parse("abc|aa").behead(conc.parse("a")) == pattern.parse("a|bc") # "cf{1,2}|cf".behead("c") = "f{1,2}|f" (no simplification) assert pattern.parse("cf{1,2}|cf").behead(conc.parse("c")) == pattern.parse("f{1,2}|f")
def test_pattern_generator(): gen = pattern.parse("[ab]|[cde]").strings() assert next(gen) == "a" assert next(gen) == "b" assert next(gen) == "c" assert next(gen) == "d" assert next(gen) == "e" try: next(gen) assert False except StopIteration: assert True # more complex gen = pattern.parse("abc|def(ghi|jkl)").strings() assert next(gen) == "abc" assert next(gen) == "defghi" assert next(gen) == "defjkl" gen = mult.parse("[0-9a-fA-F]{3,10}").strings() assert next(gen) == "000" assert next(gen) == "001" assert next(gen) == "002"
def test_pattern_fsm(): # "a[^a]" anota = pattern.parse("a[^a]").to_fsm() assert len(anota.states) == 3 assert not anota.accepts("a") assert not anota.accepts(["a"]) assert not anota.accepts("b") assert not anota.accepts(["b"]) assert not anota.accepts("aa") assert not anota.accepts(["a", "a"]) assert anota.accepts("ab") assert anota.accepts(["a", "b"]) assert anota.accepts(["a", fsm.anything_else]) assert not anota.accepts("ba") assert not anota.accepts("bb") # "0\\d" zeroD = pattern.parse("0\\d").to_fsm(d.chars) assert zeroD.accepts("01") assert not zeroD.accepts("10") # "\\d{2}" d2 = pattern.parse("\\d{2}").to_fsm(d.chars) assert not d2.accepts("") assert not d2.accepts("1") assert d2.accepts("11") assert not d2.accepts("111") # abc|def(ghi|jkl) conventional = pattern.parse("abc|def(ghi|jkl)").to_fsm(w.chars) assert not conventional.accepts("a") assert not conventional.accepts("ab") assert conventional.accepts("abc") assert not conventional.accepts("abcj") assert conventional.accepts("defghi") assert conventional.accepts("defjkl")
def test_pattern_parsing(): assert pattern.parse("abc|def(ghi|jkl)") == pattern( conc( mult(charclass("a"), one), mult(charclass("b"), one), mult(charclass("c"), one), ), conc( mult(charclass("d"), one), mult(charclass("e"), one), mult(charclass("f"), one), mult( pattern( conc( mult(charclass("g"), one), mult(charclass("h"), one), mult(charclass("i"), one), ), conc( mult(charclass("j"), one), mult(charclass("k"), one), mult(charclass("l"), one), ), ), one ), ) ) # Accept the "non-capturing group" syntax, "(?: ... )" but give it no # special significance assert parse("(?:)") == parse("()") assert parse("(?:abc|def)") == parse("(abc|def)") parse("(:abc)") # should give no problems # Named groups assert pattern.parse("(?P<ng1>abc)") == parse("(abc)")
def test_concatenation(): assert charclass.parse("a") + charclass.parse("b") == conc.parse("ab") assert charclass.parse("a") + mult.parse("b{0,8}") == conc.parse("ab{0,8}") assert charclass.parse("a") + conc.parse("bc") == conc.parse("abc") assert charclass.parse("a") + pattern.parse("b|cd") == conc.parse("a(b|cd)") assert mult.parse("b{0,8}") + charclass.parse("c") == conc.parse("b{0,8}c") assert mult.parse("a{3,4}") + mult.parse("b?") == conc.parse("a{3,4}b?") assert mult.parse("a{2}") + conc.parse("bc") == conc.parse("a{2}bc") assert mult.parse("a{2,3}") + pattern.parse("b|cd") == conc.parse("a{2,3}(b|cd)") assert conc.parse("ab") + charclass.parse("c") == conc.parse("abc") assert conc.parse("ab") + mult.parse("c*") == conc.parse("abc*") assert conc.parse("") + conc.parse("") == conc.parse("") assert conc.parse("ab") + conc.parse("cd") == conc.parse("abcd") assert conc.parse("za{2,3}") + pattern.parse("b|cd") == conc.parse("za{2,3}(b|cd)") assert pattern.parse("a|bd") + charclass.parse("c") == conc.parse("(a|bd)c") assert pattern.parse("b|cd") + mult.parse("a{2,3}") == conc.parse("(b|cd)a{2,3}") assert pattern.parse("b|cd") + conc.parse("za{2,3}") == conc.parse("(b|cd)za{2,3}") assert pattern.parse("a|bc") + pattern.parse("c|de") == conc.parse("(a|bc)(c|de)")
def test_recursive_pattern_reduction(): assert pattern.parse("0|(0|[1-9]|a{5,7})").reduce() == pattern.parse("0|(\d|a{5,7})")
def test_derive(): assert parse("a+").derive("a") == mult.parse("a*") assert parse("a+|b+").derive("a") == mult.parse("a*") assert parse("abc|ade").derive("a") == pattern.parse("bc|de") assert parse("abc|ade").derive("ab") == charclass.parse("c")
def test_common_prefix_pattern_reduction(): assert pattern.parse("a{2}b|a+c").reduce() == conc.parse("a(ab|a*c)")
def test_epsilon_reduction(): assert parse("|(ab)*|def").reduce() == pattern.parse("(ab)*|def") assert parse("|(ab)+|def").reduce() == pattern.parse("(ab)*|def") assert parse("|.+").reduce() == mult.parse(".*") assert parse("|a+|b+").reduce() in {pattern.parse("a+|b*"), pattern.parse("a*|b+")}
def test_pattern_commonconc_suffix(): # pattern._commonconc(suffix=True) tests # a | bc -> emptystring assert pattern.parse("a|bc")._commonconc(suffix=True) == conc.parse("") # (a|bc) - () = (a|bc) assert pattern.parse("a|bc") - conc.parse("") == pattern.parse("a|bc") # (aa|bca) -> a assert pattern.parse("aa|bca")._commonconc(suffix=True) == conc.parse("a") # (aa|bca) - a = (a|bc) assert pattern.parse("aa|bca") - conc.parse("a") == pattern.parse("a|bc") # xyza | abca | a -> a assert pattern.parse("xyza|abca|a")._commonconc(suffix=True) == conc.parse("a") # (xyza|abca|a) - a = (xyz|abc|) assert pattern.parse("xyza|abca|a") - conc.parse("a") == pattern.parse("xyz|abc|") # f{2,3}c, fc -> fc assert pattern.parse("f{2,3}c|fc")._commonconc(suffix=True) == conc.parse("fc") # (f{2,3}c|fc) - fc = (f{1,2}|) assert pattern.parse("f{2,3}c|fc") - conc.parse("fc") == pattern.parse("f{1,2}|") # (aa) -> aa assert pattern.parse("aa")._commonconc(suffix=True) == conc.parse("aa") # (aa) - aa = () assert pattern.parse("aa") - conc.parse("aa") == pattern.parse("")
def test_bad_reduction_bug(): # DEFECT: "0{2}|1{2}" was erroneously reduced() to "[01]{2}" assert parse("0{2}|1{2}").reduce() != parse("[01]{2}") assert parse("0|[1-9]|ab").reduce() == pattern.parse("\\d|ab") assert parse("0|[1-9]|a{5,7}").reduce() == pattern.parse("\\d|a{5,7}") assert parse("0|(0|[1-9]|a{5,7})").reduce() == pattern.parse("0|(\\d|a{5,7})")
def test_pattern_multiplication(): assert pattern.parse("ab?|ba?") * multiplier.parse("{2,3}") == mult.parse("(ab?|ba?){2,3}")
def test_pattern_reduce_basic(): assert pattern.parse("ab|cd").reduce() == pattern.parse("ab|cd") assert pattern.parse("a{2}b{2}").reduce() == conc.parse("a{2}b{2}") assert pattern.parse("a{2}").reduce() == mult.parse("a{2}") assert pattern.parse("a").reduce() == charclass.parse("a")
def test_concatenation(): # empty conc + empty conc assert conc.parse("") + conc.parse("") == conc.parse("") # charclass + charclass # a + b = ab assert charclass.parse("a") + charclass.parse("b") == conc.parse("ab") # a + a = a{2} assert (charclass.parse("a") + charclass.parse("a")).reduce() == mult.parse("a{2}") # charclass + mult # a + a = a{2} assert (charclass.parse("a") + mult.parse("a")).reduce() == mult.parse("a{2}") # a + a{2,} = a{3,} assert (charclass.parse("a") + mult.parse("a{2,}")).reduce() == mult.parse("a{3,}") # a + a{,8} = a{1,9} assert (charclass.parse("a") + mult.parse("a{0,8}")).reduce() == mult.parse("a{1,9}") # a + b{,8} = ab{,8} assert charclass.parse("a") + mult.parse("b{0,8}") == conc.parse("ab{0,8}") # mult + charclass # b + b = b{2} assert (mult.parse("b") + charclass.parse("b")).reduce() == mult.parse("b{2}") # b* + b = b+ assert (mult.parse("b*") + charclass.parse("b")).reduce() == mult.parse("b+") # b{,8} + b = b{1,9} assert (mult.parse("b{0,8}") + charclass.parse("b")).reduce() == mult.parse("b{1,9}") # b{,8} + c = b{,8}c assert mult.parse("b{0,8}") + charclass.parse("c") == conc.parse("b{0,8}c") # charclass + conc # a + nothing = a assert (charclass.parse("a") + conc.parse("")).reduce() == charclass.parse("a") # a + bc = abc assert charclass.parse("a") + conc.parse("bc") == conc.parse("abc") # a + ab = a{2}b assert (charclass.parse("a") + conc.parse("ab")).reduce() == conc.parse("a{2}b") # conc + charclass # nothing + a = a assert (conc.parse("") + charclass.parse("a")).reduce() == charclass.parse("a") # ab + c = abc assert conc.parse("ab") + charclass.parse("c") == conc.parse("abc") # ab + b = ab{2} assert (conc.parse("ab") + charclass.parse("b")).reduce() == conc.parse("ab{2}") # pattern + charclass # (a|bd) + c = (a|bd)c assert pattern.parse("a|bd") + charclass.parse("c") == conc.parse("(a|bd)c") # (ac{2}|bc+) + c = (ac|bc*)c{2} assert (pattern.parse("ac{2}|bc+") + charclass.parse("c")).reduce() == conc.parse("(ac|bc*)c{2}") # charclass + pattern # a + (b|cd) = a(b|cd) assert charclass.parse("a") + pattern.parse("b|cd") == conc.parse("a(b|cd)") # a + (a{2}b|a+c) = a{2}(ab|a*c) assert (charclass.parse("a") + pattern.parse("(a{2}b|a+c)")).reduce() == conc.parse("a{2}(ab|a*c)") # mult + mult # a{3,4} + b? = a{3,4}b? assert mult.parse("a{3,4}") + mult.parse("b?") == conc.parse("a{3,4}b?") # a* + a{2} = a{2,} assert (mult.parse("a*") + mult.parse("a{2}")).reduce() == mult.parse("a{2,}") # mult + conc # a{2} + bc = a{2}bc assert mult.parse("a{2}") + conc.parse("bc") == conc.parse("a{2}bc") # a? + ab = a{1,2}b assert (mult.parse("a?") + conc.parse("ab")).reduce() == conc.parse("a{1,2}b") # conc + mult # ab + c* = abc* assert conc.parse("ab") + mult.parse("c*") == conc.parse("abc*") # ab + b* = ab+ assert (conc.parse("ab") + mult.parse("b*")).reduce() == conc.parse("ab+") # mult + pattern # a{2,3} + (b|cd) = a{2,3}(b|cd) assert mult.parse("a{2,3}") + pattern.parse("b|cd") == conc.parse("a{2,3}(b|cd)") # a{2,3} + (a{2}b|a+c) = a{3,4}(ab|a*c) assert (mult.parse("a{2,3}") + pattern.parse("a{2}b|a+c")).reduce() == conc.parse("a{3,4}(ab|a*c)") # pattern + mult # (b|cd) + a{2,3} = (b|cd)a{2,3} assert pattern.parse("b|cd") + mult.parse("a{2,3}") == conc.parse("(b|cd)a{2,3}") # (ba{2}|ca+) + a{2,3} = (ba|ca*)a{3,4} assert (pattern.parse("ba{2}|ca+") + mult.parse("a{2,3}")).reduce() == conc.parse("(ba|ca*)a{3,4}") # conc + conc # ab + cd = abcd assert conc.parse("ab") + conc.parse("cd") == conc.parse("abcd") # ab + bc = ab{2}c assert (conc.parse("ab") + conc.parse("bc")).reduce() == conc.parse("ab{2}c") # conc + pattern # za{2,3} + (b|cd) = za{2,3}(b|cd) assert conc.parse("za{2,3}") + pattern.parse("b|cd") == conc.parse("za{2,3}(b|cd)") # za{2,3} + (a{2}b|a+c) = za{3,4}(ab|a*c) assert (conc.parse("za{2,3}") + pattern.parse("a{2}b|a+c")).reduce() == conc.parse("za{3,4}(ab|a*c)") # pattern + conc # (b|cd) + za{2,3} = (b|cd)za{2,3} assert pattern.parse("b|cd") + conc.parse("za{2,3}") == conc.parse("(b|cd)za{2,3}") # (ba{2}|ca+) + a{2,3}z = (ba|ca*)a{3,4}z assert (pattern.parse("ba{2}|ca+") + conc.parse("a{2,3}z")).reduce() == conc.parse("(ba|ca*)a{3,4}z") # pattern + pattern # (a|bc) + (c|de) = (a|bc)(c|de) assert pattern.parse("a|bc") + pattern.parse("c|de") == conc.parse("(a|bc)(c|de)") # (a|bc) + (a|bc) = (a|bc){2} assert (pattern.parse("a|bc") + pattern.parse("a|bc")).reduce() == mult.parse("(a|bc){2}")
def test_copy(): x = pattern.parse("abc|def(ghi|jkl)") assert x.copy() == x
def test_bad_reduction_bug(): # DEFECT: "0{2}|1{2}" was erroneously reduced() to "[01]{2}" assert parse("0{2}|1{2}").reduce() != parse("[01]{2}") assert parse("0|[1-9]|ab").reduce() == pattern.parse("\d|ab") assert parse("0|[1-9]|a{5,7}").reduce() == pattern.parse("\d|a{5,7}") assert parse("0|(0|[1-9]|a{5,7})").reduce() == pattern.parse("0|(\d|a{5,7})")
def test_pattern_commonconc_suffix(): assert pattern.parse("a|bc")._commonconc(suffix=True) == conc.parse("") assert pattern.parse("aa|bca")._commonconc(suffix=True) == conc.parse("a") assert pattern.parse("xyza|abca|a")._commonconc(suffix=True) == conc.parse("a") assert pattern.parse("f{2,3}c|fc")._commonconc(suffix=True) == conc.parse("fc") assert pattern.parse("aa")._commonconc(suffix=True) == conc.parse("aa")
def test_pattern_beheading(): assert pattern.parse("aa").behead(conc.parse("a")) == pattern.parse("a") assert pattern.parse("abc|aa").behead(conc.parse("a")) == pattern.parse("a|bc") assert pattern.parse("cf{1,2}|cf").behead(conc.parse("c")) == pattern.parse("f{1,2}|f") assert pattern.parse("aa|aa").behead(conc.parse("aa")) == pattern.parse("") assert pattern.parse("abc|aa").behead(conc.parse("a")) == pattern.parse("a|bc") assert pattern.parse("a|bc").behead(conc.parse("")) == pattern.parse("a|bc") assert pattern.parse("cf{1,2}|cf").behead(conc.parse("cf")) == pattern.parse("f?|") assert pattern.parse("ZA|ZB|ZC").behead(conc.parse("Z")) == pattern.parse("A|B|C") assert pattern.parse("Z+A|ZB|ZZC").behead(conc.parse("Z")) == pattern.parse("Z*A|B|ZC") assert pattern.parse("a{2}b|a+c").behead(conc.parse("a")) == pattern.parse("ab|a*c")
def test_special_pattern_reduction(): # 0|[1-9]|a{5,7} -> [0-9]|a{5,7} assert pattern.parse("0|[1-9]|a{5,7}").reduce() == pattern.parse("[0-9]|a{5,7}")
def test_pattern_commonconc(): # aa, aa -> aa assert pattern.parse("aa|aa")._commonconc() == conc.parse("aa") # (aa|aa).behead(aa) = () assert pattern.parse("aa|aa").behead(conc.parse("aa")) == pattern.parse("") # abc, aa -> a assert pattern.parse("abc|aa")._commonconc() == conc.parse("a") # (abc|aa).behead(a) = (a|bc) assert pattern.parse("abc|aa").behead(conc.parse("a")) == pattern.parse("a|bc") # a, bc -> emptystring assert pattern.parse("a|bc")._commonconc() == conc.parse("") # (a|bc).behead(emptystring) = (a|bc) assert pattern.parse("a|bc").behead(conc.parse("")) == pattern.parse("a|bc") # cf{1,2}, cf -> cf, (f?|) assert pattern.parse("cf{1,2}|cf")._commonconc() == conc.parse("cf") # (cf{1,2}|cf).behead(cf) = (f?|) assert pattern.parse("cf{1,2}|cf").behead(conc.parse("cf")) == pattern.parse("f?|") # ZA|ZB|ZC -> Z assert pattern.parse("ZA|ZB|ZC")._commonconc() == conc.parse("Z") # ZA|ZB|ZC.behead(Z) = A|B|C assert pattern.parse("ZA|ZB|ZC").behead(conc.parse("Z")) == pattern.parse("A|B|C") # Z+A|ZB|ZZC -> Z assert pattern.parse("Z+A|ZB|ZZC")._commonconc() == conc.parse("Z") # Z+A|ZB|ZZC.behead(Z) = Z*A|B|ZC assert pattern.parse("Z+A|ZB|ZZC").behead(conc.parse("Z")) == pattern.parse("Z*A|B|ZC") # a{2}b|a+c -> a assert pattern.parse("a{2}b|a+c")._commonconc() == conc.parse("a") # a{2}b|a+c.behead(a) = (ab|a*c) assert pattern.parse("a{2}b|a+c").behead(conc.parse("a")) == pattern.parse("ab|a*c")
def test_pattern_dock(): assert pattern.parse("a|bc").dock(conc.parse("")) == pattern.parse("a|bc") assert pattern.parse("aa|bca").dock(conc.parse("a")) == pattern.parse("a|bc") assert pattern.parse("xyza|abca|a").dock(conc.parse("a")) == pattern.parse("xyz|abc|") assert pattern.parse("f{2,3}c|fc").dock(conc.parse("fc")) == pattern.parse("f{1,2}|") assert pattern.parse("aa").dock(conc.parse("aa")) == pattern.parse("")