def test_empty_mult_suppression(): assert conc.parse("[]0\\d").reduce() == charclass.parse("[]") assert conc( mult(pattern(), one), # this mult can never actually match anything mult(charclass("0"), one), mult(charclass("0123456789"), one), ).reduce() == charclass.parse("[]")
def test_empty_mult_suppression(): assert conc.parse("[]0\d").reduce() == charclass.parse("[]") assert conc( mult(pattern(), one), # this mult can never actually match anything mult(charclass("0"), one), mult(charclass("0123456789"), one), ).reduce() == charclass.parse("[]")
def test_new_reduce(): # The @reduce_after decorator has been removed from many methods since it # takes unnecessary time which the user may not wish to spend. # This alters the behaviour of several methods and also exposes a new # opportunity for conc.reduce() assert conc.parse("a()").reduce() == charclass.parse("a") assert conc.parse("a()()").reduce() == charclass.parse("a") assert conc.parse("a.b()()").reduce() == conc.parse("a.b")
def test_conc_reduction_basic(): assert conc.parse("a[]b").reduce() == charclass.parse("[]") # conc -> conc assert conc.parse("ab").reduce() == conc.parse("ab") # conc -> mult assert conc.parse("a{3,4}").reduce() == mult.parse("a{3,4}") # conc -> charclass assert conc.parse("a").reduce() == charclass.parse("a")
def test_mult_intersection(): assert mult.parse("a") & mult.parse("b?") == charclass() assert mult.parse("a") & mult.parse("b?") == nothing assert mult.parse("a") & mult.parse("a?") == charclass.parse("a") assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}") assert mult.parse("a") & mult.parse("b") == charclass.parse("[]") assert mult.parse("a") & mult.parse("a") == charclass.parse("a") assert mult.parse("a*") & mult.parse("a") == charclass.parse("a") assert mult.parse("a*") & mult.parse("b*") == conc.parse("") assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+") assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]") assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
def test_pattern_reduce_basic(): # pattern -> pattern # (ab|cd) -> (ab|cd) assert pattern.parse("ab|cd").reduce() == pattern.parse("ab|cd") # pattern -> conc assert pattern.parse("a{2}b{2}").reduce() == conc.parse("a{2}b{2}") # pattern -> mult assert pattern.parse("a{2}").reduce() == mult.parse("a{2}") # pattern -> charclass assert pattern.parse("a").reduce() == charclass.parse("a")
def test_cardinality(): assert charclass.parse("[]").cardinality() == 0 assert mult.parse("[]?").cardinality() == 1 assert mult.parse("[]{0,6}").cardinality() == 1 assert mult.parse("[ab]{3}").cardinality() == 8 assert mult.parse("[ab]{2,3}").cardinality() == 12 assert len(pattern.parse("abc|def(ghi|jkl)")) == 3 try: len(pattern.parse(".*")) assert False except OverflowError: assert True
def test_mult_reduction_easy(): assert mult.parse("a").reduce() == charclass.parse("a") assert mult.parse("a").reduce() == charclass("a") assert mult.parse("a?").reduce() == mult(charclass("a"), qm) assert mult.parse("a{0}").reduce() == emptystring assert mult.parse("[]").reduce() == nothing assert mult.parse("[]?").reduce() == emptystring assert mult.parse("[]{0}").reduce() == emptystring assert mult.parse("[]{0,5}").reduce() == emptystring assert mult(pattern(), one).reduce() == nothing assert mult(pattern(), qm).reduce() == emptystring assert mult(pattern(), zero).reduce() == emptystring assert mult(pattern(), multiplier.parse("{0,5}")).reduce() == emptystring
def test_charclass_parsing(): assert charclass.match("a", 0) == (charclass("a"), 1) assert charclass.parse("a") == charclass("a") assert charclass.match("aa", 1) == (charclass("a"), 2) assert charclass.match("a$", 1) == (charclass("$"), 2) assert charclass.match(".", 0) == (dot, 1) try: charclass.match("[", 0) assert False except IndexError: pass try: charclass.match("a", 1) assert False except nomatch: pass
def test_concatenation(): assert charclass.parse("a") + charclass.parse("b") == conc.parse("ab") assert charclass.parse("a") + mult.parse("b{0,8}") == conc.parse("ab{0,8}") assert charclass.parse("a") + conc.parse("bc") == conc.parse("abc") assert charclass.parse("a") + pattern.parse("b|cd") == conc.parse("a(b|cd)") assert mult.parse("b{0,8}") + charclass.parse("c") == conc.parse("b{0,8}c") assert mult.parse("a{3,4}") + mult.parse("b?") == conc.parse("a{3,4}b?") assert mult.parse("a{2}") + conc.parse("bc") == conc.parse("a{2}bc") assert mult.parse("a{2,3}") + pattern.parse("b|cd") == conc.parse("a{2,3}(b|cd)") assert conc.parse("ab") + charclass.parse("c") == conc.parse("abc") assert conc.parse("ab") + mult.parse("c*") == conc.parse("abc*") assert conc.parse("") + conc.parse("") == conc.parse("") assert conc.parse("ab") + conc.parse("cd") == conc.parse("abcd") assert conc.parse("za{2,3}") + pattern.parse("b|cd") == conc.parse("za{2,3}(b|cd)") assert pattern.parse("a|bd") + charclass.parse("c") == conc.parse("(a|bd)c") assert pattern.parse("b|cd") + mult.parse("a{2,3}") == conc.parse("(b|cd)a{2,3}") assert pattern.parse("b|cd") + conc.parse("za{2,3}") == conc.parse("(b|cd)za{2,3}") assert pattern.parse("a|bc") + pattern.parse("c|de") == conc.parse("(a|bc)(c|de)")
def test_mult_intersection(): # a & b? = nothing assert mult.parse("a") & mult.parse("b?") == charclass() assert mult.parse("a") & mult.parse("b?") == nothing # a & a? = nothing assert mult.parse("a").reduce() == charclass.parse("a") assert mult.parse("a") & mult.parse("a?") == charclass.parse("a") # a{2} & a{2,} = a{2} assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}") # a & b -> no intersection. assert mult.parse("a") & mult.parse("b") == charclass.parse("[]") # a & a -> a assert mult.parse("a") & mult.parse("a") == charclass.parse("a") # a* & a -> a assert mult.parse("a*") & mult.parse("a") == charclass.parse("a") # a* & b* -> emptystring assert mult.parse("a*") & mult.parse("b*") == conc.parse("") # a* & a+ -> a+ assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+") # aa & aaaa -> [] assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]") # a{3,4} & a{2,5} -> a{2,3} assert mult.parse("a{3,4}").common(mult.parse("a{2,5}")) == mult.parse("a{2,3}") # a{2,} & a{1,5} -> a{1,5} assert mult.parse("a{2,}").common(mult.parse("a{1,5}")) == mult.parse("a{1,5}") # a{3,}, a{2,} -> a{2,} (with a, epsilon left over) assert mult.parse("a{3,}").common(mult.parse("a{2,}")) == mult.parse("a{2,}") # a{3,}, a{3,} -> a{3,} (with inf, inf left over) assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
def test_derive(): assert parse("a+").derive("a") == mult.parse("a*") assert parse("a+|b+").derive("a") == mult.parse("a*") assert parse("abc|ade").derive("a") == pattern.parse("bc|de") assert parse("abc|ade").derive("ab") == charclass.parse("c")
def test_charclass_intersection_2(): assert (parse("[A-z]") & parse("[^g]")).reduce() == charclass.parse("[A-fh-z]")
def test_concatenation(): # empty conc + empty conc assert conc.parse("") + conc.parse("") == conc.parse("") # charclass + charclass # a + b = ab assert charclass.parse("a") + charclass.parse("b") == conc.parse("ab") # a + a = a{2} assert (charclass.parse("a") + charclass.parse("a")).reduce() == mult.parse("a{2}") # charclass + mult # a + a = a{2} assert (charclass.parse("a") + mult.parse("a")).reduce() == mult.parse("a{2}") # a + a{2,} = a{3,} assert (charclass.parse("a") + mult.parse("a{2,}")).reduce() == mult.parse("a{3,}") # a + a{,8} = a{1,9} assert (charclass.parse("a") + mult.parse("a{0,8}")).reduce() == mult.parse("a{1,9}") # a + b{,8} = ab{,8} assert charclass.parse("a") + mult.parse("b{0,8}") == conc.parse("ab{0,8}") # mult + charclass # b + b = b{2} assert (mult.parse("b") + charclass.parse("b")).reduce() == mult.parse("b{2}") # b* + b = b+ assert (mult.parse("b*") + charclass.parse("b")).reduce() == mult.parse("b+") # b{,8} + b = b{1,9} assert (mult.parse("b{0,8}") + charclass.parse("b")).reduce() == mult.parse("b{1,9}") # b{,8} + c = b{,8}c assert mult.parse("b{0,8}") + charclass.parse("c") == conc.parse("b{0,8}c") # charclass + conc # a + nothing = a assert (charclass.parse("a") + conc.parse("")).reduce() == charclass.parse("a") # a + bc = abc assert charclass.parse("a") + conc.parse("bc") == conc.parse("abc") # a + ab = a{2}b assert (charclass.parse("a") + conc.parse("ab")).reduce() == conc.parse("a{2}b") # conc + charclass # nothing + a = a assert (conc.parse("") + charclass.parse("a")).reduce() == charclass.parse("a") # ab + c = abc assert conc.parse("ab") + charclass.parse("c") == conc.parse("abc") # ab + b = ab{2} assert (conc.parse("ab") + charclass.parse("b")).reduce() == conc.parse("ab{2}") # pattern + charclass # (a|bd) + c = (a|bd)c assert pattern.parse("a|bd") + charclass.parse("c") == conc.parse("(a|bd)c") # (ac{2}|bc+) + c = (ac|bc*)c{2} assert (pattern.parse("ac{2}|bc+") + charclass.parse("c")).reduce() == conc.parse("(ac|bc*)c{2}") # charclass + pattern # a + (b|cd) = a(b|cd) assert charclass.parse("a") + pattern.parse("b|cd") == conc.parse("a(b|cd)") # a + (a{2}b|a+c) = a{2}(ab|a*c) assert (charclass.parse("a") + pattern.parse("(a{2}b|a+c)")).reduce() == conc.parse("a{2}(ab|a*c)") # mult + mult # a{3,4} + b? = a{3,4}b? assert mult.parse("a{3,4}") + mult.parse("b?") == conc.parse("a{3,4}b?") # a* + a{2} = a{2,} assert (mult.parse("a*") + mult.parse("a{2}")).reduce() == mult.parse("a{2,}") # mult + conc # a{2} + bc = a{2}bc assert mult.parse("a{2}") + conc.parse("bc") == conc.parse("a{2}bc") # a? + ab = a{1,2}b assert (mult.parse("a?") + conc.parse("ab")).reduce() == conc.parse("a{1,2}b") # conc + mult # ab + c* = abc* assert conc.parse("ab") + mult.parse("c*") == conc.parse("abc*") # ab + b* = ab+ assert (conc.parse("ab") + mult.parse("b*")).reduce() == conc.parse("ab+") # mult + pattern # a{2,3} + (b|cd) = a{2,3}(b|cd) assert mult.parse("a{2,3}") + pattern.parse("b|cd") == conc.parse("a{2,3}(b|cd)") # a{2,3} + (a{2}b|a+c) = a{3,4}(ab|a*c) assert (mult.parse("a{2,3}") + pattern.parse("a{2}b|a+c")).reduce() == conc.parse("a{3,4}(ab|a*c)") # pattern + mult # (b|cd) + a{2,3} = (b|cd)a{2,3} assert pattern.parse("b|cd") + mult.parse("a{2,3}") == conc.parse("(b|cd)a{2,3}") # (ba{2}|ca+) + a{2,3} = (ba|ca*)a{3,4} assert (pattern.parse("ba{2}|ca+") + mult.parse("a{2,3}")).reduce() == conc.parse("(ba|ca*)a{3,4}") # conc + conc # ab + cd = abcd assert conc.parse("ab") + conc.parse("cd") == conc.parse("abcd") # ab + bc = ab{2}c assert (conc.parse("ab") + conc.parse("bc")).reduce() == conc.parse("ab{2}c") # conc + pattern # za{2,3} + (b|cd) = za{2,3}(b|cd) assert conc.parse("za{2,3}") + pattern.parse("b|cd") == conc.parse("za{2,3}(b|cd)") # za{2,3} + (a{2}b|a+c) = za{3,4}(ab|a*c) assert (conc.parse("za{2,3}") + pattern.parse("a{2}b|a+c")).reduce() == conc.parse("za{3,4}(ab|a*c)") # pattern + conc # (b|cd) + za{2,3} = (b|cd)za{2,3} assert pattern.parse("b|cd") + conc.parse("za{2,3}") == conc.parse("(b|cd)za{2,3}") # (ba{2}|ca+) + a{2,3}z = (ba|ca*)a{3,4}z assert (pattern.parse("ba{2}|ca+") + conc.parse("a{2,3}z")).reduce() == conc.parse("(ba|ca*)a{3,4}z") # pattern + pattern # (a|bc) + (c|de) = (a|bc)(c|de) assert pattern.parse("a|bc") + pattern.parse("c|de") == conc.parse("(a|bc)(c|de)") # (a|bc) + (a|bc) = (a|bc){2} assert (pattern.parse("a|bc") + pattern.parse("a|bc")).reduce() == mult.parse("(a|bc){2}")
def test_pattern_reduce_basic(): assert pattern.parse("ab|cd").reduce() == pattern.parse("ab|cd") assert pattern.parse("a{2}b{2}").reduce() == conc.parse("a{2}b{2}") assert pattern.parse("a{2}").reduce() == mult.parse("a{2}") assert pattern.parse("a").reduce() == charclass.parse("a")
def test_conc_reduction_basic(): assert conc.parse("a").reduce() == charclass.parse("a") assert conc.parse("a{3,4}").reduce() == mult.parse("a{3,4}") assert conc.parse("ab").reduce() == conc.parse("ab") assert conc.parse("a[]b").reduce() == charclass.parse("[]")
def test_set_ops(): assert parse("[abcd]") - parse("a") == charclass.parse("[bcd]") assert parse("[abcd]") ^ parse("[cdef]") == charclass.parse("[abef]")
def test_w_d_s(): # Allow "\w", "\d" and "\s" in charclasses assert charclass.parse("\\w") == charclass.parse("[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz]") assert charclass.parse("[\\w~]") == charclass.parse("[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~]") assert charclass.parse("[\\da]") == charclass.parse("[0123456789a]") assert charclass.parse("[\\s]") == charclass.parse("[\t\n\r\f\v ]")
def test_w_d_s(): # Allow "\w", "\d" and "\s" in charclasses assert charclass.parse("\w") == charclass.parse("[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz]") assert charclass.parse("[\w~]") == charclass.parse("[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~]") assert charclass.parse("[\da]") == charclass.parse("[0123456789a]") assert charclass.parse("[\s]") == charclass.parse("[\t\n\r\f\v ]")