Example #1
0
def test_empty_mult_suppression():
	assert conc.parse("[]0\\d").reduce() == charclass.parse("[]")
	assert conc(
		mult(pattern(), one), # this mult can never actually match anything
		mult(charclass("0"), one),
		mult(charclass("0123456789"), one),
	).reduce() == charclass.parse("[]")
Example #2
0
def test_empty_mult_suppression():
	assert conc.parse("[]0\d").reduce() == charclass.parse("[]")
	assert conc(
		mult(pattern(), one), # this mult can never actually match anything
		mult(charclass("0"), one),
		mult(charclass("0123456789"), one),
	).reduce() == charclass.parse("[]")
Example #3
0
def test_new_reduce():
	# The @reduce_after decorator has been removed from many methods since it
	# takes unnecessary time which the user may not wish to spend.
	# This alters the behaviour of several methods and also exposes a new
	# opportunity for conc.reduce()
	assert conc.parse("a()").reduce() == charclass.parse("a")
	assert conc.parse("a()()").reduce() == charclass.parse("a")
	assert conc.parse("a.b()()").reduce() == conc.parse("a.b")
Example #4
0
def test_new_reduce():
	# The @reduce_after decorator has been removed from many methods since it
	# takes unnecessary time which the user may not wish to spend.
	# This alters the behaviour of several methods and also exposes a new
	# opportunity for conc.reduce()
	assert conc.parse("a()").reduce() == charclass.parse("a")
	assert conc.parse("a()()").reduce() == charclass.parse("a")
	assert conc.parse("a.b()()").reduce() == conc.parse("a.b")
def test_conc_reduction_basic():
	assert conc.parse("a[]b").reduce() == charclass.parse("[]")
	# conc -> conc
	assert conc.parse("ab").reduce() == conc.parse("ab")
	# conc -> mult
	assert conc.parse("a{3,4}").reduce() == mult.parse("a{3,4}")
	# conc -> charclass
	assert conc.parse("a").reduce() == charclass.parse("a")
Example #6
0
def test_mult_intersection():
	assert mult.parse("a") & mult.parse("b?") == charclass()
	assert mult.parse("a") & mult.parse("b?") == nothing
	assert mult.parse("a") & mult.parse("a?") == charclass.parse("a")
	assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}")
	assert mult.parse("a") & mult.parse("b") == charclass.parse("[]")
	assert mult.parse("a") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("b*") == conc.parse("")
	assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+")
	assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]")
	assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
Example #7
0
def test_mult_intersection():
	assert mult.parse("a") & mult.parse("b?") == charclass()
	assert mult.parse("a") & mult.parse("b?") == nothing
	assert mult.parse("a") & mult.parse("a?") == charclass.parse("a")
	assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}")
	assert mult.parse("a") & mult.parse("b") == charclass.parse("[]")
	assert mult.parse("a") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("b*") == conc.parse("")
	assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+")
	assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]")
	assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
def test_pattern_reduce_basic():
	# pattern -> pattern
	# (ab|cd) -> (ab|cd)
	assert pattern.parse("ab|cd").reduce() == pattern.parse("ab|cd")
	# pattern -> conc
	assert pattern.parse("a{2}b{2}").reduce() == conc.parse("a{2}b{2}")
	# pattern -> mult
	assert pattern.parse("a{2}").reduce() == mult.parse("a{2}")
	# pattern -> charclass
	assert pattern.parse("a").reduce() == charclass.parse("a")
Example #9
0
def test_cardinality():
	assert charclass.parse("[]").cardinality() == 0
	assert mult.parse("[]?").cardinality() == 1
	assert mult.parse("[]{0,6}").cardinality() == 1
	assert mult.parse("[ab]{3}").cardinality() == 8
	assert mult.parse("[ab]{2,3}").cardinality() == 12
	assert len(pattern.parse("abc|def(ghi|jkl)")) == 3
	try:
		len(pattern.parse(".*"))
		assert False
	except OverflowError:
		assert True
Example #10
0
def test_cardinality():
	assert charclass.parse("[]").cardinality() == 0
	assert mult.parse("[]?").cardinality() == 1
	assert mult.parse("[]{0,6}").cardinality() == 1
	assert mult.parse("[ab]{3}").cardinality() == 8
	assert mult.parse("[ab]{2,3}").cardinality() == 12
	assert len(pattern.parse("abc|def(ghi|jkl)")) == 3
	try:
		len(pattern.parse(".*"))
		assert False
	except OverflowError:
		assert True
Example #11
0
def test_mult_reduction_easy():
	assert mult.parse("a").reduce() == charclass.parse("a")
	assert mult.parse("a").reduce() == charclass("a")
	assert mult.parse("a?").reduce() == mult(charclass("a"), qm)
	assert mult.parse("a{0}").reduce() == emptystring
	assert mult.parse("[]").reduce() == nothing
	assert mult.parse("[]?").reduce() == emptystring
	assert mult.parse("[]{0}").reduce() == emptystring
	assert mult.parse("[]{0,5}").reduce() == emptystring
	assert mult(pattern(), one).reduce() == nothing
	assert mult(pattern(), qm).reduce() == emptystring
	assert mult(pattern(), zero).reduce() == emptystring
	assert mult(pattern(), multiplier.parse("{0,5}")).reduce() == emptystring
Example #12
0
def test_mult_reduction_easy():
	assert mult.parse("a").reduce() == charclass.parse("a")
	assert mult.parse("a").reduce() == charclass("a")
	assert mult.parse("a?").reduce() == mult(charclass("a"), qm)
	assert mult.parse("a{0}").reduce() == emptystring
	assert mult.parse("[]").reduce() == nothing
	assert mult.parse("[]?").reduce() == emptystring
	assert mult.parse("[]{0}").reduce() == emptystring
	assert mult.parse("[]{0,5}").reduce() == emptystring
	assert mult(pattern(), one).reduce() == nothing
	assert mult(pattern(), qm).reduce() == emptystring
	assert mult(pattern(), zero).reduce() == emptystring
	assert mult(pattern(), multiplier.parse("{0,5}")).reduce() == emptystring
Example #13
0
def test_charclass_parsing():
	assert charclass.match("a", 0) == (charclass("a"), 1)
	assert charclass.parse("a") == charclass("a")
	assert charclass.match("aa", 1) == (charclass("a"), 2)
	assert charclass.match("a$", 1) == (charclass("$"), 2)
	assert charclass.match(".", 0) == (dot, 1)
	try:
		charclass.match("[", 0)
		assert False
	except IndexError:
		pass
	try:
		charclass.match("a", 1)
		assert False
	except nomatch:
		pass
Example #14
0
def test_charclass_parsing():
	assert charclass.match("a", 0) == (charclass("a"), 1)
	assert charclass.parse("a") == charclass("a")
	assert charclass.match("aa", 1) == (charclass("a"), 2)
	assert charclass.match("a$", 1) == (charclass("$"), 2)
	assert charclass.match(".", 0) == (dot, 1)
	try:
		charclass.match("[", 0)
		assert False
	except IndexError:
		pass
	try:
		charclass.match("a", 1)
		assert False
	except nomatch:
		pass
Example #15
0
def test_concatenation():
	assert charclass.parse("a") + charclass.parse("b") == conc.parse("ab")
	assert charclass.parse("a") + mult.parse("b{0,8}") == conc.parse("ab{0,8}")
	assert charclass.parse("a") + conc.parse("bc") == conc.parse("abc")
	assert charclass.parse("a") + pattern.parse("b|cd") == conc.parse("a(b|cd)")
	assert mult.parse("b{0,8}") + charclass.parse("c") == conc.parse("b{0,8}c")
	assert mult.parse("a{3,4}") + mult.parse("b?") == conc.parse("a{3,4}b?")
	assert mult.parse("a{2}") + conc.parse("bc") == conc.parse("a{2}bc")
	assert mult.parse("a{2,3}") + pattern.parse("b|cd") == conc.parse("a{2,3}(b|cd)")
	assert conc.parse("ab") + charclass.parse("c") == conc.parse("abc")
	assert conc.parse("ab") + mult.parse("c*") == conc.parse("abc*")
	assert conc.parse("") + conc.parse("") == conc.parse("")
	assert conc.parse("ab") + conc.parse("cd") == conc.parse("abcd")
	assert conc.parse("za{2,3}") + pattern.parse("b|cd") == conc.parse("za{2,3}(b|cd)")
	assert pattern.parse("a|bd") + charclass.parse("c") == conc.parse("(a|bd)c")
	assert pattern.parse("b|cd") + mult.parse("a{2,3}") == conc.parse("(b|cd)a{2,3}")
	assert pattern.parse("b|cd") + conc.parse("za{2,3}") == conc.parse("(b|cd)za{2,3}")
	assert pattern.parse("a|bc") + pattern.parse("c|de") == conc.parse("(a|bc)(c|de)")
Example #16
0
def test_concatenation():
	assert charclass.parse("a") + charclass.parse("b") == conc.parse("ab")
	assert charclass.parse("a") + mult.parse("b{0,8}") == conc.parse("ab{0,8}")
	assert charclass.parse("a") + conc.parse("bc") == conc.parse("abc")
	assert charclass.parse("a") + pattern.parse("b|cd") == conc.parse("a(b|cd)")
	assert mult.parse("b{0,8}") + charclass.parse("c") == conc.parse("b{0,8}c")
	assert mult.parse("a{3,4}") + mult.parse("b?") == conc.parse("a{3,4}b?")
	assert mult.parse("a{2}") + conc.parse("bc") == conc.parse("a{2}bc")
	assert mult.parse("a{2,3}") + pattern.parse("b|cd") == conc.parse("a{2,3}(b|cd)")
	assert conc.parse("ab") + charclass.parse("c") == conc.parse("abc")
	assert conc.parse("ab") + mult.parse("c*") == conc.parse("abc*")
	assert conc.parse("") + conc.parse("") == conc.parse("")
	assert conc.parse("ab") + conc.parse("cd") == conc.parse("abcd")
	assert conc.parse("za{2,3}") + pattern.parse("b|cd") == conc.parse("za{2,3}(b|cd)")
	assert pattern.parse("a|bd") + charclass.parse("c") == conc.parse("(a|bd)c")
	assert pattern.parse("b|cd") + mult.parse("a{2,3}") == conc.parse("(b|cd)a{2,3}")
	assert pattern.parse("b|cd") + conc.parse("za{2,3}") == conc.parse("(b|cd)za{2,3}")
	assert pattern.parse("a|bc") + pattern.parse("c|de") == conc.parse("(a|bc)(c|de)")
Example #17
0
def test_mult_intersection():
	# a & b? = nothing
	assert mult.parse("a") & mult.parse("b?") == charclass()
	assert mult.parse("a") & mult.parse("b?") == nothing

	# a & a? = nothing
	assert mult.parse("a").reduce() == charclass.parse("a")
	assert mult.parse("a") & mult.parse("a?") == charclass.parse("a")

	# a{2} & a{2,} = a{2}
	assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}")

	# a & b -> no intersection.
	assert mult.parse("a") & mult.parse("b") == charclass.parse("[]")

	# a & a -> a
	assert mult.parse("a") & mult.parse("a") == charclass.parse("a")

	# a* & a -> a
	assert mult.parse("a*") & mult.parse("a") == charclass.parse("a")

	# a* & b* -> emptystring
	assert mult.parse("a*") & mult.parse("b*") == conc.parse("")

	# a* & a+ -> a+
	assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+")

	# aa & aaaa -> []
	assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]")

	# a{3,4} & a{2,5} -> a{2,3}
	assert mult.parse("a{3,4}").common(mult.parse("a{2,5}")) == mult.parse("a{2,3}")

	# a{2,} & a{1,5} -> a{1,5}
	assert mult.parse("a{2,}").common(mult.parse("a{1,5}")) == mult.parse("a{1,5}")

	# a{3,}, a{2,} -> a{2,} (with a, epsilon left over)
	assert mult.parse("a{3,}").common(mult.parse("a{2,}")) == mult.parse("a{2,}")

	# a{3,}, a{3,} -> a{3,} (with inf, inf left over)
	assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
Example #18
0
def test_derive():
	assert parse("a+").derive("a") == mult.parse("a*")
	assert parse("a+|b+").derive("a") == mult.parse("a*")
	assert parse("abc|ade").derive("a") == pattern.parse("bc|de")
	assert parse("abc|ade").derive("ab") == charclass.parse("c")
Example #19
0
def test_charclass_intersection_2():
	assert (parse("[A-z]") & parse("[^g]")).reduce() == charclass.parse("[A-fh-z]")
Example #20
0
def test_concatenation():

	# empty conc + empty conc
	assert conc.parse("") + conc.parse("") == conc.parse("")

	# charclass + charclass
	# a + b = ab
	assert charclass.parse("a") + charclass.parse("b") == conc.parse("ab")
	# a + a = a{2}
	assert (charclass.parse("a") + charclass.parse("a")).reduce() == mult.parse("a{2}")

	# charclass + mult
	# a + a = a{2}
	assert (charclass.parse("a") + mult.parse("a")).reduce() == mult.parse("a{2}")
	# a + a{2,} = a{3,}
	assert (charclass.parse("a") + mult.parse("a{2,}")).reduce() == mult.parse("a{3,}")
	# a + a{,8} = a{1,9}
	assert (charclass.parse("a") + mult.parse("a{0,8}")).reduce() == mult.parse("a{1,9}")
	# a + b{,8} = ab{,8}
	assert charclass.parse("a") + mult.parse("b{0,8}") == conc.parse("ab{0,8}")

	# mult + charclass
	# b + b = b{2}
	assert (mult.parse("b") + charclass.parse("b")).reduce() == mult.parse("b{2}")
	# b* + b = b+
	assert (mult.parse("b*") + charclass.parse("b")).reduce() == mult.parse("b+")
	 # b{,8} + b = b{1,9}
	assert (mult.parse("b{0,8}") + charclass.parse("b")).reduce() == mult.parse("b{1,9}")
	# b{,8} + c = b{,8}c
	assert mult.parse("b{0,8}") + charclass.parse("c") == conc.parse("b{0,8}c")

	# charclass + conc
	# a + nothing = a
	assert (charclass.parse("a") + conc.parse("")).reduce() == charclass.parse("a")
	# a + bc = abc
	assert charclass.parse("a") + conc.parse("bc") == conc.parse("abc")
	# a + ab = a{2}b
	assert (charclass.parse("a") + conc.parse("ab")).reduce() == conc.parse("a{2}b")

	# conc + charclass
	# nothing + a = a
	assert (conc.parse("") + charclass.parse("a")).reduce() == charclass.parse("a")
	# ab + c = abc
	assert conc.parse("ab") + charclass.parse("c") == conc.parse("abc")
	# ab + b = ab{2}
	assert (conc.parse("ab") + charclass.parse("b")).reduce() == conc.parse("ab{2}")

	# pattern + charclass
	# (a|bd) + c = (a|bd)c
	assert pattern.parse("a|bd") + charclass.parse("c") == conc.parse("(a|bd)c")
	# (ac{2}|bc+) + c = (ac|bc*)c{2}
	assert (pattern.parse("ac{2}|bc+") + charclass.parse("c")).reduce() == conc.parse("(ac|bc*)c{2}")

	# charclass + pattern
	# a + (b|cd) = a(b|cd)
	assert charclass.parse("a") + pattern.parse("b|cd") == conc.parse("a(b|cd)")
	# a + (a{2}b|a+c) = a{2}(ab|a*c)
	assert (charclass.parse("a") + pattern.parse("(a{2}b|a+c)")).reduce() == conc.parse("a{2}(ab|a*c)")

	# mult + mult
	# a{3,4} + b? = a{3,4}b?
	assert mult.parse("a{3,4}") + mult.parse("b?") == conc.parse("a{3,4}b?")
	# a* + a{2} = a{2,}
	assert (mult.parse("a*") + mult.parse("a{2}")).reduce() == mult.parse("a{2,}")

	# mult + conc
	# a{2} + bc = a{2}bc
	assert mult.parse("a{2}") + conc.parse("bc") == conc.parse("a{2}bc")
	# a? + ab = a{1,2}b
	assert (mult.parse("a?") + conc.parse("ab")).reduce() == conc.parse("a{1,2}b")

	# conc + mult
	# ab + c* = abc*
	assert conc.parse("ab") + mult.parse("c*") == conc.parse("abc*")
	# ab + b* = ab+
	assert (conc.parse("ab") + mult.parse("b*")).reduce() == conc.parse("ab+")

	# mult + pattern
	# a{2,3} + (b|cd) = a{2,3}(b|cd)
	assert mult.parse("a{2,3}") + pattern.parse("b|cd") == conc.parse("a{2,3}(b|cd)")
	# a{2,3} + (a{2}b|a+c) = a{3,4}(ab|a*c)
	assert (mult.parse("a{2,3}") + pattern.parse("a{2}b|a+c")).reduce() == conc.parse("a{3,4}(ab|a*c)")

	# pattern + mult
	# (b|cd) + a{2,3} = (b|cd)a{2,3}
	assert pattern.parse("b|cd") + mult.parse("a{2,3}") == conc.parse("(b|cd)a{2,3}")
	# (ba{2}|ca+) + a{2,3} = (ba|ca*)a{3,4}
	assert (pattern.parse("ba{2}|ca+") + mult.parse("a{2,3}")).reduce() == conc.parse("(ba|ca*)a{3,4}")

	# conc + conc
	# ab + cd = abcd
	assert conc.parse("ab") + conc.parse("cd") == conc.parse("abcd")
	# ab + bc = ab{2}c
	assert (conc.parse("ab") + conc.parse("bc")).reduce() == conc.parse("ab{2}c")

	# conc + pattern
	# za{2,3} + (b|cd) = za{2,3}(b|cd)
	assert conc.parse("za{2,3}") + pattern.parse("b|cd") == conc.parse("za{2,3}(b|cd)")
	# za{2,3} + (a{2}b|a+c) = za{3,4}(ab|a*c)
	assert (conc.parse("za{2,3}") + pattern.parse("a{2}b|a+c")).reduce() == conc.parse("za{3,4}(ab|a*c)")

	# pattern + conc
	# (b|cd) + za{2,3} = (b|cd)za{2,3}
	assert pattern.parse("b|cd") + conc.parse("za{2,3}") == conc.parse("(b|cd)za{2,3}")
	# (ba{2}|ca+) + a{2,3}z = (ba|ca*)a{3,4}z
	assert (pattern.parse("ba{2}|ca+") + conc.parse("a{2,3}z")).reduce() == conc.parse("(ba|ca*)a{3,4}z")

	# pattern + pattern
	# (a|bc) + (c|de) = (a|bc)(c|de)
	assert pattern.parse("a|bc") + pattern.parse("c|de") == conc.parse("(a|bc)(c|de)")
	# (a|bc) + (a|bc) = (a|bc){2}
	assert (pattern.parse("a|bc") + pattern.parse("a|bc")).reduce() == mult.parse("(a|bc){2}")
Example #21
0
def test_pattern_reduce_basic():
	assert pattern.parse("ab|cd").reduce() == pattern.parse("ab|cd")
	assert pattern.parse("a{2}b{2}").reduce() == conc.parse("a{2}b{2}")
	assert pattern.parse("a{2}").reduce() == mult.parse("a{2}")
	assert pattern.parse("a").reduce() == charclass.parse("a")
Example #22
0
def test_conc_reduction_basic():
	assert conc.parse("a").reduce() == charclass.parse("a")
	assert conc.parse("a{3,4}").reduce() == mult.parse("a{3,4}")
	assert conc.parse("ab").reduce() == conc.parse("ab")
	assert conc.parse("a[]b").reduce() == charclass.parse("[]")
Example #23
0
def test_pattern_reduce_basic():
	assert pattern.parse("ab|cd").reduce() == pattern.parse("ab|cd")
	assert pattern.parse("a{2}b{2}").reduce() == conc.parse("a{2}b{2}")
	assert pattern.parse("a{2}").reduce() == mult.parse("a{2}")
	assert pattern.parse("a").reduce() == charclass.parse("a")
Example #24
0
def test_charclass_intersection_2():
	assert (parse("[A-z]") & parse("[^g]")).reduce() == charclass.parse("[A-fh-z]")
Example #25
0
def test_set_ops():
	assert parse("[abcd]") - parse("a") == charclass.parse("[bcd]")
	assert parse("[abcd]") ^ parse("[cdef]") == charclass.parse("[abef]")
Example #26
0
def test_w_d_s():
	# Allow "\w", "\d" and "\s" in charclasses
	assert charclass.parse("\\w") == charclass.parse("[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz]")
	assert charclass.parse("[\\w~]") == charclass.parse("[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~]")
	assert charclass.parse("[\\da]") == charclass.parse("[0123456789a]")
	assert charclass.parse("[\\s]") == charclass.parse("[\t\n\r\f\v ]")
Example #27
0
def test_w_d_s():
	# Allow "\w", "\d" and "\s" in charclasses
	assert charclass.parse("\w") == charclass.parse("[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz]")
	assert charclass.parse("[\w~]") == charclass.parse("[0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~]")
	assert charclass.parse("[\da]") == charclass.parse("[0123456789a]")
	assert charclass.parse("[\s]") == charclass.parse("[\t\n\r\f\v ]")
Example #28
0
def test_set_ops():
	assert parse("[abcd]") - parse("a") == charclass.parse("[bcd]")
	assert parse("[abcd]") ^ parse("[cdef]") == charclass.parse("[abef]")
def test_derive():
	assert parse("a+").derive("a") == mult.parse("a*")
	assert parse("a+|b+").derive("a") == mult.parse("a*")
	assert parse("abc|ade").derive("a") == pattern.parse("bc|de")
	assert parse("abc|ade").derive("ab") == charclass.parse("c")