Exemple #1
def test_empty_mult_suppression():
	assert conc.parse("[]0\\d").reduce() == charclass.parse("[]")
	assert conc(
		mult(pattern(), one), # this mult can never actually match anything
		mult(charclass("0"), one),
		mult(charclass("0123456789"), one),
	).reduce() == charclass.parse("[]")
Exemple #2
def test_empty_mult_suppression():
	assert conc.parse("[]0\d").reduce() == charclass.parse("[]")
	assert conc(
		mult(pattern(), one), # this mult can never actually match anything
		mult(charclass("0"), one),
		mult(charclass("0123456789"), one),
	).reduce() == charclass.parse("[]")
def test_charclass_multiplication():
	# a * 1 = a
	assert charclass("a") * one == charclass("a")
	# a * {1,3} = a{1,3}
	assert charclass("a") * multiplier.parse("{1,3}") == mult.parse("a{1,3}")
	# a * {4,} = a{4,}
	assert charclass("a") * multiplier.parse("{4,}") == mult.parse("a{4,}")
Exemple #4
def test_odd_bug():
	# Odd bug with ([bc]*c)?[ab]*
	int5A = mult(charclass("bc"), star).to_fsm({"a", "b", "c", fsm.anything_else})
	assert int5A.accepts([])
	assert int5A.accepts("")
	int5B = mult(charclass("c"), one).to_fsm({"a", "b", "c", fsm.anything_else})
	assert int5B.accepts("c")
	assert int5B.accepts(["c"])
	int5C = int5A + int5B
	assert int5C.accepts("c")
	assert int5C.accepts(["c"])
Exemple #5
def test_odd_bug():
	# Odd bug with ([bc]*c)?[ab]*
	int5A = mult(charclass("bc"), star).to_fsm({"a", "b", "c", fsm.anything_else})
	assert int5A.accepts([])
	assert int5A.accepts("")
	int5B = mult(charclass("c"), one).to_fsm({"a", "b", "c", fsm.anything_else})
	assert int5B.accepts("c")
	assert int5B.accepts(["c"])
	int5C = int5A + int5B
	assert int5C.accepts("c")
	assert int5C.accepts(["c"])
Exemple #6
def test_mult_reduction_easy():
	assert mult.parse("a").reduce() == charclass.parse("a")
	assert mult.parse("a").reduce() == charclass("a")
	assert mult.parse("a?").reduce() == mult(charclass("a"), qm)
	assert mult.parse("a{0}").reduce() == emptystring
	assert mult.parse("[]").reduce() == nothing
	assert mult.parse("[]?").reduce() == emptystring
	assert mult.parse("[]{0}").reduce() == emptystring
	assert mult.parse("[]{0,5}").reduce() == emptystring
	assert mult(pattern(), one).reduce() == nothing
	assert mult(pattern(), qm).reduce() == emptystring
	assert mult(pattern(), zero).reduce() == emptystring
	assert mult(pattern(), multiplier.parse("{0,5}")).reduce() == emptystring
Exemple #7
def test_mult_reduction_easy():
	assert mult.parse("a").reduce() == charclass.parse("a")
	assert mult.parse("a").reduce() == charclass("a")
	assert mult.parse("a?").reduce() == mult(charclass("a"), qm)
	assert mult.parse("a{0}").reduce() == emptystring
	assert mult.parse("[]").reduce() == nothing
	assert mult.parse("[]?").reduce() == emptystring
	assert mult.parse("[]{0}").reduce() == emptystring
	assert mult.parse("[]{0,5}").reduce() == emptystring
	assert mult(pattern(), one).reduce() == nothing
	assert mult(pattern(), qm).reduce() == emptystring
	assert mult(pattern(), zero).reduce() == emptystring
	assert mult(pattern(), multiplier.parse("{0,5}")).reduce() == emptystring
def test_mult_reduction_easy():
	# mult -> mult
	# mult -> charclass
	assert mult(charclass("a"), one).reduce() == charclass("a")
	assert mult(charclass("a"), qm).reduce() == mult(charclass("a"), qm)
	assert mult(charclass("a"), zero).reduce() == emptystring
	assert mult(nothing, one).reduce() == nothing
	assert mult(nothing, qm).reduce() == emptystring
	assert mult(nothing, zero).reduce() == emptystring
	assert mult(nothing, multiplier(bound(0), bound(5))).reduce() == emptystring
	assert mult(pattern(), one).reduce() == nothing
	assert mult(pattern(), qm).reduce() == emptystring
	assert mult(pattern(), zero).reduce() == emptystring
	assert mult(pattern(), multiplier(bound(0), bound(5))).reduce() == emptystring
Exemple #9
def test_charclass_parsing():
	assert charclass.match("a", 0) == (charclass("a"), 1)
	assert charclass.parse("a") == charclass("a")
	assert charclass.match("aa", 1) == (charclass("a"), 2)
	assert charclass.match("a$", 1) == (charclass("$"), 2)
	assert charclass.match(".", 0) == (dot, 1)
		charclass.match("[", 0)
		assert False
	except IndexError:
		charclass.match("a", 1)
		assert False
	except nomatch:
Exemple #10
def test_charclass_fsm():
	# "[^a]"
	nota = (~charclass("a")).to_fsm()
	assert nota.alphabet == {"a", fsm.anything_else}
	assert nota.accepts("b")
	assert nota.accepts(["b"])
	assert nota.accepts([fsm.anything_else])
Exemple #11
def test_charclass_parsing():
	assert charclass.match("a", 0) == (charclass("a"), 1)
	assert charclass.parse("a") == charclass("a")
	assert charclass.match("aa", 1) == (charclass("a"), 2)
	assert charclass.match("a$", 1) == (charclass("$"), 2)
	assert charclass.match(".", 0) == (dot, 1)
		charclass.match("[", 0)
		assert False
	except IndexError:
		charclass.match("a", 1)
		assert False
	except nomatch:
Exemple #12
def test_charclass_fsm():
	# "[^a]"
	nota = (~charclass("a")).to_fsm()
	assert nota.alphabet == {"a", fsm.anything_else}
	assert nota.accepts("b")
	assert nota.accepts(["b"])
	assert nota.accepts([fsm.anything_else])
def assert_non_overlapping(fsa1, fsa2):
    """Assert that the intersection of two lego finite state automata is the
    empty FSA.

    if not'--no-skip' in sys.argv:
        raise SkipTest
    assert fsa1 & fsa2 == lego.charclass(), ("Overlapping regex: "
                                             "{}".format(fsa1 & fsa2))
Exemple #14
def assert_non_overlapping(fsa1, fsa2):
    """Assert that the intersection of two lego finite state automata is the
    empty FSA.

    if not '--no-skip' in sys.argv:
        raise SkipTest
    assert fsa1 & fsa2 == lego.charclass(), ("Overlapping regex: "
                                             "{}".format(fsa1 & fsa2))
Exemple #15
def test_conc_str():
	assert str(conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(charclass("d"), one),
		mult(charclass("e"), one),
		mult(~charclass("fg"), star),
		mult(charclass("h"), multiplier(bound(5), bound(5))),
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
	)) == "abcde[^fg]*h{5}[a-z]+"
Exemple #16
def test_conc_str():
	assert str(conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(charclass("d"), one),
		mult(charclass("e"), one),
		mult(~charclass("fg"), star),
		mult(charclass("h"), multiplier(bound(5), bound(5))),
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
	)) == "abcde[^fg]*h{5}[a-z]+"
Exemple #17
def test_charclass_gen():
	gen = charclass("xyz").strings()
	assert next(gen) == "x"
	assert next(gen) == "y"
	assert next(gen) == "z"
		assert False
	except StopIteration:
		assert True
Exemple #18
def test_charclass_gen():
	gen = charclass("xyz").strings()
	assert next(gen) == "x"
	assert next(gen) == "y"
	assert next(gen) == "z"
		assert False
	except StopIteration:
		assert True
Exemple #19
def test_mult_intersection():
	assert mult.parse("a") & mult.parse("b?") == charclass()
	assert mult.parse("a") & mult.parse("b?") == nothing
	assert mult.parse("a") & mult.parse("a?") == charclass.parse("a")
	assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}")
	assert mult.parse("a") & mult.parse("b") == charclass.parse("[]")
	assert mult.parse("a") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("b*") == conc.parse("")
	assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+")
	assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]")
	assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
Exemple #20
def test_mult_intersection():
	assert mult.parse("a") & mult.parse("b?") == charclass()
	assert mult.parse("a") & mult.parse("b?") == nothing
	assert mult.parse("a") & mult.parse("a?") == charclass.parse("a")
	assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}")
	assert mult.parse("a") & mult.parse("b") == charclass.parse("[]")
	assert mult.parse("a") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("b*") == conc.parse("")
	assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+")
	assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]")
	assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
Exemple #21
def test_empty():
	assert nothing.empty()
	assert charclass().empty()
	assert not dot.empty()
	assert not mult.parse("a{0}").empty()
	assert mult.parse("[]").empty()
	assert not mult.parse("[]?").empty()
	assert conc.parse("a[]").empty()
	assert not conc.parse("a[]?").empty()
	assert pattern().empty()
	assert not pattern.parse("a{0}").empty()
	assert not pattern.parse("[]?").empty()
Exemple #22
def test_empty():
	assert nothing.empty()
	assert charclass().empty()
	assert not dot.empty()
	assert not mult.parse("a{0}").empty()
	assert mult.parse("[]").empty()
	assert not mult.parse("[]?").empty()
	assert conc.parse("a[]").empty()
	assert not conc.parse("a[]?").empty()
	assert pattern().empty()
	assert not pattern.parse("a{0}").empty()
	assert not pattern.parse("[]?").empty()
Exemple #23
def test_pattern_equality():
	assert pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("b"), one)),
	) == pattern(
		conc(mult(charclass("b"), one)),
		conc(mult(charclass("a"), one)),
	assert pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("a"), one)),
	) == pattern(
		conc(mult(charclass("a"), one)),
Exemple #24
def test_pattern_equality():
	assert pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("b"), one)),
	) == pattern(
		conc(mult(charclass("b"), one)),
		conc(mult(charclass("a"), one)),
	assert pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("a"), one)),
	) == pattern(
		conc(mult(charclass("a"), one)),
def test_mult_str():
	a = charclass("a")
	assert str(mult(a, one)) == "a"
	assert str(mult(a, multiplier(bound(2), bound(2)))) == "aa"
	assert str(mult(a, multiplier(bound(3), bound(3)))) == "aaa"
	assert str(mult(a, multiplier(bound(4), bound(4)))) == "aaaa"
	assert str(mult(a, multiplier(bound(5), bound(5)))) == "a{5}"
	assert str(mult(a, qm)) == "a?"
	assert str(mult(a, star)) == "a*"
	assert str(mult(a, plus)) == "a+"
	assert str(mult(a, multiplier(bound(2), bound(5)))) == "a{2,5}"
	assert str(bound(2)) == "2"
	assert str(inf) == ""
	assert str(multiplier(bound(2), inf)) == "{2,}"
	assert str(mult(a, multiplier(bound(2), inf))) == "a{2,}"
	assert str(mult(d, one)) == "\\d"
	assert str(mult(d, multiplier(bound(2), bound(2)))) == "\\d\\d"
	assert str(mult(d, multiplier(bound(3), bound(3)))) == "\\d{3}"
Exemple #26
def test_mult_str():
	assert str(bound(2)) == "2"
	assert str(inf) == ""
	assert str(multiplier(bound(2), inf)) == "{2,}"

	a = charclass("a")
	assert str(mult(a, one)) == "a"
	assert str(mult(a, multiplier(bound(2), bound(2)))) == "a{2}"
	assert str(mult(a, multiplier(bound(3), bound(3)))) == "a{3}"
	assert str(mult(a, multiplier(bound(4), bound(4)))) == "a{4}"
	assert str(mult(a, multiplier(bound(5), bound(5)))) == "a{5}"
	assert str(mult(a, qm)) == "a?"
	assert str(mult(a, star)) == "a*"
	assert str(mult(a, plus)) == "a+"
	assert str(mult(a, multiplier(bound(2), bound(5)))) == "a{2,5}"
	assert str(mult(a, multiplier(bound(2), inf))) == "a{2,}"
	assert str(mult(d, one)) == "\\d"
	assert str(mult(d, multiplier(bound(2), bound(2)))) == "\\d{2}"
	assert str(mult(d, multiplier(bound(3), bound(3)))) == "\\d{3}"
def test_mult_intersection():
	# a & b? = nothing
	assert mult.parse("a") & mult.parse("b?") == charclass()
	assert mult.parse("a") & mult.parse("b?") == nothing

	# a & a? = nothing
	assert mult.parse("a").reduce() == charclass.parse("a")
	assert mult.parse("a") & mult.parse("a?") == charclass.parse("a")

	# a{2} & a{2,} = a{2}
	assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}")

	# a & b -> no intersection.
	assert mult.parse("a") & mult.parse("b") == charclass.parse("[]")

	# a & a -> a
	assert mult.parse("a") & mult.parse("a") == charclass.parse("a")

	# a* & a -> a
	assert mult.parse("a*") & mult.parse("a") == charclass.parse("a")

	# a* & b* -> emptystring
	assert mult.parse("a*") & mult.parse("b*") == conc.parse("")

	# a* & a+ -> a+
	assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+")

	# aa & aaaa -> []
	assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]")

	# a{3,4} & a{2,5} -> a{2,3}
	assert mult.parse("a{3,4}").common(mult.parse("a{2,5}")) == mult.parse("a{2,3}")

	# a{2,} & a{1,5} -> a{1,5}
	assert mult.parse("a{2,}").common(mult.parse("a{1,5}")) == mult.parse("a{1,5}")

	# a{3,}, a{2,} -> a{2,} (with a, epsilon left over)
	assert mult.parse("a{3,}").common(mult.parse("a{2,}")) == mult.parse("a{2,}")

	# a{3,}, a{3,} -> a{3,} (with inf, inf left over)
	assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
Exemple #28
def test_mult_parsing():
	assert mult.parse("[a-g]+") == mult(charclass("abcdefg"), plus)
	assert mult.parse("[a-g0-8$%]+") == mult(charclass("abcdefg012345678$%"), plus)
	assert mult.parse("[a-g0-8$%\\^]+") == mult(charclass("abcdefg012345678$%^"), plus)
	assert mult.match("abcde[^fg]*", 5) == (
		mult(~charclass("fg"), star),
	assert mult.match("abcde[^fg]*h{5}[a-z]+", 11) == (
		mult(charclass("h"), multiplier(bound(5), bound(5))),
	assert mult.match("abcde[^fg]*h{5}[a-z]+T{1,}", 15) == (
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
	assert mult.match("abcde[^fg]*h{5}[a-z]+T{2,}", 21) == (
		mult(charclass("T"), multiplier(bound(2), inf)),
Exemple #29
def test_mult_parsing():
	assert mult.parse("[a-g]+") == mult(charclass("abcdefg"), plus)
	assert mult.parse("[a-g0-8$%]+") == mult(charclass("abcdefg012345678$%"), plus)
	assert mult.parse("[a-g0-8$%\\^]+") == mult(charclass("abcdefg012345678$%^"), plus)
	assert mult.match("abcde[^fg]*", 5) == (
		mult(~charclass("fg"), star),
	assert mult.match("abcde[^fg]*h{5}[a-z]+", 11) == (
		mult(charclass("h"), multiplier(bound(5), bound(5))),
	assert mult.match("abcde[^fg]*h{5}[a-z]+T{1,}", 15) == (
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
	assert mult.match("abcde[^fg]*h{5}[a-z]+T{2,}", 21) == (
		mult(charclass("T"), multiplier(bound(2), inf)),
Exemple #30
def test_pattern_str():
	assert str(pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("b"), one)),
	)) == "a|b"
	assert str(pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("a"), one)),
	)) == "a"
	assert str(pattern(
			mult(charclass("a"), one),
			mult(charclass("b"), one),
			mult(charclass("c"), one),
			mult(charclass("d"), one),
			mult(charclass("e"), one),
			mult(charclass("f"), one),
						mult(charclass("g"), one),
						mult(charclass("h"), one),
						mult(charclass("i"), one),
						mult(charclass("j"), one),
						mult(charclass("k"), one),
						mult(charclass("l"), one),
				), one
	)) == "abc|def(ghi|jkl)"
Exemple #31
def test_pattern_parsing():
	assert pattern.parse("abc|def(ghi|jkl)") == pattern(
			mult(charclass("a"), one),
			mult(charclass("b"), one),
			mult(charclass("c"), one),
			mult(charclass("d"), one),
			mult(charclass("e"), one),
			mult(charclass("f"), one),
						mult(charclass("g"), one),
						mult(charclass("h"), one),
						mult(charclass("i"), one),
						mult(charclass("j"), one),
						mult(charclass("k"), one),
						mult(charclass("l"), one),
				), one

	# Accept the "non-capturing group" syntax, "(?: ... )" but give it no
	# special significance
	assert parse("(?:)") == parse("()")
	assert parse("(?:abc|def)") == parse("(abc|def)")
	parse("(:abc)") # should give no problems

	# Named groups
	assert pattern.parse("(?P<ng1>abc)") == parse("(abc)")
Exemple #32
def test_charclass_multiplication():
	assert charclass("a") * one == charclass("a")
	assert charclass("a") * multiplier.parse("{1,3}") == mult.parse("a{1,3}")
	assert charclass("a") * multiplier.parse("{4,}") == mult.parse("a{4,}")
Exemple #33
def test_charclass_intersection():
	# [ab] n [bc] = [b]
	assert charclass("ab") & charclass("bc") == charclass("b")
	# [ab] n [^bc] = [a]
	assert charclass("ab") & ~charclass("bc") == charclass("a")
	# [^ab] n [bc] = [c]
	assert ~charclass("ab") & charclass("bc") == charclass("c")
	# [^ab] n [^bc] = [^abc]
	assert ~charclass("ab") & ~charclass("bc") == ~charclass("abc")
Exemple #34
def test_charclass_negation():
	assert ~~charclass("a") == charclass("a")
	assert charclass("a") == ~~charclass("a")
Exemple #35
def test_conc_equality():
	assert conc(mult(charclass("a"), one)) == conc(mult(charclass("a"), one))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("b"), one))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("a"), qm))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("a"), multiplier(bound(1), bound(2))))
	assert conc(mult(charclass("a"), one)) != emptystring
Exemple #36
def test_repr():
	assert repr(~charclass("a")) == "~charclass('a')"
Exemple #37
def test_mult_equality():
	assert mult(charclass("a"), one) == mult(charclass("a"), one)
	assert mult(charclass("a"), one) != mult(charclass("b"), one)
	assert mult(charclass("a"), one) != mult(charclass("a"), qm)
	assert mult(charclass("a"), one) != mult(charclass("a"), multiplier(bound(1), bound(2)))
	assert mult(charclass("a"), one) != charclass("a")
Exemple #38
    def lego(self):
			This is the big kahuna of this module. Turn the present FSM into a regular
			expression object, as imported from the lego module. This is accomplished
			using the Brzozowski algebraic method.
        from greenery.lego import nothing, charclass, emptystring, star, otherchars

        # We need a new state not already used; guess first beyond current len
        outside = len(self.states)
        while outside in self.states:
            outside += 1

        # The set of strings that would be accepted by this FSM if you started
        # at state i is represented by the regex R_i.
        # If state i has a sole transition "a" to state j, then we know R_i = a R_j.
        # If state i is final, then the empty string is also accepted by this regex.
        # And so on...

        # From this we can build a set of simultaneous equations in len(self.states)
        # variables. This system is easily solved for all variables, but we only
        # need one: R_a, where a is the starting state.

        # The first thing we need to do is organise the states into order of depth,
        # so that when we perform our back-substitutions, we can start with the
        # last (deepest) state and therefore finish with R_a.
        states = [self.initial]
        i = 0
        while i < len(states):
            current = states[i]
            for symbol in sorted(self.alphabet, key=str):
                next = self.map[current][symbol]
                if next not in states:
            i += 1

        # Our system of equations is represented like so:
        brz = {}
        for a in self.states:
            brz[a] = {}
            for b in self.states | set([outside]):
                brz[a][b] = nothing

        # Populate it with some initial data.
        for a in self.map:
            for symbol in self.map[a]:
                b = self.map[a][symbol]
                if symbol == otherchars:
                    brz[a][b] |= ~charclass(self.alphabet - set([otherchars]))
                    brz[a][b] |= charclass(set([symbol]))
            if a in self.finals:
                brz[a][outside] |= emptystring

        # Now perform our back-substitution
        for i in reversed(range(len(states))):
            a = states[i]

            # Before the equation for R_a can be substituted into the other
            # equations, we need to resolve the self-transition (if any).
            # e.g.    R_a = 0 R_a |   1 R_b |   2 R_c
            # becomes R_a =         0*1 R_b | 0*2 R_c
            loop = brz[a][a] * star  # i.e. "0*"
            del brz[a][a]

            for right in brz[a]:
                brz[a][right] = loop + brz[a][right]

            # Note: even if we're down to our final equation, the above step still
            # needs to be performed before anything is returned.

            # Now we can substitute this equation into all of the previous ones.
            for j in range(i):
                b = states[j]

                # e.g. substituting R_a =  0*1 R_b |      0*2 R_c
                # into              R_b =    3 R_a |        4 R_c | 5 R_d
                # yields            R_b = 30*1 R_b | (30*2|4) R_c | 5 R_d
                univ = brz[b][a]  # i.e. "3"
                del brz[b][a]

                for right in brz[a]:
                    brz[b][right] |= univ + brz[a][right]

        return brz[self.initial][outside]
Exemple #39
def test_empty_pattern_reduction():
	assert pattern().reduce() == charclass()
Exemple #40
def test_conc_parsing():
	assert conc.parse("abcde[^fg]*h{5}[a-z]+") == conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(charclass("d"), one),
		mult(charclass("e"), one),
		mult(~charclass("fg"), star),
		mult(charclass("h"), multiplier(bound(5), bound(5))),
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
	assert conc.parse("[bc]*[ab]*") == conc(
		mult(charclass("bc"), star),
		mult(charclass("ab"), star),
	assert conc.parse("abc...") == conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(dot, one),
		mult(dot, one),
		mult(dot, one),
	assert conc.parse("\\d{4}-\\d{2}-\\d{2}") == conc(
		mult(charclass("0123456789"), multiplier(bound(4), bound(4))),
		mult(charclass("-"), one),
		mult(charclass("0123456789"), multiplier(bound(2), bound(2))),
		mult(charclass("-"), one),
		mult(charclass("0123456789"), multiplier(bound(2), bound(2))),
Exemple #41
def test_charclass_union():
	# [ab] u [bc] = [abc]
	assert charclass("ab") | charclass("bc") == charclass("abc")
	# [ab] u [^bc] = [^c]
	assert charclass("ab") | ~charclass("bc") == ~charclass("c")
	# [^a] u [bc] = [^a]
	assert ~charclass("ab") | charclass("bc") == ~charclass("a")
	# [^ab] u [^bc] = [^b]
	assert ~charclass("ab") | ~charclass("bc") == ~charclass("b")
Exemple #42
def test_conc_equality():
	assert conc(mult(charclass("a"), one)) == conc(mult(charclass("a"), one))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("b"), one))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("a"), qm))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("a"), multiplier(bound(1), bound(2))))
	assert conc(mult(charclass("a"), one)) != emptystring
Exemple #43
def test_charclass_equality():
	assert charclass("a") == charclass("a")
	assert ~charclass("a") == ~charclass("a")
	assert ~charclass("a") != charclass("a")
	assert charclass("ab") == charclass("ba")
Exemple #44
def test_charclass_equality():
	assert charclass("a") == charclass("a")
	assert ~charclass("a") == ~charclass("a")
	assert ~charclass("a") != charclass("a")
	assert charclass("ab") == charclass("ba")
Exemple #45
def test_repr():
	assert repr(~charclass("a")) == "~charclass('a')"
Exemple #46
def test_charclass_multiplication():
	assert charclass("a") * one == charclass("a")
	assert charclass("a") * multiplier.parse("{1,3}") == mult.parse("a{1,3}")
	assert charclass("a") * multiplier.parse("{4,}") == mult.parse("a{4,}")
Exemple #47
def test_charclass_str():
	assert str(w) == "\\w"
	assert str(d) == "\\d"
	assert str(s) == "\\s"
	assert str(charclass("a")) == "a"
	assert str(charclass("{")) == "\\{"
	assert str(charclass("\t")) == "\\t"
	assert str(charclass("ab")) == "[ab]"
	assert str(charclass("a{")) == "[a{]"
	assert str(charclass("a\t")) == "[\\ta]"
	assert str(charclass("a-")) == "[\\-a]"
	assert str(charclass("a[")) == "[\\[a]"
	assert str(charclass("a]")) == "[\\]a]"
	assert str(charclass("ab")) == "[ab]"
	assert str(charclass("abc")) == "[abc]"
	assert str(charclass("abcd")) == "[a-d]"
	assert str(charclass("abcdfghi")) == "[a-df-i]"
	assert str(charclass("^")) == "^"
	assert str(charclass("\\")) == "\\\\"
	assert str(charclass("a^")) == "[\\^a]"
	assert str(charclass("0123456789a")) == "[0-9a]"
	assert str(charclass("\t\v\r A")) == "[\\t\\v\\r A]"
	assert str(charclass("\n\f A")) == "[\\n\\f A]"
	assert str(charclass("\t\n\v\f\r A")) == "[\\t-\\r A]"
	assert str(charclass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz|")) == "[0-9A-Z_a-z|]"
	assert str(W) == "\\W"
	assert str(D) == "\\D"
	assert str(S) == "\\S"
	assert str(dot) == "."
	assert str(~charclass("")) == "."
	assert str(~charclass("a")) == "[^a]"
	assert str(~charclass("{")) == "[^{]"
	assert str(~charclass("\t")) == "[^\\t]"
	assert str(~charclass("^")) == "[^\\^]"

	# Arbitrary ranges
	assert str(parse("[\\w:;<=>?@\\[\\\\\\]\\^`]")) == "[0-z]"
	# TODO: what if \d is a proper subset of `chars`?

	# escape sequences are not preserved
	assert str(parse("\\x09")) == "\\t"

	# Printing ASCII control characters? You should get hex escapes
	assert str(parse("\\x00")) == "\\x00"
Exemple #48
def test_pattern_str():
	assert str(pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("b"), one)),
	)) == "a|b"
	assert str(pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("a"), one)),
	)) == "a"
	assert str(pattern(
			mult(charclass("a"), one),
			mult(charclass("b"), one),
			mult(charclass("c"), one),
			mult(charclass("d"), one),
			mult(charclass("e"), one),
			mult(charclass("f"), one),
						mult(charclass("g"), one),
						mult(charclass("h"), one),
						mult(charclass("i"), one),
						mult(charclass("j"), one),
						mult(charclass("k"), one),
						mult(charclass("l"), one),
				), one
	)) == "abc|def(ghi|jkl)"
Exemple #49
def test_mult_equality():
	assert mult(charclass("a"), one) == mult(charclass("a"), one)
	assert mult(charclass("a"), one) != mult(charclass("b"), one)
	assert mult(charclass("a"), one) != mult(charclass("a"), qm)
	assert mult(charclass("a"), one) != mult(charclass("a"), multiplier(bound(1), bound(2)))
	assert mult(charclass("a"), one) != charclass("a")
Exemple #50
def test_charclass_union():
	# [ab] u [bc] = [abc]
	assert charclass("ab") | charclass("bc") == charclass("abc")
	# [ab] u [^bc] = [^c]
	assert charclass("ab") | ~charclass("bc") == ~charclass("c")
	# [^a] u [bc] = [^a]
	assert ~charclass("ab") | charclass("bc") == ~charclass("a")
	# [^ab] u [^bc] = [^b]
	assert ~charclass("ab") | ~charclass("bc") == ~charclass("b")
Exemple #51
def test_charclass_negation():
	assert ~~charclass("a") == charclass("a")
	assert charclass("a") == ~~charclass("a")
Exemple #52
def test_empty_pattern_reduction():
	assert pattern().reduce() == charclass()
Exemple #53
def test_charclass_intersection():
	# [ab] n [bc] = [b]
	assert charclass("ab") & charclass("bc") == charclass("b")
	# [ab] n [^bc] = [a]
	assert charclass("ab") & ~charclass("bc") == charclass("a")
	# [^ab] n [bc] = [c]
	assert ~charclass("ab") & charclass("bc") == charclass("c")
	# [^ab] n [^bc] = [^abc]
	assert ~charclass("ab") & ~charclass("bc") == ~charclass("abc")
Exemple #54
	def lego(self):
			This is the big kahuna of this module. Turn the present FSM into a regular
			expression object, as imported from the lego module. This is accomplished
			using the Brzozowski algebraic method.
		from greenery.lego import nothing, charclass, emptystring, star, otherchars

		# We need a new state not already used; guess first beyond current len
		outside = len(self.states)
		while outside in self.states:
			outside += 1

		# The set of strings that would be accepted by this FSM if you started
		# at state i is represented by the regex R_i.
		# If state i has a sole transition "a" to state j, then we know R_i = a R_j.
		# If state i is final, then the empty string is also accepted by this regex.
		# And so on...

		# From this we can build a set of simultaneous equations in len(self.states)
		# variables. This system is easily solved for all variables, but we only
		# need one: R_a, where a is the starting state.

		# The first thing we need to do is organise the states into order of depth,
		# so that when we perform our back-substitutions, we can start with the
		# last (deepest) state and therefore finish with R_a.
		states = [self.initial]
		i = 0
		while i < len(states):
			current = states[i]
			for symbol in sorted(self.alphabet, key=str):
				next = self.map[current][symbol]
				if next not in states:
			i += 1

		# Our system of equations is represented like so:
		brz = {}
		for a in self.states:
			brz[a] = {}
			for b in self.states | {outside}:
				brz[a][b] = nothing

		# Populate it with some initial data.
		for a in self.map:
			for symbol in self.map[a]:
				b = self.map[a][symbol]
				if symbol == otherchars:
					brz[a][b] |= ~charclass(self.alphabet - {otherchars})
					brz[a][b] |= charclass({symbol})
			if a in self.finals:
				brz[a][outside] |= emptystring

		# Now perform our back-substitution
		for i in reversed(range(len(states))):
			a = states[i]

			# Before the equation for R_a can be substituted into the other
			# equations, we need to resolve the self-transition (if any).
			# e.g.    R_a = 0 R_a |   1 R_b |   2 R_c
			# becomes R_a =         0*1 R_b | 0*2 R_c
			loop = brz[a][a] * star # i.e. "0*"
			del brz[a][a]

			for right in brz[a]:
				brz[a][right] = loop + brz[a][right]

			# Note: even if we're down to our final equation, the above step still
			# needs to be performed before anything is returned.

			# Now we can substitute this equation into all of the previous ones.
			for j in range(i):
				b = states[j]

				# e.g. substituting R_a =  0*1 R_b |      0*2 R_c
				# into              R_b =    3 R_a |        4 R_c | 5 R_d
				# yields            R_b = 30*1 R_b | (30*2|4) R_c | 5 R_d
				univ = brz[b][a] # i.e. "3"
				del brz[b][a]

				for right in brz[a]:
					brz[b][right] |= univ + brz[a][right]

		return brz[self.initial][outside]
Exemple #55
def test_conc_parsing():
	assert conc.parse("abcde[^fg]*h{5}[a-z]+") == conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(charclass("d"), one),
		mult(charclass("e"), one),
		mult(~charclass("fg"), star),
		mult(charclass("h"), multiplier(bound(5), bound(5))),
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
	assert conc.parse("[bc]*[ab]*") == conc(
		mult(charclass("bc"), star),
		mult(charclass("ab"), star),
	assert conc.parse("abc...") == conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(dot, one),
		mult(dot, one),
		mult(dot, one),
	assert conc.parse("\\d{4}-\\d{2}-\\d{2}") == conc(
		mult(charclass("0123456789"), multiplier(bound(4), bound(4))),
		mult(charclass("-"), one),
		mult(charclass("0123456789"), multiplier(bound(2), bound(2))),
		mult(charclass("-"), one),
		mult(charclass("0123456789"), multiplier(bound(2), bound(2))),
Exemple #56
def test_pattern_parsing():
	assert pattern.parse("abc|def(ghi|jkl)") == pattern(
			mult(charclass("a"), one),
			mult(charclass("b"), one),
			mult(charclass("c"), one),
			mult(charclass("d"), one),
			mult(charclass("e"), one),
			mult(charclass("f"), one),
						mult(charclass("g"), one),
						mult(charclass("h"), one),
						mult(charclass("i"), one),
						mult(charclass("j"), one),
						mult(charclass("k"), one),
						mult(charclass("l"), one),
				), one

	# Accept the "non-capturing group" syntax, "(?: ... )" but give it no
	# special significance
	assert parse("(?:)") == parse("()")
	assert parse("(?:abc|def)") == parse("(abc|def)")
	parse("(:abc)") # should give no problems

	# Named groups
	assert pattern.parse("(?P<ng1>abc)") == parse("(abc)")
Exemple #57
def test_charclass_str():
	assert str(w) == "\\w"
	assert str(d) == "\\d"
	assert str(s) == "\\s"
	assert str(charclass("a")) == "a"
	assert str(charclass("{")) == "\\{"
	assert str(charclass("\t")) == "\\t"
	assert str(charclass("ab")) == "[ab]"
	assert str(charclass("a{")) == "[a{]"
	assert str(charclass("a\t")) == "[\\ta]"
	assert str(charclass("a-")) == "[\\-a]"
	assert str(charclass("a[")) == "[\\[a]"
	assert str(charclass("a]")) == "[\\]a]"
	assert str(charclass("ab")) == "[ab]"
	assert str(charclass("abc")) == "[abc]"
	assert str(charclass("abcd")) == "[a-d]"
	assert str(charclass("abcdfghi")) == "[a-df-i]"
	assert str(charclass("^")) == "^"
	assert str(charclass("\\")) == "\\\\"
	assert str(charclass("a^")) == "[\\^a]"
	assert str(charclass("0123456789a")) == "[0-9a]"
	assert str(charclass("\t\v\r A")) == "[\\t\\v\\r A]"
	assert str(charclass("\n\f A")) == "[\\n\\f A]"
	assert str(charclass("\t\n\v\f\r A")) == "[\\t-\\r A]"
	assert str(charclass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz|")) == "[0-9A-Z_a-z|]"
	assert str(W) == "\\W"
	assert str(D) == "\\D"
	assert str(S) == "\\S"
	assert str(dot) == "."
	assert str(~charclass("")) == "."
	assert str(~charclass("a")) == "[^a]"
	assert str(~charclass("{")) == "[^{]"
	assert str(~charclass("\t")) == "[^\\t]"
	assert str(~charclass("^")) == "[^\\^]"

	# Arbitrary ranges
	assert str(parse("[\w:;<=>?@\\[\\\\\]\\^`]")) == "[0-z]"
	# TODO: what if \d is a proper subset of `chars`?

	# escape sequences are not preserved
	assert str(parse("\\x09")) == "\\t"

	# Printing ASCII control characters? You should get hex escapes
	assert str(parse("\\x00")) == "\\x00"