def test_Sequence(): assert (Sequence(Literal('a'), Dot()) == Def(Op.SEQ, ([Literal('a'), Dot()], ))) assert Sequence('foo', 'bar') == Sequence(Literal('foo'), Literal('bar')) # simple optimizations assert Sequence(Dot()) == Dot() assert Sequence(Sequence('a', 'b'), 'c') == Sequence('a', 'b', 'c')
def test_Choice(): assert (Choice(Literal('a'), Dot()) == Def(Op.CHC, ([Literal('a'), Dot()], ))) assert Choice('foo', 'bar') == Choice(Literal('foo'), Literal('bar')) # simple optimizations assert Choice(Dot()) == Dot() assert Choice(Choice('a', 'b'), 'c') == Choice('a', 'b', 'c')
def test_loads_def(): assert loads('A <- "a"') == ('A', {'A': Literal('a')}) assert loads('A <- "a" # comment') == ('A', {'A': Literal('a')}) assert loads('A <- "a" "b"') == ('A', {'A': Sequence('a', 'b')}) assert loads('A <- "a" B <- "b"') == ('A', { 'A': Literal('a'), 'B': Literal('b') }) assert loads(''' A <- "a" Bee Bee <- "b" ''') == ('A', { 'A': Sequence('a', Nonterminal('Bee')), 'Bee': Literal('b') })
def _common(defn): op = defn.op # descend first make_op = _op_map.get(op) if op in (SEQ, CHC): defn = make_op(*(_common(d) for d in defn.args[0])) elif make_op: defn = make_op(_common(defn.args[0]), *defn.args[1:]) # [.] -> "." (only 1-char class, not a range, not negated) if op == CLS: ranges = defn.args[0] negated = defn.args[1] if len(ranges) == 1 and ranges[0][1] is None and not negated: defn = Literal(ranges[0][0]) if op == SEQ: _common_sequence(defn.args[0]) if op == CHC: _common_choice(defn.args[0]) # Sequence(x) -> x OR Choice(x) -> x if op in (SEQ, CHC) and len(defn.args[0]) == 1: defn = defn.args[0][0] op = defn.op return defn
def _common_sequence(subdefs): i = 0 while i < len(subdefs) - 1: d = subdefs[i] # ![...] . -> [^...] # !"." . -> [^.] if (d.op == NOT and subdefs[i + 1].op == DOT): notd = d.args[0] if notd.op == CLS: negated = not notd.args[1] subdefs[i:i + 2] = [Class(notd.args[0], negate=negated)] elif notd.op == LIT and len(notd.args[0]) == 1: subdefs[i:i + 2] = [Class(notd.args[0], negate=True)] # "." "." -> ".." elif d.op == LIT: j = i + 1 while j < len(subdefs) and subdefs[j].op == LIT: j += 1 if j - i > 1: subdefs[i:j] = [ Literal(''.join(x.args[0] for x in subdefs[i:j])) ] i += 1
def test_common(): assert (cload(r'A <- "a"') == gload(r'A <- "a"')) assert (cload(r'A <- !"a"') == gload(r'A <- !"a"')) assert (cload(r'A <- !"a"') == gload(r'A <- !"a"')) # single-char classes to literals assert (cload(r'A <- [a]') == gload(r'A <- "a"')) # but not single-range assert (cload(r'A <- [a-c]') == gload(r'A <- [a-c]')) # add "b" to avoid dropping the sequence assert (cload(r'A <- !"a" . "b"') == cload(r'A <- ![a] . "b"') == grm( {'A': Sequence(Class('a', negate=True), Literal('b'))})) # now show the dropped sequence assert (cload(r'A <- !"a" .') == cload(r'A <- ![a] .') == grm( {'A': Class('a', negate=True)})) # sequence of literals to literal assert (cload(r'A <- "a" "bc" "d"') == gload(r'A <- "abcd"')) # but not sequence with classes assert (cload(r'A <- "a" [bc] "d"') == gload(r'A <- "a" [bc] "d"')) # choice of classes or single-char literals assert (cload(r'A <- [ab] / "m" / [yz]') == gload(r'A <- [abmyz]')) # not negated classes though assert (cload(r'A <- (![ab] .) / "m" / [yz]') == grm( {'A': Choice(Class('ab', negate=True), Class('myz'))}))
from pe import Match from pe.operators import Literal, Sequence, Capture, Bind, Rule from pe.actions import Pack One = Literal('1') CaptureOne = Capture(Literal('1')) OneTwo = Sequence(Literal('1'), Literal('2')) OneCaptureTwo = Sequence(Literal('1'), Capture(Literal('2'))) OneBindTwo = Sequence(Literal('1'), Bind(Literal('2'), name='x')) OneBindCaptureTwo = Sequence(Literal('1'), Bind(Capture(Literal('2')), name='x')) OneTwoRule = Rule(Sequence(Capture(Literal('1')), Capture(Literal('2'))), action=Pack(list)) def test_Match_atom(): m = Match('123', 0, 1, One, (), {}) assert m.string == '123' assert m.start() == 0 assert m.end() == 1 assert m.span() == (0, 1) assert m.pe is One assert m.group(0) == '1' assert m.groups() == () assert m.groupdict() == {} assert m.value() is None def test_Match_capture_atom(): m = Match('123', 0, 1, CaptureOne, ('1', ), {}) assert m.string == '123'
def test_Bind(): assert Bind(Dot(), name='x') == Def(Op.BND, (Dot(), 'x')) assert Bind('foo', name='bar') == Bind(Literal('foo'), name='bar')
def test_Capture(): assert Capture(Dot()) == Def(Op.CAP, (Dot(), )) assert Capture('foo') == Capture(Literal('foo'))
def test_Not(): assert Not(Dot()) == Def(Op.NOT, (Dot(), )) assert Not('foo') == Not(Literal('foo'))
def test_And(): assert And(Dot()) == Def(Op.AND, (Dot(), )) assert And('foo') == And(Literal('foo'))
def _make_literal(s): return Literal(pe.unescape(s[1:-1]))
def test_Star(): assert Star(Dot()) == Def(Op.STR, (Dot(), )) assert Star('foo') == Star(Literal('foo'))
def test_Optional(): assert Optional(Dot()) == Def(Op.OPT, (Dot(), )) assert Optional('foo') == Optional(Literal('foo'))
def test_Literal(): assert Literal('foo') == Def(Op.LIT, ('foo', ))
def test_loads_literal(): assert eloads('"foo"') == Literal('foo') assert eloads('"foo" # comment') == Literal('foo') assert eloads('"\\t"') == Literal('\t') assert eloads('"\\n"') == Literal('\n') assert eloads('"\\v"') == Literal('\v') assert eloads('"\\f"') == Literal('\f') assert eloads('"\\r"') == Literal('\r') assert eloads('"\\""') == Literal('"') assert eloads("'\\''") == Literal("'") assert eloads("'\\['") == Literal("[") assert eloads("'\\\\'") == Literal("\\") assert eloads("'\\]'") == Literal("]") assert eloads("'\\123'") == Literal('S') assert eloads("'\\x61'") == Literal('a') assert eloads("'\\u0061'") == Literal('a') assert eloads("'\\U00000061'") == Literal('a')
def test_Plus(): assert Plus(Dot()) == Def(Op.PLS, (Dot(), )) assert Plus('foo') == Plus(Literal('foo'))
V.Spacing) # Non-recursive patterns # V.Operator = Choice(V.LEFTARROW) V.Special = Class('tnvfr"\'[]\\\\') V.Oct = Class('0-7') V.Hex = Class('0-9a-fA-F') V.Octal = Sequence(V.Oct, Optional(V.Oct), Optional(V.Oct)) V.UTF8 = Sequence('x', *([V.Hex] * 2)) V.UTF16 = Sequence('u', *([V.Hex] * 4)) V.UTF32 = Sequence('U', *([V.Hex] * 8)) V.Char = Choice( Sequence('\\', Choice(V.Special, V.Octal, V.UTF8, V.UTF16, V.UTF32)), Sequence(Not('\\'), Dot())) V.RangeEndWarn = Literal(']') V.Range = Choice(Sequence(V.Char, '-', Choice(V.RangeEndWarn, V.Char)), V.Char) V.IdentStart = Class('a-zA-Z_') V.IdentCont = Class('a-zA-Z_0-9') V.Identifier = Sequence(Capture(Sequence(V.IdentStart, Star(V.IdentCont))), V.Spacing) # Tokens V.LEFTARROW = Sequence('<-', V.Spacing) V.SLASH = Sequence('/', V.Spacing) V.AND = Sequence('&', V.Spacing) V.NOT = Sequence('!', V.Spacing) V.TILDE = Sequence('~', V.Spacing) V.QUESTION = Sequence('?', V.Spacing) V.STAR = Sequence('*', V.Spacing)