def compile(): INI = pe.compile(r''' Start <- (Comment* Section)* Comment* EOF Section <- Header Body Header <- Space* Title Space* (EOL / EOF) Title <- '[' ~(![\]=\n\r] .)* ']' Body <- (Comment* Pair)* Pair <- Space* Key ('=' val:Value)? Key <- !Title (![=\n\r] .)+ Value <- ('\\' EOL / !EOL .)* Comment <- (Space* ';' (!EOL .)* / Space+) (EOL / EOF) / EOL Space <- [\t ] EOL <- '\r\n' / [\n\r] EOF <- !. ''', actions={ 'Start': Pack(dict), 'Section': Pack(tuple), 'Body': Pack(dict), 'Pair': lambda key, val=None: (key, val), 'Key': Capture(str.strip), 'Value': Capture(_normalize_multiline), }, parser='machine', flags=pe.OPTIMIZE) return lambda s: INI.match(s).value()
def compile(): Json = pe.compile(r''' # Syntactic rules Start <- Spacing Value EOF Value <- Object / Array / String / Number / Constant Object <- LBRACE (Member (COMMA Member)*)? RBRACE Member <- String COLON Value Array <- LBRACK (Value (COMMA Value)*)? RBRACK String <- ["] CHAR* ( ESC CHAR* )* ["] Number <- INTEGER FRACTION? EXPONENT? Constant <- TRUE / FALSE / NULL # Lexical rules CHAR <- [ !#-\[\]-\U0010ffff] ESC <- '\\' ( ["\\/bfnrt] / 'u' HEX HEX HEX HEX ) HEX <- [0-9a-fA-F] INTEGER <- "-"? ("0" / [1-9] [0-9]*) FRACTION <- "." [0-9]+ EXPONENT <- [eE] [-+]? [0-9]+ TRUE <- "true" FALSE <- "false" NULL <- "null" LBRACE <- "{" Spacing RBRACE <- Spacing "}" LBRACK <- "[" Spacing RBRACK <- Spacing "]" COMMA <- Spacing "," Spacing COLON <- Spacing ":" Spacing Spacing <- [\t\n\r ]* EOF <- Spacing !. ''', actions={ 'Object': Pack(dict), 'Member': Pack(tuple), 'Array': Pack(list), 'String': Capture(json_unescape), 'Number': Capture(float), 'TRUE': Constant(True), 'FALSE': Constant(False), 'NULL': Constant(None), }, parser='machine', flags=pe.OPTIMIZE) return lambda s: Json.match(s, flags=pe.STRICT).value()
('Cap6', Chc(Seq(Cap(abc), Cap(xyz)), Seq(Cap(abc), Cap(abc))), 'aa', 0, 2, (('a', 'a'), {}, 'a')), # Capture suppresses inner values ('Cap7', Cap(Cap(abc)), 'abc', 0, 1, (('a', ), {}, 'a')), ('Cap8', Cap(Bnd(Cap(abc), name='x')), 'abc', 0, 1, (('a', ), {}, 'a')), ('Cap9', Cap(Rul(Cap(abc), lambda x: int(x, 16), name='A')), 'abc', 0, 1, (('a', ), {}, 'a')), ('Rul0', Rul(abc, None), 'a', 0, 1, _blank), ('Rul1', Rul(Cap(abc), None), 'a', 0, 1, (('a', ), {}, 'a')), ('Rul2', Rul(abc, None), 'd', 0, FAIL, None), ('Rul3', Rul(Cap(abc), lambda x: int(x, 16), name='A'), 'a', 0, 1, ((10, ), {}, 10)), ('Rul4', Rul(Cap(abc), lambda x: int(x, 16), name='A'), 'd', 0, FAIL, None), ('Rul5', Rul(Seq(Cap('a'), Cap('b')), action=Pack(list)), 'ab', 0, 2, ((['a', 'b'], ), {}, ['a', 'b'])), # Regression tests for Machine Parser ('Rgr0', Cap(Sym('abc')), 'a', 0, 1, (('a', ), {}, 'a')), ('Rgr1', Cap(Sym('abcs')), 'aaa', 0, 3, (('aaa', ), {}, 'aaa')), ('Rgr2', Seq(abc, Not(Dot())), 'a', 0, 1, _blank), ] @pytest.mark.parametrize( 'parser,dfn,input,pos,end,match', [(parser, ) + row[1:] for parser in [PackratParser, PyMachineParser, CyMachineParser] for row in data], ids=[
FALSE <- "false" NULL <- "null" LBRACE <- "{" Spacing RBRACE <- Spacing "}" LBRACK <- "[" Spacing RBRACK <- Spacing "]" COMMA <- Spacing "," Spacing COLON <- Spacing ":" Spacing Spacing <- [\t\n\f\r ]* EOF <- !. BADVALUE <- ![}\]] . BADCOMMA <- ',' &(RBRACE / RBRACK) ''', actions={ 'Object': Pair(dict), 'Array': Pack(list), 'Integer': Capture(int), 'Float': Capture(float), 'TRUE': Constant(True), 'FALSE': Constant(False), 'NULL': Constant(None), 'BADVALUE': Fail('unexpected JSON value'), 'BADCOMMA': Fail('trailing commas are not allowed'), }) def _match(s): return Json.match(s, flags=pe.STRICT | pe.MEMOIZE).value() def test_numbers():
from pe import Match from pe.operators import Literal, Sequence, Capture, Bind, Rule from pe.actions import Pack One = Literal('1') CaptureOne = Capture(Literal('1')) OneTwo = Sequence(Literal('1'), Literal('2')) OneCaptureTwo = Sequence(Literal('1'), Capture(Literal('2'))) OneBindTwo = Sequence(Literal('1'), Bind(Literal('2'), name='x')) OneBindCaptureTwo = Sequence(Literal('1'), Bind(Capture(Literal('2')), name='x')) OneTwoRule = Rule(Sequence(Capture(Literal('1')), Capture(Literal('2'))), action=Pack(list)) def test_Match_atom(): m = Match('123', 0, 1, One, (), {}) assert m.string == '123' assert m.start() == 0 assert m.end() == 1 assert m.span() == (0, 1) assert m.pe is One assert m.group(0) == '1' assert m.groups() == () assert m.groupdict() == {} assert m.value() is None def test_Match_capture_atom(): m = Match('123', 0, 1, CaptureOne, ('1', ), {}) assert m.string == '123'
def toml_time_offset(s): if s in 'Zz': return datetime.timezone(datetime.timedelta(0)) else: hour, minutes = s.split(':') return datetime.timezone( datetime.timedelta(hours=int(hour), minutes=int(minutes))) def toml_sec_frac(s): return int(float(s) * 1000) actions = { 'toml': Pack(toml_reduce), 'keyval': Pack(tuple), 'key': Pack(tuple), 'basic_string': toml_unescape, 'ml_basic_string': Join(toml_unescape), 'dec_int': int, 'hex_int': lambda x: int(x, 16), 'oct_int': lambda x: int(x, 8), 'bin_int': lambda x: int(x, 2), 'float': Capture(float), 'true': Constant(True), 'false': Constant(False), 'flexible_date_time': datetime.datetime, 'local_time': datetime.time, 'time_offset': toml_time_offset, 'sec_frac': toml_sec_frac,
V.CLOSE = Sequence(')', V.Spacing) V.DOT = Sequence('.', V.Spacing) # Whitespace and comments V.Spacing = Star(Choice(V.Space, V.Comment)) V.Space = Choice(Class(' \t'), V.EOL) V.Comment = Sequence('#', Star(Sequence(Not(V.EOL), Dot())), Optional(V.EOL)) V.EOF = Not(Dot()) V.EOL = Choice('\r\n', '\n', '\r') PEG = Grammar( definitions=V, actions={ 'Grammar': Pack(tuple), 'Definition': Pack(tuple), 'Expression': Pack(_make_prioritized), 'Sequence': Pack(_make_sequential), 'Valued': _make_valued, 'AND': Constant(And), 'NOT': Constant(Not), 'TILDE': Constant(Capture), 'Binding':