def test_regex_values(): assert pe.compile('A <- "a" "b"', flags=pe.NONE).match('ab').value() is None assert pe.compile('A <- "a" "b"', flags=pe.REGEX).match('ab').value() is None assert pe.compile('A <- "a" ~"b" "c"', flags=pe.NONE).match('abc').value() == 'b' assert pe.compile('A <- "a" ~"b" "c"', flags=pe.REGEX).match('abc').value() == 'b'
def compile(): INI = pe.compile(r''' Start <- (Comment* Section)* Comment* EOF Section <- Header Body Header <- Space* Title Space* (EOL / EOF) Title <- '[' ~(![\]=\n\r] .)* ']' Body <- (Comment* Pair)* Pair <- Space* Key ('=' val:Value)? Key <- !Title (![=\n\r] .)+ Value <- ('\\' EOL / !EOL .)* Comment <- (Space* ';' (!EOL .)* / Space+) (EOL / EOF) / EOL Space <- [\t ] EOL <- '\r\n' / [\n\r] EOF <- !. ''', actions={ 'Start': Pack(dict), 'Section': Pack(tuple), 'Body': Pack(dict), 'Pair': lambda key, val=None: (key, val), 'Key': Capture(str.strip), 'Value': Capture(_normalize_multiline), }, parser='machine', flags=pe.OPTIMIZE) return lambda s: INI.match(s).value()
def test_inline(): assert (iload(r'A <- "a"') == gload(r'A <- "a"')) assert (iload(r'A <- B B <- "a"') == gload(r'A <- "a" B <- "a"')) assert (iload(r'A <- B B <- C C <- "a"') == gload( r'A <- "a" B <- "a" C <- "a"')) assert (iload(r'A <- "a" A') == gload(r'A <- "a" A')) assert (iload(r'A <- "a" B B <- A') == gload(r'A <- "a" A B <- "a" B')) assert (iload(r'A <- "a" B B <- "b" A') == gload( r'A <- "a" "b" A B <- "b" "a" B')) assert pe.compile('A <- "a" B B <- "b"', flags=pe.NONE).match('ab').value() is None assert pe.compile('A <- "a" B B <- "b"', flags=pe.INLINE).match('ab').value() is None assert pe.compile('A <- "a" B B <- ~"b"', flags=pe.NONE).match('ab').value() == 'b' assert pe.compile('A <- "a" B B <- ~"b"', flags=pe.INLINE).match('ab').value() == 'b'
def test_multi_range_charclass(parser): p = pe.compile( r''' Start <- ["] CHAR* ["] CHAR <- [ !#-\[\]-\U0010ffff] ''', parser=parser, ) assert p.match('""') is not None assert p.match('"a"') is not None assert p.match('"ab"') is not None assert p.match('"1a"') is not None assert p.match('"1"') is not None assert p.match('"\U0010ffff"') is not None assert p.match('"a1"') is not None
def compile(): Json = pe.compile(r''' # Syntactic rules Start <- Spacing Value EOF Value <- Object / Array / String / Number / Constant Object <- LBRACE (Member (COMMA Member)*)? RBRACE Member <- String COLON Value Array <- LBRACK (Value (COMMA Value)*)? RBRACK String <- ["] CHAR* ( ESC CHAR* )* ["] Number <- INTEGER FRACTION? EXPONENT? Constant <- TRUE / FALSE / NULL # Lexical rules CHAR <- [ !#-\[\]-\U0010ffff] ESC <- '\\' ( ["\\/bfnrt] / 'u' HEX HEX HEX HEX ) HEX <- [0-9a-fA-F] INTEGER <- "-"? ("0" / [1-9] [0-9]*) FRACTION <- "." [0-9]+ EXPONENT <- [eE] [-+]? [0-9]+ TRUE <- "true" FALSE <- "false" NULL <- "null" LBRACE <- "{" Spacing RBRACE <- Spacing "}" LBRACK <- "[" Spacing RBRACK <- Spacing "]" COMMA <- Spacing "," Spacing COLON <- Spacing ":" Spacing Spacing <- [\t\n\r ]* EOF <- Spacing !. ''', actions={ 'Object': Pack(dict), 'Member': Pack(tuple), 'Array': Pack(list), 'String': Capture(json_unescape), 'Number': Capture(float), 'TRUE': Constant(True), 'FALSE': Constant(False), 'NULL': Constant(None), }, parser='machine', flags=pe.OPTIMIZE) return lambda s: Json.match(s, flags=pe.STRICT).value()
def compile(): parser = pe.compile( r''' Start <- Spacing Expr EOL? EOF Expr <- Term ((PLUS / MINUS) Term)* Term <- Factor ((TIMES / DIVIDE) Factor)* Factor <- Sign* (LPAR Expr RPAR / INTEGER ) Sign <- NEG / POS INTEGER <- ~( '0' / [1-9] [0-9]* ) Spacing PLUS <- '+' Spacing MINUS <- '-' Spacing TIMES <- '*' Spacing DIVIDE <- '/' Spacing LPAR <- '(' Spacing RPAR <- ')' Spacing NEG <- '-' Spacing POS <- '+' Spacing Spacing <- [ \t\n\f\v\r]* EOL <- '\r\n' / [\n\r] EOF <- !. ''', actions={ 'Expr': reduce_infix, 'Term': reduce_infix, 'Factor': reduce_prefix, 'INTEGER': int, 'PLUS': Constant(add), 'MINUS': Constant(sub), 'TIMES': Constant(mul), 'DIVIDE': Constant(div), 'NEG': Constant(neg), }, flags=pe.OPTIMIZE ) return lambda s: parser.match(s).value()
Json = pe.compile(r''' Start <- Spacing Value Spacing EOF Value <- Object / Array / String / Number / Constant / BADVALUE Object <- LBRACE (Member (COMMA Member)*)? BADCOMMA? RBRACE Member <- String COLON Value Array <- LBRACK (Value (COMMA Value)*)? BADCOMMA? RBRACK String <- ["] ~( (!["\\] .)* ('\\' . / (!["\\] .)+)* ) ["] Number <- Integer / Float Constant <- TRUE / FALSE / NULL Integer <- INTEGER ![.eE] Float <- INTEGER FRACTION? EXPONENT? INTEGER <- "-"? ("0" / [1-9] [0-9]*) FRACTION <- "." [0-9]+ EXPONENT <- [eE] [-+]? [0-9]+ TRUE <- "true" FALSE <- "false" NULL <- "null" LBRACE <- "{" Spacing RBRACE <- Spacing "}" LBRACK <- "[" Spacing RBRACK <- Spacing "]" COMMA <- Spacing "," Spacing COLON <- Spacing ":" Spacing Spacing <- [\t\n\f\r ]* EOF <- !. BADVALUE <- ![}\]] . BADCOMMA <- ',' &(RBRACE / RBRACK) ''', actions={ 'Object': Pair(dict), 'Array': Pack(list), 'Integer': Capture(int), 'Float': Capture(float), 'TRUE': Constant(True), 'FALSE': Constant(False), 'NULL': Constant(None), 'BADVALUE': Fail('unexpected JSON value'), 'BADCOMMA': Fail('trailing commas are not allowed'), })
import pe X = pe.compile(r''' Start <- Expr (EOL Expr)* EOF Expr <- Term PLUS Expr / (Sign Sign)+ Term MINUS Expr / Term Sign <- [-+] Spacing Term <- Factor TIMES Term / Factor DIVIDE Term / Factor Factor <- LPAREN Expr RPAREN / Atom Atom <- NAME / NUMBER NAME <- [a-bA-B_] [a-bA-B0-9_]* Spacing NUMBER <- ('0' / [1-9] [0-9]*) Spacing PLUS <- '+' Spacing MINUS <- '-' Spacing TIMES <- '*' Spacing DIVIDE <- '/' Spacing LPAREN <- '(' Spacing RPAREN <- ')' Spacing EOL <- '\r\n' / [\n\r] EOF <- [ \t\n\v\f\r]* !. Spacing <- ' '* ''') def _match(s): return X.match(s, flags=pe.STRICT | pe.MEMOIZE)
peg_acceptor = pe.compile(r''' # Hierarchical syntax Grammar <- Spacing Definition+ EndOfFile Definition <- Identifier LEFTARROW Expression Expression <- Sequence (SLASH Sequence)* Sequence <- Prefix* Prefix <- (AND / NOT)? Suffix Suffix <- Primary (QUESTION / STAR / PLUS)? Primary <- Identifier !LEFTARROW / OPEN Expression CLOSE / Literal / Class / DOT # Lexical syntax Identifier <- IdentStart IdentCont* Spacing IdentStart <- [a-zA-Z_] IdentCont <- IdentStart / [0-9] Literal <- ['] (!['] Char)* ['] Spacing / ["] (!["] Char)* ["] Spacing Class <- '[' (!']' Range)* ']' Spacing Range <- Char '-' Char / Char Char <- '\\' [nrt'"\[\]\\] / '\\' [0-2] [0-7] [0-7] / '\\' [0-7] [0-7]? / !'\\' . LEFTARROW <- '<-' Spacing SLASH <- '/' Spacing AND <- '&' Spacing NOT <- '!' Spacing QUESTION <- '?' Spacing STAR <- '*' Spacing PLUS <- '+' Spacing OPEN <- '(' Spacing CLOSE <- ')' Spacing DOT <- '.' Spacing Spacing <- (Space / Comment)* Comment <- '#' (!EndOfLine .)* EndOfLine Space <- ' ' / '\t' / EndOfLine EndOfLine <- '\r\n' / '\n' / '\r' EndOfFile <- !. ''')
'key': Pack(tuple), 'basic_string': toml_unescape, 'ml_basic_string': Join(toml_unescape), 'dec_int': int, 'hex_int': lambda x: int(x, 16), 'oct_int': lambda x: int(x, 8), 'bin_int': lambda x: int(x, 2), 'float': Capture(float), 'true': Constant(True), 'false': Constant(False), 'flexible_date_time': datetime.datetime, 'local_time': datetime.time, 'time_offset': toml_time_offset, 'sec_frac': toml_sec_frac, 'DIGIT2': Capture(int), 'DIGIT4': Capture(int), 'array': Pack(list), 'std_table': Table, 'inline_table': Pack(dict), 'array_table': ArrayTable, } TOML = pe.compile(grammar, actions=actions, flags=pe.NONE) if __name__ == '__main__': import sys with open(sys.argv[1]) as fh: m = TOML.match(fh.read()) if m: print(m.value())