def parse_imp (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( function ( <name ... ) <expr> ) # ( <expr> <expr> ... ) # # <decl> ::= var name = expr ; # # <stmt> ::= if <expr> <stmt> else <stmt> # while <expr> <stmt> # name <- <expr> ; # print <expr> ; # <block> # # <block> ::= { <decl> ... <stmt> ... } # # <toplevel> ::= <decl> # <stmt> # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") #### NOTE THE DIFFERENCE pIDENTIFIER.setParseAction(lambda result: EPrimCall(oper_deref,[EId(result[0])])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EValue(VBoolean(result[0]=="true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) def mkFunBody (params,body): bindings = [ (p,ERefCell(EId(p))) for p in params ] return ELet(bindings,body) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3],mkFunBody(result[3],result[5]))) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1],result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pFUN | pCALL) pDECL_VAR = "var" + pNAME + "=" + pEXPR + ";" pDECL_VAR.setParseAction(lambda result: (result[1],result[3])) # hack to get pDECL to match only PDECL_VAR (but still leave room # to add to pDECL later) pDECL = ( pDECL_VAR | NoMatch() ) pDECLS = ZeroOrMore(pDECL) pDECLS.setParseAction(lambda result: [result]) pSTMT = Forward() pSTMT_IF_1 = "if" + pEXPR + pSTMT + "else" + pSTMT pSTMT_IF_1.setParseAction(lambda result: EIf(result[1],result[2],result[4])) pSTMT_IF_2 = "if" + pEXPR + pSTMT pSTMT_IF_2.setParseAction(lambda result: EIf(result[1],result[2],EValue(VBoolean(True)))) pSTMT_WHILE = "while" + pEXPR + pSTMT pSTMT_WHILE.setParseAction(lambda result: EWhile(result[1],result[2])) pSTMT_PRINT = "print" + pEXPR + ";" pSTMT_PRINT.setParseAction(lambda result: EPrimCall(oper_print,[result[1]])); pSTMT_UPDATE = pNAME + "<-" + pEXPR + ";" pSTMT_UPDATE.setParseAction(lambda result: EPrimCall(oper_update,[EId(result[0]),result[2]])) pSTMTS = ZeroOrMore(pSTMT) pSTMTS.setParseAction(lambda result: [result]) def mkBlock (decls,stmts): bindings = [ (n,ERefCell(expr)) for (n,expr) in decls ] return ELet(bindings,EDo(stmts)) pSTMT_BLOCK = "{" + pDECLS + pSTMTS + "}" pSTMT_BLOCK.setParseAction(lambda result: mkBlock(result[1],result[2])) pSTMT << ( pSTMT_IF_1 | pSTMT_IF_2 | pSTMT_WHILE | pSTMT_PRINT | pSTMT_UPDATE | pSTMT_BLOCK ) # can't attach a parse action to pSTMT because of recursion, so let's duplicate the parser pTOP_STMT = pSTMT.copy() pTOP_STMT.setParseAction(lambda result: {"result":"statement", "stmt":result[0]}) pTOP_DECL = pDECL.copy() pTOP_DECL.setParseAction(lambda result: {"result":"declaration", "decl":result[0]}) pABSTRACT = "#abs" + pSTMT pABSTRACT.setParseAction(lambda result: {"result":"abstract", "stmt":result[1]}) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result":"quit"}) pTOP = (pQUIT | pABSTRACT | pTOP_DECL | pTOP_STMT ) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( function ( <name> ... ) <expr> ) # ( ref <expr> ) # ( <expr> <expr> ... ) # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EValue(VBoolean(result[0]=="true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1],result[2])) pBINDINGS = ZeroOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [ result ]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet(result[3],result[5])) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1],result[2])) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3],result[5])) pFUNrec = "(" + Keyword("function") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pFUNrec.setParseAction(lambda result: EFunction(result[4],result[6],name=result[2])) pCLASS = "(" + Keyword("class") + "(" + pNAMES + ")" + Keyword("(") + pBINDINGS + ")" + Keyword("(") + pBINDINGS + Keyword(")") + ")" pCLASS.setParseAction(lambda result: EClass(result[3],result[6],result[9])) pNEW = "(" + Keyword("new") + pEXPR + pEXPRS +")" pNEW.setParseAction(lambda result: ENew(result[2],result[3])) pWITH = "(" + Keyword("with") + pEXPR + pEXPR +")" pWITH.setParseAction(lambda result: EWithObj(result[2],result[3])) pDO = "(" + Keyword("do") + pEXPRS + ")" pDO.setParseAction(lambda result: EDo(result[2])) pWHILE = "(" + Keyword("while") + pEXPR + pEXPR + ")" pWHILE.setParseAction(lambda result: EWhile(result[2],result[3])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pFUNrec| pCLASS | pNEW | pWITH | pDO | pWHILE | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: {"result":"expression","expr":result[0]}) pDEFINE = "(" + Keyword("define") + pNAME + pEXPR + ")" pDEFINE.setParseAction(lambda result: {"result":"value", "name":result[2], "expr":result[3]}) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction(lambda result: {"result":"function", "name":result[2], "params":result[4], "body":result[6]}) pABSTRACT = "#abs" + pEXPR pABSTRACT.setParseAction(lambda result: {"result":"abstract", "expr":result[1]}) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result":"quit"}) pTOP = (pDEFUN | pDEFINE | pQUIT | pABSTRACT | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse_imp(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( function ( <name ... ) <expr> ) # ( <expr> <expr> ... ) # # <decl> ::= var name = expr ; # # <stmt> ::= if <expr> <stmt> else <stmt> # while <expr> <stmt> # name <- <expr> ; # print <expr> ; # <block> # # <block> ::= { <decl> ... <stmt> ... } # # <toplevel> ::= <decl> # <stmt> # idChars = alphas + "_+*-?!=<>+" QUOTE = Literal('"') INTERNAL_QUOTE = QUOTE.copy().leaveWhitespace() pIDENTIFIER = Word(idChars, idChars + "0123456789") #### NOTE THE DIFFERENCE pIDENTIFIER.setParseAction( lambda result: EPrimCall(oper_deref, [EId(result[0])])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") #| Keyword("&\"") | Keyword("&\'") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) QUOTE = Literal("&\"") | Literal("&\'") pSTRING = Literal('"') + ZeroOrMore( Combine(Word(idChars + "0123456789'" + " ") | QUOTE)) + Literal('"') pSTRING.setParseAction(lambda result: EValue(VString(str(result[1:-1])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) def mkFunBody(params, body): bindings = [(p, ERefCell(EId(p))) for p in params] return ELet(bindings, body) def letToFun(result): func = result[5] binds = result[3] params = [] vals = [] for p, v in binds: params.append(p) vals.append(v) return ECall(EFunction(params, func), vals) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction( lambda result: EFunction(result[3], mkFunBody(result[3], result[5]))) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(letToFun) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pARRAY = "(" + Keyword("new-array") + pEXPR + ")" pARRAY.setParseAction(lambda result: EArray(result[2])) pINDEX = Keyword("index") + pINTEGER pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pWITH = "(" + Keyword("with") + pEXPR + pEXPR + ")" pWITH.setParseAction(lambda result: EWithObj(result[2], result[3])) pEXPR << (pINTEGER | pARRAY | pSTRING | pWITH | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pCALL) pDECL_VAR = "var" + pNAME + "=" + pEXPR + ";" pDECL_VAR.setParseAction(lambda result: (result[1], result[3])) pSTMT = Forward() pDECL_PROCEDURE = "procedure" + pNAME + "(" + pNAMES + ")" + pSTMT pDECL_PROCEDURE.setParseAction(lambda result: (result[ 1], EProcedure(result[3], mkFunBody(result[3], result[5])))) # hack to get pDECL to match only PDECL_VAR (but still leave room # to add to pDECL later) pDECL = (pDECL_VAR | pDECL_PROCEDURE | NoMatch() | ";") pDECLS = ZeroOrMore(pDECL) pDECLS.setParseAction(lambda result: [result]) pSTMT_IF_1 = "if" + pEXPR + pSTMT + "else" + pSTMT pSTMT_IF_1.setParseAction( lambda result: EIf(result[1], result[2], result[4])) pSTMT_IF_2 = "if" + pEXPR + pSTMT pSTMT_IF_2.setParseAction( lambda result: EIf(result[1], result[2], EValue(VBoolean(True)))) pSTMT_WHILE = "while" + pEXPR + pSTMT pSTMT_WHILE.setParseAction(lambda result: EWhile(result[1], result[2])) pSTMT_FOR = "for" + pDECLS + pEXPR + ";" + pSTMT + pSTMT pSTMT_FOR.setParseAction( lambda result: EFor(result[1], result[2], result[4], result[5])) pSTMT_PRINT = "print" + pEXPR + ";" pSTMT_PRINT.setParseAction( lambda result: EPrimCall(oper_print, [result[1]])) pSTMT_UPDATE_ARR = pNAME + "[" + pINTEGER + "]" + "<-" + pEXPR + ";" pSTMT_UPDATE_ARR.setParseAction(lambda result: EPrimCall( oper_update_arr, [EId(result[0]), result[2], result[5]])) pSTMT_UPDATE = pNAME + "<-" + pEXPR + ";" pSTMT_UPDATE.setParseAction( lambda result: EPrimCall(oper_update, [EId(result[0]), result[2]])) pSTMT_PROCEDURE = pEXPR + "(" + pEXPRS + ")" + ";" pSTMT_PROCEDURE.setParseAction(lambda result: ECall(result[0], result[2])) pSTMTS = ZeroOrMore(pSTMT) pSTMTS.setParseAction(lambda result: [result]) def mkBlock(decls, stmts): bindings = [(n, ERefCell(expr)) for (n, expr) in decls] return ELet(bindings, EDo(stmts)) pSTMT_BLOCK = "{" + pDECLS + pSTMTS + "}" pSTMT_BLOCK.setParseAction(lambda result: mkBlock(result[1], result[2])) pSTMT << (pSTMT_IF_1 | pSTMT_IF_2 | pWITH | pSTMT_WHILE | pSTMT_FOR | pSTMT_PRINT | pSTMT_UPDATE_ARR | pSTMT_UPDATE | pSTMT_PROCEDURE | pSTMT_BLOCK) # can't attach a parse action to pSTMT because of recursion, so let's duplicate the parser pTOP_STMT = pSTMT.copy() pTOP_STMT.setParseAction(lambda result: { "result": "statement", "stmt": result[0] }) pTOP_DECL = pDECL.copy() pTOP_DECL.setParseAction(lambda result: { "result": "declaration", "decl": result[0] }) pABSTRACT = "#abs" + pSTMT pABSTRACT.setParseAction(lambda result: { "result": "abstract", "stmt": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pTOP = (pQUIT | pABSTRACT | pTOP_DECL | pTOP_STMT) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
operand = number | int_variables_ref | misc_variables_ref operand.setName('r-value') rvalue << myOperatorPrecedence(operand, [ ('-', 1, opAssoc.RIGHT, Unary.parse_action), ('*', 2, opAssoc.LEFT, Binary.parse_action), ('-', 2, opAssoc.LEFT, Binary.parse_action), ('+', 2, opAssoc.LEFT, Binary.parse_action), ]) # I want # - BindVariable to have precedence to EqualTo(VariableRef) # but I also want: # - Arithmetic to have precedence w.r.t BindVariable # last is variables add_contract(misc_variables_contract) add_contract(int_variables_contract) add_contract(rvalue.copy().setParseAction(EqualTo.parse_action)) hardwired = MatchFirst(ParsingTmp.contract_types) hardwired.setName('Predefined contract expression') simple_contract << (hardwired | identifier_contract) simple_contract.setName('simple contract expression') any_contract = composite_contract | simple_contract any_contract.setName('Any simple or composite contract') contract_expression << (any_contract) # Parentheses before << !!
def parse_imp(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( function ( <name ... ) <expr> ) # ( <expr> <expr> ... ) # # <decl> ::= var name = expr ; # # <stmt> ::= if <expr> <stmt> else <stmt> # while <expr> <stmt> # name <- <expr> ; # print <expr> ; # <expr> ; # <block> # # <block> ::= { <decl> ... <stmt> ... } # # <toplevel> ::= <decl> # <stmt> # idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") #### NOTE THE DIFFERENCE pIDENTIFIER.setParseAction( lambda result: EPrimCall(oper_deref, [EId(result[0])])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) def mkFunBody(params, body): bindings = [(p, ERefCell(EId(p))) for p in params] return ELet(bindings, body) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction( lambda result: EFunction(result[3], mkFunBody(result[3], result[5]))) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pFUN | pCALL) pDECL_VAR = "var" + pNAME + "=" + pEXPR + ";" pDECL_VAR.setParseAction(lambda result: (result[1], result[3])) # hack to get pDECL to match only PDECL_VAR (but still leave room # to add to pDECL later) pDECL = (pDECL_VAR | NoMatch()) pDECLS = ZeroOrMore(pDECL) pDECLS.setParseAction(lambda result: [result]) pSTMT = Forward() pSTMT_IF_1 = "if" + pEXPR + pSTMT + "else" + pSTMT pSTMT_IF_1.setParseAction( lambda result: EIf(result[1], result[2], result[4])) pSTMT_IF_2 = "if" + pEXPR + pSTMT pSTMT_IF_2.setParseAction( lambda result: EIf(result[1], result[2], EValue(VBoolean(True)))) pSTMT_WHILE = "while" + pEXPR + pSTMT pSTMT_WHILE.setParseAction(lambda result: EWhile(result[1], result[2])) pSTMT_PRINT = "print" + pEXPR + ";" pSTMT_PRINT.setParseAction( lambda result: EPrimCall(oper_print, [result[1]])) pSTMT_UPDATE = pNAME + "<-" + pEXPR + ";" pSTMT_UPDATE.setParseAction( lambda result: EPrimCall(oper_update, [EId(result[0]), result[2]])) pSTMT_EXPR = pEXPR + ";" pSTMT_EXPR.setParseAction(lambda result: result[0]) pSTMTS = ZeroOrMore(pSTMT) pSTMTS.setParseAction(lambda result: [result]) def mkBlock(decls, stmts): bindings = [(n, ERefCell(expr)) for (n, expr) in decls] return ELet(bindings, EDo(stmts)) pSTMT_BLOCK = "{" + pDECLS + pSTMTS + "}" pSTMT_BLOCK.setParseAction(lambda result: mkBlock(result[1], result[2])) pSTMT << (pSTMT_IF_1 | pSTMT_IF_2 | pSTMT_WHILE | pSTMT_PRINT | pSTMT_UPDATE | pSTMT_EXPR | pSTMT_BLOCK) # can't attach a parse action to pSTMT because of recursion, so let's duplicate the parser pTOP_STMT = pSTMT.copy() pTOP_STMT.setParseAction(lambda result: { "result": "statement", "stmt": result[0] }) pTOP_DECL = pDECL.copy() pTOP_DECL.setParseAction(lambda result: { "result": "declaration", "decl": result[0] }) pABSTRACT = "#abs" + pSTMT pABSTRACT.setParseAction(lambda result: { "result": "abstract", "stmt": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pTOP = (pQUIT | pABSTRACT | pTOP_DECL | pTOP_STMT) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def _get_parser(cls): if cls._parser is not None: return cls._parser ParserElement.enablePackrat() LPAR, RPAR, COMMA, LBRACKET, RBRACKET, LT, GT = map(Literal, "(),[]<>") ungrouped_select_stmt = Forward().setName("select statement") # keywords ( UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, WITH, EXTRACT, PARTITION, ROWS, RANGE, UNBOUNDED, PRECEDING, CURRENT, ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD, DATE_SUB, ADDDATE, SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE, LAST_VALUE, NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK, DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME, DATETIME, TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES, GEOGRAPHY, ARRAY, STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG, ARRAY_CONCAT_AGG, AVG, BIT_AND, BIT_OR, BIT_XOR, COUNT, COUNTIF, LOGICAL_AND, LOGICAL_OR, MAX, MIN, STRING_AGG, SUM, CORR, COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP, STDDEV, VAR_POP, VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB, GENERATE_ARRAY, GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR, SYSTEMTIME, AS, OF, WINDOW, RESPECT, IGNORE, NULLS, ) = map( CaselessKeyword, """ UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, OR, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, WITH, EXTRACT, PARTITION, ROWS, RANGE, UNBOUNDED, PRECEDING, CURRENT, ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD, DATE_SUB, ADDDATE, SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE, LAST_VALUE, NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK, DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME, DATETIME, TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES, GEOGRAPHY, ARRAY, STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG, ARRAY_CONCAT_AGG, AVG, BIT_AND, BIT_OR, BIT_XOR, COUNT, COUNTIF, LOGICAL_AND, LOGICAL_OR, MAX, MIN, STRING_AGG, SUM, CORR, COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP, STDDEV, VAR_POP, VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB, GENERATE_ARRAY, GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR, SYSTEMTIME, AS, OF, WINDOW, RESPECT, IGNORE, NULLS """.replace(",", "").split(), ) keyword_nonfunctions = MatchFirst(( UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, STRUCT, WINDOW, )) keyword = keyword_nonfunctions | MatchFirst(( ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, DATE_ADD, DATE_SUB, ADDDATE, SUBDATE, INTERVAL, STRING_AGG, REGEXP_EXTRACT, SPLIT, ORDINAL, UNNEST, SAFE_CAST, PARTITION, TIMESTAMP_ADD, TIMESTAMP_SUB, ARRAY, GENERATE_ARRAY, GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, )) identifier_word = Word(alphas + "_@#", alphanums + "@$#_") identifier = ~keyword + identifier_word.copy() collation_name = identifier.copy() # NOTE: Column names can be keywords. Doc says they cannot, but in practice it seems to work. column_name = identifier.copy() cast_to = identifier.copy() qualified_column_name = Group( delimitedList(column_name, delim=".") + Optional( Suppress("::") + delimitedList(cast_to("cast"), delim="::"))) # NOTE: As with column names, column aliases can be keywords, e.g. functions like `current_time`. Other # keywords, e.g. `from` make parsing pretty difficult (e.g. "SELECT a from from b" is confusing.) column_alias = ~keyword_nonfunctions + column_name.copy() table_name = identifier.copy() table_alias = identifier.copy() index_name = identifier.copy() function_name = identifier.copy() parameter_name = identifier.copy() # NOTE: The expression in a CASE statement can be an integer. E.g. this is valid SQL: # select CASE 1 WHEN 1 THEN -1 ELSE -2 END from test_table unquoted_case_identifier = ~keyword + Word(alphanums + "$_") quoted_case_identifier = ~keyword + (QuotedString('"') ^ Suppress("`") + CharsNotIn("`") + Suppress("`")) case_identifier = quoted_case_identifier | unquoted_case_identifier case_expr = (Optional(case_identifier + Suppress(".")) + Optional(case_identifier + Suppress(".")) + case_identifier) # expression expr = Forward().setName("expression") integer = Regex(r"[+-]?\d+") numeric_literal = Regex(r"[+-]?\d*\.?\d+([eE][+-]?\d+)?") string_literal = QuotedString("'") | QuotedString('"') | QuotedString( "`") regex_literal = "r" + string_literal blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'") date_or_time_literal = (DATE | TIME | DATETIME | TIMESTAMP) + string_literal literal_value = ( numeric_literal | string_literal | regex_literal | blob_literal | date_or_time_literal | NULL | CURRENT_TIME + Optional(LPAR + Optional(string_literal) + RPAR) | CURRENT_DATE + Optional(LPAR + Optional(string_literal) + RPAR) | CURRENT_TIMESTAMP + Optional(LPAR + Optional(string_literal) + RPAR)) bind_parameter = Word("?", nums) | Combine(oneOf(": @ $") + parameter_name) type_name = oneOf( """TEXT REAL INTEGER BLOB NULL TIMESTAMP STRING DATE INT64 NUMERIC FLOAT64 BOOL BYTES DATETIME GEOGRAPHY TIME ARRAY STRUCT""", caseless=True, ) date_part = oneOf( """DAY DAY_HOUR DAY_MICROSECOND DAY_MINUTE DAY_SECOND HOUR HOUR_MICROSECOND HOUR_MINUTE HOUR_SECOND MICROSECOND MINUTE MINUTE_MICROSECOND MINUTE_SECOND MONTH QUARTER SECOND SECOND_MICROSECOND WEEK YEAR YEAR_MONTH""", caseless=True, ) datetime_operators = (DATE_ADD | DATE_SUB | ADDDATE | SUBDATE | TIMESTAMP_ADD | TIMESTAMP_SUB) def invalid_date_add(s, loc, tokens): prev_newline = s[:loc].rfind('\n') prev_prev_newline = s[:prev_newline].rfind('\n') if '--ignore' in s[prev_prev_newline:prev_newline]: pass else: raise RuntimeError( "{} is not valid, did you mean 'date_add'".format( tokens[0])) #bad_datetime_operators = ( # CaselessKeyword('dateadd').setParseAction(invalid_date_add) #) grouping_term = expr.copy() ordering_term = Group( expr("order_key") + Optional(COLLATE + collation_name("collate")) + Optional(ASC | DESC)("direction"))("ordering_term") function_arg = expr.copy()("function_arg") function_args = Optional( "*" | Optional(DISTINCT) + delimitedList(function_arg) + Optional((RESPECT | IGNORE) + NULLS))("function_args") function_call = ((function_name | keyword)("function_name") + LPAR + Group(function_args)("function_args_group") + RPAR)('function') navigation_function_name = (FIRST_VALUE | LAST_VALUE | NTH_VALUE | LEAD | LAG | PERCENTILE_CONT | PRECENTILE_DISC) aggregate_function_name = (ANY_VALUE | ARRAY_AGG | ARRAY_CONCAT_AGG | AVG | BIT_AND | BIT_OR | BIT_XOR | COUNT | COUNTIF | LOGICAL_AND | LOGICAL_OR | MAX | MIN | STRING_AGG | SUM) statistical_aggregate_function_name = (CORR | COVAR_POP | COVAR_SAMP | STDDEV_POP | STDDEV_SAMP | STDDEV | VAR_POP | VAR_SAMP | VARIANCE) numbering_function_name = (RANK | DENSE_RANK | PERCENT_RANK | CUME_DIST | NTILE | ROW_NUMBER) analytic_function_name = ( navigation_function_name | aggregate_function_name | statistical_aggregate_function_name | numbering_function_name)("analytic_function_name") partition_expression_list = delimitedList(grouping_term)( "partition_expression_list") window_frame_boundary_start = (UNBOUNDED + PRECEDING | numeric_literal + (PRECEDING | FOLLOWING) | CURRENT + ROW) window_frame_boundary_end = (UNBOUNDED + FOLLOWING | numeric_literal + (PRECEDING | FOLLOWING) | CURRENT + ROW) window_frame_clause = (ROWS | RANGE) + ( ((UNBOUNDED + PRECEDING) | (numeric_literal + PRECEDING) | (CURRENT + ROW)) | (BETWEEN + window_frame_boundary_start + AND + window_frame_boundary_end)) window_name = identifier.copy()("window_name") window_specification = ( Optional(window_name) + Optional(PARTITION + BY + partition_expression_list) + Optional(ORDER + BY + delimitedList(ordering_term)) + Optional(window_frame_clause)("window_specification")) analytic_function = ( analytic_function_name + LPAR + function_args.setParseAction(debug) + RPAR + OVER + (window_name | LPAR + Optional(window_specification) ('window') + RPAR))("analytic_function") string_agg_term = (STRING_AGG + LPAR + Optional(DISTINCT)('has_distinct') + expr('string_agg_expr') + Optional(COMMA + string_literal('delimiter')) + Optional(ORDER + BY + expr + Optional(ASC | DESC) + Optional(LIMIT + integer)) + RPAR)("string_agg") array_literal = ( Optional(ARRAY + Optional(LT + delimitedList(type_name) + GT)) + LBRACKET + delimitedList(expr) + RBRACKET) interval = INTERVAL + expr + date_part array_generator = (GENERATE_ARRAY + LPAR + numeric_literal + COMMA + numeric_literal + COMMA + numeric_literal + RPAR) date_array_generator = ( (GENERATE_DATE_ARRAY | GENERATE_TIMESTAMP_ARRAY) + LPAR + expr("start_date") + COMMA + expr("end_date") + Optional(COMMA + interval) + RPAR) explicit_struct = ( STRUCT + Optional(LT + delimitedList(type_name) + GT) + LPAR + Optional(delimitedList(expr + Optional(AS + identifier))) + RPAR) case_when = WHEN + expr.copy()("when") case_then = THEN + expr.copy()("then") case_clauses = Group(ZeroOrMore(case_when + case_then)) case_else = ELSE + expr.copy()("_else") case_stmt = (CASE + Optional(case_expr.copy()) + case_clauses("case_clauses") + Optional(case_else) + END)("case") class SelectStatement(SemanticToken): def __init__(self, tokens): self.tokens = tokens def getName(self): return 'select' @classmethod def parse(cls, tokens): return SelectStatement(tokens) class Function(SemanticToken): def __init__(self, func, tokens): self.func = func self.tokens = tokens def getName(self): return 'function' @classmethod def parse(cls, tokens): method = tokens[0] args = tokens[2:-1] return Function(method, args) def __repr__(self): return "func:{}({})".format(self.func, self.tokens) class WindowFunction(Function): def __init__(self, func, tokens, func_args, partition_args, order_args, window_args): self.func = func self.tokens = tokens self.func_args = func_args self.partition_args = partition_args self.order_args = order_args self.window_args = window_args def getName(self): return 'window function' @classmethod def parse(cls, tokens): return WindowFunction(tokens.analytic_function_name, tokens, tokens.function_args, tokens.partition_expression_list, tokens.ordering_term, tokens.window_specification) def __repr__(self): return "window:{}({})over({}, {}, {})".format( self.func, self.func_args, self.partition_args, self.order_args, self.window_args) class CaseStatement(SemanticToken): def __init__(self, tokens, whens, _else): self.tokens = tokens self.whens = whens self._else = _else def getName(self): return 'case' @classmethod def parse_whens(self, tokens): whens = [] while len(tokens) > 0: _, when, _, then, *tokens = tokens whens.append({"when": when, "then": then}) return whens @classmethod def parse(cls, tokens): whens = tokens[1] _else = tokens[3] return CaseStatement(tokens, cls.parse_whens(whens), _else) def __repr__(self): return "<case statement ({}, {})>".format( len(self.whens), self._else) expr_term = ( (analytic_function)("analytic_function").setParseAction( WindowFunction.parse) | (CAST + LPAR + expr + AS + type_name + RPAR)("cast") | (SAFE_CAST + LPAR + expr + AS + type_name + RPAR)("safe_cast") | (Optional(EXISTS) + LPAR + ungrouped_select_stmt + RPAR)("subselect") | (literal_value)("literal") | (bind_parameter)("bind_parameter") | (EXTRACT + LPAR + expr + FROM + expr + RPAR)("extract") | case_stmt.setParseAction(CaseStatement.parse) | (datetime_operators + LPAR + expr + COMMA + interval + RPAR)("date_operation") #| (bad_datetime_operators + LPAR + expr + COMMA + interval + RPAR) | string_agg_term("string_agg_term") | array_literal("array_literal") | array_generator("array_generator") | date_array_generator("date_array_generator") | explicit_struct("explicit_struct") | function_call("function_call").setParseAction(Function.parse) | qualified_column_name("column").setParseAction( lambda x: ".".join([str(i) for i in x[0]])) ).setParseAction(debug) + Optional(LBRACKET + (OFFSET | ORDINAL) + LPAR + expr + RPAR + RBRACKET)("offset_ordinal") struct_term = (LPAR + delimitedList(expr_term) + RPAR) KNOWN_OPS = [(BETWEEN, AND), Literal("||").setName("concat"), Literal("*").setName("mul"), Literal("/").setName("div"), Literal("+").setName("add"), Literal("-").setName("sub"), Literal("<>").setName("neq"), Literal(">").setName("gt"), Literal("<").setName("lt"), Literal(">=").setName("gte"), Literal("<=").setName("lte"), Literal("=").setName("eq"), Literal("==").setName("eq"), Literal("!=").setName("neq"), IN.setName("in"), IS.setName("is"), LIKE.setName("like"), OR.setName("or"), AND.setName("and"), NOT.setName('not')] class Operator(SemanticToken): def __init__(self, op, assoc, name, tokens): self.op = op self.assoc = assoc self.name = name self.tokens = tokens def getName(self): return 'operator' @classmethod def parse(cls, tokens): # ARRANGE INTO {op: params} FORMAT toks = tokens[0] if toks[1] in KNOWN_OPS: op = KNOWN_OPS[KNOWN_OPS.index(toks[1])] if toks.subselect: import ipdb ipdb.set_trace() return Operator(op, 'binary', op.name, [toks[0], toks[2:]]) else: import ipdb ipdb.set_trace() return tokens @classmethod def parse_unary(cls, tokens): toks = tokens[0] if toks[0] in KNOWN_OPS: op = KNOWN_OPS[KNOWN_OPS.index(toks[0])] else: import ipdb ipdb.set_trace() return Operator(op, 'unary', op.name, [toks[1:]]) @classmethod def parse_ternary(cls, tokens): import ipdb ipdb.set_trace() def __repr__(self): return "<operator({}, {}, {})>".format(self.op, self.assoc, self.tokens) UNARY, BINARY, TERNARY = 1, 2, 3 expr << infixNotation( (expr_term | struct_term), [ (oneOf("- + ~") | NOT, UNARY, opAssoc.RIGHT, Operator.parse_unary), (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT, Operator.parse_unary), ("||", BINARY, opAssoc.LEFT, Operator.parse), (oneOf("* / %"), BINARY, opAssoc.LEFT, Operator.parse), (oneOf("+ -"), BINARY, opAssoc.LEFT, Operator.parse), (oneOf("<< >> & |"), BINARY, opAssoc.LEFT, Operator.parse), (oneOf("= > < >= <= <> != !< !>"), BINARY, opAssoc.LEFT, Operator.parse), (IS + Optional(NOT) | Optional(NOT) + IN | Optional(NOT) + LIKE | GLOB | MATCH | REGEXP, BINARY, opAssoc.LEFT, Operator.parse), ((BETWEEN, AND), TERNARY, opAssoc.LEFT, Operator.parse_ternary), (Optional(NOT) + IN + LPAR + Group(ungrouped_select_stmt | delimitedList(expr)) + RPAR, UNARY, opAssoc.LEFT, Operator.parse_unary), (AND, BINARY, opAssoc.LEFT, Operator.parse), (OR, BINARY, opAssoc.LEFT, Operator.parse), ], lpar=Literal('('), rpar=Literal(')'), ) quoted_expr = (expr ^ Suppress('"') + expr + Suppress('"') ^ Suppress("'") + expr + Suppress("'") ^ Suppress("`") + expr + Suppress("`"))("quoted_expr") compound_operator = (UNION + Optional(ALL | DISTINCT) | INTERSECT + DISTINCT | EXCEPT + DISTINCT | INTERSECT | EXCEPT)("compound_operator") join_constraint = Group( Optional(ON + expr | USING + LPAR + Group(delimitedList(qualified_column_name)) + RPAR))("join_constraint") join_op = (COMMA | Group( Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | RIGHT + OUTER | RIGHT | FULL + OUTER | OUTER | FULL) + JOIN))("join_op") join_source = Forward() # We support three kinds of table identifiers. # # First, dot delimited info like project.dataset.table, where # each component follows the rules described in the BigQuery # docs, namely: # Contain letters (upper or lower case), numbers, and underscores # # Second, a dot delimited quoted string. Since it's quoted, we'll be # liberal w.r.t. what characters we allow. E.g.: # `project.dataset.name-with-dashes` # # Third, a series of quoted strings, delimited by dots, e.g.: # `project`.`dataset`.`name-with-dashes` # # We won't attempt to support combinations, like: # project.dataset.`name-with-dashes` # `project`.`dataset.name-with-dashes` def record_table_identifier(t): identifier_list = t.asList() padded_list = [None] * (3 - len(identifier_list)) + identifier_list cls._table_identifiers.add(tuple(padded_list)) standard_table_part = ~keyword + Word(alphanums + "_") standard_table_identifier = ( Optional(standard_table_part("project") + Suppress(".")) + Optional(standard_table_part("dataset") + Suppress(".")) + standard_table_part("table") ).setParseAction(lambda t: record_table_identifier(t)) quoted_project_part = ( Suppress('"') + CharsNotIn('"') + Suppress('"') | Suppress("'") + CharsNotIn("'") + Suppress("'") | Suppress("`") + CharsNotIn("`") + Suppress("`")) quoted_table_part = (Suppress('"') + CharsNotIn('".') + Suppress('"') | Suppress("'") + CharsNotIn("'.") + Suppress("'") | Suppress("`") + CharsNotIn("`.") + Suppress("`")) quoted_table_parts_identifier = ( Optional(quoted_project_part("project") + Suppress(".")) + Optional(quoted_table_part("dataset") + Suppress(".")) + quoted_table_part("table") ).setParseAction(lambda t: record_table_identifier(t)) def record_quoted_table_identifier(t): identifier_list = t.asList()[0].split(".") first = ".".join(identifier_list[0:-2]) or None second = identifier_list[-2] third = identifier_list[-1] identifier_list = [first, second, third] padded_list = [None] * (3 - len(identifier_list)) + identifier_list cls._table_identifiers.add(tuple(padded_list)) quotable_table_parts_identifier = ( Suppress('"') + CharsNotIn('"') + Suppress('"') | Suppress("'") + CharsNotIn("'") + Suppress("'") | Suppress("`") + CharsNotIn("`") + Suppress("`") ).setParseAction(lambda t: record_quoted_table_identifier(t)) table_identifier = (standard_table_identifier | quoted_table_parts_identifier | quotable_table_parts_identifier) def record_ref(t): lol = [t.op] + t.ref_target.asList() cls._with_aliases.add(tuple(lol)) cls._table_identifiers.add(tuple(lol)) ref_target = identifier.copy() single_source = ( # ref + source statements ((Suppress('{{') + (CaselessKeyword('ref') | CaselessKeyword("source"))("op") + LPAR + delimitedList((Suppress("'") | Suppress('"')) + ref_target + (Suppress("'") | Suppress('"')))("ref_target") + RPAR + Suppress("}}")).setParseAction(record_ref) | table_identifier) + Optional(Optional(AS) + table_alias("table_alias*")) + Optional(FOR + SYSTEMTIME + AS + OF + string_literal) + Optional(INDEXED + BY + index_name("name") | NOT + INDEXED) ("index") | (LPAR + ungrouped_select_stmt + RPAR + Optional(Optional(AS) + table_alias))('subquery') | (LPAR + join_source + RPAR) | (UNNEST + LPAR + expr + RPAR) + Optional(Optional(AS) + column_alias)) join_source << (Group(single_source + OneOrMore( Group(join_op + single_source + join_constraint)('joins*'))) | single_source)('sources*') over_partition = ( PARTITION + BY + delimitedList(partition_expression_list))("over_partition") over_order = ORDER + BY + delimitedList(ordering_term) over_unsigned_value_specification = expr over_window_frame_preceding = ( UNBOUNDED + PRECEDING | over_unsigned_value_specification + PRECEDING | CURRENT + ROW) over_window_frame_following = ( UNBOUNDED + FOLLOWING | over_unsigned_value_specification + FOLLOWING | CURRENT + ROW) over_window_frame_bound = (over_window_frame_preceding | over_window_frame_following) over_window_frame_between = (BETWEEN + over_window_frame_bound + AND + over_window_frame_bound) over_window_frame_extent = (over_window_frame_preceding | over_window_frame_between) over_row_or_range = (ROWS | RANGE) + over_window_frame_extent over = (OVER + LPAR + Optional(over_partition) + Optional(over_order) + Optional(over_row_or_range) + RPAR)("over") result_column = ( Optional(table_name + ".") + "*" + Optional(EXCEPT + LPAR + delimitedList(column_name) + RPAR) | Group(quoted_expr + Optional(over) + Optional(Optional(AS) + column_alias('alias')))) window_select_clause = (WINDOW + identifier + AS + LPAR + window_specification + RPAR) select_core = ( SELECT + Optional(DISTINCT | ALL) + Group(delimitedList(result_column))("columns") + Optional(FROM - join_source("from*")) + Optional(WHERE + expr('where')) + Optional(GROUP + BY + Group(delimitedList(grouping_term))("group_by_terms")) + Optional(HAVING + expr("having_expr")) + Optional(ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms")) + Optional(delimitedList(window_select_clause))) grouped_select_core = select_core | (LPAR + select_core + RPAR) ungrouped_select_stmt << ( grouped_select_core + ZeroOrMore(compound_operator + grouped_select_core) + Optional(LIMIT + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)("limit")))("select") select_stmt = ungrouped_select_stmt | (LPAR + ungrouped_select_stmt + RPAR) # define comment format, and ignore them sql_comment = oneOf("-- #") + restOfLine | cStyleComment select_stmt.ignore(sql_comment) def record_with_alias(t): identifier_list = t.asList() padded_list = [None] * (3 - len(identifier_list)) + identifier_list cls._with_aliases.add(tuple(padded_list)) with_stmt = Forward().setName("with statement") with_clause = Group( identifier.setParseAction(lambda t: record_with_alias(t)) ('cte_name') - AS - LPAR + (select_stmt | with_stmt) - RPAR) with_core = WITH + delimitedList(with_clause)('ctes') with_stmt << (with_core - ~Literal(',') + ungrouped_select_stmt) with_stmt.ignore(sql_comment) select_or_with = select_stmt | with_stmt select_or_with_parens = LPAR + select_or_with - RPAR cls._parser = select_or_with | select_or_with_parens return cls._parser
def parse (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ... ) <expr ) # ( function ( <name> ... ) <expr> ) # ( <expr> <expr> ... ) # ( call/cc <expr>) # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EValue(VBoolean(result[0]=="true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1],result[2])) pBINDINGS = ZeroOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [ result ]) def makeLet (bindings,body): params = [ param for (param,exp) in bindings ] args = [ exp for (param,exp) in bindings ] return ECall(EFunction(params,body),args) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: makeLet(result[3],result[5])) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1],result[2])) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3],result[5])) pFUNrec = "(" + Keyword("function") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pFUNrec.setParseAction(lambda result: EFunction(result[4],result[6],name=result[2])) def makeDo (exprs): result = exprs[-1] for e in reversed(exprs[:-1]): # space is not an allowed identifier in the syntax! result = makeLet([(" ",e)],result) return result pDO = "(" + Keyword("do") + pEXPRS + ")" pDO.setParseAction(lambda result: makeDo(result[2])) def makeWhile (cond,body): return makeLet([(" while", EFunction([],EIf(cond,makeLet([(" ",body)],ECall(EId(" while"),[])),EValue(VNone())),name=" while"))], ECall(EId(" while"),[])) pWHILE = "(" + Keyword("while") + pEXPR + pEXPR + ")" pWHILE.setParseAction(lambda result: makeWhile(result[2],result[3])) pCALLCC = "(" + Keyword("call/cc") + pEXPR + ")" pCALLCC.setParseAction(lambda result: ECallCC(result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pFUNrec| pDO | pWHILE | pCALLCC | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: {"result":"expression","expr":result[0]}) pDEFINE = "(" + Keyword("define") + pNAME + pEXPR + ")" pDEFINE.setParseAction(lambda result: {"result":"value", "name":result[2], "expr":result[3]}) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction(lambda result: {"result":"function", "name":result[2], "params":result[4], "body":result[6]}) pABSTRACT = "#abs" + pEXPR pABSTRACT.setParseAction(lambda result: {"result":"abstract", "expr":result[1]}) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result":"quit"}) pTOP = (pDEFUN | pDEFINE | pQUIT | pABSTRACT | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse_imp(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( function ( <name ... ) <expr> ) # ( <expr> <expr> ... ) # # <decl> ::= var name = expr ; # # <stmt> ::= if <expr> <stmt> else <stmt> # while <expr> <stmt> # name <- <expr> ; # print <expr> ; # <block> # # <block> ::= { <decl> ... <stmt> ... } # # <toplevel> ::= <decl> # <stmt> # idChars = alphas + "_+*-/?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") #### NOTE THE DIFFERENCE pIDENTIFIER.setParseAction( lambda result: EPrimCall(oper_deref, [EId(result[0])])) pIDENTIFIERS = ZeroOrMore(pIDENTIFIER) pIDENTIFIERS.setParseAction(lambda result: [result]) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) def escapeString(inStr): inStr = inStr[1:-1] outStr = "" i = 0 while (i < len(inStr) - 1): if (inStr[i] == '\\'): if (inStr[i + 1] == '\\' or inStr[i + 1] == '\"'): i += 1 outStr += inStr[i] i += 1 outStr += inStr[-1] return outStr pSTRING = quotedString.copy() pSTRING.setParseAction( lambda result: EValue(VString(escapeString(result[0])))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) def mkFunBody(params, body): bindings = [(p, ERefCell(EId(p))) for p in params] return ELet(bindings, body) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction( lambda result: EFunction(result[3], mkFunBody(result[3], result[5]))) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = ZeroOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pEXPR << (pINTEGER | pBOOLEAN | pSTRING | pIDENTIFIER | pIF | pFUN | pCALL) pDECL_VAR = "var" + pNAME + "=" + pEXPR + ";" pDECL_VAR.setParseAction(lambda result: (result[1], result[3])) pDECL_ARRAY = "var" + pNAME + "<-" + "(" + "new-array" + pEXPR + ")" + ";" pDECL_ARRAY.setParseAction(lambda result: (result[1], EArray(result[5]))) # hack to get pDECL to match only PDECL_VAR (but still leave room # to add to pDECL later) pDECL = (pDECL_VAR | pDECL_ARRAY | NoMatch()) pDECLS = ZeroOrMore(pDECL) pDECLS.setParseAction(lambda result: [result]) pSTMT = Forward() pSTMT_IF_1 = "if" + pEXPR + pSTMT + "else" + pSTMT pSTMT_IF_1.setParseAction( lambda result: EIf(result[1], result[2], result[4])) pSTMT_IF_2 = "if" + pEXPR + pSTMT pSTMT_IF_2.setParseAction( lambda result: EIf(result[1], result[2], EValue(VBoolean(True)))) pSTMT_WHILE = "while" + pEXPR + pSTMT pSTMT_WHILE.setParseAction(lambda result: EWhile(result[1], result[2])) pSTMT_PRINT = "print" + pEXPR + ";" pSTMT_PRINT.setParseAction( lambda result: EPrimCall(oper_print, [result[1]])) pSTMT_UPDATE = pNAME + "<-" + pEXPR + ";" pSTMT_UPDATE.setParseAction( lambda result: EPrimCall(oper_update, [EId(result[0]), result[2]])) pSTMTARR_UPDATE = pNAME + "[" + pEXPR + "]" + "<-" + pEXPR + ";" pSTMTARR_UPDATE.setParseAction(lambda result: EPrimCall( oper_update_arr, [EId(result[0]), result[5], result[2]])) # pSTMT_FOR = "for" + "(" + pSTMT_UPDATE + pEXPR + ";" + pSTMT_UPDATE + ")" + pSTMT # pSTMT_FOR.setParseAction(lambda result: EDo([result[2],EWhile(result[3], EDo([result[7],result[5]])) ] )) pSTMT_FOR = "for" + pSTMT_UPDATE + pEXPR + ";" + pSTMT_UPDATE + pSTMT pSTMT_FOR.setParseAction(lambda result: EDo( [result[1], EWhile(result[2], EDo([result[5], result[4]]))])) pSTMTS = ZeroOrMore(pSTMT) pSTMTS.setParseAction(lambda result: [result]) def mkBlock(decls, stmts): bindings = [(n, ERefCell(expr)) for (n, expr) in decls] return ELet(bindings, EDo(stmts)) pSTMT_BLOCK = "{" + pDECLS + pSTMTS + "}" pSTMT_BLOCK.setParseAction(lambda result: mkBlock(result[1], result[2])) pDEFPROC = "procedure" + pNAME + "(" + pNAMES + ")" + pSTMT pDEFPROC.setParseAction( lambda result: { "result": "procedure", "proc": (result[1], EProcedure(result[3], mkFunBody(result[3], result[5]))) }) pSTMT_PROC = pIDENTIFIER + "(" + pEXPRS + ")" + ";" pSTMT_PROC.setParseAction(lambda result: EProcCall(result[0], result[2])) pWITH = "(" + Keyword( "with") + pIDENTIFIER + pIDENTIFIER + "(" + pEXPRS + ")" + ")" pWITH.setParseAction(lambda result: EWith(result[2], result[3], result[5])) pNOTIMPLEMENTED = Keyword("<>") pNOTIMPLEMENTED.setParseAction(lambda result: ENotImplemented()) pSTMT << (pSTMT_IF_1 | pSTMT_IF_2 | pSTMT_WHILE | pSTMT_PRINT | pSTMT_FOR | pSTMT_UPDATE | pSTMTARR_UPDATE | pSTMT_BLOCK | pSTMT_PROC | pWITH) # can't attach a parse action to pSTMT because of recursion, so let's duplicate the parser pTOP_STMT = pSTMT.copy() pTOP_STMT.setParseAction(lambda result: { "result": "statement", "stmt": result[0] }) pTOP_DECL = pDECL.copy() pTOP_DECL.setParseAction(lambda result: { "result": "declaration", "decl": result[0] }) pABSTRACT = "#abs" + pSTMT pABSTRACT.setParseAction(lambda result: { "result": "abstract", "stmt": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pDEFFUN = "(" + pNAME + "(" + pNAMES + ")" + pSTMT + ")" pDEFFUN.setParseAction(lambda result: (result[ 1], EProcedure(result[3], mkFunBody(result[3], result[5])))) pDEFABSFUN = "(" + pNAME + "(" + pNAMES + ")" + pNOTIMPLEMENTED + ")" pDEFABSFUN.setParseAction(lambda result: (result[1], ENotImplemented())) pDEFFUNS = ZeroOrMore((pDEFFUN | pDEFABSFUN)) pDEFFUNS.setParseAction(lambda result: [result]) pTEMPLATE = Keyword( "class" ) + "(" + pNAME + pIDENTIFIER + "(" + pNAMES + ")" + "(" + pIDENTIFIERS + ")" + "(" + pDEFFUNS + ")" + ")" pTEMPLATE.setParseAction( lambda result: { "result": "template", "temp": (result[2], ETemplate(False, result[2], result[3], result[5], result[ 8], result[11])) }) pABSTEMPLATE = Keyword( "absclass" ) + "(" + pNAME + pIDENTIFIER + "(" + pNAMES + ")" + "(" + pIDENTIFIERS + ")" + "(" + pDEFFUNS + ")" + ")" pABSTEMPLATE.setParseAction( lambda result: { "result": "template", "temp": (result[2], ETemplate(True, result[2], result[3], result[5], result[ 8], result[11])) }) pNEWOBJ = Keyword("new") + pIDENTIFIER + "(" + pEXPRS + ")" pNEWOBJ.setParseAction(lambda result: EObject(result[1], result[3])) pOBJASS = Keyword("obj") + pIDENTIFIER + pNAME + "=" + pNEWOBJ pOBJASS.setParseAction( lambda result: { "result": "objectassignment", "assignment": (result[2], EObjectBinding(result[1], result[4])) }) pMULTI = Keyword("#multi") pMULTI.setParseAction(lambda result: {"result": "multi"}) pTOP = (pQUIT | pABSTRACT | pTOP_DECL | pTOP_STMT | pDEFPROC | pTEMPLATE | pOBJASS | pMULTI | pABSTEMPLATE) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( + <expr> <expr> ) # ( * <expr> <expr> ) # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("-0123456789","0123456789") pINTEGER.setParseAction(lambda result: EInteger(int(result[0]))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EBoolean(result[0]=="true")) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1],result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [ result ]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet(result[3],result[5])) pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) def mkOper (op,unit,es): print "came to mkoper" result = EInteger(unit) for index in reversed(range(len(es))): result = ECall(op,[es[index],result]) return result pPLUS = "(" + Keyword("+") + pEXPRS + ")" pPLUS.setParseAction(lambda result: mkOper("+",0,result[2])) pTIMES = "(" + Keyword("*") + pEXPRS + ")" pTIMES.setParseAction(lambda result: mkOper("*",1,result[2])) pCALL = "(" + pNAME + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1],result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pPLUS | pTIMES | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: {"result":"expression","expr":result[0]}) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction(lambda result: {"result":"function", "name":result[2], "params":result[4], "body":result[6]}) pTOP = (pDEFUN | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
^ QuotedString('{', multiline=True, endQuoteChar='}').setParseAction( lambda t: t[0].strip())).setResultsName('value') VarDefinitions = Group(NameDefinitions + ValDefinitions).setParseAction( expression_type_detection) NestedVar = Forward().setParseAction(expression_type_detection_in_nestedvalues) _NestedContent = (VarDefinitions + CharsNotIn('{' + '}' + ParserElement.DEFAULT_WHITE_CHARS ).setParseAction(lambda t: t[0].strip())) NestedVar << (opener.suppress() + OneOrMore(NestedVar | _NestedContent) + closer.suppress()) OptionsDefinitions = Group( Keyword('options').setResultsName('node_type') + NestedVar.copy().setResultsName('value')).setResultsName('option-node') ZoneDefinitions = Group( Keyword('zone').setResultsName('node_type') + QUOTED_WORDS.setResultsName('name') + NestedVar.copy().setResultsName('value')).setResultsName('zone-node') KeyDefinitions = Group( Keyword('key').setResultsName('node_type') + QUOTED_WORDS.setResultsName('name') + NestedVar.copy().setResultsName('value')).setResultsName('key-node') AclDefinitions = Group( Keyword('acl').setResultsName('node_type') + QUOTED_WORDS.copy().setResultsName('name') + WORD_LIST.copy().setResultsName('value')).setResultsName('acl-node')
def parse_curry(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # (function ( <name> ) <expr> ) # ( <expr> <expr> ) # # <definition> ::= ( defun <name> ( <name> ) <expr> ) # idChars = alphas + "_+*-~/?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: parsePLet(result)) pCALL = "(" + pEXPR + OneOrMore(pEXPR) + ")" pCALL.setParseAction(lambda result: parsePCallCurry(result)) pFUN = "(" + Keyword("function") + "(" + OneOrMore( pNAME) + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: parsePFuncCurry(result)) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: { "result": "expression", "expr": result[0] }) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + OneOrMore( pNAME) + ")" + pEXPR + ")" pDEFUN.setParseAction(lambda result: parsePDefunCurry(result)) pTOP = (pDEFUN | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse_curry(input): def letToFun(result): func = result[5] binds = result[3] params = [] vals = [] for p, v in binds: params.append(p) vals.append(v) return ECall(EFunction(params, func), vals) def eCallHelper(first, rest): if len(rest) == 1: return ECall(first, rest) else: return ECall(eCallHelper(first, rest[:-1]), [rest[-1]]) def eCall(result): first = result[1] rest = result[2:-1] return eCallHelper(first, rest) def eFunHelper(variables, expression): if len(variables) == 1: return EFunction(variables[0], expression) else: return EFunction(variables[0], eFunHelper(variables[1:], expression)) def eFun(result): variables = result[3:-3] expression = result[-2] return eFunHelper(variables, expression) def eDeFun(result): expression = result[-2] variables = result[5:-3] build_list = ["(", "function", "("] build_list.extend(variables) build_list.extend([")", result[-2], ")"]) fun = eFun(build_list) return { "result": "function", "name": result[2], "params": result[4], "body": fun } idChars = alphas + "_+*-~/?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(letToFun) pCALL = "(" + pEXPR + OneOrMore(pEXPR) + ")" pCALL.setParseAction(eCall) # pCALL.setParseAction(lambda result: ECall(result[1],result[2:-1])) pFUN = "(" + Keyword("function") + "(" + OneOrMore( pNAME) + ")" + pEXPR + ")" pFUN.setParseAction(eFun) # pFUN.setParseAction(lambda result: EFunction(result[3:-3],result[-2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: { "result": "expression", "expr": result[0] }) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + OneOrMore( pNAME) + ")" + pEXPR + ")" pDEFUN.setParseAction(eDeFun) pTOP = (pDEFUN | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # (function ( <name> ) <expr> ) # ( <expr> <expr> ) # # <definition> ::= ( defun <name> ( <name> ) <expr> ) # def letToFun(result): func = result[5] binds = result[3] params = [] vals = [] for p, v in binds: params.append(p) vals.append(v) return ECall(EFunction(params, func), vals) def eFunName(result): varName = result[2] variables = result[4:-3] expression = result[-2] return EFunction(variables, expression, varName) idChars = alphas + "_+*-~/?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(letToFun) pCALL = "(" + pEXPR + OneOrMore(pEXPR) + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2:-1])) pFUN = "(" + Keyword("function") + "(" + OneOrMore( pNAME) + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3:-3], result[-2])) pFUNNAME = "(" + Keyword("function") + pNAME + "(" + OneOrMore( pNAME) + ")" + pEXPR + ")" pFUNNAME.setParseAction(eFunName) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pFUNNAME | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: { "result": "expression", "expr": result[0] }) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + OneOrMore( pNAME) + ")" + pEXPR + ")" pDEFUN.setParseAction( lambda result: { "result": "function", "name": result[2], "params": result[4:-3], "body": result[-2] }) pTOP = (pDEFUN | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr> ) # ( <name> <expr> ... ) # def and_helper(result): if len(result) == 3: return EBoolean(True) elif len(result) == 4: return result[2] elif len(result) == 5: return EIf(result[2], result[3], EBoolean(False)) else: first = result[:-3] last = EIf(result[-3], result[-2], EBoolean(False)) first.append(last) first.append(")") return and_helper(first) def or_helper(result): if len(result) == 3: return EBoolean(True) elif len(result) == 4: return result[2] elif len(result) == 5: return EIf(result[2], EBoolean(True), result[3]) else: first = result[:-3] last = EIf(result[-3], EBoolean(True), result[-2]) first.append(last) first.append(")") return or_helper(first) def condition_helper(result): if len(result) == 0: return EBoolean(False) if len(result) == 1: return EIf(result[0][0], result[0][1], EBoolean(False)) else: return EIf(result[0][0], result[0][1], condition_helper(result[1:])) def letstar_helper(result): if len(result) == 7: return ELet([result[3]], result[5]) else: return ELet([result[3]], letstar_helper(result[:3] + result[4:])) idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("-0123456789", "0123456789") pINTEGER.setParseAction(lambda result: EInteger(int(result[0]))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EBoolean(result[0] == "true")) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) pAND = "(" + Keyword("and") + ZeroOrMore(pEXPR) + ")" pAND.setParseAction(and_helper) pOR = "(" + Keyword("or") + ZeroOrMore(pEXPR) + ")" pOR.setParseAction(or_helper) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet(result[3], result[5])) pLETSTAR = "(" + Keyword("let*") + "(" + OneOrMore( pBINDING) + ")" + pEXPR + ")" pLETSTAR.setParseAction(lambda result: letstar_helper(result)) pCONDITION = "(" + pEXPR + pINTEGER + ")" pCONDITION.setParseAction(lambda result: (result[1], result[2])) pCONDITIONS = ZeroOrMore(pCONDITION) pCONDITIONS.setParseAction(lambda result: [result]) pCOND = "(" + Keyword("cond") + pCONDITIONS + ")" pCOND.setParseAction(lambda result: condition_helper(result[2])) pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pCALL = "(" + pNAME + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pAND | pOR | pLET | pLETSTAR | pCOND | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: { "result": "expression", "expr": result[0] }) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction( lambda result: { "result": "function", "name": result[2], "params": result[4], "body": result[6] }) pTOP = (pDEFUN | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr> ) # ( <name> <expr> ... ) # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("-0123456789","0123456789") pINTEGER.setParseAction(lambda result: EInteger(int(result[0]))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EBoolean(result[0]=="true")) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1],result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [ result ]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet(result[3],result[5])) pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pCALL = "(" + pNAME + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1],result[2])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: {"result":"expression","expr":result[0]}) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction(lambda result: {"result":"function", "name":result[2], "params":result[4], "body":result[6]}) pTOP = (pDEFUN | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse_natural (input): # parse a string into an element of the abstract representation idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAME_comma = "," + pNAME pNAME_comma.setParseAction(lambda result: result[1]) pNAMES = pNAME + ZeroOrMore(pNAME_comma) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("-0123456789","0123456789") pINTEGER.setParseAction(lambda result: EInteger(int(result[0]))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EBoolean(result[0]=="true")) pEXPR = Forward() pPAREN = "(" + pEXPR + ")" pPAREN.setParseAction(lambda result: result[1]) pEXPR_comma = "," + pEXPR pEXPR_comma.setParseAction(lambda result: result[1]) pEXPRS = pEXPR + ZeroOrMore(pEXPR_comma) pEXPRS.setParseAction(lambda result: [result]) pCALL = pNAME + "(" + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[0],result[2])) pCORE = ( pINTEGER | pBOOLEAN | pCALL | pIDENTIFIER | pPAREN ) pFACTOR = Forward() pTIMES = (pCORE + "*" + pFACTOR) pTIMES.setParseAction(lambda result: ECall("*",[result[0],result[2]])) pFACTOR << (pTIMES | pCORE) pTERM = Forward() pPLUS = (pFACTOR + "+" + pTERM) pPLUS.setParseAction(lambda result: ECall("+",[result[0],result[2]])) pMINUS = (pFACTOR + "-" + pTERM) pMINUS.setParseAction(lambda result: ECall("-",[result[0],result[2]])) pTERM << (pPLUS | pMINUS | pFACTOR) pIF = pTERM + "?" + pEXPR + ":" + pEXPR pIF.setParseAction(lambda result: EIf(result[0],result[2],result[4])) pBINDING = pNAME + "=" + pEXPR pBINDING.setParseAction(lambda result: (result[0],result[2])) pBINDING_comma = "," + pBINDING pBINDING_comma.setParseAction(lambda result: result[1]) pBINDINGS = pBINDING + ZeroOrMore(pBINDING_comma) pBINDINGS.setParseAction(lambda result: [ result ]) pLET = Keyword("let") + "(" + pBINDINGS + ")" + pEXPR pLET.setParseAction(lambda result: ELet(result[2],result[4])) pEXPR << ( pIF | pLET | pTERM ) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: {"result":"expression","expr":result[0]}) pDEFUN = Keyword("function") + pNAME + "(" + pNAMES + ")" + pEXPR pDEFUN.setParseAction(lambda result: {"result":"function", "name":result[1], "params":result[3], "body":result[5]}) pTOP = (pDEFUN | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
>>> print join(['a', 'long', 'long', 'road']) a~long long~road >>> print join(['very', 'long', 'phrase']) very long~phrase """ if len(words) <= 2: return tie.join(words) else: return (words[0] + tie_or_space(words[0], tie, space) + space.join(words[1:-1]) + tie + words[-1]) def format(name, format): return NameFormat(format).format(name) lbrace = Literal('{') rbrace = Literal('}') format_chars = Word(alphas) braced_string = Forward() braced_string << Combine(lbrace + ZeroOrMore(CharsNotIn('{}')| braced_string) + rbrace) verbatim = Combine(ZeroOrMore(CharsNotIn(alphas + '{}') | braced_string)) delimiter = braced_string.copy().setParseAction(removeQuotes) group = Group(Suppress(lbrace) + verbatim + format_chars + Optional(delimiter, None) + verbatim + Suppress(rbrace)) group.setParseAction(lambda toks: NamePart(toks)) toplevel_text = CharsNotIn('{}').setParseAction(lambda toks: Text(toks)) name_format_grammar = ZeroOrMore(toplevel_text | group) + StringEnd() name_format_grammar.leaveWhitespace()
NestedVar = Forward().setParseAction(expression_type_detection_in_nestedvalues) _NestedContent = ( VarDefinitions + CharsNotIn('{' + '}' + ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t: t[0].strip()) ) NestedVar << ( opener.suppress() + OneOrMore(NestedVar | _NestedContent) + closer.suppress() ) OptionsDefinitions = Group( Keyword('options').setResultsName('node_type') + NestedVar.copy().setResultsName('value') ).setResultsName('option-node') ZoneDefinitions = Group( Keyword('zone').setResultsName('node_type') + QUOTED_WORDS.setResultsName('name') + NestedVar.copy().setResultsName('value') ).setResultsName('zone-node') KeyDefinitions = Group( Keyword('key').setResultsName('node_type') + QUOTED_WORDS.setResultsName('name') + NestedVar.copy().setResultsName('value') ).setResultsName('key-node') AclDefinitions = Group(
def parse(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( function ( <name> ... ) <expr> ) # ( ref <expr> ) # ( <expr> <expr> ... ) # idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = ZeroOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) def makeLet(bindings, body): params = [param for (param, exp) in bindings] args = [exp for (param, exp) in bindings] return ECall(EFunction(params, body), args) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: makeLet(result[3], result[5])) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3], result[5])) pFUNrec = "(" + Keyword( "function") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pFUNrec.setParseAction( lambda result: EFunction(result[4], result[6], name=result[2])) def makeDo(exprs): result = exprs[-1] for e in reversed(exprs[:-1]): # space is not an allowed identifier in the syntax! result = makeLet([(" ", e)], result) return result pDO = "(" + Keyword("do") + pEXPRS + ")" pDO.setParseAction(lambda result: makeDo(result[2])) def makeWhile(cond, body): return makeLet( [(" while", EFunction([], EIf(cond, makeLet([(" ", body)], ECall(EId(" while"), [])), EValue(VNone())), name=" while"))], ECall(EId(" while"), [])) pWHILE = "(" + Keyword("while") + pEXPR + pEXPR + ")" pWHILE.setParseAction(lambda result: makeWhile(result[2], result[3])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pFUNrec | pDO | pWHILE | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: { "result": "expression", "expr": result[0] }) pDEFINE = "(" + Keyword("define") + pNAME + pEXPR + ")" pDEFINE.setParseAction(lambda result: { "result": "value", "name": result[2], "expr": result[3] }) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction( lambda result: { "result": "function", "name": result[2], "params": result[4], "body": result[6] }) pABSTRACT = "#abs" + pEXPR pABSTRACT.setParseAction(lambda result: { "result": "abstract", "expr": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pTOP = (pDEFUN | pDEFINE | pQUIT | pABSTRACT | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse_imp (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( function ( <name ... ) <expr> ) # ( <expr> <expr> ... ) # # <decl> ::= var name = expr ; # # <stmt> ::= if <expr> <stmt> else <stmt> # while <expr> <stmt> # name <- <expr> ; # print <expr> ; # <block> # # <block> ::= { <decl> ... <stmt> ... } # # <toplevel> ::= <decl> # <stmt> # idChars = alphas+"_+*-?!=<>+" QUOTE = Literal('"') INTERNAL_QUOTE = QUOTE.copy().leaveWhitespace() pIDENTIFIER = Word(idChars, idChars+"0123456789") #### NOTE THE DIFFERENCE pIDENTIFIER.setParseAction(lambda result: EPrimCall(oper_deref,[EId(result[0])])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") #| Keyword("&\"") | Keyword("&\'") pNAMECON = "," + pNAME pNAMECON.setParseAction(lambda result: result[1]) pNAMES = pNAME + ZeroOrMore(pNAMECON) | ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) QUOTE = Literal("&\"") | Literal("&\'") pSTRINGSTART = Literal('"') + ZeroOrMore(Word(" ")).leaveWhitespace() pSTRINGSTART.setParseAction(lambda result: result[1:]) pSTRING = pSTRINGSTART + ZeroOrMore(Combine( Word(idChars+"0123456789'"+" ") | QUOTE)) + Literal('"') pSTRING.setParseAction(lambda result: EValue(VString(str(result[:-1])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EValue(VBoolean(result[0]=="true"))) pEXPR = Forward() pEXPR2 = Forward() pSTMT_BLOCK = Forward() pSTMT = Forward() pEXPRS = ZeroOrMore(pEXPR2) pEXPRS.setParseAction(lambda result: [result]) pIF = pEXPR + Keyword("?") + pEXPR + Keyword(':') + pEXPR pIF.setParseAction(lambda result: EIf(result[0], result[2], result[4])) def mkFunBody (params,body): bindings = [ (p,ERefCell(EId(p))) for p in params ] return ELet(bindings,body) def mkLetBody (bindings,body): bindings = [ (p[0],ERefCell(p[1])) for p in bindings ] return ELet(bindings,body) def multiCallHelper(result, start, i, length): if i < length: start = ECall(result[1][i][0], [result[1][i][1], start]) multiCallHelper(result, start, i + 1, length) return start def multiCall(result): start = ECall(result[1][0][0], [result[0], result[1][0][1]]) return multiCallHelper(result, start, 1, len(result[1])) def eFunHelper(variables, expression): if len(variables) == 1: return EFunction(variables[0], expression) else: return EFunction(variables[0], eFunHelper(variables[1:], expression)) def eFunName(result): varName = result[1] variables = result[3] expression = result[-1] print variables, expression return EFunction(variables, expression, varName) pFUN = Keyword("fun") + "(" + pNAMES + ")" + pSTMT pFUN.setParseAction(lambda result: EFunction(result[2],mkFunBody(result[2],result[4]))) pFUNR = Keyword("fun") + pNAME + "(" + pNAMES + ")" + pSTMT # pFUNR.setParseAction(eFunName) pFUNR.setParseAction(lambda result: EFunction(result[3],mkFunBody(result[3],result[5]), result[1])) pEXPR2CAR = "," + pEXPR2 pEXPR2CAR.setParseAction(lambda result: result[1]) pEXPR2MULTIALL = pEXPR2 + ZeroOrMore(pEXPR2CAR) | ZeroOrMore(pEXPR2) pEXPR2MULTIALL.setParseAction(lambda result: [result]) pFUNCALL = pEXPR + "(" + pEXPR2MULTIALL + ")" pFUNCALL.setParseAction(lambda result: ECall(result[0], result[2])) pBINDINGCAR = "," + pNAME + "=" + pEXPR2 pBINDINGCAR.setParseAction(lambda result: (result[1], result[3])) pBINDINGCON = pNAME + "=" + pEXPR2 pBINDINGCON.setParseAction(lambda result: (result[0], result[2])) pBINDINGS = pBINDINGCON + ZeroOrMore(pBINDINGCAR) pBINDINGS.setParseAction(lambda result: [result]) pLET = Keyword("let") + "(" + pBINDINGS + ")" + pEXPR2 pLET.setParseAction(lambda result: mkLetBody(result[2], result[4])) pCALLG = pIDENTIFIER + pEXPR2 pCALLG.setParseAction(lambda result: (result[0], result[1])) pCALL1S = OneOrMore(pCALLG) pCALL1S.setParseAction(lambda result: [ result ]) pCALL = pEXPR + pCALL1S pCALL.setParseAction(multiCall) pCALL1 = pIDENTIFIER + pEXPR2 pCALL1.setParseAction(lambda result: ECall(result[0], [result[1]])) pNOT = "not" + pEXPR2 pNOT.setParseAction(lambda result: EPrimCall(oper_not, [result[1]])) pARRAYITEM = "," + pEXPR2 pARRAYITEM.setParseAction(lambda result: (result[1])) pARRAYITEMS = ZeroOrMore(pARRAYITEM) pARRAYITEMS.setParseAction(lambda result: [result]) pARRAY = "[" + ZeroOrMore(pEXPR2) + pARRAYITEMS + "]" pARRAY.setParseAction(lambda result: EArray(result[1],result[2])) pDICTPAIR = pNAME + ":" + pEXPR pDICTPAIR.setParseAction(lambda result: (result[0],result[2])) pDICTPAIRWITHCOMMA = "," + pNAME + ":" + pEXPR pDICTPAIRWITHCOMMA.setParseAction(lambda result: (result[1],result[3])) pDICTS = ZeroOrMore(pDICTPAIRWITHCOMMA) pDICTS.setParseAction(lambda result: [ result ]) pDICT = "{" + pDICTPAIR + pDICTS + "}" pDICT.setParseAction(lambda result:EDict(result[1],result[2])) pEXPR2P = "(" + pEXPR2 + ")" pEXPR2P.setParseAction(lambda result: result[1]) pACCESS = pNAME + "[" + pEXPR + "]" pACCESS.setParseAction(lambda result: EPrimCall(oper_access_arr,[EId(result[0]),result[2]])) pLEN = Keyword("len") + "(" + pNAME + ")" pLEN.setParseAction(lambda result: EPrimCall(oper_len,[EId(result[2])])) pEXPR << ( pEXPR2P | pINTEGER | pNOT | pARRAY | pACCESS | pDICT | pSTRING | pBOOLEAN | pIDENTIFIER | pCALL1 | pLEN ) pEXPR2 << ( pLET | pFUN | pFUNR | pFUNCALL | pIF | pCALL | pEXPR ) pDECL_VAR_E = "var" + pNAME + ";" pDECL_VAR_E.setParseAction(lambda result: (result[1], EValue(VNone))) pDECL_VAR = "var" + pNAME + "=" + pEXPR2 + ";" pDECL_VAR.setParseAction(lambda result: (result[1],result[3])) pDECL_PROCEDURE = "def" + pNAME + "(" + pNAMES + ")" + pSTMT pDECL_PROCEDURE.setParseAction(lambda result: (result[1], EProcedure(result[3], mkFunBody(result[3], result[5])))) # hack to get pDECL to match only PDECL_VAR (but still leave room # to add to pDECL later) pDECL = ( pDECL_VAR_E | pDECL_VAR | pDECL_PROCEDURE | NoMatch() | ";" ) pDECLS = ZeroOrMore(pDECL) pDECLS.setParseAction(lambda result: [result]) pSTMT_IF_1 = "if (" + pEXPR2 + ")" + pSTMT + "else" + pSTMT pSTMT_IF_1.setParseAction(lambda result: EIf(result[1],result[3],result[5])) pSTMT_IF_2 = "if (" + pEXPR2 + ")" + pSTMT pSTMT_IF_2.setParseAction(lambda result: EIf(result[1],result[3],EValue(VBoolean(True)))) pSTMT_WHILE = "while (" + pEXPR2 + ")" + pSTMT pSTMT_WHILE.setParseAction(lambda result: EWhile(result[1],result[3])) pSTMT_FOR = "for (" + pNAME + "in" + pEXPR2 + ")" + pSTMT pSTMT_FOR.setParseAction(lambda result: EFor(result[1], result[3], result[5])) pSTMT_PRINT_STMS = "," + pEXPR2 pSTMT_PRINT_STMS.setParseAction(lambda result: [ result[1] ]) pSTMT_PRINT_ZERO = ZeroOrMore(pSTMT_PRINT_STMS) pSTMT_PRINT_ZERO.setParseAction(lambda result: [ result ]) def printStmEval(result): newArray = [] newArray.append(result[1]) for i in result[2]: newArray.append(i) return EPrimCall(oper_print,newArray) pSTMT_PRINT = "print" + pEXPR2 + pSTMT_PRINT_ZERO + ";" pSTMT_PRINT.setParseAction(printStmEval) pSTMT_UPDATE_ARR = pNAME + "[" + pEXPR +"]" + "=" + pEXPR + ";" pSTMT_UPDATE_ARR.setParseAction(lambda result: EPrimCall(oper_update_arr,[EId(result[0]),result[2],result[5]])) pSTMT_UPDATE = pNAME + "=" + pEXPR2 + ";" pSTMT_UPDATE.setParseAction(lambda result: EPrimCall(oper_update,[EId(result[0]),result[2]])) pSTMTS = ZeroOrMore(pSTMT) pSTMTS.setParseAction(lambda result: [result]) def mkBlock (decls,stmts): bindings = [ (n,ERefCell(expr)) for (n,expr) in decls ] return ELet(bindings,EDo(stmts)) pSTMT_BLOCK = "{" + pDECLS + pSTMTS + "}" pSTMT_BLOCK.setParseAction(lambda result: mkBlock(result[1],result[2])) pSTMT_pEXPR2 = pEXPR2 + ";" pSTMT_pEXPR2.setParseAction(lambda result: result[0]) pSTMT << ( pSTMT_IF_1 | pSTMT_IF_2 | pSTMT_WHILE | pSTMT_FOR | pSTMT_PRINT | pSTMT_UPDATE_ARR | pSTMT_UPDATE | pSTMT_BLOCK | pSTMT_pEXPR2 | pEXPR2 ) # can't attach a parse action to pSTMT because of recursion, so let's duplicate the parser pTOP_STMT = pSTMT.copy() pTOP_STMT.setParseAction(lambda result: {"result":"statement", "stmt":result[0]}) pTOP_DECL = pDECL.copy() pTOP_DECL.setParseAction(lambda result: {"result":"declaration", "decl":result[0]}) pABSTRACT = "#abs" + pSTMT pABSTRACT.setParseAction(lambda result: {"result":"abstract", "stmt":result[1]}) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result":"quit"}) pTOP = ZeroOrMore(pTOP_DECL) + ZeroOrMore(pTOP_STMT) return pTOP.parseString(input)
def parse(input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( function ( <name> ... ) <expr> ) # ( ref <expr> ) # ( <expr> <expr> ... ) # idChars = alphas + "_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars + "0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars, idChars + "0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("0123456789") pINTEGER.setParseAction(lambda result: EValue(VInteger(int(result[0])))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction( lambda result: EValue(VBoolean(result[0] == "true"))) pEXPR = Forward() pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2], result[3], result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1], result[2])) pBINDINGS = ZeroOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [result]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet(result[3], result[5])) pCALL = "(" + pEXPR + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1], result[2])) pFUN = "(" + Keyword("function") + "(" + pNAMES + ")" + pEXPR + ")" pFUN.setParseAction(lambda result: EFunction(result[3], result[5])) pFUNrec = "(" + Keyword( "function") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pFUNrec.setParseAction( lambda result: EFunction(result[4], result[6], name=result[2])) pCLASS = "(" + Keyword("class") + "(" + pNAMES + ")" + Keyword( "(") + pBINDINGS + ")" + Keyword("(") + pBINDINGS + Keyword(")") + ")" pCLASS.setParseAction( lambda result: EClass(result[3], result[6], result[9])) pNEW = "(" + Keyword("new") + pEXPR + pEXPRS + ")" pNEW.setParseAction(lambda result: ENew(result[2], result[3])) pWITH = "(" + Keyword("with") + pEXPR + pEXPR + ")" pWITH.setParseAction(lambda result: EWithObj(result[2], result[3])) pDO = "(" + Keyword("do") + pEXPRS + ")" pDO.setParseAction(lambda result: EDo(result[2])) pWHILE = "(" + Keyword("while") + pEXPR + pEXPR + ")" pWHILE.setParseAction(lambda result: EWhile(result[2], result[3])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pFUN | pFUNrec | pCLASS | pNEW | pWITH | pDO | pWHILE | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: { "result": "expression", "expr": result[0] }) pDEFINE = "(" + Keyword("define") + pNAME + pEXPR + ")" pDEFINE.setParseAction(lambda result: { "result": "value", "name": result[2], "expr": result[3] }) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction( lambda result: { "result": "function", "name": result[2], "params": result[4], "body": result[6] }) pABSTRACT = "#abs" + pEXPR pABSTRACT.setParseAction(lambda result: { "result": "abstract", "expr": result[1] }) pQUIT = Keyword("#quit") pQUIT.setParseAction(lambda result: {"result": "quit"}) pTOP = (pDEFUN | pDEFINE | pQUIT | pABSTRACT | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
def parse (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr> ) # ( <name> <expr> ... ) # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pNAMES = ZeroOrMore(pNAME) pNAMES.setParseAction(lambda result: [result]) pINTEGER = Word("-0123456789","0123456789") pINTEGER.setParseAction(lambda result: EInteger(int(result[0]))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EBoolean(result[0]=="true")) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1],result[2])) pBINDINGS = OneOrMore(pBINDING) pBINDINGS.setParseAction(lambda result: [ result ]) pLET = "(" + Keyword("let") + "(" + pBINDINGS + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet(result[3],result[5])) pEXPRS = ZeroOrMore(pEXPR) pEXPRS.setParseAction(lambda result: [result]) pCALL = "(" + pNAME + pEXPRS + ")" pCALL.setParseAction(lambda result: ECall(result[1],result[2])) pAND = "(" + Keyword("and") + ZeroOrMore(pEXPR) + ")" pAND.setParseAction(lambda result: unpackLogic(result, True)) pOR = "(" + Keyword("or") + ZeroOrMore(pEXPR) + ")" pOR.setParseAction(lambda result: unpackLogic(result, False)) pLetS = "(" + Keyword("let*") + "(" + OneOrMore(pBINDING) + ")" + pEXPR + ")" pLetS.setParseAction(lambda result: unpackLetS(result[3:])) pCOND = "(" + Keyword("cond") + ZeroOrMore("(" + pEXPR + pEXPR + ")") + ")" pCOND.setParseAction(lambda result: unpackCond(result[3:])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pAND | pOR | pLetS | pCOND | pCALL) # can't attach a parse action to pEXPR because of recursion, so let's duplicate the parser pTOPEXPR = pEXPR.copy() pTOPEXPR.setParseAction(lambda result: {"result":"expression","expr":result[0]}) pDEFUN = "(" + Keyword("defun") + pNAME + "(" + pNAMES + ")" + pEXPR + ")" pDEFUN.setParseAction(lambda result: {"result":"function", "name":result[2], "params":result[4], "body":result[6]}) pTOP = (pDEFUN | pTOPEXPR) result = pTOP.parseString(input)[0] return result # the first element of the result is the expression
op = operatorPrecedence # op = myOperatorPrecedence rvalue << op(operand, [ ('-', 1, opAssoc.RIGHT, Unary.parse_action), ('*', 2, opAssoc.LEFT, Binary.parse_action), ('-', 2, opAssoc.LEFT, Binary.parse_action), ('+', 2, opAssoc.LEFT, Binary.parse_action), ('^', 2, opAssoc.LEFT, Binary.parse_action), ]) # I want # - BindVariable to have precedence to EqualTo(VariableRef) # but I also want: # - Arithmetic to have precedence w.r.t BindVariable # last is variables add_contract(misc_variables_contract) add_contract(int_variables_contract) add_contract(rvalue.copy().setParseAction(EqualTo.parse_action)) hardwired = MatchFirst(ParsingTmp.contract_types) hardwired.setName('Predefined contract expression') simple_contract << (hardwired | identifier_contract) simple_contract.setName('simple contract expression') any_contract = composite_contract | simple_contract any_contract.setName('Any simple or composite contract') contract_expression << (any_contract) # Parentheses before << !!
# stat ::= varlist `=´ explist stat = (varlist + Suppress("=") + explist).setParseAction(lambda t, p, ( e, v): ast.Assignment(list(e), list(v))) | functioncall | semicolon # retstat ::= return [explist] [‘;’] retstat = (Keyword("return") + Optional(explist) + semicolon) # block ::= {stat} [retstat] block = (Optional(ZeroOrMore(stat | retstat | comment), default=[])).setParseAction(from_parse_result(ast.Block)) #field ::= `[´ exp `]´ `=´ exp | Name `=´ exp | exp #field = Group("[" + exp + "]" + Literal("=") + exp) ... field = ((name + Suppress('=') + exp).setParseAction( from_parse_result(ast.NamedField)) | exp.copy().setParseAction(from_parse_result(ast.UnnamedField))) # fieldsep ::= `,´ | `;´ fieldsep = Literal(",") | Literal(";") # fieldlist ::= field {fieldsep field} [fieldsep] fieldlist = field + ZeroOrMore(Suppress(fieldsep) + field) + Optional(fieldsep) # tableconstructor ::= `{´ [fieldlist] `}´ tableconstructor << (Suppress("{") + Optional(fieldlist) + Suppress("}")).setParseAction( from_parse_result(ast.Table)) def parse(s): return block.parseString(s, parseAll=True)[0]