def __parseNestedList( value, dtype = float): from pyparsing import Word, Group, Forward, OneOrMore, Optional, alphanums, Suppress number = Word( alphanums + ".e-" ).setParseAction( lambda s,l,t: dtype(t[0]) ) arr = Forward() element = number | arr arr << Group(Suppress('[') + ( OneOrMore(element + Optional(Suppress(",")) ) ) + Suppress(']') ) return arr.parseString(value, parseAll = True).asList()[0]
def parse_constraint_group(constraint_group): global valgrind_operations, size_by_var, offset_by_var, realsize_by_var, shift_by_var init_global_vars() lparen = Literal("(") rparen = Literal(")") func = Word(alphanums, alphanums+":_") integer = Word(nums) expression = Forward() arg = expression | func | integer args = arg + ZeroOrMore(","+arg) expression << func + lparen + args + rparen expression.setParseAction(parse_function) valgrind_operations_group = [] for constraint in constraint_group: valgrind_operations = [] expression.parseString(constraint) resize_operands() valgrind_operations_group.append(valgrind_operations) return (valgrind_operations_group, size_by_var, offset_by_var, realsize_by_var, shift_by_var)
def parseQuery(queryString): try: parser = Forward(); # parser << (Word(alphas).setResultsName( "first" ) + \ # #(' ').setResultsName( "delim" ) + \ # '*' + Word(alphas).setResultsName( "second")) # selectSpecialStmt = Forward.setResultsName("selectSpecialStmt"); # selectSpecialStmt << (selectSpecialToken + "(" + table_columns + ")" + fromToken \ # + table.setResultsName("table")); selectStmt = Forward().setResultsName("selectStmt"); selectStmt << ( selectToken + ( '*' | func_table_column | table_columns).setResultsName( "columns" ) \ + fromToken + tables.setResultsName("tables") \ + Optional(whereToken + whereExpression.setResultsName("conds") ) ); deleteStmt = Forward().setResultsName("deleteStmt"); deleteStmt << ( deleteToken + table.setResultsName("table") \ + whereToken + whereExpression.setResultsName("conds")); insertStmt = Forward().setResultsName("insertStmt"); insertStmt << ( insertToken + table.setResultsName("table") + valuesToken \ + "(" + intNums.setResultsName("intValues") + ")" ); createStmt = Forward().setResultsName("createStmt"); createStmt << ( createToken + table.setResultsName("table") + "(" \ + Group(delimitedList(column + intToken)).setResultsName("fields") + ")" ); truncateStmt = Forward().setResultsName("truncateStmt"); truncateStmt << ( truncateToken + table.setResultsName("table")); dropStmt = Forward().setResultsName("dropStmt"); dropStmt << ( dropToken + table.setResultsName("table")); parser = selectStmt | insertStmt | deleteStmt | createStmt | truncateStmt | dropStmt | exitToken; tokens = parser.parseString(queryString); # import pdb; pdb.set_trace() return tokens except Exception as e: # print e; print "Error in format." return [];
def build_parser(root_directory, path, fake_root=os.getcwd(), file_reader=None): from pyparsing import nestedExpr from pyparsing import QuotedString from pyparsing import Group from pyparsing import restOfLine from pyparsing import Word from pyparsing import alphanums from pyparsing import cStyleComment from pyparsing import OneOrMore from pyparsing import ZeroOrMore from pyparsing import Optional from pyparsing import Forward from pyparsing import Literal from pyparsing import Keyword root = Forward() include_handler = IncludeHandler( root_directory, path, root, fake_root=fake_root, file_reader=file_reader) # relaxed grammar identifier = Word(alphanums + "-_.:/") comment = ("//" + restOfLine).suppress() \ | ("#" + restOfLine).suppress() \ | cStyleComment endstmt = Literal(";").suppress() argument = QuotedString('"') \ | identifier arguments = ZeroOrMore(argument) statements = Forward() section = nestedExpr("{", "}", statements) include = Keyword("include").suppress() + QuotedString('"') regular = identifier + Group(arguments) + Optional(section, default=[]) statement = include.setParseAction(include_handler.pyparsing_call) \ | regular.setParseAction(include_handler.pyparsing_mark) statements << OneOrMore(statement + endstmt) root << Optional(statements) root.ignore(comment) setattr( root, 'parse_file', lambda f, root=root: root.parseFile(f, parseAll=True)) return root
def get_enclosed(self,raw): #Word ::= Ascii - Tokens non_token = "!#$%&\'*+,-./:;=?@\\^_`|~" word = Word(alphanums+non_token) #word = Word(printables) #Tokens ::= {}[]()<> tokens = "{}[]()<>" o_curly,c_curly,o_brack,c_brack,o_paren,c_paren,o_mayor,c_mayor = map(Suppress,tokens) enclosed_data = Forward() #Enclosed groups curly_enclosed = OneOrMore(o_curly + enclosed_data + c_curly) brack_enclosed = OneOrMore(o_brack + enclosed_data + c_brack) paren_enclosed = OneOrMore(o_paren + enclosed_data + c_paren) mayor_enclosed = OneOrMore(o_mayor + enclosed_data + c_mayor) enclosed = Optional(curly_enclosed) & Optional(brack_enclosed) & Optional(paren_enclosed) & Optional(mayor_enclosed) enclosed_data << ((OneOrMore(word) & enclosed) ^ enclosed) return enclosed_data.parseString(raw)
def parse_sexp(data): '''parse sexp/S-expression format and return a python list''' # define punctuation literals LPAR, RPAR, LBRK, RBRK, LBRC, RBRC, VBAR = map(Suppress, "()[]{}|") decimal = Word("123456789", nums).setParseAction(lambda t: int(t[0])) bytes = Word(printables) raw = Group(decimal.setResultsName("len") + Suppress(":") + bytes).setParseAction(OtrPrivateKeys.verifyLen) token = Word(alphanums + "-./_:*+=") base64_ = Group(Optional(decimal, default=None).setResultsName("len") + VBAR + OneOrMore(Word( alphanums +"+/=" )).setParseAction(lambda t: b64decode("".join(t))) + VBAR).setParseAction(OtrPrivateKeys.verifyLen) hexadecimal = ("#" + OneOrMore(Word(hexnums)) + "#")\ .setParseAction(lambda t: int("".join(t[1:-1]),16)) qString = Group(Optional(decimal, default=None).setResultsName("len") + dblQuotedString.setParseAction(removeQuotes)).setParseAction(OtrPrivateKeys.verifyLen) simpleString = raw | token | base64_ | hexadecimal | qString display = LBRK + simpleString + RBRK string_ = Optional(display) + simpleString sexp = Forward() sexpList = Group(LPAR + ZeroOrMore(sexp) + RPAR) sexp << ( string_ | sexpList ) try: sexpr = sexp.parseString(data) return sexpr.asList()[0][1:] except ParseFatalException, pfe: print("Error:", pfe.msg) print(pfe.loc) print(pfe.markInputline())
def parse(string): ''' returns either [atomic], [monoop, [f]] or [binop, [f1], [f2]] this method is static (no need for a CTL instance) ''' lparen = Literal('(').suppress() rparen = Literal(')').suppress() wildcard = Literal('_') atom = Combine(Word(alphas) + Optional(Word('.0123456789')) ^ 'true' ^ 'false' ^ wildcard) term = Forward() term << (atom + Optional(lparen + Group(term) + ZeroOrMore(Literal(',').suppress() + Group(term)) + rparen)) A = Optional('<-')+'A' E = Optional('<-')+'E' G, Gi, F, X, U = map(Literal, ('G', 'Gi', 'F', 'X', 'U')) UnOp = wildcard ^ '!' ^ Combine(A + (G^F^X)) ^ Combine(E + (G^Gi^F^X)) BinOp = wildcard ^ Literal('or') ^ Literal('and') ^ Combine(A + U) ^ Combine(E + U) formula = Forward() formula << (Group(term) ^ (lparen + formula + rparen) ^ Group(UnOp + formula) ^ Group(BinOp + formula + formula)) # 0 because we expect only one formula in the string return formula.parseString(string).asList()[0]
def compute(input_string): # Debugging flag can be set to either "debug_flag=True" or "debug_flag=False" debug_flag = False explain_list = [] variables = {} # define grammar point = Literal(".") e = CaselessLiteral("E") plusorminus = Literal("+") | Literal("-") number = Word(nums) integer = Combine(Optional(plusorminus) + number) floatnumber = Combine(integer + Optional(point + Optional(number)) + Optional(e + integer)) ident = Word(alphas, alphanums + "_") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") assign = Literal("=") expr = Forward() atom = (e | floatnumber | integer | ident).setParseAction(pushFirst) | (lpar + expr.suppress() + rpar) factor = Forward() factor << atom + ZeroOrMore((expop + factor).setParseAction(pushFirst)) term = factor + ZeroOrMore((multop + factor).setParseAction(pushFirst)) expr << term + ZeroOrMore((addop + term).setParseAction(pushFirst)) bnf = Optional((ident + assign).setParseAction(assignVar)) + expr pattern = bnf + StringEnd() if input_string != "": try: L = pattern.parseString(input_string) except ParseException, err: raise ComputationException, "Error while parsing" print exprStack if len(exprStack) <= 1: return None result = evaluateStack(exprStack, explain_list) if len(str(result)) > 12: ret = "%e" % result else: ret = str(result) ret = ret.replace("e", " x 10^") ret = ret.replace("+", "") if len(explain_list): return "%s (%s)" % (ret, ", ".join(explain_list)) else: return "%s" % ret
def parseATL(spec): """Parse the spec and return the list of possible ASTs.""" global __atl if __atl is None: true = Literal("True") true.setParseAction(lambda tokens: TrueExp()) false = Literal("False") false.setParseAction(lambda tokens: FalseExp()) atom = "'" + SkipTo("'") + "'" atom.setParseAction(lambda tokens: Atom(tokens[1])) agent = atom group = Group(ZeroOrMore(agent + Suppress(",")) + agent) proposition = true | false | atom __atl = Forward() notproposition = "~" + proposition notproposition.setParseAction(lambda tokens: Not(tokens[1])) formula = (proposition | notproposition | Suppress("(") + __atl + Suppress(")")) logical = Forward() cax = Literal("[") + group + "]" + "X" + logical cax.setParseAction(lambda tokens: CAX(tokens[1], tokens[4])) cex = Literal("<") + group + ">" + "X" + logical cex.setParseAction(lambda tokens: CEX(tokens[1], tokens[4])) caf = Literal("[") + group + "]" + "F" + logical caf.setParseAction(lambda tokens: CAF(tokens[1], tokens[4])) cef = Literal("<") + group + ">" + "F" + logical cef.setParseAction(lambda tokens: CEF(tokens[1], tokens[4])) cag = Literal("[") + group + "]" + "G" + logical cag.setParseAction(lambda tokens: CAG(tokens[1], tokens[4])) ceg = Literal("<") + group + ">" + "G" + logical ceg.setParseAction(lambda tokens: CEG(tokens[1], tokens[4])) cau = Literal("[") + group + "]" + "[" + __atl + "U" + __atl + "]" cau.setParseAction(lambda tokens: CAU(tokens[1], tokens[4], tokens[6])) ceu = Literal("<") + group + ">" + "[" + __atl + "U" + __atl + "]" ceu.setParseAction(lambda tokens: CEU(tokens[1], tokens[4], tokens[6])) caw = Literal("[") + group + "]" + "[" + __atl + "W" + __atl + "]" caw.setParseAction(lambda tokens: CAW(tokens[1], tokens[4], tokens[6])) cew = Literal("<") + group + ">" + "[" + __atl + "W" + __atl + "]" cew.setParseAction(lambda tokens: CEW(tokens[1], tokens[4], tokens[6])) strategic = (cax | cex | caf | cef | cag | ceg | cau | ceu | caw | cew) logical <<= (formula | strategic) __atl <<= (_logicals_(logical)) return __atl.parseString(spec, parseAll = True)
def bnf(self): ''' The BNF grammar is defined bellow. expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* ''' if not self._bnf: point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-"+nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums)) ) ident = Word(alphas, alphas + nums + "_$") minus = Literal("-") plus = Literal("+") div = Literal("/") mult = Literal("*") rpar = Literal(")").suppress() lpar = Literal("(").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ( Optional("-") + ( pi | e | fnumber | ident + lpar + delimitedList(expr) + rpar ).setParseAction(self.push_first) | (lpar + expr.suppress() + rpar) ).setParseAction(self.push_minus) # The right way to define exponentiation is -> 2^3^2 = 2^(3^2), # not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self.push_first) ) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.push_first) ) expr << term + ZeroOrMore( (addop + term).setParseAction(self.push_first) ) self._bnf = expr return self._bnf
def parseArctl(spec): """Parse the spec and return its AST.""" global __arctl if __arctl is None: true = Literal("True") true.setParseAction(lambda tokens: TrueExp()) false = Literal("False") false.setParseAction(lambda tokens: FalseExp()) atom = "'" + SkipTo("'") + "'" atom.setParseAction(lambda tokens: Atom(tokens[1])) action = _logicals_(atom) __arctl = Forward() proposition = true | false | atom notproposition = "~" + proposition notproposition.setParseAction(lambda tokens: Not(tokens[1])) formula = proposition | notproposition | Suppress("(") + __arctl + Suppress(")") temporal = Forward() e = Literal("E") + "<" + action + ">" a = Literal("A") + "<" + action + ">" eax = e + "X" + temporal eax.setParseAction(lambda tokens: EaX(tokens[2], tokens[5])) aax = a + "X" + temporal aax.setParseAction(lambda tokens: AaX(tokens[2], tokens[5])) eaf = e + "F" + temporal eaf.setParseAction(lambda tokens: EaF(tokens[2], tokens[5])) aaf = a + "F" + temporal aaf.setParseAction(lambda tokens: AaF(tokens[2], tokens[5])) eag = e + "G" + temporal eag.setParseAction(lambda tokens: EaG(tokens[2], tokens[5])) aag = a + "G" + temporal aag.setParseAction(lambda tokens: AaG(tokens[2], tokens[5])) eau = e + "[" + __arctl + "U" + __arctl + "]" eau.setParseAction(lambda tokens: EaU(tokens[2], tokens[5], tokens[7])) aau = a + "[" + __arctl + "U" + __arctl + "]" aau.setParseAction(lambda tokens: AaU(tokens[2], tokens[5], tokens[7])) eaw = e + "[" + __arctl + "W" + __arctl + "]" eaw.setParseAction(lambda tokens: EaW(tokens[2], tokens[5], tokens[7])) aaw = a + "[" + __arctl + "W" + __arctl + "]" aaw.setParseAction(lambda tokens: AaW(tokens[2], tokens[5], tokens[7])) temporal <<= formula | eax | aax | eaf | aaf | eag | aag | eau | aau | eaw | aaw logical = _logicals_(temporal) __arctl <<= logical return __arctl.parseString(spec, parseAll=True)
def _string_to_ast(self, input_string): """ Parse a smart search string and return it in an AST like form """ # simple words # we need to use a regex to match on words because the regular # Word(alphanums) will only match on American ASCII alphanums and since # we try to be Unicode / internationally friendly we need to match much # much more. Trying to expand a word class to catch it all seems futile # so we match on everything *except* a few things, like our operators comp_word = Regex("[^*\s=><~!]+") word = Regex("[^*\s=><~!]+").setResultsName('word') # numbers comp_number = Word(nums) number = Word(nums).setResultsName('number') # IPv4 address ipv4_oct = Regex("((2(5[0-5]|[0-4][0-9])|[01]?[0-9][0-9]?))") comp_ipv4_address = Combine(ipv4_oct + ('.' + ipv4_oct*3)) ipv4_address = Combine(ipv4_oct + ('.' + ipv4_oct*3)).setResultsName('ipv4_address') # IPv6 address ipv6_address = Regex("((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?").setResultsName('ipv6_address') ipv6_prefix = Combine(ipv6_address + Regex("/(12[0-8]|1[01][0-9]|[0-9][0-9]?)")).setResultsName('ipv6_prefix') # VRF RTs of the form number:number vrf_rt = Combine((comp_ipv4_address | comp_number) + Literal(':') + comp_number).setResultsName('vrf_rt') # tags tags = Combine( Literal('#') + comp_word).setResultsName('tag') # operators for matching match_op = oneOf(' '.join(self.match_operators)).setResultsName('operator') boolean_op = oneOf(' '.join(self.boolean_operators)).setResultsName('boolean') # quoted string d_quoted_string = QuotedString('"', unquoteResults=True, escChar='\\') s_quoted_string = QuotedString('\'', unquoteResults=True, escChar='\\') quoted_string = (s_quoted_string | d_quoted_string).setResultsName('quoted_string') # expression to match a certain value for an attribute expression = Group(word + match_op + (quoted_string | vrf_rt | word | number)).setResultsName('expression') # we work on atoms, which are single quoted strings, match expressions, # tags, VRF RT or simple words. # NOTE: Place them in order of most exact match first! atom = Group(ipv6_prefix | ipv6_address | quoted_string | expression | tags | vrf_rt | boolean_op | word) enclosed = Forward() parens = nestedExpr('(', ')', content=enclosed) enclosed << ( parens | atom ).setResultsName('nested') content = Forward() content << ( ZeroOrMore(enclosed) ) res = content.parseString(input_string) return res
def _BNF(self): base16 = Literal("$") hex = Combine(base16 + Word(hexnums + "_")) base4 = Literal("%%") quaternary = Combine(base4 + Word("0123_")) base2 = Literal("%") binary = Combine(base2 + Word("01_")) plusminus = Literal("+") | Literal("-") integer = Combine(Optional(plusminus) + Word(nums+"_")) name_token = Combine(Optional(Literal(":") | Literal("@")) + Word("_" + alphas, "_" + alphanums)) name_token.setParseAction(self._mark_name_token) lparens = Literal("(").suppress() rparens = Literal(")").suppress() # op0 = Literal("@") op1 = (Literal("^^") | Literal("||") | Literal("|<") | Literal(">|") | Literal("!")).setParseAction(self._mark_unary) op2 = Literal("->") | Literal("<-") | Literal(">>") | Literal("<<") | Literal("~>") | Literal("><") op3 = Literal("&") op4 = Literal("|") | Literal("^") op5 = Literal("**") | Literal("*") | Literal("//") | Literal("/") op6 = Literal("+") | Literal("-") op7 = Literal("#>") | Literal("<#") op8 = Literal("<") | Literal(">") | Literal("<>") | Literal("==") | Literal("=<") | Literal("=>") op9 = Literal("NOT").setParseAction(self._mark_unary) op10 = Literal("AND") op11 = Literal("OR") op12 = Literal(",") expr = Forward() atom = name_token | hex | quaternary | binary | integer | quotedString atom.setParseAction(self._push) atom = atom | (lparens + expr.suppress() + rparens) # term0 = atom + ZeroOrMore((op0 + atom) .setParseAction(self._push)) # term1 = term0 + ZeroOrMore((op1 + term0) .setParseAction(self._push)) term1 = atom + ZeroOrMore((op1 + atom) .setParseAction(self._push)) term2 = term1 + ZeroOrMore((op2 + term1) .setParseAction(self._push)) term3 = term2 + ZeroOrMore((op3 + term2) .setParseAction(self._push)) term4 = term3 + ZeroOrMore((op4 + term3) .setParseAction(self._push)) term5 = term4 + ZeroOrMore((op5 + term4) .setParseAction(self._push)) term6 = term5 + ZeroOrMore((op6 + term5) .setParseAction(self._push)) term7 = term6 + ZeroOrMore((op7 + term6) .setParseAction(self._push)) term8 = term7 + ZeroOrMore((op8 + term7) .setParseAction(self._push)) term9 = term8 + ZeroOrMore((op9 + term8) .setParseAction(self._push)) term10 = term9 + ZeroOrMore((op10 + term9) .setParseAction(self._push)) term11 = term10 + ZeroOrMore((op11 + term10).setParseAction(self._push)) expr << term11 + ZeroOrMore((op12 + term11).setParseAction(self._push)) return expr
def parser(): rule = Forward() body = OneOrMore(CharsNotIn('{};') + ';') sel = CharsNotIn('{};') rule <<= sel + Group( '{' + ZeroOrMore( rule | body ) + '}' ) rule.setParseAction( make_action(Rule) ) stylesheet = ZeroOrMore( rule ) stylesheet.ignore( cStyleComment ) return stylesheet
def main(s): lpar = Literal('(').suppress() rpar = Literal(')').suppress() integer = Word(nums) element = Word(alphas, exact=1) formula = Forward() term = Group((element | Group(lpar + formula + rpar)('subgroup')) + Optional(integer, default=1)('mult')) formula << OneOrMore(term) integer.setParseAction(process_integer) term.setParseAction(process_term) formula.setParseAction(process_formula) return formula.parseString(s)[0]
def _BNF(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ if not self.bnf: point = Literal( "." ) e = CaselessLiteral( "E" ) fnumber = Combine( Word( "+-"+nums, nums ) + Optional( point + Optional( Word( nums ) ) ) + Optional( e + Word( "+-"+nums, nums ) ) ) ident = Word(alphas, alphas+nums+"_$") plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "*" ) div = Literal( "/" ) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() # comma = Literal( "," ).suppress() comma = Literal( "," ) addop = plus | minus multop = mult | div expop = Literal( "^" ) pi = CaselessLiteral( "PI" ) var_list = [Literal(i) for i in self.var_names] expr = Forward() arg_func = Forward() or_vars = MatchFirst(var_list) # atom = (Optional("-") + ( pi | e | fnumber | ident + lpar + delimitedList(Group(expr)) + rpar | or_vars ).setParseAction( self._pushFirst ) | ( lpar + delimitedList(Group(expr)).suppress() + rpar ) ).setParseAction(self._pushUMinus) atom = ((Optional("-") + ( pi | e | fnumber | ident + lpar + arg_func + rpar | or_vars ).setParseAction( self._pushFirst )) | \ (Optional("-") + ( lpar + arg_func.suppress() + rpar )) ).setParseAction(self._pushUMinus) # expr + ZeroOrMore( "," + expr ) # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( self._pushFirst ) ) term = factor + ZeroOrMore( ( multop + factor ).setParseAction( self._pushFirst ) ) expr << term + ZeroOrMore( ( addop + term ).setParseAction( self._pushFirst ) ) arg_func << expr + ZeroOrMore( (comma + expr).setParseAction( self._pushFirst)) self.bnf = expr return self.bnf
def parse (input): # parse a string into an element of the abstract representation # Grammar: # # <expr> ::= <integer> # true # false # <identifier> # ( if <expr> <expr> <expr> ) # ( let ( ( <name> <expr> ) ) <expr ) # ( + <expr> <expr> ) # ( * <expr> <expr> ) # idChars = alphas+"_+*-?!=<>" pIDENTIFIER = Word(idChars, idChars+"0123456789") pIDENTIFIER.setParseAction(lambda result: EId(result[0])) # A name is like an identifier but it does not return an EId... pNAME = Word(idChars,idChars+"0123456789") pINTEGER = Word("-0123456789","0123456789") pINTEGER.setParseAction(lambda result: EInteger(int(result[0]))) pBOOLEAN = Keyword("true") | Keyword("false") pBOOLEAN.setParseAction(lambda result: EBoolean(result[0]=="true")) pEXPR = Forward() pIF = "(" + Keyword("if") + pEXPR + pEXPR + pEXPR + ")" pIF.setParseAction(lambda result: EIf(result[2],result[3],result[4])) pBINDING = "(" + pNAME + pEXPR + ")" pBINDING.setParseAction(lambda result: (result[1],result[2])) pLET = "(" + Keyword("let") + "(" + pBINDING + ")" + pEXPR + ")" pLET.setParseAction(lambda result: ELet([result[3]],result[5])) pPLUS = "(" + Keyword("+") + pEXPR + pEXPR + ")" pPLUS.setParseAction(lambda result: ECall("+",[result[2],result[3]])) pTIMES = "(" + Keyword("*") + pEXPR + pEXPR + ")" pTIMES.setParseAction(lambda result: ECall("*",[result[2],result[3]])) pEXPR << (pINTEGER | pBOOLEAN | pIDENTIFIER | pIF | pLET | pPLUS | pTIMES) result = pEXPR.parseString(input)[0] return result # the first element of the result is the expression
def parser(text): """ str := \w+ str := '\w+' exp := Var=str exp := exp & exp exp := exp ^ exp """ # grammar #g_string = "'"+Word(alphas)+"'" | Word(alphas) g_quote = Literal("'").suppress() g_text = Regex("[\w\s\:\#\.]+").setResultsName("text") g_string = Optional(g_quote) + g_text + Optional(g_quote) g_equ = Literal("!=").setResultsName("connector") | Literal("=").setResultsName("connector") g_amp = Literal("&").setResultsName("connector") g_hat = Literal("^").setResultsName("connector") g_or = Literal("|").suppress() g_seq = Literal("->").setResultsName("connector") g_hash = Literal("#").setResultsName("hash") g_left_brack = Literal("[").suppress() g_right_brack = Literal("]").suppress() g_vals = Forward() g_vals << g_string + ZeroOrMore(Group(g_or + g_vals).setResultsName("or_group")) # working """ exp_basic = Group(Optional(g_hash) + g_string).setResultsName("left") + g_equ + Group(g_vals).setResultsName("right") exp = Group(exp_basic) exp = exp.setResultsName("left") + g_amp + exp.setResultsName("right") | \ g_left_brack + exp.setResultsName("left") + g_hat + exp.setResultsName("right") + g_right_brack | \ g_left_brack + exp.setResultsName("left") + g_seq + exp.setResultsName("right") + g_right_brack | \ exp_basic """ # recursion simpleq = Forward() complexq = Forward() exp = (simpleq | complexq).setResultsName("exp") exp_basic = Group(Group(Optional(g_hash) + g_string).setResultsName("left") + g_equ + Group(g_vals).setResultsName("right")) simpleq << (Group(exp_basic.setResultsName("left") + g_amp + simpleq.setResultsName("right")) | exp_basic) complexq << ( Group(g_left_brack + exp.setResultsName("left") + g_hat + exp.setResultsName("right") + g_right_brack) | \ Group(g_left_brack + exp.setResultsName("left") + g_seq + exp.setResultsName("right") + g_right_brack) ) return exp.parseString(text)
def _dice_grammar(exprStack, varStack): def pushFirst(str, loc, toks): exprStack.append(toks[0]) def assignVar(str, loc, toks): varStack.append(toks[0]) point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') singledie = Literal('d') number = Word(nums) integer = Combine(Optional(plusorminus) + number) singleroll = Combine(singledie + number) floatnumber = Combine( integer + Optional(point + Optional(number)) + Optional(e + integer)) ident = Word(alphas, alphanums + '_') plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") dieop = Literal("d") assign = Literal("=") expr = Forward() atom = ( (e | floatnumber | integer | ident | singleroll).setParseAction( pushFirst) | (lpar + expr.suppress() + rpar)) roll = Forward() roll << atom + ZeroOrMore((dieop + roll).setParseAction(pushFirst)) factor = Forward() factor << roll + ZeroOrMore((expop + factor).setParseAction(pushFirst)) term = factor + ZeroOrMore((multop + factor).setParseAction(pushFirst)) expr << term + ZeroOrMore((addop + term).setParseAction(pushFirst)) bnf = Optional((ident + assign).setParseAction(assignVar)) + expr return bnf + StringEnd()
def sandbox(): """Based on http://stackoverflow.com/a/4802004/623735""" loose_grammar = Forward() nestedParens = nestedExpr('(', ')', content=loose_grammar) loose_grammar << ( OneOrMore(Optional(':').suppress() + Word(alphanums + '-_')) | OneOrMore(Optional('?').suppress() + Word(alphanums + '-_')) | init | goal | ',' | nestedParens) examples = [ # definitely not PDDL-compliant, but parser does OK anyway (not strict) '(some global things (:a (nested list of three varibles (?list0 ?list1 ?list2))))', # this is a valid line of STRIPS (subset of PDDL grammar?) '(:requirements :strips)', # another valid line of STRIPS (subset of PDDL grammar?) '(define (domain random-domain))', # a complete (if simple) STRIPS problem definition from coursera AI Planning class, HW wk2 r''' (define (problem random-pbl1) (:domain random-domain) (:init (S B B) (S C B) (S A C) (R B B) (R C B)) (:goal (and (S A A)))) ''', # a complete STRIPS domain definition from coursera AI Planning class, HW wk2 r''' (define (domain random-domain) (:requirements :strips) (:action op1 :parameters (?x1 ?x2 ?x3) :precondition (and (S ?x1 ?x2) (R ?x3 ?x1)) :effect (and (S ?x2 ?x1) (S ?x1 ?x3) (not (R ?x3 ?x1)))) (:action op2 :parameters (?x1 ?x2 ?x3) :precondition (and (S ?x3 ?x1) (R ?x2 ?x2)) :effect (and (S ?x1 ?x3) (not (S ?x3 ?x1))))) ''', ] ans = [] for ex in examples: try: ans += [loose_grammar.parseString(ex).asList()] print(ans[-1]) except: print_exc() return ans
def __init__(self, query): self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ( (operator_word + operator_quotes_content) | operator_word ) operator_quotes = Group( Suppress('"') + operator_quotes_content + Suppress('"') ).setResultsName("quotes") | operator_word operator_parenthesis = Group( (Suppress("(") + operator_or + Suppress(")")) ).setResultsName("parenthesis") | operator_quotes operator_not = Forward() operator_not << (Group( Suppress(Keyword("not", caseless=True)) + operator_not ).setResultsName("not") | operator_parenthesis) operator_and = Forward() operator_and << (Group( operator_not + Suppress(Keyword("and", caseless=True)) + operator_and ).setResultsName("and") | Group( operator_not + OneOrMore(~oneOf("and or") + operator_and) ).setResultsName("and") | operator_not) operator_or << (Group( operator_and + Suppress(Keyword("or", caseless=True)) + operator_or ).setResultsName("or") | operator_and) self._parser = operator_or.parseString(self.query)[0] else: self._parser = False
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack,ignore=escapedChar) + rbrack.suppress()) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reDot = Literal(".") repetition = ( ( lbrace + Word(nums).setResultsName("count") + rbrace ) | ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) | oneOf(list("*+?")) ) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = ( reLiteral | reRange | reMacro | reDot | reGroup ) reExpr << operatorPrecedence( reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ] ) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def BNF(): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ global bnf if not bnf: point = Literal(".") e = CaselessLiteral("E") fnumber = Combine(Word("+-"+nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-"+nums, nums))) ident = Word(alphas, alphas+nums+"_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional("-") + (pi | e | fnumber | ident + lpar + expr + rpar).setParseAction(pushFirst) | (lpar + expr.suppress() + rpar)).setParseAction(pushUMinus)) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of # left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore((expop + factor).setParseAction(pushFirst)) term = factor + ZeroOrMore((multop + factor).setParseAction(pushFirst)) expr << term + ZeroOrMore((addop + term).setParseAction(pushFirst)) bnf = expr return bnf
def __init__(self, db): """Initialize a new RDF parser with pre-defined grammar. Takes database db as an argument.""" self.__db = db # grammar definition # literals self.__word = self.__prefix = self.__suffix = Word(alphanums) self.__colon = Literal(':') self.__a = Literal('a') self.__quoted_string = dblQuotedString.setParseAction(removeQuotes) self.__l_paren = Suppress('(') self.__r_paren = Suppress(')') self.__dot = Suppress('.') self.__comma = Suppress(',') self.__semicolon = Suppress(';') # composites self.__get_suffix = Suppress(self.__prefix + self.__colon) + self.__suffix self.__get_object = Optional(self.__l_paren) + OneOrMore((self.__get_suffix | self.__quoted_string) + Optional(self.__comma)) + Optional(self.__r_paren) self.__is_a = (self.__get_suffix('subject') | self.__word) + self.__a('relation') + \ self.__get_suffix('object') + self.__dot self.__has_x = self.__get_suffix('subject') + self.__get_suffix('relation') + \ Group(self.__get_object)('object') + self.__dot # search term self.__search = Forward() self.__search << (self.__is_a | self.__has_x)
def grammar(self): if not self.bnf: point = Literal( "." ) fnumber = Combine( Word( nums ) + Optional( point + Optional( Word( nums ) ) ) ) ident = Word(alphas.lower()+"_", alphanums+"_") plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "*" ) # passiverate = Word('infty') | Word('T') div = Literal( "/" ) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() addop = plus | minus multop = mult | div assign = Literal('=') expop = Literal( "^" ) expr = Forward() atom = Optional("-") + ( fnumber | ident + lpar + expr + rpar | ident).setParseAction(self._pushFirst ) | lpar + expr.suppress() + rpar factor = Forward() factor << atom + ZeroOrMore( ( expop + factor ) .setParseAction(self._pushFirst) ) term = factor + ZeroOrMore( ( multop + factor ) .setParseAction(self._pushFirst) ) expr << term + ZeroOrMore( ( addop + term ) .setParseAction(self._pushFirst ) ) bnf = (ident + assign).setParseAction(self._assignVar) + expr self.bnf = bnf return self.bnf
class RigorousClassicalPropositionalLogicParser: def __init__(self): self.left_parenthesis = Suppress("(") self.right_parenthesis = Suppress(")") self.implies = Literal("->") self.or_ = Literal("|") self.and_ = Literal("&") self.not_ = Literal("!") | Literal ("~") self.boolean = Keyword("false") | Keyword("true") self.symbol = Word(alphas, alphanums) self.formula = Forward() self.operand = self.boolean | self.symbol self.binaryConnective = self.or_ | self.and_ | self.implies self.unaryFormula = Group(self.not_ + self.formula) self.binaryFormula = Group(self.left_parenthesis + self.formula + self.binaryConnective + self.formula + self.right_parenthesis) self.formula << (self.unaryFormula | self.binaryFormula | self.operand) ## Should return a ParserResult object def parse(self,text): try: result = self.formula.parseString(text, parseAll=True) assert len(result) == 1 return result except (ParseException, ParseSyntaxException) as err: # print("Syntax error:\n{0.line}\n{1}^".format(err, " " * (err.column - 1))) return ""
def parse_block(self, block_text): """Parses sql block into tokens """ # Valid grammar looks like this: # {sqlbarchart: title='Some string' | other params as yet unknown...} # make a grammar block_start = Literal("{") sql_start = Keyword(self.TAGNAME, caseless=True) colon = Literal(":") sql_end = Literal("}") separator = Literal("|") block_end = Keyword("{" + self.TAGNAME + "}", caseless=True) # params field_name = Word(alphanums) equal_sign = Suppress(Literal("=")) # whatever value field_value = (CharsNotIn("|}")) # param name and value param_group = Group(field_name + equal_sign + field_value) # list of all params param_list = delimitedList(param_group, '|') # helper param_dict = Dict(param_list) # sql text sql_text = SkipTo(block_end) sqldecl = Forward() sqldecl << (block_start + sql_start + Optional(colon) + Optional(param_dict) + sql_end + sql_text.setResultsName('sqltext') + block_end) block_str = "".join(block_text) tokens = sqldecl.parseString( block_str ) return tokens
def _string_to_ast(self, input_string): """ Parse a smart search string and return it in an AST like form """ # simple words comp_word = Word(alphanums + "-./_") word = Word(alphanums + "-./_").setResultsName('word') # numbers comp_number = Word(nums) number = Word(nums).setResultsName('number') # IPv4 address ipv4_oct = Regex("((2(5[0-5]|[0-4][0-9])|[01]?[0-9][0-9]?))") comp_ipv4_address = Combine(ipv4_oct + ('.' + ipv4_oct*3)) ipv4_address = Combine(ipv4_oct + ('.' + ipv4_oct*3)).setResultsName('ipv4_address') # VRF RTs of the form number:number vrf_rt = Combine((comp_ipv4_address | comp_number) + Literal(':') + comp_number).setResultsName('vrf_rt') # tags tags = Combine( Literal('#') + comp_word).setResultsName('tag') # operators for matching match_op = oneOf(' '.join(self.match_operators)).setResultsName('operator') boolean_op = oneOf(' '.join(self.boolean_operators)).setResultsName('boolean') # quoted string quoted_string = QuotedString('"', unquoteResults=True, escChar='\\').setResultsName('quoted_string') # expression to match a certain value for an attribute expression = Group(word + match_op + (quoted_string | vrf_rt | word | number)).setResultsName('expression') # we work on atoms, which are single quoted strings, match expressions, # tags, VRF RT or simple words. # NOTE: Place them in order of most exact match first! atom = Group(quoted_string | expression | tags | vrf_rt | boolean_op | word) enclosed = Forward() parens = nestedExpr('(', ')', content=enclosed) enclosed << ( parens | atom ).setResultsName('nested') content = Forward() content << ( ZeroOrMore(enclosed) ) res = content.parseString(input_string) return res
def __init__(self): """ Setup the Backus Normal Form (BNF) parser logic. """ # Set an empty formula attribute self.formula = None # Instantiate blank parser for BNF construction self.bnf = Forward() # Expression for parenthesis, which are suppressed in the atoms # after matching. lpar = Literal(const.LPAR).suppress() rpar = Literal(const.RPAR).suppress() # Expression for mathematical constants: Euler number and Pi e = Keyword(const.EULER) pi = Keyword(const.PI) null = Keyword(const.NULL) _true = Keyword(const.TRUE) _false = Keyword(const.FALSE) # Prepare operator expressions addop = oneOf(const.ADDOP) multop = oneOf(const.MULTOP) powop = oneOf(const.POWOP) unary = reduce(operator.add, (Optional(x) for x in const.UNOP)) # Expression for floating point numbers, allowing for scientific notation. number = Regex(const.NUMBER) # Variables are alphanumeric strings that represent keys in the input # data dictionary. variable = delimitedList(Word(alphanums), delim=const.VARIABLE_NAME_SEPARATOR, combine=True) # Functional calls function = Word(alphanums) + lpar + self.bnf + rpar # Atom core - a single element is either a math constant, # a function or a variable. atom_core = function | pi | e | null | _true | _false | number | variable # Atom subelement between parenthesis atom_subelement = lpar + self.bnf.suppress() + rpar # In atoms, pi and e need to be before the letters for it to be found atom = ( unary + atom_core.setParseAction(self.push_first) | atom_subelement ).setParseAction(self.push_unary_operator) # By defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of # left-to-right that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore((powop + factor).setParseAction(self.push_first)) term = factor + ZeroOrMore((multop + factor).setParseAction(self.push_first)) self.bnf << term + ZeroOrMore((addop + term).setParseAction(self.push_first))
def __setup_parser(): # reserved single-character tokens LSQUARE,RSQUARE,LCURLY,RCURLY,EQ,PIPE,SEMI = map(Suppress,'[]{}=|;') # non-iterable literals integer = simple(Word('-'+nums,nums), 'int', int) string = simple(QuotedString("'") | QuotedString('"'), 'str', str) regex = simple(QuotedString('/'), 'rgx', re.compile) # list/range literals emptylist = named(LSQUARE + RSQUARE, 'emptylist') rstart = LSQUARE + integer + Optional(Suppress(',') + integer) irange = named(rstart + Suppress('..]'), 'irange') brange = named(rstart + Suppress('..') + integer + RSQUARE, 'brange') intlist = named(LSQUARE + delimitedList(integer) + RSQUARE, 'intlist') strlist = named(LSQUARE + delimitedList(string) + RSQUARE, 'strlist') rgxlist = named(LSQUARE + delimitedList(regex) + RSQUARE, 'rgxlist') list_lit = Forward() lstlist = named(LSQUARE + delimitedList(list_lit) + RSQUARE, 'lstlist') list_lit << (emptylist | irange | brange | intlist | strlist | rgxlist | lstlist) # special-syntax functions slurp = special(QuotedString('<',endQuoteChar='>'), 'slurp') shell = special(QuotedString('`'), 'shell') # functions and arguments name = simple(Word(alphas, alphanums+'_'), 'name', str) subpipe = Forward() function = Forward() argument = string | list_lit | regex | integer | subpipe | slurp | shell | function function << name + named(ZeroOrMore(argument), 'arguments') function.setParseAction(lambda parse: ('function', dict(parse.asList()))) # an atom is anything that can fit between pipes on its own atom = (function | slurp | shell | list_lit) # an expression/subpipe is multiple atoms piped together expression = named(atom + ZeroOrMore(PIPE + atom), 'pipe') subpipe << LCURLY + expression + RCURLY # statements and lines are pretty standard statement = Optional(name + EQ, default=('name','')) + expression statement.setParseAction(lambda parse: dict(parse.asList())) line = (statement | empty).ignore(pythonStyleComment) return line.parseString
LCURLY, RCURLY, LPAREN, RPAREN, QUOTE, COMMA, AT, EQUALS, HASH = map( Suppress, '{}()",@=#') def bracketed(expr): """ Return matcher for `expr` between curly brackets or parentheses """ return (LPAREN + expr + RPAREN) | (LCURLY + expr + RCURLY) # Define parser components for strings (the hard bit) chars_no_curly = Regex(r"[^{}]+") chars_no_curly.leaveWhitespace() chars_no_quotecurly = Regex(r'[^"{}]+') chars_no_quotecurly.leaveWhitespace() # Curly string is some stuff without curlies, or nested curly sequences curly_string = Forward() curly_item = Group(curly_string) | chars_no_curly curly_string << LCURLY + ZeroOrMore(curly_item) + RCURLY # quoted string is either just stuff within quotes, or stuff within quotes, within # which there is nested curliness quoted_item = Group(curly_string) | chars_no_quotecurly quoted_string = QUOTE + ZeroOrMore(quoted_item) + QUOTE # Numbers can just be numbers. Only integers though. number = Regex('[0-9]+') # Basis characters (by exclusion) for variable / field names. The following # list of characters is from the btparse documentation any_name = Regex('[^\\s"#%\'(),={}]+') # btparse says, and the test bibs show by experiment, that macro and field names
import pyparsing from pyparsing import Optional, Word, Literal, Forward, alphas, nums, \ Group, OneOrMore, ZeroOrMore, oneOf, delimitedList, restOfLine, \ QuotedString, Regex from pprint import pprint L = Literal Ls = lambda expr: Literal(expr).suppress() table = Forward() array = Forward() identifier = Word(alphas, alphas + nums + '_') boolean = oneOf("true false") integer = Word(nums) double = Regex(r'[+-]?\d+\.\d*([eE][+-]?\d+)?').setParseAction( lambda t: float(t[0])) # TODO number = integer | double string = QuotedString(quoteChar='"', escChar='\\') value = number | boolean | string | table | array key = integer | identifier pair = Group(key + Ls(':') + value) table << Group( Ls('{') + Optional(pair + ZeroOrMore(Ls(',') + pair) + Optional(Ls(','))) + Ls('}')) array << Group( Ls('[') + Optional(value + ZeroOrMore(Ls(',') + value) + Optional(Ls(','))) + Ls(']')) # statement = oneOf("return print read break continue")
def _create_config_parser(): """ Creates a parser using pyparsing that works with bibfield rule definitions BNF like grammar: rule ::= ([persitent_identifier] json_id ["[0]" | "[n]"] "," aliases":" INDENT body UNDENT) | include | python_comment include ::= "include(" PATH ")" body ::= [inherit_from] (creator | derived | calculated) [checker] [documentation] [producer] aliases ::= json_id ["[0]" | "[n]"] ["," aliases] creator ::= "creator:" INDENT creator_body+ UNDENT creator_body ::= [decorators] source_format "," source_tag "," python_allowed_expr source_format ::= MASTER_FORMATS source_tag ::= QUOTED_STRING derived ::= "derived" INDENT derived_calculated_body UNDENT calculated ::= "calculated:" INDENT derived_calculated_body UNDENT derived_calculated_body ::= [decorators] "," python_allowed_exp decorators ::= (peristent_identfier | legacy | do_not_cache | parse_first | depends_on | only_if | only_if_master_value)* peristent_identfier ::= @persitent_identifier( level ) legacy ::= "@legacy(" correspondences+ ")" correspondences ::= "(" source_tag [ "," tag_name ] "," json_id ")" parse_first ::= "@parse_first(" jsonid+ ")" depends_on ::= "@depends_on(" json_id+ ")" only_if ::= "@only_if(" python_condition+ ")" only_if_master_value ::= "@only_if_master_value(" python_condition+ ")" inherit_from ::= "@inherit_from()" do_not_cache ::= "@do_not_cache" python_allowed_exp ::= ident | list_def | dict_def | list_access | dict_access | function_call checker ::= "checker:" INDENT checker_function+ UNDENT documentation ::= INDENT doc_string subfield* UNDENT doc_string ::= QUOTED_STRING subfield ::= "@subfield" json_id["."json_id*] ":" docstring producer ::= "producer:" INDENT producer_body UNDENT producer_body ::= producer_code "," python_dictionary producer_code ::= ident """ indent_stack = [1] def check_sub_indent(str, location, tokens): cur_col = col(location, str) if cur_col > indent_stack[-1]: indent_stack.append(cur_col) else: raise ParseException(str, location, "not a subentry") def check_unindent(str, location, tokens): if location >= len(str): return cur_col = col(location, str) if not(cur_col < indent_stack[-1] and cur_col <= indent_stack[-2]): raise ParseException(str, location, "not an unindent") def do_unindent(): indent_stack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(check_unindent) UNDENT.setParseAction(do_unindent) json_id = (Word(alphas + "_", alphanums + "_") + Optional(oneOf("[0] [n]")))\ .setResultsName("json_id", listAllMatches=True)\ .setParseAction(lambda tokens: "".join(tokens)) aliases = delimitedList((Word(alphanums + "_") + Optional(oneOf("[0] [n]"))) .setParseAction(lambda tokens: "".join(tokens)))\ .setResultsName("aliases") python_allowed_expr = Forward() ident = Word(alphas + "_", alphanums + "_") dict_def = originalTextFor(nestedExpr('{', '}')) list_def = originalTextFor(nestedExpr('[', ']')) dict_access = list_access = originalTextFor(ident + nestedExpr('[', ']')) function_call = originalTextFor(ZeroOrMore(ident + ".") + ident + nestedExpr('(', ')')) python_allowed_expr << (ident ^ dict_def ^ list_def ^ dict_access ^ list_access ^ function_call ^ restOfLine)\ .setResultsName("value", listAllMatches=True) persistent_identifier = (Suppress("@persistent_identifier") + nestedExpr("(", ")"))\ .setResultsName("persistent_identifier") legacy = (Suppress("@legacy") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("legacy", listAllMatches=True) only_if = (Suppress("@only_if") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if") only_if_master_value = (Suppress("@only_if_value") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("only_if_master_value") depends_on = (Suppress("@depends_on") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("depends_on") parse_first = (Suppress("@parse_first") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("parse_first") do_not_cache = (Suppress("@") + "do_not_cache")\ .setResultsName("do_not_cache") field_decorator = parse_first ^ depends_on ^ only_if ^ only_if_master_value ^ do_not_cache ^ legacy #Independent decorators inherit_from = (Suppress("@inherit_from") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("inherit_from") master_format = (Suppress("@master_format") + originalTextFor(nestedExpr("(", ")")))\ .setResultsName("master_format") derived_calculated_body = ZeroOrMore(field_decorator) + python_allowed_expr derived = "derived" + Suppress(":") + INDENT + derived_calculated_body + UNDENT calculated = "calculated" + Suppress(":") + INDENT + derived_calculated_body + UNDENT source_tag = quotedString\ .setParseAction(removeQuotes)\ .setResultsName("source_tag", listAllMatches=True) source_format = oneOf(CFG_BIBFIELD_MASTER_FORMATS)\ .setResultsName("source_format", listAllMatches=True) creator_body = (ZeroOrMore(field_decorator) + source_format + Suppress(",") + source_tag + Suppress(",") + python_allowed_expr)\ .setResultsName("creator_def", listAllMatches=True) creator = "creator" + Suppress(":") + INDENT + OneOrMore(creator_body) + UNDENT checker_function = (Optional(master_format) + ZeroOrMore(ident + ".") + ident + originalTextFor(nestedExpr('(', ')')))\ .setResultsName("checker_function", listAllMatches=True) checker = ("checker" + Suppress(":") + INDENT + OneOrMore(checker_function) + UNDENT) doc_string = QuotedString(quoteChar='"""', multiline=True) | quotedString.setParseAction(removeQuotes) subfield = (Suppress("@subfield") + Word(alphanums + "_" + '.') + Suppress(":") + Optional(doc_string))\ .setResultsName("subfields", listAllMatches=True) documentation = ("documentation" + Suppress(":") + INDENT + Optional(doc_string).setResultsName("main_doc") + ZeroOrMore(subfield) + UNDENT)\ .setResultsName("documentation") producer_code = Word(alphas + "_", alphanums + "_")\ .setResultsName("producer_code", listAllMatches=True) producer_body = (producer_code + Suppress(",") + python_allowed_expr)\ .setResultsName("producer_def", listAllMatches=True) producer = "producer" + Suppress(":") + INDENT + OneOrMore(producer_body) + UNDENT field_def = (creator | derived | calculated)\ .setResultsName("type_field", listAllMatches=True) body = Optional(inherit_from) + Optional(field_def) + Optional(checker) + Optional(documentation) + Optional(producer) comment = Literal("#") + restOfLine + LineEnd() include = (Suppress("include") + quotedString)\ .setResultsName("includes", listAllMatches=True) rule = (Optional(persistent_identifier) + json_id + Optional(Suppress(",") + aliases) + Suppress(":") + INDENT + body + UNDENT)\ .setResultsName("rules", listAllMatches=True) return OneOrMore(rule | include | comment.suppress())
from pyparsing import (Word, alphas, alphanums, Regex, Suppress, Forward, Group, oneOf, ZeroOrMore, Optional, delimitedList, Keyword, restOfLine, quotedString, Dict) ident = Word(alphas + "_", alphanums + "_").setName("identifier") integer = Regex(r"[+-]?\d+") LBRACE, RBRACE, LBRACK, RBRACK, LPAR, RPAR, EQ, SEMI = map( Suppress, "{}[]()=;") kwds = """message required optional repeated enum extensions extends extend to package service rpc returns true false option import""" for kw in kwds.split(): exec("%s_ = Keyword('%s')" % (kw.upper(), kw)) messageBody = Forward() messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody( "body") + RBRACE typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 bool string bytes""" ) | ident rvalue = integer | TRUE_ | FALSE_ | ident fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK fieldDefn = ((REQUIRED_ | OPTIONAL_ | REPEATED_)("fieldQualifier") - typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI) # enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}' enumDefn = ENUM_("typespec") - ident('name') + LBRACE + Dict(
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: if true, resolve substitutions :type resolve: boolean :param unresolved_value: assigned value to unresolved substitution. If overriden with a default value, it will replace all unresolved values by the default value. If it is set to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) :type unresolved_value: boolean :return: a ConfigTree or a list """ unescape_pattern = re.compile(r'\\.') def replace_escape_sequence(match): value = match.group(0) return cls.REPLACEMENTS.get(value, value) def norm_string(value): return unescape_pattern.sub(replace_escape_sequence, value) def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3:-3] def convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: return float(n) def convert_period(tokens): period_value = int(tokens.value) period_identifier = tokens.unit period_unit = next((single_unit for single_unit, values in cls.get_supported_period_type_map().items() if period_identifier in values)) return period(period_value, period_unit) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance( final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith( "https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance( final_tokens[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value elif final_tokens[0] == 'package': file = cls.resolve_package_path(value) else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) def _make_prefix(path): return ('<root>' if path is None else '[%s]' % path).ljust(55).replace('\\', '/') _prefix = _make_prefix(path) def _load(path): _prefix = _make_prefix(path) logger.debug('%s Loading config from file %r', _prefix, path) obj = ConfigFactory.parse_file( path, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION) logger.debug('%s Result: %s', _prefix, obj) return obj if '*' in path or '?' in path: paths = glob(path, recursive=True) obj = None def _merge(a, b): if a is None or b is None: return a or b elif isinstance(a, ConfigTree) and isinstance( b, ConfigTree): return ConfigTree.merge_configs(a, b) elif isinstance(a, list) and isinstance(b, list): return a + b else: raise ConfigException( 'Unable to make such include (merging unexpected types: {a} and {b}', a=type(a), b=type(b)) logger.debug('%s Loading following configs: %s', _prefix, paths) for p in paths: obj = _merge(obj, _load(p)) logger.debug('%s Result: %s', _prefix, obj) else: logger.debug('%s Loading single config: %s', _prefix, path) obj = _load(path) else: raise ConfigException( 'No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) @contextlib.contextmanager def set_default_white_spaces(): default = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(' \t') yield ParserElement.setDefaultWhitespaceChars(default) with set_default_white_spaces(): assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction( replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction( replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction( replaceWith(NoneValue())) key = QuotedString( '"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex( r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # Must be sorted from longest to shortest otherwise 'weeks' will match 'w' and 'eeks' # will be parsed as a general string. period_types = sorted(itertools.chain.from_iterable( cls.get_supported_period_type_map().values()), key=lambda x: len(x), reverse=True) period_expr = Regex( r'(?P<value>\d+)\s*(?P<unit>' + '|'.join(period_types) + ')$', flags=re.MULTILINE, ).setParseAction(convert_period) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex( '""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex( r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex( r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*' ).setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = period_expr | number_expr | true_expr | false_expr | null_expr | string_expr include_content = (quoted_string | ( (Keyword('url') | Keyword('file') | Keyword('package')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) include_expr = (Keyword("include", caseless=True).suppress() + (include_content | (Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress())) ).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal('\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress( '}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore( eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) - ( dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr))) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + ( list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION has_unresolved = cls.resolve_substitutions( config, allow_unresolved) if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: raise ConfigSubstitutionException( 'resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION' ) if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: cls.unresolve_substitutions_to_value(config, unresolved_value) return config
) atom = expr + (Literal("<=") | Literal("<") | Literal("=")) + expr atom.setParseAction(mk_atom) formula = infixNotation( atom, [ ("not", 1, opAssoc.RIGHT, mk_not), ("and", 2, opAssoc.LEFT, mk_and), ("or", 2, opAssoc.LEFT, mk_or), ], ) block = Forward() assign_stmt = varname + ":=" + expr if_stmt = Keyword("if") + formula + block + Keyword("else").suppress() + block while_stmt = Keyword("while") + formula + block panic_stmt = Literal("panic") + Literal('(').suppress() + Literal(')').suppress() print_stmt = ( Literal("print") ) + ( Literal('(').suppress() ) + ( expr ) + ( Literal(')').suppress() ) stmt = if_stmt ^ while_stmt ^ print_stmt ^ assign_stmt ^ panic_stmt block << (
LANGLE = Literal("<").suppress() LBRACE = Literal("[").suppress() LPAREN = Literal("(").suppress() PERIOD = Literal(".").suppress() RANGLE = Literal(">").suppress() RBRACE = Literal("]").suppress() RPAREN = Literal(")").suppress() CATEGORIES = CaselessLiteral("categories").suppress() END = CaselessLiteral("end").suppress() FONT = CaselessLiteral("font").suppress() HINT = CaselessLiteral("hint").suppress() ITEM = CaselessLiteral("item").suppress() OBJECT = CaselessLiteral("object").suppress() attribute_value_pair = Forward() # this is recursed in item_list_entry simple_identifier = Word(alphas, alphanums + "_") identifier = Combine(simple_identifier + ZeroOrMore(Literal(".") + simple_identifier)) object_name = identifier object_type = identifier # Integer and floating point values are converted to Python longs and floats, respectively. int_value = Combine(Optional("-") + Word(nums)).setParseAction(lambda s, l, t: [int(t[0])]) float_value = Combine(Optional("-") + Optional(Word(nums)) + "." + Word(nums)).setParseAction(lambda s, l, t: [float(t[0])]) number_value = float_value | int_value # Base16 constants are left in string form, including the surrounding braces.
def sql2table_list(tables, show_columns=True): def field_act(s, loc, tok): return " ".join(tok).replace('\n', '\\n') def field_list_act(s, loc, tok): return tok def create_table_act(s, loc, tok): table = Table(tok["tableName"], None, {}, {}) for t in tok["fields"]: if str(t).startswith("FK:"): l = t[3:].split(":") if len(l) > 2: table.fkeys[l[0]] = {"ftable": l[1], "fcoloumn": l[2]} else: table.fkeys[l[0]] = {"ftable": l[1]} elif str(t).startswith("PK:"): table.pk = t[3:] elif str(t).startswith("KEY:"): pass else: l = t.split(" ") table.columns[l[0]] = " ".join(l[1:]) tables.append(table) def add_fkey_act(s, loc, tok): return '{tableName}:{keyName}:{fkTable}:{fkCol}'.format(**tok) def fkey_act(s, loc, tok): return 'FK:{keyName}:{fkTable}:{fkCol}'.format(**tok) def fkey_nocols_act(s, loc, tok): return 'FK:{keyName}:{fkTable}'.format(**tok) # def fkey_list_act(s, loc, tok): # return "\n ".join(tok) def other_statement_act(s, loc, tok): pass def join_string_act(s, loc, tok): return "".join(tok).replace('\n', '\\n') def quoted_default_value_act(s, loc, tok): return tok[0] + " " + "".join(tok[1::]) def pk_act(s, loc, tok): return 'PK:{primary_key}'.format(**tok) def k_act(s, loc, tok): pass def no_act(s, loc, tok): pass string = Regex('[a-zA-Z0-9=_]+') ws = OneOrMore(White()).suppress() lp = Regex('[(]').suppress() rp = Regex('[)]').suppress() c = Regex('[,]').suppress() q = Regex("[`]").suppress() parenthesis = Forward() parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")" parenthesis.setParseAction(join_string_act) quoted_string = "'" + ZeroOrMore(CharsNotIn("'")) + "'" quoted_string.setParseAction(join_string_act) quoted_default_value = "DEFAULT" + quoted_string + OneOrMore( CharsNotIn(", \n\t")) quoted_default_value.setParseAction(quoted_default_value_act) column_comment = CaselessKeyword("COMMENT") + quoted_string primary_key = CaselessKeyword('PRIMARY').suppress() + CaselessKeyword( "KEY").suppress() + lp + string.setResultsName('primary_key') + rp primary_key.ignore("`") primary_key.setParseAction(pk_act) key_def = Optional(CaselessKeyword('UNIQUE').suppress()) + CaselessKeyword( 'KEY').suppress() + Word(alphanums + "_") + lp + delimitedList( string.setResultsName('key'), delim=",") + rp key_def.ignore("`") key_def.setParseAction(k_act) fkey_def = CaselessKeyword("CONSTRAINT") + Word( alphanums + "_" ) + CaselessKeyword("FOREIGN") + CaselessKeyword("KEY") + lp + Word( alphanums + "_" ).setResultsName("keyName") + rp + CaselessKeyword("REFERENCES") + Word( alphanums + "._").setResultsName("fkTable") + lp + Word( alphanums + "_").setResultsName("fkCol") + rp + Optional( CaselessKeyword("DEFERRABLE") ) + Optional( CaselessKeyword("ON") + (CaselessKeyword("DELETE") | CaselessKeyword("UPDATE")) + (CaselessKeyword("CASCADE") | CaselessKeyword("RESTRICT") | CaselessKeyword("NO ACTION") | CaselessKeyword("SET NULL")) ) + Optional( CaselessKeyword("ON") + (CaselessKeyword("DELETE") | CaselessKeyword("UPDATE")) + (CaselessKeyword("CASCADE") | CaselessKeyword("RESTRICT") | CaselessKeyword("NO ACTION") | CaselessKeyword("SET NULL"))) fkey_def.ignore("`") if show_columns: fkey_def.setParseAction(fkey_act) else: fkey_def.setParseAction(fkey_nocols_act) #fkey_list_def = ZeroOrMore(Suppress(",") + fkey_def) #fkey_list_def.setParseAction(fkey_list_act) field_def = Word(alphanums + "_\"':-/[].") + Word( alphanums + "_\"':-/[].") + Optional( CaselessKeyword("NOT NULL") | CaselessKeyword("DEFAULT") + Word(alphanums + "_\"':-/[].")) + Optional( OneOrMore(quoted_default_value | column_comment | Word(alphanums + "_\"'`:-/[].") | parenthesis)) field_def.ignore("`") # if columns: field_def.setParseAction(field_act) # else: # field_def.setParseAction(no_act) field_list_def = delimitedList(\ (primary_key.suppress() | \ key_def.suppress() | \ fkey_def | \ field_def \ ), delim=","\ ) #if columns else field_def.suppress() field_list_def.setParseAction(field_list_act) tablename_def = (Word(alphanums + "_.") | QuotedString("\"")) tablename_def.ignore("`") create_table_def = CaselessKeyword("CREATE").suppress() + CaselessKeyword( "TABLE").suppress() + tablename_def.setResultsName( "tableName") + lp + field_list_def.setResultsName( "fields") + rp + ZeroOrMore( Word(alphanums + "_\"'`:-/[].=")) + Word(";").suppress() create_table_def.setParseAction(create_table_act) add_fkey_def = CaselessKeyword( "ALTER") + "TABLE" + "ONLY" + tablename_def.setResultsName( "tableName") + "ADD" + "CONSTRAINT" + Word( alphanums + "_" ) + "FOREIGN" + "KEY" + "(" + Word(alphanums + "_").setResultsName( "keyName") + ")" + "REFERENCES" + Word( alphanums + "._").setResultsName("fkTable") + "(" + Word( alphanums + "_" ).setResultsName("fkCol") + ")" + Optional( Literal("DEFERRABLE")) + Optional( Literal("ON") + "DELETE" + (Literal("CASCADE") | Literal("RESTRICT"))) + ";" add_fkey_def.setParseAction(add_fkey_act) other_statement_def = OneOrMore(CharsNotIn(";")) + ";" other_statement_def.setParseAction(other_statement_act) comment_def = "--" + ZeroOrMore(CharsNotIn("\n")) comment_def.setParseAction(other_statement_act) return OneOrMore(comment_def | create_table_def | add_fkey_def | other_statement_def)
exprStack.append(toks[0]) # the following statements define the grammar for the parser. point = Literal(".") e = CaselessLiteral("E") plusorminus = Literal("+") | Literal("-") number = Word(nums) integer = Combine(Optional(plusorminus) + number) floatnumber = Combine(integer + Optional(point + Optional(number)) + Optional(e + integer)) lbracket = Literal("[") rbracket = Literal("]") ident = Forward() ## The definition below treats array accesses as identifiers. This means your expressions ## can include references to array elements, rows and columns, e.g., a = b[i] + 5. ## Expressions within []'s are not presently supported, so a = b[i+1] will raise ## a ParseException. ident = Combine( Word(alphas + "-", alphanums + "_") + ZeroOrMore(lbracket + (Word(alphas + "-", alphanums + "_") | integer) + rbracket)) plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") solveop = Literal("\\") outer = Literal("@")
def parse_col_desc(self, data_type): ''' Returns a prased output based on type describe output. data_type is string that should look like this: "bigint" or like this: "array<struct< field_51:int, field_52:bigint, field_53:int, field_54:boolean >>" In the first case, this method would return: 'bigint' In the second case, it would return ['array', ['struct', ['field_51', 'int'], ['field_52', 'bigint'], ['field_53', 'int'], ['field_54', 'boolean']]] This output is used to create the appropriate columns by self.create_column(). ''' COMMA, LPAR, RPAR, COLON, LBRA, RBRA = map(Suppress, ",<>:()") t_bigint = Literal('bigint') t_int = Literal('int') t_integer = Literal('integer') t_smallint = Literal('smallint') t_tinyint = Literal('tinyint') t_boolean = Literal('boolean') t_string = Literal('string') t_timestamp = Literal('timestamp') t_timestamp_without_time_zone = Literal('timestamp without time zone') t_float = Literal('float') t_double = Literal('double') t_real = Literal('real') t_double_precision = Literal('double precision') t_decimal = Group( Literal('decimal') + LBRA + Word(nums) + COMMA + Word(nums) + RBRA) t_numeric = Group( Literal('numeric') + LBRA + Word(nums) + COMMA + Word(nums) + RBRA) t_char = Group(Literal('char') + LBRA + Word(nums) + RBRA) t_character = Group(Literal('character') + LBRA + Word(nums) + RBRA) t_varchar = (Group(Literal('varchar') + LBRA + Word(nums) + RBRA) | Literal('varchar')) t_character_varying = Group( Literal('character varying') + LBRA + Word(nums) + RBRA) t_struct = Forward() t_array = Forward() t_map = Forward() complex_type = (t_struct | t_array | t_map) any_type = (complex_type | t_bigint | t_int | t_integer | t_smallint | t_tinyint | t_boolean | t_string | t_timestamp | t_timestamp_without_time_zone | t_float | t_double | t_real | t_double_precision | t_decimal | t_numeric | t_char | t_character | t_character_varying | t_varchar) struct_field_name = Word(alphanums + '_') struct_field_pair = Group(struct_field_name + COLON + any_type) t_struct << Group( Literal('struct') + LPAR + delimitedList(struct_field_pair) + RPAR) t_array << Group(Literal('array') + LPAR + any_type + RPAR) t_map << Group( Literal('map') + LPAR + any_type + COMMA + any_type + RPAR) return any_type.parseString(data_type)[0]
""" from pyparsing import Suppress,Word,nums,alphas,alphanums,Combine,oneOf,\ Optional,QuotedString,Forward,Group,ZeroOrMore,printables,srange MARK, UNMARK, AT, COLON, QUOTE = map(Suppress, "[]@:'") NUMBER = Word(nums) NUMBER.setParseAction(lambda t: int(t[0])) FLOAT = Combine(oneOf("+ -") + Word(nums) + "." + Optional(Word(nums))) FLOAT.setParseAction(lambda t: float(t[0])) STRING = QuotedString('"', multiline=True) WORD = Word(alphas, alphanums + "_:") ATTRIBUTE = Combine(AT + WORD) strBody = Forward() def setBodyLength(tokens): strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0])) return "" BLOB = Combine(QUOTE + Word(nums).setParseAction(setBodyLength) + COLON + strBody + QUOTE) item = Forward() def assignUsing(s): def assignPA(tokens):
def checkQuotedColon(s, loc, toks): if ':' in toks[0]: raise InvalidSQL("identifier with colon : must be in double quotes.") def checkDoubleQuotes(s, loc, toks): # TODO really? if toks[0][0] == "'": raise InvalidSQL("quoted strings must use double quotes.") ident = Word(alphas, alphanums + "_:").setParseAction(checkQuotedColon) columnName = (ident | quotedString().setParseAction(checkDoubleQuotes))("columnName") whereExpression = Forward() and_ = Keyword("and", caseless=True)('and') or_ = Keyword("or", caseless=True)('or') in_ = Keyword("in", caseless=True)("in") isnotnull = Keyword("is not null", caseless=True)('notnull') binop = oneOf("= != < > >= <=", caseless=True)('binop') intNum = Word(nums) columnRval = (intNum | quotedString)('rval*') whereCondition = Group((columnName + isnotnull) | (columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | ("(" + whereExpression + ")"))('condition') whereExpression << Group(whereCondition + ZeroOrMore( (and_ | or_) + whereExpression))('expression')
Optional(e + integer)) numarg = (real | integer) identifier = Word(alphas + "_", alphanums + "_") dotidentifier = Word(alphas, alphanums + "_" + ".") bracketidentifier = identifier + lbracket + Word(alphas) + rbracket statement = Group(identifier + equal + (quotedString | restOfLine)) #math mathElements = (numarg | ',' | '+' | '-' | '*' | '/' | '^' | '&' | '>' | '<' | '=' | '|' | identifier) nestedMathDefinition = nestedExpr('(', ')', content=mathElements) mathDefinition = OneOrMore(mathElements) section_enclosure2_ = nestedExpr('{', '}') section_enclosure_ = Forward() nestedBrackets = nestedExpr('[', ']', content=section_enclosure_) nestedCurlies = nestedExpr('{', '}', content=section_enclosure_) section_enclosure_ << ( statement | Group(identifier + ZeroOrMore(identifier) + nestedCurlies) | Group(identifier + '@' + restOfLine) | Word(alphas, alphanums + "_[]") | identifier | Suppress(',') | '@' | real) function_entry_ = Suppress(dbquotes) + Group( identifier.setResultsName('functionName') + Suppress(lparen) + delimitedList(Group( identifier.setResultsName('key') + Suppress(equal) + (identifier | numarg).setResultsName('value')), delim=',').setResultsName('parameters') + Suppress(rparen)) + Suppress(dbquotes)
from pyparsing import ( Forward, Combine, Optional, Word, Literal, CaselessKeyword, CaselessLiteral, Group, FollowedBy, LineEnd, OneOrMore, ZeroOrMore, alphas, alphanums, printables, delimitedList, quotedString, Regex, __version__, ) grammar = Forward() expression = Forward() # Literals intNumber = Regex(r'-?\d+')('integer') floatNumber = Regex(r'-?\d+\.\d+')('float') sciNumber = Combine( (floatNumber | intNumber) + CaselessLiteral('e') + intNumber )('scientific') aString = quotedString('string') # Use lookahead to match only numbers in a list (can't remember why this is necessary) afterNumber = FollowedBy(",") ^ FollowedBy(")") ^ FollowedBy(LineEnd()) number = Group( (sciNumber + afterNumber) | (floatNumber + afterNumber) | (intNumber + afterNumber) )('number')
def SPICE_BNF(): global bnf if not bnf: # punctuation colon = Literal(":").suppress() lbrace = Literal("{").suppress() rbrace = Literal("}").suppress() lbrack = Literal("[").suppress() rbrack = Literal("]").suppress() lparen = Literal("(").suppress() rparen = Literal(")").suppress() equals = Literal("=").suppress() comma = Literal(",").suppress() semi = Literal(";").suppress() # primitive types int8_ = Keyword("int8").setParseAction(replaceWith(ptypes.int8)) uint8_ = Keyword("uint8").setParseAction(replaceWith(ptypes.uint8)) int16_ = Keyword("int16").setParseAction(replaceWith(ptypes.int16)) uint16_ = Keyword("uint16").setParseAction(replaceWith(ptypes.uint16)) int32_ = Keyword("int32").setParseAction(replaceWith(ptypes.int32)) uint32_ = Keyword("uint32").setParseAction(replaceWith(ptypes.uint32)) int64_ = Keyword("int64").setParseAction(replaceWith(ptypes.int64)) uint64_ = Keyword("uint64").setParseAction(replaceWith(ptypes.uint64)) # keywords channel_ = Keyword("channel") enum32_ = Keyword("enum32").setParseAction(replaceWith(32)) enum16_ = Keyword("enum16").setParseAction(replaceWith(16)) enum8_ = Keyword("enum8").setParseAction(replaceWith(8)) flags32_ = Keyword("flags32").setParseAction(replaceWith(32)) flags16_ = Keyword("flags16").setParseAction(replaceWith(16)) flags8_ = Keyword("flags8").setParseAction(replaceWith(8)) channel_ = Keyword("channel") server_ = Keyword("server") client_ = Keyword("client") protocol_ = Keyword("protocol") typedef_ = Keyword("typedef") struct_ = Keyword("struct") message_ = Keyword("message") image_size_ = Keyword("image_size") bytes_ = Keyword("bytes") cstring_ = Keyword("cstring") switch_ = Keyword("switch") default_ = Keyword("default") case_ = Keyword("case") identifier = Word(alphas, alphanums + "_") enumname = Word(alphanums + "_") integer = ( Combine(CaselessLiteral("0x") + Word(nums + "abcdefABCDEF")) | Word(nums + "+-", nums)).setName("int").setParseAction(cvtInt) typename = identifier.copy().setParseAction( lambda toks: ptypes.TypeRef(str(toks[0]))) # This is just normal "types", i.e. not channels or messages typeSpec = Forward() attributeValue = integer ^ identifier attribute = Group( Combine("@" + identifier) + Optional(lparen + delimitedList(attributeValue) + rparen)) attributes = Group(ZeroOrMore(attribute)) arraySizeSpecImage = Group(image_size_ + lparen + integer + comma + identifier + comma + identifier + rparen) arraySizeSpecBytes = Group(bytes_ + lparen + identifier + comma + identifier + rparen) arraySizeSpecCString = Group(cstring_ + lparen + rparen) arraySizeSpec = lbrack + Optional( identifier ^ integer ^ arraySizeSpecImage ^ arraySizeSpecBytes ^ arraySizeSpecCString, default="") + rbrack variableDef = Group(typeSpec + Optional("*", default=None) + identifier + Optional(arraySizeSpec, default=None) + attributes - semi) \ .setParseAction(parseVariableDef) switchCase = Group(Group(OneOrMore(default_.setParseAction(replaceWith(None)) + colon | Group(case_.suppress() + Optional("!", default="") + identifier) + colon)) + variableDef) \ .setParseAction(lambda toks: ptypes.SwitchCase(toks[0][0], toks[0][1])) switchBody = Group(switch_ + lparen + delimitedList(identifier,delim='.', combine=True) + rparen + lbrace + Group(OneOrMore(switchCase)) + rbrace + identifier + attributes - semi) \ .setParseAction(lambda toks: ptypes.Switch(toks[0][1], toks[0][2], toks[0][3], toks[0][4])) messageBody = structBody = Group(lbrace + ZeroOrMore(variableDef | switchBody) + rbrace) structSpec = Group(struct_ + identifier + structBody + attributes).setParseAction( lambda toks: ptypes.StructType( toks[0][1], toks[0][2], toks[0][3])) # have to use longest match for type, in case a user-defined type name starts with a keyword type, like "channel_type" typeSpec << (structSpec ^ int8_ ^ uint8_ ^ int16_ ^ uint16_ ^ int32_ ^ uint32_ ^ int64_ ^ uint64_ ^ typename).setName("type") flagsBody = enumBody = Group( lbrace + delimitedList(Group(enumname + Optional(equals + integer))) + Optional(comma) + rbrace) messageSpec = Group(message_ + messageBody + attributes ).setParseAction(lambda toks: ptypes.MessageType( None, toks[0][1], toks[0][2])) | typename channelParent = Optional(colon + typename, default=None) channelMessage = Group(messageSpec + identifier + Optional(equals + integer, default=None) + semi) \ .setParseAction(lambda toks: ptypes.ChannelMember(toks[0][1], toks[0][0], toks[0][2])) channelBody = channelParent + Group(lbrace + ZeroOrMore( server_ + colon | client_ + colon | channelMessage) + rbrace) enum_ = (enum32_ | enum16_ | enum8_) flags_ = (flags32_ | flags16_ | flags8_) enumDef = Group(enum_ + identifier + enumBody + attributes - semi).setParseAction(lambda toks: ptypes.EnumType( toks[0][0], toks[0][1], toks[0][2], toks[0][3])) flagsDef = Group(flags_ + identifier + flagsBody + attributes - semi).setParseAction(lambda toks: ptypes.FlagsType( toks[0][0], toks[0][1], toks[0][2], toks[0][3])) messageDef = Group(message_ + identifier + messageBody + attributes - semi).setParseAction( lambda toks: ptypes.MessageType( toks[0][1], toks[0][2], toks[0][3])) channelDef = Group( channel_ + identifier + channelBody + attributes - semi).setParseAction(lambda toks: ptypes.ChannelType( toks[0][1], toks[0][2], toks[0][3], toks[0][4])) structDef = Group(struct_ + identifier + structBody + attributes - semi).setParseAction(lambda toks: ptypes.StructType( toks[0][1], toks[0][2], toks[0][3])) typedefDef = Group(typedef_ + identifier + typeSpec + attributes - semi).setParseAction(lambda toks: ptypes.TypeAlias( toks[0][1], toks[0][2], toks[0][3])) definitions = typedefDef | structDef | enumDef | flagsDef | messageDef | channelDef protocolChannel = Group(typename + identifier + Optional(equals + integer, default=None) + semi) \ .setParseAction(lambda toks: ptypes.ProtocolMember(toks[0][1], toks[0][0], toks[0][2])) protocolDef = Group(protocol_ + identifier + Group(lbrace + ZeroOrMore(protocolChannel) + rbrace) + semi) \ .setParseAction(lambda toks: ptypes.ProtocolType(toks[0][1], toks[0][2])) bnf = ZeroOrMore(definitions) + protocolDef + StringEnd() singleLineComment = "//" + restOfLine bnf.ignore(singleLineComment) bnf.ignore(cStyleComment) return bnf
def parse_algebra(self): """ Parse an algebraic expression into a tree. Store a `pyparsing.ParseResult` in `self.tree` with proper groupings to reflect parenthesis and order of operations. Leave all operators in the tree and do not parse any strings of numbers into their float versions. Adding the groups and result names makes the `repr()` of the result really gross. For debugging, use something like print OBJ.tree.asXML() """ # 0.33 or 7 or .34 or 16. number_part = Word(nums) inner_number = (number_part + Optional("." + Optional(number_part))) | ("." + number_part) # pyparsing allows spaces between tokens--`Combine` prevents that. inner_number = Combine(inner_number) # SI suffixes and percent. number_suffix = MatchFirst(Literal(k) for k in SUFFIXES.keys()) # 0.33k or 17 plus_minus = Literal('+') | Literal('-') number = Group( Optional(plus_minus) + inner_number + Optional( CaselessLiteral("E") + Optional(plus_minus) + number_part) + Optional(number_suffix)) number = number("number") # Predefine recursive variables. expr = Forward() # Handle variables passed in. They must start with a letter # and may contain numbers and underscores afterward. inner_varname = Combine( Word(alphas, alphanums + "_") + ZeroOrMore("'")) # Alternative variable name in tensor format # Tensor name must start with a letter, continue with alphanums # Indices may be alphanumeric # e.g., U_{ijk}^{123} upper_indices = Literal("^{") + Word(alphanums) + Literal("}") lower_indices = Literal("_{") + Word(alphanums) + Literal("}") tensor_lower = Combine( Word(alphas, alphanums) + lower_indices + ZeroOrMore("'")) tensor_mixed = Combine( Word(alphas, alphanums) + Optional(lower_indices) + upper_indices + ZeroOrMore("'")) # Test for mixed tensor first, then lower tensor alone, then generic variable name varname = Group(tensor_mixed | tensor_lower | inner_varname)("variable") varname.setParseAction(self.variable_parse_action) # Same thing for functions. function = Group(inner_varname + Suppress("(") + expr + Suppress(")"))("function") function.setParseAction(self.function_parse_action) atom = number | function | varname | "(" + expr + ")" atom = Group(atom)("atom") # Do the following in the correct order to preserve order of operation. pow_term = atom + ZeroOrMore("^" + atom) pow_term = Group(pow_term)("power") par_term = pow_term + ZeroOrMore('||' + pow_term) # 5k || 4k par_term = Group(par_term)("parallel") prod_term = par_term + ZeroOrMore( (Literal('*') | Literal('/')) + par_term) # 7 * 5 / 4 prod_term = Group(prod_term)("product") sum_term = Optional(plus_minus) + prod_term + ZeroOrMore( plus_minus + prod_term) # -5 + 4 - 3 sum_term = Group(sum_term)("sum") # Finish the recursion. expr << sum_term # pylint: disable=pointless-statement self.tree = (expr + stringEnd).parseString(self.math_expr)[0]
def Verilog_BNF(): global verilogbnf if verilogbnf is None: # compiler directives compilerDirective = Combine( "`" + \ oneOf("define undef ifdef else endif default_nettype " "include resetall timescale unconnected_drive " "nounconnected_drive celldefine endcelldefine") + \ restOfLine ).setName("compilerDirective") # primitives semi = Literal(";") lpar = Literal("(") rpar = Literal(")") equals = Literal("=") identLead = alphas+"$_" identBody = alphanums+"$_" identifier1 = Regex( r"\.?["+identLead+"]["+identBody+"]*(\.["+identLead+"]["+identBody+"]*)*" ).setName("baseIdent") identifier2 = Regex(r"\\\S+").setParseAction(lambda t:t[0][1:]).setName("escapedIdent") identifier = identifier1 | identifier2 hexnums = nums + "abcdefABCDEF" + "_?" base = Regex("'[bBoOdDhH]").setName("base") basedNumber = Combine( Optional( Word(nums + "_") ) + base + Word(hexnums+"xXzZ"), joinString=" ", adjacent=False ).setName("basedNumber") #~ number = ( basedNumber | Combine( Word( "+-"+spacedNums, spacedNums ) + #~ Optional( "." + Optional( Word( spacedNums ) ) ) + #~ Optional( e + Word( "+-"+spacedNums, spacedNums ) ) ).setName("numeric") ) number = ( basedNumber | \ Regex(r"[+-]?[0-9_]+(\.[0-9_]*)?([Ee][+-]?[0-9_]+)?") \ ).setName("numeric") #~ decnums = nums + "_" #~ octnums = "01234567" + "_" expr = Forward().setName("expr") concat = Group( "{" + delimitedList( expr ) + "}" ) multiConcat = Group("{" + expr + concat + "}").setName("multiConcat") funcCall = Group(identifier + "(" + Optional( delimitedList( expr ) ) + ")").setName("funcCall") subscrRef = Group("[" + delimitedList( expr, ":" ) + "]") subscrIdentifier = Group( identifier + Optional( subscrRef ) ) #~ scalarConst = "0" | (( FollowedBy('1') + oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1") )) scalarConst = Regex("0|1('[Bb][01xX])?") mintypmaxExpr = Group( expr + ":" + expr + ":" + expr ).setName("mintypmax") primary = ( number | ("(" + mintypmaxExpr + ")" ) | ( "(" + Group(expr) + ")" ).setName("nestedExpr") | #.setDebug() | multiConcat | concat | dblQuotedString | funcCall | subscrIdentifier ) unop = oneOf( "+ - ! ~ & ~& | ^| ^ ~^" ).setName("unop") binop = oneOf( "+ - * / % == != === !== && " "|| < <= > >= & | ^ ^~ >> << ** <<< >>>" ).setName("binop") expr << ( ( unop + expr ) | # must be first! ( primary + "?" + expr + ":" + expr ) | ( primary + Optional( binop + expr ) ) ) lvalue = subscrIdentifier | concat # keywords if_ = Keyword("if") else_ = Keyword("else") edge = Keyword("edge") posedge = Keyword("posedge") negedge = Keyword("negedge") specify = Keyword("specify") endspecify = Keyword("endspecify") fork = Keyword("fork") join = Keyword("join") begin = Keyword("begin") end = Keyword("end") default = Keyword("default") forever = Keyword("forever") repeat = Keyword("repeat") while_ = Keyword("while") for_ = Keyword("for") case = oneOf( "case casez casex" ) endcase = Keyword("endcase") wait = Keyword("wait") disable = Keyword("disable") deassign = Keyword("deassign") force = Keyword("force") release = Keyword("release") assign = Keyword("assign") eventExpr = Forward() eventTerm = ( posedge + expr ) | ( negedge + expr ) | expr | ( "(" + eventExpr + ")" ) eventExpr << ( Group( delimitedList( eventTerm, "or" ) ) ) eventControl = Group( "@" + ( ( "(" + eventExpr + ")" ) | identifier | "*" ) ).setName("eventCtrl") delayArg = ( number | Word(alphanums+"$_") | #identifier | ( "(" + Group( delimitedList( mintypmaxExpr | expr ) ) + ")" ) ).setName("delayArg")#.setDebug() delay = Group( "#" + delayArg ).setName("delay")#.setDebug() delayOrEventControl = delay | eventControl assgnmt = Group( lvalue + "=" + Optional( delayOrEventControl ) + expr ).setName( "assgnmt" ) nbAssgnmt = Group(( lvalue + "<=" + Optional( delay ) + expr ) | ( lvalue + "<=" + Optional( eventControl ) + expr )).setName( "nbassgnmt" ) range = "[" + expr + ":" + expr + "]" paramAssgnmt = Group( identifier + "=" + expr ).setName("paramAssgnmt") parameterDecl = Group( "parameter" + Optional( range ) + delimitedList( paramAssgnmt ) + semi).setName("paramDecl") inputDecl = Group( "input" + Optional( range ) + delimitedList( identifier ) + semi ).setParseAction(parseInput) outputDecl = Group( "output" + Optional( range ) + delimitedList( identifier ) + semi ).setParseAction(parseOutput) inoutDecl = Group( "inout" + Optional( range ) + delimitedList( identifier ) + semi ) regIdentifier = Group( identifier + Optional( "[" + expr + ":" + expr + "]" ) ) regDecl = Group( "reg" + Optional("signed") + Optional( range ) + delimitedList( regIdentifier ) + semi ).setName("regDecl") timeDecl = Group( "time" + delimitedList( regIdentifier ) + semi ) integerDecl = Group( "integer" + delimitedList( regIdentifier ) + semi ) strength0 = oneOf("supply0 strong0 pull0 weak0 highz0") strength1 = oneOf("supply1 strong1 pull1 weak1 highz1") driveStrength = Group( "(" + ( ( strength0 + "," + strength1 ) | ( strength1 + "," + strength0 ) ) + ")" ).setName("driveStrength") nettype = oneOf("wire tri tri1 supply0 wand triand tri0 supply1 wor trior trireg") expandRange = Optional( oneOf("scalared vectored") ) + range realDecl = Group( "real" + delimitedList( identifier ) + semi ) eventDecl = Group( "event" + delimitedList( identifier ) + semi ) blockDecl = ( parameterDecl | regDecl | integerDecl | realDecl | timeDecl | eventDecl ) stmt = Forward().setName("stmt")#.setDebug() stmtOrNull = stmt | semi caseItem = ( delimitedList( expr ) + ":" + stmtOrNull ) | \ ( default + Optional(":") + stmtOrNull ) stmt << Group( ( begin + Group( ZeroOrMore( stmt ) ) + end ).setName("begin-end") | ( if_ + Group("(" + expr + ")") + stmtOrNull + Optional( else_ + stmtOrNull ) ).setName("if") | ( delayOrEventControl + stmtOrNull ) | ( case + "(" + expr + ")" + OneOrMore( caseItem ) + endcase ) | ( forever + stmt ) | ( repeat + "(" + expr + ")" + stmt ) | ( while_ + "(" + expr + ")" + stmt ) | ( for_ + "(" + assgnmt + semi + Group( expr ) + semi + assgnmt + ")" + stmt ) | ( fork + ZeroOrMore( stmt ) + join ) | ( fork + ":" + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ) | ( wait + "(" + expr + ")" + stmtOrNull ) | ( "->" + identifier + semi ) | ( disable + identifier + semi ) | ( assign + assgnmt + semi ) | ( deassign + lvalue + semi ) | ( force + assgnmt + semi ) | ( release + lvalue + semi ) | ( begin + ":" + identifier + ZeroOrMore( blockDecl ) + ZeroOrMore( stmt ) + end ).setName("begin:label-end") | # these *have* to go at the end of the list!!! ( assgnmt + semi ) | ( nbAssgnmt + semi ) | ( Combine( Optional("$") + identifier ) + Optional( "(" + delimitedList(expr|empty) + ")" ) + semi ) ).setName("stmtBody") """ x::=<blocking_assignment> ; x||= <non_blocking_assignment> ; x||= if ( <expression> ) <statement_or_null> x||= if ( <expression> ) <statement_or_null> else <statement_or_null> x||= case ( <expression> ) <case_item>+ endcase x||= casez ( <expression> ) <case_item>+ endcase x||= casex ( <expression> ) <case_item>+ endcase x||= forever <statement> x||= repeat ( <expression> ) <statement> x||= while ( <expression> ) <statement> x||= for ( <assignment> ; <expression> ; <assignment> ) <statement> x||= <delay_or_event_control> <statement_or_null> x||= wait ( <expression> ) <statement_or_null> x||= -> <name_of_event> ; x||= <seq_block> x||= <par_block> x||= <task_enable> x||= <system_task_enable> x||= disable <name_of_task> ; x||= disable <name_of_block> ; x||= assign <assignment> ; x||= deassign <lvalue> ; x||= force <assignment> ; x||= release <lvalue> ; """ alwaysStmt = Group( "always" + Optional(eventControl) + stmt ).setName("alwaysStmt") initialStmt = Group( "initial" + stmt ).setName("initialStmt") chargeStrength = Group( "(" + oneOf( "small medium large" ) + ")" ).setName("chargeStrength") continuousAssign = Group( assign + Optional( driveStrength ) + Optional( delay ) + delimitedList( assgnmt ) + semi ).setName("continuousAssign") tfDecl = ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | timeDecl | integerDecl | realDecl ) functionDecl = Group( "function" + Optional( range | "integer" | "real" ) + identifier + semi + Group( OneOrMore( tfDecl ) ) + Group( ZeroOrMore( stmt ) ) + "endfunction" ) inputOutput = oneOf("input output") netDecl1Arg = ( nettype + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( identifier ) ) ) #Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl2Arg = ( "trireg" + Optional( chargeStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( identifier ) ) ) # Group( delimitedList( ~inputOutput + identifier ) ) ) netDecl3Arg = ( nettype + Optional( driveStrength ) + Optional( expandRange ) + Optional( delay ) + Group( delimitedList( assgnmt ) ) ) netDecl1 = Group(netDecl1Arg + semi) netDecl2 = Group(netDecl2Arg + semi) netDecl3 = Group(netDecl3Arg + semi) gateType = oneOf("and nand or nor xor xnor buf bufif0 bufif1 " "not notif0 notif1 pulldown pullup nmos rnmos " "pmos rpmos cmos rcmos tran rtran tranif0 " "rtranif0 tranif1 rtranif1" ) gateInstance = Optional( Group( identifier + Optional( range ) ) ) + \ "(" + Group( delimitedList( expr ) ) + ")" gateDecl = Group( gateType + Optional( driveStrength ) + Optional( delay ) + delimitedList( gateInstance) + semi ) udpInstance = Group( Group( identifier + Optional(range | subscrRef) ) + "(" + Group( delimitedList( expr ) ) + ")" ) udpInstantiation = Group( identifier - Optional( driveStrength ) + Optional( delay ) + delimitedList( udpInstance ) + semi ).setName("udpInstantiation")#.setParseAction(dumpTokens).setDebug() parameterValueAssignment = Group( Literal("#") + "(" + Group( delimitedList( expr ) ) + ")" ) namedPortConnection = Group( "." + identifier + "(" + expr + ")" ) modulePortConnection = expr | empty #~ moduleInstance = Group( Group ( identifier + Optional(range) ) + #~ ( delimitedList( modulePortConnection ) | #~ delimitedList( namedPortConnection ) ) ) inst_args = Group( "(" + (delimitedList( modulePortConnection ) | delimitedList( namedPortConnection )) + ")").setName("inst_args")#.setDebug() moduleInstance = Group( Group ( identifier + Optional(range) ) + inst_args ) moduleInstantiation = Group( identifier + Optional( parameterValueAssignment ) + delimitedList( moduleInstance ).setName("moduleInstanceList") + semi ).setName("moduleInstantiation").setParseAction(parseSubmod) parameterOverride = Group( "defparam" + delimitedList( paramAssgnmt ) + semi ) task = Group( "task" + identifier + semi + ZeroOrMore( tfDecl ) + stmtOrNull + "endtask" ) specparamDecl = Group( "specparam" + delimitedList( paramAssgnmt ) + semi ) pathDescr1 = Group( "(" + subscrIdentifier + "=>" + subscrIdentifier + ")" ) pathDescr2 = Group( "(" + Group( delimitedList( subscrIdentifier ) ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + ")" ) pathDescr3 = Group( "(" + Group( delimitedList( subscrIdentifier ) ) + "=>" + Group( delimitedList( subscrIdentifier ) ) + ")" ) pathDelayValue = Group( ( "(" + Group( delimitedList( mintypmaxExpr | expr ) ) + ")" ) | mintypmaxExpr | expr ) pathDecl = Group( ( pathDescr1 | pathDescr2 | pathDescr3 ) + "=" + pathDelayValue + semi ).setName("pathDecl") portConditionExpr = Forward() portConditionTerm = Optional(unop) + subscrIdentifier portConditionExpr << portConditionTerm + Optional( binop + portConditionExpr ) polarityOp = oneOf("+ -") levelSensitivePathDecl1 = Group( if_ + Group("(" + portConditionExpr + ")") + subscrIdentifier + Optional( polarityOp ) + "=>" + subscrIdentifier + "=" + pathDelayValue + semi ) levelSensitivePathDecl2 = Group( if_ + Group("(" + portConditionExpr + ")") + lpar + Group( delimitedList( subscrIdentifier ) ) + Optional( polarityOp ) + "*>" + Group( delimitedList( subscrIdentifier ) ) + rpar + "=" + pathDelayValue + semi ) levelSensitivePathDecl = levelSensitivePathDecl1 | levelSensitivePathDecl2 edgeIdentifier = posedge | negedge edgeSensitivePathDecl1 = Group( Optional( if_ + Group("(" + expr + ")") ) + lpar + Optional( edgeIdentifier ) + subscrIdentifier + "=>" + lpar + subscrIdentifier + Optional( polarityOp ) + ":" + expr + rpar + rpar + "=" + pathDelayValue + semi ) edgeSensitivePathDecl2 = Group( Optional( if_ + Group("(" + expr + ")") ) + lpar + Optional( edgeIdentifier ) + subscrIdentifier + "*>" + lpar + delimitedList( subscrIdentifier ) + Optional( polarityOp ) + ":" + expr + rpar + rpar + "=" + pathDelayValue + semi ) edgeSensitivePathDecl = edgeSensitivePathDecl1 | edgeSensitivePathDecl2 edgeDescr = oneOf("01 10 0x x1 1x x0").setName("edgeDescr") timCheckEventControl = Group( posedge | negedge | (edge + "[" + delimitedList( edgeDescr ) + "]" )) timCheckCond = Forward() timCondBinop = oneOf("== === != !==") timCheckCondTerm = ( expr + timCondBinop + scalarConst ) | ( Optional("~") + expr ) timCheckCond << ( ( "(" + timCheckCond + ")" ) | timCheckCondTerm ) timCheckEvent = Group( Optional( timCheckEventControl ) + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) timCheckLimit = expr controlledTimingCheckEvent = Group( timCheckEventControl + subscrIdentifier + Optional( "&&&" + timCheckCond ) ) notifyRegister = identifier systemTimingCheck1 = Group( "$setup" + lpar + timCheckEvent + "," + timCheckEvent + "," + timCheckLimit + Optional( "," + notifyRegister ) + rpar + semi ) systemTimingCheck2 = Group( "$hold" + lpar + timCheckEvent + "," + timCheckEvent + "," + timCheckLimit + Optional( "," + notifyRegister ) + rpar + semi ) systemTimingCheck3 = Group( "$period" + lpar + controlledTimingCheckEvent + "," + timCheckLimit + Optional( "," + notifyRegister ) + rpar + semi ) systemTimingCheck4 = Group( "$width" + lpar + controlledTimingCheckEvent + "," + timCheckLimit + Optional( "," + expr + "," + notifyRegister ) + rpar + semi ) systemTimingCheck5 = Group( "$skew" + lpar + timCheckEvent + "," + timCheckEvent + "," + timCheckLimit + Optional( "," + notifyRegister ) + rpar + semi ) systemTimingCheck6 = Group( "$recovery" + lpar + controlledTimingCheckEvent + "," + timCheckEvent + "," + timCheckLimit + Optional( "," + notifyRegister ) + rpar + semi ) systemTimingCheck7 = Group( "$setuphold" + lpar + timCheckEvent + "," + timCheckEvent + "," + timCheckLimit + "," + timCheckLimit + Optional( "," + notifyRegister ) + rpar + semi ) systemTimingCheck = (FollowedBy('$') + ( systemTimingCheck1 | systemTimingCheck2 | systemTimingCheck3 | systemTimingCheck4 | systemTimingCheck5 | systemTimingCheck6 | systemTimingCheck7 )).setName("systemTimingCheck") sdpd = if_ + Group("(" + expr + ")") + \ ( pathDescr1 | pathDescr2 ) + "=" + pathDelayValue + semi specifyItem = ~Keyword("endspecify") +( specparamDecl | pathDecl | levelSensitivePathDecl | edgeSensitivePathDecl | systemTimingCheck | sdpd ) """ x::= <specparam_declaration> x||= <path_declaration> x||= <level_sensitive_path_declaration> x||= <edge_sensitive_path_declaration> x||= <system_timing_check> x||= <sdpd> """ specifyBlock = Group( "specify" + ZeroOrMore( specifyItem ) + "endspecify" ) moduleItem = ~Keyword("endmodule") + ( parameterDecl | inputDecl | outputDecl | inoutDecl | regDecl | netDecl3 | netDecl1 | netDecl2 | timeDecl | integerDecl | realDecl | eventDecl | gateDecl | parameterOverride | continuousAssign | specifyBlock | initialStmt | alwaysStmt | task | functionDecl | # these have to be at the end - they start with identifiers moduleInstantiation | udpInstantiation ) """ All possible moduleItems, from Verilog grammar spec x::= <parameter_declaration> x||= <input_declaration> x||= <output_declaration> x||= <inout_declaration> ?||= <net_declaration> (spec does not seem consistent for this item) x||= <reg_declaration> x||= <time_declaration> x||= <integer_declaration> x||= <real_declaration> x||= <event_declaration> x||= <gate_declaration> x||= <UDP_instantiation> x||= <module_instantiation> x||= <parameter_override> x||= <continuous_assign> x||= <specify_block> x||= <initial_statement> x||= <always_statement> x||= <task> x||= <function> """ portRef = subscrIdentifier portExpr = portRef | Group( "{" + delimitedList( portRef ) + "}" ) port = portExpr | Group( ( "." + identifier + "(" + portExpr + ")" ) ) moduleHdr = Group ( oneOf("module macromodule") + identifier("moduleName").setParseAction(parseModule) + Optional( "(" + Group( Optional( delimitedList( Group(oneOf("input output") + (netDecl1Arg | netDecl2Arg | netDecl3Arg) ) | port ) ) ) + ")" ) + semi ).setName("moduleHdr") module = Group( moduleHdr + Group( ZeroOrMore( moduleItem ) ) + "endmodule" ).setName("module")#.setDebug() udpDecl = outputDecl | inputDecl | regDecl #~ udpInitVal = oneOf("1'b0 1'b1 1'bx 1'bX 1'B0 1'B1 1'Bx 1'BX 1 0 x X") udpInitVal = (Regex("1'[bB][01xX]") | Regex("[01xX]")).setName("udpInitVal") udpInitialStmt = Group( "initial" + identifier + "=" + udpInitVal + semi ).setName("udpInitialStmt") levelSymbol = oneOf("0 1 x X ? b B") levelInputList = Group( OneOrMore( levelSymbol ).setName("levelInpList") ) outputSymbol = oneOf("0 1 x X") combEntry = Group( levelInputList + ":" + outputSymbol + semi ) edgeSymbol = oneOf("r R f F p P n N *") edge = Group( "(" + levelSymbol + levelSymbol + ")" ) | \ Group( edgeSymbol ) edgeInputList = Group( ZeroOrMore( levelSymbol ) + edge + ZeroOrMore( levelSymbol ) ) inputList = levelInputList | edgeInputList seqEntry = Group( inputList + ":" + levelSymbol + ":" + ( outputSymbol | "-" ) + semi ).setName("seqEntry") udpTableDefn = Group( "table" + OneOrMore( combEntry | seqEntry ) + "endtable" ).setName("table") """ <UDP> ::= primitive <name_of_UDP> ( <name_of_variable> <,<name_of_variable>>* ) ; <UDP_declaration>+ <UDP_initial_statement>? <table_definition> endprimitive """ udp = Group( "primitive" + identifier + "(" + Group( delimitedList( identifier ) ) + ")" + semi + OneOrMore( udpDecl ) + Optional( udpInitialStmt ) + udpTableDefn + "endprimitive" ) verilogbnf = OneOrMore( module | udp ) + StringEnd() verilogbnf.ignore( cppStyleComment ) verilogbnf.ignore( compilerDirective ) return verilogbnf
def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") # token definitions identifier = Word(alphanums + "_.").setName("identifier") double_quoted_string = QuotedString('"', multiline=True, unquoteResults=False, escChar='\\') # dblQuotedString noncomma = "".join([c for c in printables if c != ","]) alphastring_ = OneOrMore(CharsNotIn(noncomma + ' ')) def parse_html(s, loc, toks): return '<%s>' % ''.join(toks[0]) opener = '<' closer = '>' html_text = nestedExpr( opener, closer, (CharsNotIn(opener + closer))).setParseAction(parse_html).leaveWhitespace() ID = ( identifier | html_text | double_quoted_string | #.setParseAction(strip_quotes) | alphastring_).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + rbrace.suppress() + Optional(semi.suppress())).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph") edge_point << Group(subgraph | graph_stmt | node_id).setName('edge_point') node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = OneOrMore((Optional(strict_) + Group( (graph_ | digraph_)) + Optional(ID) + graph_stmt).setResultsName("graph")) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser
def parse_math_str(input_string,variables={}): # Uncomment the line below for readline support on interactive terminal # import readline import re from pyparsing import Word, alphas, ParseException, Literal, CaselessLiteral, Combine, Optional, nums, Or, Forward, ZeroOrMore, StringEnd, alphanums import math # Debugging flag can be set to either "debug_flag=True" or "debug_flag=False" debug_flag=False exprStack = [] varStack = [] def pushFirst( str, loc, toks ): exprStack.append( toks[0] ) def assignVar( str, loc, toks ): varStack.append( toks[0] ) # define grammar point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') number = Word(nums) integer = Combine( Optional(plusorminus) + number ) floatnumber = Combine( integer + Optional( point + Optional(number) ) + Optional( e + integer ) ) ident = Word(alphas,alphanums + '_') plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "*" ) div = Literal( "/" ) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() addop = plus | minus multop = mult | div expop = Literal( "^" ) assign = Literal( "=" ) expr = Forward() atom = ( ( e | floatnumber | integer | ident ).setParseAction(pushFirst) | ( lpar + expr.suppress() + rpar ) ) factor = Forward() factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) ) term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) ) expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) ) bnf = Optional((ident + assign).setParseAction(assignVar)) + expr pattern = bnf + StringEnd() # map operator symbols to corresponding arithmetic operations opn = { "+" : ( lambda a,b: a + b ), "-" : ( lambda a,b: a - b ), "*" : ( lambda a,b: a * b ), "/" : ( lambda a,b: a / b ), "^" : ( lambda a,b: a ** b ) } # Recursive function that evaluates the stack def evaluateStack( s ): op = s.pop() if op in "+-*/^": op2 = evaluateStack( s ) op1 = evaluateStack( s ) return opn[op]( op1, op2 ) elif op == "PI": return math.pi elif op == "E": return math.e elif re.search('^[a-zA-Z][a-zA-Z0-9_]*$',op): if op in variables: return variables[op] else: return 0 elif re.search('^[-+]?[0-9]+$',op): return int( op ) else: return float( op ) # Start with a blank exprStack and a blank varStack exprStack = [] varStack = [] if input_string != '': # try parsing the input string try: L=pattern.parseString( input_string ) except ParseException as err: L=['Parse Failure',input_string] # show result of parsing the input string if debug_flag: print(input_string, "->", L) if len(L)==0 or L[0] != 'Parse Failure': if debug_flag: print("exprStack=", exprStack) # calculate result , store a copy in ans , display the result to user result=evaluateStack(exprStack) variables['ans']=result #print result return result # Assign result to a variable if required if debug_flag: print("var=",varStack) if len(varStack)==1: variables[varStack.pop()]=result if debug_flag: print("variables=",variables) else: print('Parse Failure') print(err.line) print(" "*(err.column-1) + "^") print(err)
def __init__(self): """ Please use any of the following symbols: expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ """ point = Literal(".") e = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ((Optional(oneOf("- +")) + (pi | e | fnumber | ident + lpar + expr + rpar).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar)).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (expop + factor).setParseAction(self.pushFirst)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.pushFirst)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.pushFirst)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # this will map operator symbols to their corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, "^": operator.pow } self.fn = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0 }
m_or = K('or') m_or.setName('OR') m_not = K('not') m_not.setName('NOT') test('m_and', 'and') test('m_or', 'or') m_logical_operator = m_and ^ m_or test('m_logical_operator', ''' and or ''') m_expression = Forward() m_expression.setName('EXPR') m_infix_operator = m_logical_operator m_prefix_operator = m_not m_subexpression = nestedExpr(content=m_expression) m_term = m_literal ^ m_identifier ^ m_subexpression m_infix_expression = ((m_term + m_infix_operator + m_expression) #^ #(m_expression + m_infix_operator + m_term) ^ (m_term + m_infix_operator + m_term)) m_prefix_expression = m_prefix_operator + m_expression m_expression << (m_term ^ m_prefix_expression
def parseCTLK(spec): """Parse the spec and return the list of possible ASTs.""" global __ctlk if __ctlk is None: true = Literal("True") true.setParseAction(lambda tokens: TrueExp()) false = Literal("False") false.setParseAction(lambda tokens: FalseExp()) init = Literal("Init") init.setParseAction(lambda tokens: Init()) reachable = Literal("Reachable") reachable.setParseAction(lambda tokens: Reachable()) atom = "'" + SkipTo("'") + "'" atom.setParseAction(lambda tokens: Atom(tokens[1])) agent = atom group = Group(ZeroOrMore(agent + Suppress(",")) + agent) proposition = true | false | init | reachable | atom __ctlk = Forward() notproposition = "~" + proposition notproposition.setParseAction(lambda tokens: Not(tokens[1])) formula = (proposition | notproposition | Suppress("(") + __ctlk + Suppress(")")) logical = Forward() ex = Literal("E") + "X" + logical ex.setParseAction(lambda tokens: EX(tokens[2])) ax = Literal("A") + "X" + logical ax.setParseAction(lambda tokens: AX(tokens[2])) ef = Literal("E") + "F" + logical ef.setParseAction(lambda tokens: EF(tokens[2])) af = Literal("A") + "F" + logical af.setParseAction(lambda tokens: AF(tokens[2])) eg = Literal("E") + "G" + logical eg.setParseAction(lambda tokens: EG(tokens[2])) ag = Literal("A") + "G" + logical ag.setParseAction(lambda tokens: AG(tokens[2])) eu = Literal("E") + "[" + __ctlk + "U" + __ctlk + "]" eu.setParseAction(lambda tokens: EU(tokens[2], tokens[4])) au = Literal("A") + "[" + __ctlk + "U" + __ctlk + "]" au.setParseAction(lambda tokens: AU(tokens[2], tokens[4])) ew = Literal("E") + "[" + __ctlk + "W" + __ctlk + "]" ew.setParseAction(lambda tokens: EW(tokens[2], tokens[4])) aw = Literal("A") + "[" + __ctlk + "W" + __ctlk + "]" aw.setParseAction(lambda tokens: AW(tokens[2], tokens[4])) temporal = (ex | ax | ef | af | eg | ag | eu | au | ew | aw) nk = Literal("nK") + "<" + agent + ">" + logical nk.setParseAction(lambda tokens: nK(tokens[2], tokens[4])) k = Literal("K") + "<" + agent + ">" + logical k.setParseAction(lambda tokens: K(tokens[2], tokens[4])) ne = Literal("nE") + "<" + group + ">" + logical ne.setParseAction(lambda tokens: nE(list(tokens[2]), tokens[4])) e = Literal("E") + "<" + group + ">" + logical e.setParseAction(lambda tokens: E(list(tokens[2]), tokens[4])) nd = Literal("nD") + "<" + group + ">" + logical nd.setParseAction(lambda tokens: nD(list(tokens[2]), tokens[4])) d = Literal("D") + "<" + group + ">" + logical d.setParseAction(lambda tokens: D(list(tokens[2]), tokens[4])) nc = Literal("nC") + "<" + group + ">" + logical nc.setParseAction(lambda tokens: nC(list(tokens[2]), tokens[4])) c = Literal("C") + "<" + group + ">" + logical c.setParseAction(lambda tokens: C(list(tokens[2]), tokens[4])) epistemic = (nk | k | ne | e | nd | d | nc | c) logical <<= (formula | epistemic | temporal) __ctlk <<= (_logicals_(logical)) return __ctlk.parseString(spec, parseAll=True)
def __init__(self, query): self._methods = { 'and': self.evaluate_and, 'or': self.evaluate_or, 'not': self.evaluate_not, 'parenthesis': self.evaluate_parenthesis, 'quotes': self.evaluate_quotes, 'word': self.evaluate_word, } self.line = '' self.query = query.lower() if query else '' if self.query: # TODO: Cleanup operator_or = Forward() operator_word = Group(Word(alphanums)).setResultsName('word') operator_quotes_content = Forward() operator_quotes_content << ( (operator_word + operator_quotes_content) | operator_word) operator_quotes = ( Group(Suppress('"') + operator_quotes_content + Suppress('"')).setResultsName('quotes') | operator_word) operator_parenthesis = (Group( (Suppress('(') + operator_or + Suppress(")"))).setResultsName('parenthesis') | operator_quotes) operator_not = Forward() operator_not << ( Group(Suppress(Keyword('no', caseless=True)) + operator_not).setResultsName('not') | operator_parenthesis) operator_and = Forward() operator_and << ( Group(operator_not + Suppress(Keyword('and', caseless=True)) + operator_and).setResultsName('and') | Group(operator_not + OneOrMore( ~oneOf('and or') + operator_and)).setResultsName('and') | operator_not) operator_or << ( Group(operator_and + Suppress(Keyword('or', caseless=True)) + operator_or).setResultsName('or') | operator_and) self._query_parser = operator_or.parseString(self.query)[0] else: self._query_parser = False time_cmpnt = Word(nums).setParseAction(lambda t: t[0].zfill(2)) date = Combine((time_cmpnt + '-' + time_cmpnt + '-' + time_cmpnt) + ' ' + time_cmpnt + ':' + time_cmpnt + Optional(':' + time_cmpnt)) word = Word(printables) self._log_parser = ( date.setResultsName('timestamp') + word.setResultsName('log_level') + word.setResultsName('plugin') + (White(min=16).setParseAction( lambda s, l, t: [t[0].strip()]).setResultsName('task') | (White(min=1).suppress() & word.setResultsName('task'))) + restOfLine.setResultsName('message'))
class RawNginxParser(object): # pylint: disable=expression-not-assigned """A class that parses nginx configuration with pyparsing.""" # constants space = Optional(White()) nonspace = Regex(r"\S+") left_bracket = Literal("{").suppress() right_bracket = space.leaveWhitespace() + Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+")) condition = Regex(r"\(.+\)") # Matches anything that is not a special character, and ${SHELL_VARS}, AND # any chars in single or double quotes # All of these COULD be upgraded to something like # https://stackoverflow.com/a/16130746 dquoted = Regex(r'(\".*\")') squoted = Regex(r"(\'.*\')") nonspecial = Regex(r"[^\{\};,]") varsub = Regex(r"(\$\{\w+\})") # nonspecial nibbles one character at a time, but the other objects take # precedence. We use ZeroOrMore to allow entries like "break ;" to be # parsed as assignments value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial)) location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules comment = space + Literal('#') + restOfLine assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space charset_map_statement = space + Literal("charset_map") + space + value + space + value map_statement = space + Literal("map") + space + nonspace + space + dollar_var + space # This is NOT an accurate way to parse nginx map entries; it's almost # certianly too permissive and may be wrong in other ways, but it should # preserve things correctly in mmmmost or all cases. # # - I can neither prove nor disprove that it is corect wrt all escaped # semicolon situations # Addresses https://github.com/fatiherikli/nginxparser/issues/19 map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace map_entry = space + map_pattern + space + value + space + semicolon map_block = Group( Group(map_statement).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | map_entry)) + space).leaveWhitespace() + right_bracket) block = Forward() # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) block_begin = (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(charset_map_statement)).leaveWhitespace() block_innards = Group(ZeroOrMore(Group(comment | assignment) | block | map_block) + space).leaveWhitespace() block << Group(block_begin + left_bracket + block_innards + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd script.parseWithTabs().leaveWhitespace() def __init__(self, source): self.source = source def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) def as_list(self): """Returns the parsed tree as a list.""" return self.parse().asList()
) ident = Word(alphas,alphanums + '_') plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "*" ) div = Literal( "/" ) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() addop = plus | minus multop = mult | div expop = Literal( "^" ) assign = Literal( "=" ) expr = Forward() atom = ( ( e | floatnumber | integer | ident ).setParseAction(pushFirst) | ( lpar + expr.suppress() + rpar ) ) factor = Forward() factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) ) term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) ) expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) ) bnf = Optional((ident + assign).setParseAction(assignVar)) + expr pattern = bnf + StringEnd() # map operator symbols to corresponding arithmetic operations opn = { "+" : ( lambda a,b: a + b ),
# [65] DataBlockValue ::= iri | RDFLiteral | NumericLiteral | BooleanLiteral | 'UNDEF' DataBlockValue = iri | RDFLiteral | NumericLiteral | BooleanLiteral | Keyword( 'UNDEF') # [78] Verb ::= VarOrIri | A Verb = VarOrIri | A # [85] VerbSimple ::= Var VerbSimple = Var # [97] Integer ::= INTEGER Integer = INTEGER TriplesNode = Forward() TriplesNodePath = Forward() # [104] GraphNode ::= VarOrTerm | TriplesNode GraphNode = VarOrTerm | TriplesNode # [105] GraphNodePath ::= VarOrTerm | TriplesNodePath GraphNodePath = VarOrTerm | TriplesNodePath # [93] PathMod ::= '?' | '*' | '+' PathMod = Literal('?') | '*' | '+' # [96] PathOneInPropertySet ::= iri | A | '^' ( iri | A ) PathOneInPropertySet = iri | A | Comp('InversePath', '^' + (iri | A))
def __init__(self): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = Literal(".") e = CaselessLiteral("E") fnumber = Combine(Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") pi = CaselessLiteral("PI") phy = CaselessLiteral("PH") expr = Forward() atom = ((Optional(oneOf("- +")) + (ident + lpar + expr + rpar | pi | phy | e | fnumber).setParseAction(self.pushFirst)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar) ).setParseAction(self.pushUMinus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + \ ZeroOrMore((expop + factor).setParseAction(self.pushFirst)) term = factor + \ ZeroOrMore((multop + factor).setParseAction(self.pushFirst)) expr << term + \ ZeroOrMore((addop + term).setParseAction(self.pushFirst)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = {"+": mp.mp.fadd, "-": mp.mp.fsub, "*": mp.mp.fmul, "/": mp.mp.fdiv, "^": mp.power} self.fn = {"sqrt": mp.sqrt, "sin": mp.sin, "cos": mp.cos, "tan": mp.tan, "exp": mp.exp, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0}
MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) MARKER_VALUE = QuotedString("'") | QuotedString('"') MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) BOOLOP = L("and") | L("or") MARKER_VAR = VARIABLE | MARKER_VALUE MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) LPAREN = L("(").suppress() RPAREN = L(")").suppress() MARKER_EXPR = Forward() MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) MARKER = stringStart + MARKER_EXPR + stringEnd def _coerce_parse_result(results): # type: (Union[ParseResults, List[Any]]) -> List[Any] if isinstance(results, ParseResults): return [_coerce_parse_result(i) for i in results] else: return results def _format_marker(marker, first=True):
import itertools import random import re import sys import types ''' Parser for type annotations type = [B]ool | [I]nt | [C]har | [S]tring | [L]ist(type) | t[R]ee(type) | [T]uple(name0:type, name1:type, ...) ''' from pyparsing import alphanums, delimitedList, Forward, Group, Keyword, nums, Suppress, Word any_type = Forward() LPAR, RPAR = map(Suppress, '()') BT, IT, CT, ST, LT, RT, TT = TYPES = 'BICSLRT' BKW, IKW, CKW, SKW, LKW, RKW, TKW = map(Keyword, TYPES) bool_type = Group(BKW) int_type = Group(IKW) char_type = Group(CKW) str_type = Group(SKW) comp_type = Group(Word(nums)) atom_types = comp_type | bool_type | int_type | char_type | str_type ATOM_TYPES = [BT, IT, CT, ST] list_type = Group(LKW + LPAR + Group(any_type) + RPAR)
# BNF for Lucene query syntax # # Query ::= ( Clause )* # Clause ::= ["+", "-"] [<TERM> ":"] (<TERM> | "(" Query ")" ) LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(Literal, '[]{}~^') LPAR, RPAR, COLON, DOT = map(Suppress, '():.') AND = Keyword('AND') | Literal('&&') OR = Keyword('OR') | Literal('||') NOT = Keyword('NOT') | Literal('!') TO = Keyword('TO') query_expr = Forward() required_modifier = Literal('+')('required') prohibit_modifier = Literal('-')('prohibit') special_characters = '=><(){}[]^"~*?:\\/.&|' valid_word = Word(printables, excludeChars=special_characters).setName('word') valid_word.setParseAction(lambda t: t[0].replace('\\\\', chr(127)).replace( '\\', '').replace(chr(127), '\\')) clause = Forward() field_name = (Optional(valid_word()('attr') + DOT)) + valid_word()('fieldname') single_term = valid_word()('singleterm') phrase = QuotedString('"', unquoteResults=True)('phrase') wildcard = Regex(r'[a-z0-9]*[\?\*][a-z0-9]*')('wildcard') wildcard.setParseAction(lambda t: t[0].replace('?', '.?').replace('*', '.*')) regex = QuotedString('/', unquoteResults=True)('regex')