def get_formula_parser(chemical_tokens): LPAR, RPAR = map(Suppress, "[]") index = Regex(r"\d+(\.\d*)?").setParseAction(lambda t: float(t[0])) element = oneOf(chemical_tokens) chemical_formula = Forward() term = Group((element | Group(LPAR + chemical_formula + RPAR)("subgroup")) + Optional(index, default=1)("mult")) chemical_formula << OneOrMore(term) def multiplyContents(tokens): t = tokens[0] if t.subgroup: mult = t.mult for term in t.subgroup: term[1] *= mult return t.subgroup term.setParseAction(multiplyContents) def sumByElement(tokens): elementsList = [t[0] for t in tokens] duplicates = len(elementsList) > len(set(elementsList)) if duplicates: ctr = defaultdict(int) for t in tokens: ctr[t[0]] += t[1] return ParseResults([ParseResults([k, v]) for k, v in ctr.items()]) chemical_formula.setParseAction(sumByElement) return chemical_formula
def grammar(): parenthesis = Forward() parenthesis <<= "(" + ZeroOrMore(CharsNotIn("()") | parenthesis) + ")" parenthesis.setParseAction(join_string_act) quoted_string = "'" + OneOrMore(CharsNotIn("'")) + "'" quoted_string.setParseAction(join_string_act) quoted_default_value = "DEFAULT" + quoted_string + OneOrMore(CharsNotIn(", \n\t")) quoted_default_value.setParseAction(quoted_default_value_act) field_def = OneOrMore(quoted_default_value | Word(alphanums + "_\"'`:-/[]") | parenthesis) field_def.setParseAction(field_act) tablename_def = ( Word(alphas + "`_.") | QuotedString("\"") ) field_list_def = field_def + ZeroOrMore(Suppress(",") + field_def) field_list_def.setParseAction(field_list_act) create_table_def = Literal("CREATE") + "TABLE" + tablename_def.setResultsName("tableName") + "(" + field_list_def.setResultsName("fields") + ")" + ";" create_table_def.setParseAction(create_table_act) add_fkey_def = Literal("ALTER") + "TABLE" + "ONLY" + tablename_def.setResultsName("tableName") + "ADD" + "CONSTRAINT" + Word(alphanums + "_") + "FOREIGN" + "KEY" + "(" + Word(alphanums + "_").setResultsName("keyName") + ")" + "REFERENCES" + Word(alphanums + "._").setResultsName("fkTable") + "(" + Word(alphanums + "_").setResultsName("fkCol") + ")" + Optional(Literal("DEFERRABLE")) + Optional(Literal("ON") + "DELETE" + ( Literal("CASCADE") | Literal("RESTRICT") )) + ";" add_fkey_def.setParseAction(add_fkey_act) other_statement_def = OneOrMore(CharsNotIn(";")) + ";" other_statement_def.setParseAction(other_statement_act) comment_def = "--" + ZeroOrMore(CharsNotIn("\n")) comment_def.setParseAction(other_statement_act) return OneOrMore(comment_def | create_table_def | add_fkey_def | other_statement_def)
def parse_constraint_group(constraint_group): global valgrind_operations, size_by_var, offset_by_var, realsize_by_var, shift_by_var init_global_vars() lparen = Literal("(") rparen = Literal(")") func = Word(alphanums, alphanums+":_") integer = Word(nums) expression = Forward() arg = expression | func | integer args = arg + ZeroOrMore(","+arg) expression << func + lparen + args + rparen expression.setParseAction(parse_function) valgrind_operations_group = [] for constraint in constraint_group: valgrind_operations = [] expression.parseString(constraint) resize_operands() valgrind_operations_group.append(valgrind_operations) return (valgrind_operations_group, size_by_var, offset_by_var, realsize_by_var, shift_by_var)
def parser(): rule = Forward() body = OneOrMore(CharsNotIn('{};') + ';') sel = CharsNotIn('{};') rule <<= sel + Group( '{' + ZeroOrMore( rule | body ) + '}' ) rule.setParseAction( make_action(Rule) ) stylesheet = ZeroOrMore( rule ) stylesheet.ignore( cStyleComment ) return stylesheet
def main(s): lpar = Literal('(').suppress() rpar = Literal(')').suppress() integer = Word(nums) element = Word(alphas, exact=1) formula = Forward() term = Group((element | Group(lpar + formula + rpar)('subgroup')) + Optional(integer, default=1)('mult')) formula << OneOrMore(term) integer.setParseAction(process_integer) term.setParseAction(process_term) formula.setParseAction(process_formula) return formula.parseString(s)[0]
def grammar(self): """ Define the parser grammar. """ # Ignore TeX commands between delimiters $$, \(, \) tex_eq = (Literal(r'\(') | Literal(r'$$') | Literal(r'\[')) + ... + ( Literal(r'\)') | Literal(r'$$') | Literal(r'\]')) # Define elemtary stuff leftAc = Literal('{').suppress() rightAc = Literal('}').suppress() lpar = Literal('(') rpar = Literal(')') integer = Word(nums) # simple unsigned integer real = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") real.setParseAction(self.real_hook) number = real | integer # Define function fnname = Word(alphas, alphanums + "_")('name') # Require expr to finalize the def function = Forward() function.setParseAction(self.function_hook) # What are the namming rule for the jocker? Need to start by a letter, # may contain almost everything variable = Combine(leftAc + Word(alphas, alphanums + "_") + rightAc)('name') variable.setParseAction(self.variable_hook) variable.ignore(tex_eq) # arithmetic operators minus = Literal('-') arithOp = oneOf("+ * /") | minus equal = Literal('{=').suppress() # Require atom to finalize the def expr = Forward() # Define atom atom = number | (0, None) * minus + (Group(lpar + expr + rpar) | variable | function) atom.setParseAction(self.atom_hook) # Finalize postponed elements... expr << atom + ZeroOrMore(arithOp + atom) # Need to group arguments for swapping them function << fnname + Group(lpar + Group(ZeroOrMore(expr)) + Optional(Literal(',') + Group(expr)) + rpar) # Define equation equation = equal + expr + rightAc equation.setParseAction(self.equation_hook) return equation, variable
def rdata_grammar(): pairs = Forward() comma_separated_values = constant + ZeroOrMore(Literal(",").suppress() + constant) vector = Literal("c(") + comma_separated_values + Literal(")").setName(') to close c') vector.setParseAction(lambda s,l,t: [np.array(t[1:-1])]) array = Literal("structure(") + pairs + Literal(")").setName(') to close structure') array.setParseAction(lambda s,l,t: [_build_array(t[1])]) value = constant | vector | array pair = variable + Literal("=").suppress() + value pair.setParseAction(lambda s,l,t: [t[:]]) pairs << (pair + ZeroOrMore(Literal(",").suppress() + pair)) pairs.setParseAction(lambda s,l,t: [dict(t[:])]) data = Literal("list(") + pairs + Literal(")").setName(') to close list') data.setParseAction(lambda s,l,t: [t[1]]) return data
def __init__(self): """Parser for instruction. Example: {{<a>},{<a>},{<a>},{<a>}} {{<!>},{<!>},{<!>},{<a>}} <{o"i!a,<{i<a> """ debug = False self.garbo_count = 0 LBRACK, RBRACK, LBRACE, RBRACE, BANG = map(Suppress, "<>{}!") nonspecial = CharsNotIn('<>{}!') ignored = Word('!', printables, exact=2) enclosed_garbo = SkipTo(Literal('>'), ignore=ignored) val_str = Forward() garbo_str = Forward() item = Forward() # a parsed item item = (ignored | garbo_str | val_str | nonspecial).setDebug(debug) # stuff in {}s val_str << nestedExpr('{', '}', content=item, ignoreExpr=None).setDebug(debug) # stuff in <>s (suppressed) garbo_str << (LBRACK + Optional(enclosed_garbo) + RBRACK).setDebug(debug) def cvt_list(toks): return toks.asList() val_str.setParseAction(cvt_list) def take_garbo(s, loc, toks): m = toks[0] ig_str = re.sub(r'!.', '', m) ln = len(ig_str) self.garbo_count += ln return f"<GARBO: {ln}>" enclosed_garbo.setParseAction(take_garbo) ignored.setParseAction(lambda: '!IGNORED') # pattern build self._pattern = item
def generate_ideal_bnf(variables): """ expop :: '^' multop :: '*' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = Literal( "." ) e = CaselessLiteral( "E" ) exponent=Word(nums) fnumber = Combine( Word( "+-"+nums, nums ) + Optional( point + Optional( Word( nums ) ) ) + Optional( e + Word( "+-"+nums, nums ) ) ) plus = Literal( "+" ) minus = Literal( "-" ) mult = Literal( "*" ) div = Literal( "/" ) seperator=Literal(",") lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() addop = plus | minus multop = mult | div expop = Literal( "^" ) one=Literal("1") poly = Forward() variable = Or([Literal(v) for v in variables]).setParseAction(set_variable) factor = Forward() factor << Or([one,variable + ZeroOrMore( ( expop + exponent))]) term = Forward() term << factor + ZeroOrMore( ( multop + factor )) term.setParseAction(add_term) poly << term + ZeroOrMore( ( addop + term ) ).setParseAction(add_poly) ideal=Forward() ideal << poly+ ZeroOrMore( ( seperator + poly ) )+Literal(";") return ideal
def __setup_parser(): # reserved single-character tokens LSQUARE,RSQUARE,LCURLY,RCURLY,EQ,PIPE,SEMI = map(Suppress,'[]{}=|;') # non-iterable literals integer = simple(Word('-'+nums,nums), 'int', int) string = simple(QuotedString("'") | QuotedString('"'), 'str', str) regex = simple(QuotedString('/'), 'rgx', re.compile) # list/range literals emptylist = named(LSQUARE + RSQUARE, 'emptylist') rstart = LSQUARE + integer + Optional(Suppress(',') + integer) irange = named(rstart + Suppress('..]'), 'irange') brange = named(rstart + Suppress('..') + integer + RSQUARE, 'brange') intlist = named(LSQUARE + delimitedList(integer) + RSQUARE, 'intlist') strlist = named(LSQUARE + delimitedList(string) + RSQUARE, 'strlist') rgxlist = named(LSQUARE + delimitedList(regex) + RSQUARE, 'rgxlist') list_lit = Forward() lstlist = named(LSQUARE + delimitedList(list_lit) + RSQUARE, 'lstlist') list_lit << (emptylist | irange | brange | intlist | strlist | rgxlist | lstlist) # special-syntax functions slurp = special(QuotedString('<',endQuoteChar='>'), 'slurp') shell = special(QuotedString('`'), 'shell') # functions and arguments name = simple(Word(alphas, alphanums+'_'), 'name', str) subpipe = Forward() function = Forward() argument = string | list_lit | regex | integer | subpipe | slurp | shell | function function << name + named(ZeroOrMore(argument), 'arguments') function.setParseAction(lambda parse: ('function', dict(parse.asList()))) # an atom is anything that can fit between pipes on its own atom = (function | slurp | shell | list_lit) # an expression/subpipe is multiple atoms piped together expression = named(atom + ZeroOrMore(PIPE + atom), 'pipe') subpipe << LCURLY + expression + RCURLY # statements and lines are pretty standard statement = Optional(name + EQ, default=('name','')) + expression statement.setParseAction(lambda parse: dict(parse.asList())) line = (statement | empty).ignore(pythonStyleComment) return line.parseString
def _build_grammar(self): expr = Forward() float_lit = Combine(Word(nums) + '.' + Word(nums)) float_lit.setName('float') float_lit.setParseAction(lambda x: \ self.to_literal(float(x[0]))) int_lit = Word(nums) int_lit.setName('int') int_lit.setParseAction(lambda x: \ self.to_literal(int(x[0]))) num = (float_lit | int_lit) num.setParseAction(lambda x: x[0]) tag_name = Word(alphas + "_", alphanums + "_") tag_name.setName('tag_name') tag_name.setParseAction(lambda t: tag_reference.TagReference(t[0])) quoted_string = QuotedString("'") quoted_string.setParseAction(lambda s: self.to_literal(s[0])) oper = oneOf('+ * / -') oper.setParseAction(lambda o: o[0]) lpar = Literal("(").suppress() rpar = Literal(")").suppress() arith = Group(lpar + expr + oper + expr + rpar) arith.setParseAction(lambda t: \ self.to_arith(t[0][0], t[0][1], t[0][2])) assign = tag_name + '=' + expr assign.setName('assign') assign.setParseAction(lambda x: self.to_assign(x[0],x[2])) print_tags = Literal('?') print_tags.setParseAction(lambda x: self.to_print_tags()) expr <<(arith|assign|tag_name|num|quoted_string|print_tags) expr.setParseAction(lambda x: x[0]) return expr
def grammar(self): """ Define the parser grammar for FP syntaxe. Modified from base class. """ # Define elemtary stuff leftAc = Literal('{').suppress() rightAc = Literal('}').suppress() lpar = Literal('(') rpar = Literal(')') integer = Word(nums) # simple unsigned integer real = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") # add extra input parameter to the parseaction to keep in mind all variable real.setParseAction(self.real_hook) number = real | integer # Define function fnname = Word(alphas, alphanums + "_")('name') # Require expr to finalize the def function = Forward() function.setParseAction(self.function_hook) # Normally, no variable excepted random*, pi ... # may contain almost everything variable = Word(alphas, alphanums + "_")('name') variable.setParseAction( lambda t: self.variable_hook(self.wildcards, t)) # arithmetic operators minus = Literal('-') arithOp = oneOf("+ * /") | minus # Require atom to finalize the def expr = Forward() # Define atom atom = number | (0, None) * minus + (Group(lpar + expr + rpar) | function | variable) atom.setParseAction(self.atom_hook) # Finalize postponed elements... expr << atom + ZeroOrMore(arithOp + atom) function << fnname + Group(lpar + Group(ZeroOrMore(expr)) + Optional(Literal(',') + Group(expr)) + rpar) # Define equation equal = Literal('fp{').suppress() equation = equal + expr + rightAc equation.setParseAction(self.equation_hook) return equation, variable
def ParseExpression(cls, source): # atoms boolean = Keyword('#f') | Keyword('#t') boolean.setParseAction(lambda s, l, t: SchemeExpression.make_boolean(t[ 0] == '#t').expression) symbol = Word(alphanums + '-_') symbol.setParseAction( lambda s, l, t: SchemeExpression.make_symbol(t[0]).expression) integer = Word(nums) integer.setParseAction( lambda s, l, t: SchemeExpression.make_integer(t[0]).expression) string = QuotedString('"', multiline=True) string.setParseAction( lambda s, l, t: SchemeExpression.make_string(t[0]).expression) element = integer | boolean | symbol | string # lists lexpr = Forward() vexpr = Forward() lparen = Literal('(').suppress() rparen = Literal(')').suppress() hashsym = Literal('#').suppress() # vectors lexpr << Group(lparen + ZeroOrMore(element ^ lexpr ^ vexpr) + rparen) lexpr.setParseAction(lambda s, l, t: SchemeExpression.make_list(t[0])) vexpr << Group(hashsym + lparen + ZeroOrMore(element ^ lexpr ^ vexpr) + rparen) vexpr.setParseAction( lambda s, l, t: SchemeExpression.make_vector(t[0])) # final... sexpr = element | vexpr | lexpr sexpr.keepTabs = True # this seems to be necessary to fix a problem with pyparsing result = sexpr.parseString(source)[0] return cls(SchemeExpression._flatten(result))
def ParseExpression(cls, source): # atoms boolean = Keyword('#f') | Keyword('#t') boolean.setParseAction(lambda s, l, t: SchemeExpression.make_boolean(t[ 0] == '#t').expression) symbol = Word(alphanums + '-_') symbol.setParseAction( lambda s, l, t: SchemeExpression.make_symbol(t[0]).expression) integer = Word(nums) integer.setParseAction( lambda s, l, t: SchemeExpression.make_integer(t[0]).expression) string = dblQuotedString string.setParseAction(lambda s, l, t: SchemeExpression.make_string(t[ 0][1:-1]).expression) element = integer | boolean | symbol | dblQuotedString # lists lexpr = Forward() vexpr = Forward() lparen = Literal('(').suppress() rparen = Literal(')').suppress() hashsym = Literal('#').suppress() # vectors lexpr << Group(lparen + ZeroOrMore(element ^ lexpr ^ vexpr) + rparen) lexpr.setParseAction(lambda s, l, t: SchemeExpression.make_list(*t[0])) vexpr << Group(hashsym + lparen + ZeroOrMore(element ^ lexpr ^ vexpr) + rparen) vexpr.setParseAction( lambda s, l, t: SchemeExpression.make_vector(*t[0])) # final... sexpr = element | vexpr | lexpr result = sexpr.parseString(source)[0] return cls(SchemeExpression._flatten(result))
def create_bnf(): cvt_int = lambda toks: int(toks[0]) cvt_real = lambda toks: float(toks[0]) cvt_tuple = lambda toks : tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) # define punctuation as suppressed literals (lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon) = map(Suppress,"()[]{}:") integer = Combine(Optional(oneOf("+ -")) + Word(nums)).setName("integer") integer.setParseAction(cvt_int) real = Combine(Optional(oneOf("+ -"))+ Word(nums) + "." + Optional(Word(nums)) + Optional("e" + Optional(oneOf("+ -")) + Word(nums))).setName("real") real.setParseAction(cvt_real) tuple_str = Forward() list_str = Forward() dict_str = Forward() list_item = (real | integer | Group(list_str) | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | Word(alphas8bit + alphas, alphas8bit + alphanums + "_")) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_str << (Suppress("(") + Optional(delimitedList(list_item)) + Optional(Suppress(",")) + Suppress(")")) tuple_str.setParseAction(cvt_tuple) list_str << (lbrack + Optional(delimitedList(list_item) + Optional(Suppress(","))) + rbrack) dict_entry = Group(list_item + colon + list_item2) dict_inner = delimitedList(dict_entry) + Optional(Suppress(",")) dict_inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_inner) + rbrace) return dict_inner
def __init__(self): FALSE = Keyword("false") NULL = Keyword("null") TRUE = Keyword("true") FALSE.setParseAction(replaceWith(False)) NULL.setParseAction(replaceWith(None)) TRUE.setParseAction(replaceWith(True)) pattern = Forward() label = Word(alphas, alphanums + "_").setResultsName("layer_name") configurable_param = nestedExpr(content=pattern) arg = (NULL ^ FALSE ^ TRUE ^ pyparsing_common.number ^ (Word(alphanums + "*_") + ~Word("=")) ^ configurable_param) args = arg[...].setResultsName("args") args.setParseAction(self.convert_list) options = Dict(Group(Word(alphanums + "_") + Suppress("=") + arg))[...].setResultsName("options") options.setParseAction(self.convert_dict) pattern <<= label + args + options pattern.setParseAction(Pattern) self.pattern = pattern
def patt(cs_list): ''' Remove the cs with its arguments with recursion on curly brackets ''' cs_lit_list = oneOf(cs_list).suppress() bslash = Literal('\\').suppress() lbrace = Literal('{').suppress() rbrace = Literal('}').suppress() parens = Word("()%\\") inside = SkipTo(rbrace) allchars = Word(printables, excludeChars="{}") inside = ZeroOrMore(allchars) inside.setParseAction(lambda tok: " ".join(tok)) content = Forward() content << OneOrMore(allchars | (lbrace + ZeroOrMore(content) + rbrace)) #content << (allchars + lbrace + ZeroOrMore(content) + rbrace) content.setParseAction(lambda tok: " ".join(tok)) return bslash + cs_lit_list + lbrace + content + rbrace
class CaseExpressionParser(TernaryExpressionParser): """Takes care of parsing (nested) SQL CASE statements with variables""" def __init__(self): # noqa super(CaseExpressionParser, self).__init__() CASE = CaselessKeyword('case').suppress() WHEN = CaselessKeyword('when').suppress() THEN = CaselessKeyword('then').suppress() ELSE = CaselessKeyword('else').suppress() END = CaselessKeyword('end').suppress() NOT = CaselessLiteral('not') AND = CaselessLiteral('and') OR = CaselessLiteral('or') RELATIONAL_OPERATOR = oneOf(">= <= != > < =") self.CASE_EXPR = Forward() # Case statements can be operands self.OPERAND = self.CASE_EXPR | self.OPERAND ARITH_EXPR = infixNotation(self.OPERAND, self.MATH_OPERATORS) CONDITION = (ARITH_EXPR + RELATIONAL_OPERATOR + ARITH_EXPR).setParseAction(Condition) WHEN_TERM = infixNotation(CONDITION, [(NOT, 1, opAssoc.RIGHT, NegativeTerm), (AND, 2, opAssoc.LEFT, AndTerm), (OR, 2, opAssoc.LEFT, OrTerm)]) WHEN_EXPR = WHEN + WHEN_TERM.setResultsName('when_expr', listAllMatches=True) THEN_EXPR = THEN + ARITH_EXPR.setResultsName('then_expr', listAllMatches=True) ELSE_EXPR = ELSE + ARITH_EXPR.setResultsName('else_expr') self.CASE_EXPR <<= (CASE + OneOrMore(WHEN_EXPR + THEN_EXPR) + Optional(ELSE_EXPR) + END) self.CASE_EXPR.setParseAction(CaseStatement) self.pattern = self.CASE_EXPR + StringEnd()
def get_sort_expression(): value_expression = Word(srange("[a-zA-Z0-9_.*]")) value_expression.setParseAction(lambda tokens: tokens[0]) quoted_value_expression = Word('"').suppress() +\ value_expression + Word('"').suppress() option_value = value_expression | quoted_value_expression option_value.setParseAction(lambda tokens: tokens[0]) simple_option = Word(srange("[a-zA-Z0-9_.*]")) +\ Word(':').suppress() + option_value simple_option.setParseAction(lambda tokens: (tokens[0], tokens[1])) option = Forward() option << (simple_option | (Word(srange("[a-zA-Z0-9_.*]")) + Word(':').suppress() + nestedExpr(content=option))) option.setParseAction( lambda tokens: parse_sort_field_option(tokens.asList()) ) exp = option + ZeroOrMore(Word(',').suppress() + option) field_expression = Optional('-') + Word( srange("[a-zA-Z0-9_.*]") ) + Optional(nestedExpr(content=exp)) field_expression.setParseAction(parse_sort_field_expression) fields_expression = field_expression + ZeroOrMore( Word(',').suppress() + field_expression) fields_expression.setParseAction(parse_sort_expression) sort_expression = Word('sort:').suppress() \ + Word('[').suppress() \ + fields_expression + Word(']').suppress() return sort_expression
def getToken(self): begin = self._getBeginToken() end = self._getEndToken().suppress() forbidden = self._getForbiddenToken() no_format = NoFormatFactory.make(self.parser) token = Forward() inside = SkipTo(end, failOn=forbidden, ignore=no_format | token).leaveWhitespace() token << begin + inside + end token = token.setParseAction(self.conversionParseAction)(self.name) return token
def __init__(self): plus, minus, mult, div, mod = map(Literal, '+-*/%') lpar = Literal('(') rpar = Literal(')') comma = Literal(',') powop = Literal( '^' ) productop = mult | div modop = Literal( '%' ) sumop = plus | minus tupl = Forward() number = Regex(r'[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?') number.setParseAction(number_action) ident = Word(alphas, alphanums+'_') ident.setParseAction(ident_action) funccall = ident + tupl funccall.setParseAction(funccall_action) atom = funccall | ident | number atom.setParseAction(atom_action) patom = lpar + atom + rpar patom.setParseAction(patom_action) powexpr = Forward() powexpr << Group( atom + ZeroOrMore( ( powop + powexpr ) ) ) powexpr.setParseAction(powexpr_action) modexpr = Forward() modexpr << Group( powexpr + ZeroOrMore( ( modop + modexpr ) ) ) modexpr.setParseAction(modexpr_action) product = Group( modexpr + ZeroOrMore( ( productop + modexpr ) ) ) product.setParseAction(product_action) sumexpr = Group( product + Group( ZeroOrMore( sumop + product ) ) ) sumexpr.setParseAction(sum_action) tupl << lpar + Optional(sumexpr + ZeroOrMore( comma + sumexpr ) ) + rpar tupl.setParseAction(tupl_action) expr = sumexpr | tupl expr.setParseAction(expr_action) self.bnf = expr
def parse_pabl(self, raw_pabl): INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction( self.check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(self.check_unindent) UNDENT.setParseAction(self.unindent) terminator = Literal(';').suppress() comment = Literal('#') + restOfLine item_name = Word(alphas, alphanums + '_') variable = Word(alphas, alphanums + '_.') variable_as = (variable + 'as' + item_name) stmt = Forward() suite = Group( OneOrMore(empty + stmt.setParseAction(self.check_peer_indent))) suite.ignore(comment) item_start = Literal('@item').suppress() item_end = Literal(':').suppress() permission_start = Literal('@permissions') item_decl = (item_start + item_name.setResultsName('item') + item_end) item_defn = Group(item_decl + INDENT + suite + UNDENT) permission_decl = (permission_start + Group( delimitedList(item_name).setResultsName('permissions')) + item_end) permission_defn = Group(permission_decl + INDENT + suite + UNDENT) fieldList = delimitedList( Group(variable_as) | variable ).setResultsName('fields') + terminator stmt << (item_defn | fieldList | Group(permission_defn)) parseTree = suite.parseString(raw_pabl) return parseTree
def make_amr_parser(): """ Pyparsing parser for AMRs. This will return an abstract syntax tree that needs to be converted into an AMR using ast_to_amr. """ def debug(s, loc, tok): if len(tok) > 1: flat = [tok[0]] + tok[1:] else: flat = tok return flat def parse_concept_expr(s, loc, tok): node_name = tok[0] concept_name = None roles = [] if len(tok) > 1: if type(tok[1]) is tuple: roles = tok[1:] else: concept_name = tok[1] if len(tok) > 2: roles = tok[2:] return (node_name, concept_name, roles) ParserElement.enablePackrat() # Hopefully no bug in here... def parse_role(s,loc,tok): if len(tok) >= 2: r, ch = tok[0], [] for v in tok[1:]: if isinstance(v, StrLiteral): # Parse the node alignment and move it to the edge parts = v.replace(" ","").rsplit("~",1) if len(parts) >= 2: v, align = parts v = StrLiteral(v) r = "%s~%s" % (r.strip(), align.strip()) elif isinstance(v, SpecialValue): parts = v.replace(" ","").rsplit("~",1) if len(parts) >= 2: v, align = parts v = StrLiteral(v) r = "%s~%s" % (r.strip(), align.strip()) ch.append(v) return r, ch else: return tok[0] # Number are all mapped to the same node in the graph because of interning parse_quantity = lambda s, loc, tok: StrLiteral(" ".join(tok)) #float(tok[0]) if "." in tok[0] else int(tok[0]) parse_string_literal = lambda s, loc, tok: StrLiteral(" ".join(tok)) parse_special_value = lambda s, loc, tok: SpecialValue(" ".join(tok)) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() quantity = Word(nums+".,").setParseAction(parse_quantity) node_name = Word(alphas+nums+"""@-_.~$/<>%&!+\*?^`"'""") #Word(alphas+nums+"_@.") lit_string = Literal('"').suppress() + CharsNotIn('"') + Literal('"').suppress() concept_name = lit_string | Word(alphas+nums+"""-_.,`~$/<>%&!+\*?^"'""") role_name = Word(alphas+nums+"""-_.,~$/<>%&!+\*:?^`"'""") | Literal("#").suppress()+Word(alphas+nums+"[]-$_").setParseAction(lambda s, loc, tok: NonterminalLabel(tok[0])) special_attr = (Literal("-") | Literal("interrogative") | Literal("SHOULD") | Literal("MUST") | Literal("HAVE-TO")| Literal("WOULD") | Literal("CAN") | Literal("DARE-TO")| Literal("BE-TO") | Literal("MAY") | Literal("GOING-TO") | Literal("MIGHT") | Literal("USED-TO")) + Optional(Literal("~")+Word(alphas+nums+".")) expr = Forward() value = expr |\ quantity.setParseAction(parse_quantity) |\ special_attr.setParseAction(parse_special_value) | \ node_name |\ (lit_string + Optional(Literal("~")+Word(alphas+nums+"."))).setParseAction(parse_string_literal) valuelist = Forward() valuelist << (value + Literal(",").suppress() + valuelist | value).setParseAction(debug) role = (Literal(":").suppress() + role_name + valuelist).setParseAction(parse_role) expr.setParseAction(parse_concept_expr) expr << (lpar + node_name + Optional(Literal("/").suppress() + concept_name) + ZeroOrMore(role) + rpar) return expr
def evaluator(variables, functions, string, cs=False): ''' Evaluate an expression. Variables are passed as a dictionary from string to value. Unary functions are passed as a dictionary from string to function. Variables must be floats. cs: Case sensitive TODO: Fix it so we can pass integers and complex numbers in variables dict ''' # log.debug("variables: {0}".format(variables)) # log.debug("functions: {0}".format(functions)) # log.debug("string: {0}".format(string)) def lower_dict(d): return dict([(k.lower(), d[k]) for k in d]) all_variables = copy.copy(default_variables) all_functions = copy.copy(default_functions) if not cs: all_variables = lower_dict(all_variables) all_functions = lower_dict(all_functions) all_variables.update(variables) all_functions.update(functions) if not cs: string_cs = string.lower() all_functions = lower_dict(all_functions) all_variables = lower_dict(all_variables) CasedLiteral = CaselessLiteral else: string_cs = string CasedLiteral = Literal check_variables(string_cs, set( all_variables.keys() + all_functions.keys())) if string.strip() == "": return float('nan') ops = {"^": operator.pow, "*": operator.mul, "/": operator.truediv, "+": operator.add, "-": operator.sub, } # We eliminated extreme ones, since they're rarely used, and potentially # confusing. They may also conflict with variables if we ever allow e.g. # 5R instead of 5*R suffixes = {'%': 0.01, 'k': 1e3, 'M': 1e6, 'G': 1e9, 'T': 1e12, # 'P':1e15,'E':1e18,'Z':1e21,'Y':1e24, 'c': 1e-2, 'm': 1e-3, 'u': 1e-6, 'n': 1e-9, 'p': 1e-12} # ,'f':1e-15,'a':1e-18,'z':1e-21,'y':1e-24} def super_float(text): ''' Like float, but with si extensions. 1k goes to 1000''' if text[-1] in suffixes: return float(text[:-1]) * suffixes[text[-1]] else: return float(text) def number_parse_action(x): # [ '7' ] -> [ 7 ] return [super_float("".join(x))] def exp_parse_action(x): # [ 2 ^ 3 ^ 2 ] -> 512 x = [e for e in x if isinstance(e, numbers.Number)] # Ignore ^ x.reverse() x = reduce(lambda a, b: b ** a, x) return x def parallel(x): # Parallel resistors [ 1 2 ] => 2/3 # convert from pyparsing.ParseResults, which doesn't support '0 in x' x = list(x) if len(x) == 1: return x[0] if 0 in x: return float('nan') x = [1. / e for e in x if isinstance(e, numbers.Number)] # Ignore || return 1. / sum(x) def sum_parse_action(x): # [ 1 + 2 - 3 ] -> 0 total = 0.0 op = ops['+'] for e in x: if e in set('+-'): op = ops[e] else: total = op(total, e) return total def prod_parse_action(x): # [ 1 * 2 / 3 ] => 0.66 prod = 1.0 op = ops['*'] for e in x: if e in set('*/'): op = ops[e] else: prod = op(prod, e) return prod def func_parse_action(x): return [all_functions[x[0]](x[1])] # SI suffixes and percent number_suffix = reduce(lambda a, b: a | b, map( Literal, suffixes.keys()), NoMatch()) (dot, minus, plus, times, div, lpar, rpar, exp) = map(Literal, ".-+*/()^") number_part = Word(nums) # 0.33 or 7 or .34 or 16. inner_number = (number_part + Optional( "." + Optional(number_part))) | ("." + number_part) # 0.33k or -17 number = (Optional(minus | plus) + inner_number + Optional(CaselessLiteral("E") + Optional( (plus | minus)) + number_part) + Optional(number_suffix)) number = number.setParseAction(number_parse_action) # Convert to number # Predefine recursive variables expr = Forward() factor = Forward() def sreduce(f, l): ''' Same as reduce, but handle len 1 and len 0 lists sensibly ''' if len(l) == 0: return NoMatch() if len(l) == 1: return l[0] return reduce(f, l) # Handle variables passed in. E.g. if we have {'R':0.5}, we make the substitution. # Special case for no variables because of how we understand PyParsing is # put together if len(all_variables) > 0: # We sort the list so that var names (like "e2") match before # mathematical constants (like "e"). This is kind of a hack. all_variables_keys = sorted( all_variables.keys(), key=len, reverse=True) varnames = sreduce(lambda x, y: x | y, map( lambda x: CasedLiteral(x), all_variables_keys)) varnames.setParseAction(lambda x: map(lambda y: all_variables[y], x)) else: varnames = NoMatch() # Same thing for functions. if len(all_functions) > 0: funcnames = sreduce(lambda x, y: x | y, map(lambda x: CasedLiteral(x), all_functions.keys())) function = funcnames + lpar.suppress() + expr + rpar.suppress() function.setParseAction(func_parse_action) else: function = NoMatch() atom = number | function | varnames | lpar + expr + rpar factor << (atom + ZeroOrMore( exp + atom)).setParseAction(exp_parse_action) # 7^6 paritem = factor + ZeroOrMore(Literal('||') + factor) # 5k || 4k paritem = paritem.setParseAction(parallel) term = paritem + ZeroOrMore((times | div) + paritem) # 7 * 5 / 4 - 3 term = term.setParseAction(prod_parse_action) expr << Optional((plus | minus)) + term + ZeroOrMore( (plus | minus) + term) # -5 + 4 - 3 expr = expr.setParseAction(sum_parse_action) return (expr + stringEnd).parseString(string)[0]
number = Regex(r'\d+(\.\d+)?').setParseAction(lambda t:float(t[0])) fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word.copy().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK) excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE) range_search = incl_range_search("incl_range") | excl_range_search("excl_range") boost = (CARAT + number("boost")) string_expr = Group(string + proximity_modifier) | string word_expr = Group(valid_word + fuzzy_modifier) | valid_word term << (Optional(field_name("field") + COLON) + (word_expr | string_expr | range_search | Group(LPAR + expression + RPAR)) + Optional(boost)) term.setParseAction(lambda t:[t] if 'field' in t or 'boost' in t else None) expression << operatorPrecedence(term, [ (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT), ((not_ | '!').setParseAction(lambda:"NOT"), 1, opAssoc.RIGHT), ((and_ | '&&').setParseAction(lambda:"AND"), 2, opAssoc.LEFT), (Optional(or_ | '||').setParseAction(lambda:"OR"), 2, opAssoc.LEFT), ]) # test strings taken from grammar description doc, and TestQueryParser.java tests = r""" a and b a and not b a and !b a && !b
def get_parser(self): declaration = Forward() keyword = (Keyword("enum") | Keyword("case") | Keyword("struct") | Keyword("default") | Keyword("switch") | Keyword("union") | Keyword("const") | Keyword("unsigned") | Keyword("int") | Keyword("hyper") | Keyword("float") | Keyword("double") | Keyword("bool") | Keyword("typedef") | Keyword("opaque") | Keyword("string") | Keyword("void") | Keyword("program") | Keyword("version")) identifier = NotAny(keyword) + Word( alphas + alphas.upper(), alphanums + alphanums.upper() + "_", asKeyword=True) constant = Combine(Optional("-") + Word(nums)) constant.setParseAction(lambda s, l, t: [int(t[0])]) value = constant | identifier enum_body = Literal("{").suppress() + identifier + Literal( "=").suppress() + value + ZeroOrMore( Literal(",").suppress() + identifier + Literal("=").suppress() + value) + Literal("}").suppress() enum_type_spec = Literal("enum").suppress() + enum_body enum_body.setParseAction(self.parse_enum) struct_body = Literal("{").suppress() + OneOrMore( declaration + Literal(";").suppress()) + Literal("}").suppress() struct_type_spec = Literal("struct").suppress() + struct_body struct_body.setParseAction(self.parse_struct) case_stmt = Literal("case").suppress() + value + Literal( ":").suppress() + declaration + Literal(";").suppress() default_stmt = Literal("default") + Literal( ":").suppress() + declaration + Literal(";").suppress() union_body = Literal("switch").suppress() + Literal("(").suppress( ) + declaration + Literal(")").suppress() + Literal("{").suppress( ) + Group(OneOrMore(Group(case_stmt)) + Optional(Group(default_stmt))) + Literal("}").suppress() union_type_spec = Literal("union").suppress() + union_body union_body.setParseAction(self.parse_union) constant_def = Literal("const").suppress() + identifier + Literal( "=").suppress() + constant + Literal(";").suppress() constant_def.setParseAction(self.parse_const) type_spec = ((Optional(Literal("unsigned")) + Literal("int")).setParseAction(self.parse_builtin) | (Optional(Literal("unsigned")) + Literal("hyper")).setParseAction(self.parse_builtin) | Literal("float").setParseAction(self.parse_builtin) | Literal("double").setParseAction(self.parse_builtin) | Literal("bool").setParseAction(self.parse_builtin) | enum_type_spec | struct_type_spec | union_type_spec | identifier) proc_return = Literal("void") | type_spec procedure_def = proc_return + identifier + Literal("(").suppress() + ( Literal("void") | type_spec) + ZeroOrMore( Literal(",").suppress() + type_spec) + Literal(")").suppress() + Literal( "=").suppress() + constant + Literal(";").suppress() procedure_def.setParseAction(self.parse_procedure_def) version_def = Literal("version").suppress() + identifier + Literal( "{").suppress() + OneOrMore(procedure_def) + Literal("}").suppress( ) + Literal("=").suppress() + constant + Literal(";").suppress() version_def.setParseAction(self.parse_version_def) program_body = Literal("{").suppress() + Group( OneOrMore(version_def)) + Literal("}").suppress() type_def = ( (Literal("typedef") + declaration + Literal(";")) | (Literal("enum") + identifier + enum_body + Literal(";")) | (Literal("struct") + identifier + struct_body + Literal(";")) | (Literal("union") + identifier + union_body + Literal(";")) | (Literal("program") + identifier + program_body + Literal("=").suppress() + constant + Literal(";"))) type_def.setParseAction(self.parse_type_def) declaration << ( (type_spec + identifier + Literal("[") + value + Literal("]")) | (type_spec + identifier + Literal("<") + value + Literal(">")) | (type_spec + identifier) | (Literal("opaque") + identifier + Literal("[") + value + Literal("]")) | (Literal("opaque") + identifier + Literal("<") + value + Literal(">")) | (Literal("string") + identifier + Literal("<") + value + Literal(">")) | (type_spec + Literal("*") + identifier) | Literal("void")) declaration.setParseAction(self.parse_decl) definition = type_def | constant_def specification = ZeroOrMore(definition) comment = (Literal("#") + restOfLine).suppress() specification.ignore(comment) return specification
# Ni%1Rh%1((SiO2)%10Al2O3) # Ni%1Rh%1(Al2O3) # (Si0.8Er0.2O2)%10Al2O3 dopeMass = matrixMass * wt / (100. - dopewtPercentsSum) for e in dopes: if e[1] == dg: e[0][1] = e[0][1].real * dopeMass / m else: elementsList = [t[0] for t in tokens] duplicates = len(elementsList) > len(set(elementsList)) if duplicates: dd = defaultdict(int) for t in tokens: dd[t[0]] += t[1] nt = ParseResults([ParseResults([k, v]) for k, v in dd.items()]) formula.setParseAction(sum_by_element) def round_to_n(x, n=3): res = x try: res = round(x, -int(floor(log10(x))) + n-1) if \ isinstance(x, (float, np.float32, np.float64)) else x # res = round(x, -int(floor(log10(x))) + n-1) except (ValueError, OverflowError): pass return res def reconstruct(parsed): outStr = ''
def _build_asn1_grammar(): def build_identifier(prefix_pattern): identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]'))) identifier = Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) # todo: more rigorous? trailing hyphens and -- forbidden return identifier def braced_list(element_rule): return Suppress('{') + Group(delimitedList(element_rule)) + Suppress('}') def annotate(name): def annotation(t): return AnnotatedToken(name, t.asList()) return annotation # Reserved words DEFINITIONS = Keyword('DEFINITIONS') BEGIN = Keyword('BEGIN') END = Keyword('END') OPTIONAL = Keyword('OPTIONAL') DEFAULT = Keyword('DEFAULT') TRUE = Keyword('TRUE') FALSE = Keyword('FALSE') UNIVERSAL = Keyword('UNIVERSAL') APPLICATION = Keyword('APPLICATION') PRIVATE = Keyword('PRIVATE') MIN = Keyword('MIN') MAX = Keyword('MAX') IMPLICIT = Keyword('IMPLICIT') EXPLICIT = Keyword('EXPLICIT') EXPLICIT_TAGS = Keyword('EXPLICIT TAGS') IMPLICIT_TAGS = Keyword('IMPLICIT TAGS') AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS') EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED') COMPONENTS_OF = Keyword('COMPONENTS OF') ELLIPSIS = Keyword('...') SIZE = Keyword('SIZE') OF = Keyword('OF') IMPORTS = Keyword('IMPORTS') EXPORTS = Keyword('EXPORTS') FROM = Keyword('FROM') # Built-in types SEQUENCE = Keyword('SEQUENCE') SET = Keyword('SET') CHOICE = Keyword('CHOICE') ENUMERATED = Keyword('ENUMERATED') BIT_STRING = Keyword('BIT STRING') BOOLEAN = Keyword('BOOLEAN') REAL = Keyword('REAL') OCTET_STRING = Keyword('OCTET STRING') CHARACTER_STRING = Keyword('CHARACTER STRING') NULL = Keyword('NULL') INTEGER = Keyword('INTEGER') OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER') # Restricted string types BMPString = Keyword('BMPString') GeneralString = Keyword('GeneralString') GraphicString = Keyword('GraphicString') IA5String = Keyword('IA5String') ISO646String = Keyword('ISO646String') NumericString = Keyword('NumericString') PrintableString = Keyword('PrintableString') TeletexString = Keyword('TeletexString') T61String = Keyword('T61String') UniversalString = Keyword('UniversalString') UTF8String = Keyword('UTF8String') VideotexString = Keyword('VideotexString') VisibleString = Keyword('VisibleString') # Useful types GeneralizedTime = Keyword('GeneralizedTime') UTCTime = Keyword('UTCTime') ObjectDescriptor = Keyword('ObjectDescriptor') # Literals number = Word(nums) signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B') hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H') # Comments hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE) comment = hyphen_comment | cStyleComment # identifier identifier = build_identifier('[a-z]') # references # these are duplicated to force unique token annotations valuereference = build_identifier('[a-z]') typereference = build_identifier('[A-Z]') module_reference = build_identifier('[A-Z]') reference = valuereference | typereference # TODO: consider object references from 12.1 # values # BUG: These are badly specified and cause the grammar to break if used generally. # todo: consider more literals from 16.9 real_value = Regex(r'-?\d+(\.\d*)?') # todo: this doesn't really follow the spec boolean_value = TRUE | FALSE bitstring_value = bstring | hstring # todo: consider more forms from 21.9 integer_value = signed_number null_value = NULL cstring_value = dblQuotedString builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value defined_value = valuereference # todo: more options from 13.1 # object identifier value name_form = Unique(identifier) number_form = Unique(number) name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')') objid_components = name_and_number_form | name_form | number_form | defined_value objid_components_list = OneOrMore(objid_components) object_identifier_value = Suppress('{') + \ (objid_components_list | (defined_value + objid_components_list)) + \ Suppress('}') value = builtin_value | defined_value | object_identifier_value # definitive identifier value definitive_number_form = Unique(number) definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')') definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form definitive_objid_component_list = OneOrMore(definitive_objid_component) definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}')) # tags class_ = UNIVERSAL | APPLICATION | PRIVATE class_number = Unique(number) # todo: consider defined values from 30.1 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']') tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS | empty # extensions extension_default = EXTENSIBILITY_IMPLIED | empty # types defined_type = Unique(typereference) # todo: consider other defined types from 13.1 referenced_type = Unique(defined_type) # todo: consider other ref:d types from 16.3 # Forward-declare these, they can only be fully defined once # we have all types defined. There are some circular dependencies. named_type = Forward() type_ = Forward() # constraints # todo: consider the full subtype and general constraint syntax described in 45.* # but for now, just implement a simple integer value range. value_range_constraint = (signed_number | valuereference | MIN) + Suppress('..') + (signed_number | valuereference | MAX) size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + Suppress('(') + (value_range_constraint | signed_number) + Suppress(')') + Optional(Suppress(')')) constraint = Suppress('(') + value_range_constraint + Suppress(')') # TODO: consider exception syntax from 24.1 extension_marker = Unique(ELLIPSIS) component_type_optional = named_type + Suppress(OPTIONAL) component_type_default = named_type + Suppress(DEFAULT) + value component_type_components_of = Suppress(COMPONENTS_OF) + type_ component_type = component_type_components_of | component_type_optional | component_type_default | named_type tagged_type = tag + Optional(IMPLICIT | EXPLICIT) + type_ named_number_value = Suppress('(') + signed_number + Suppress(')') named_number = identifier + named_number_value enumeration = named_number | identifier set_type = SET + braced_list(component_type | extension_marker) sequence_type = SEQUENCE + braced_list(component_type | extension_marker) sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type) setof_type = Suppress(SET) + Optional(size_constraint) + Suppress(OF) + (type_ | named_type) choice_type = CHOICE + braced_list(named_type | extension_marker) enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker) bitstring_type = BIT_STRING + Optional(braced_list(named_number)) plain_integer_type = INTEGER restricted_integer_type = INTEGER + braced_list(named_number) boolean_type = BOOLEAN real_type = REAL null_type = NULL object_identifier_type = OBJECT_IDENTIFIER octetstring_type = OCTET_STRING + Optional(size_constraint) unrestricted_characterstring_type = CHARACTER_STRING restricted_characterstring_type = BMPString | GeneralString | \ GraphicString | IA5String | \ ISO646String | NumericString | \ PrintableString | TeletexString | \ T61String | UniversalString | \ UTF8String | VideotexString | VisibleString characterstring_type = restricted_characterstring_type | unrestricted_characterstring_type useful_type = GeneralizedTime | UTCTime | ObjectDescriptor # todo: consider other builtins from 16.2 simple_type = (boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(constraint) constructed_type = choice_type | sequence_type | set_type value_list_type = restricted_integer_type | enumerated_type builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type type_ << (builtin_type | referenced_type) # EXT: identifier should not be Optional here, but # our other ASN.1 code generator supports unnamed members, # and we use them. named_type << (Optional(identifier) + type_) type_assignment = typereference + '::=' + type_ value_assignment = valuereference + type_ + '::=' + value assignment = type_assignment | value_assignment assignment_list = ZeroOrMore(assignment) assigned_identifier = Optional(object_identifier_value | defined_value) global_module_reference = module_reference + assigned_identifier symbol = Unique(reference) # TODO: parameterized reference? symbol_list = Group(delimitedList(symbol)) symbols_from_module = symbol_list + Suppress(FROM) + global_module_reference symbols_from_module_list = OneOrMore(symbols_from_module) symbols_imported = Optional(symbols_from_module_list) exports = Optional(Suppress(EXPORTS) + symbol_list + Suppress(';')) imports = Optional(Suppress(IMPORTS) + symbols_imported + Suppress(';')) module_body = (exports + imports + assignment_list) module_defaults = Suppress(tag_default + extension_default) # we don't want these in the AST module_identifier = module_reference + definitive_identifier module_definition = module_identifier + DEFINITIONS + module_defaults + '::=' + BEGIN + module_body + END module_definition.ignore(comment) # Mark up the parse results with token tags identifier.setParseAction(annotate('Identifier')) named_number_value.setParseAction(annotate('Value')) tag.setParseAction(annotate('Tag')) class_.setParseAction(annotate('TagClass')) class_number.setParseAction(annotate('TagClassNumber')) type_.setParseAction(annotate('Type')) simple_type.setParseAction(annotate('SimpleType')) choice_type.setParseAction(annotate('ChoiceType')) sequence_type.setParseAction(annotate('SequenceType')) set_type.setParseAction(annotate('SetType')) value_list_type.setParseAction(annotate('ValueListType')) bitstring_type.setParseAction(annotate('BitStringType')) referenced_type.setParseAction(annotate('ReferencedType')) sequenceof_type.setParseAction(annotate('SequenceOfType')) setof_type.setParseAction(annotate('SetOfType')) named_number.setParseAction(annotate('NamedValue')) constraint.setParseAction(annotate('Constraint')) size_constraint.setParseAction(annotate('SizeConstraint')) component_type.setParseAction(annotate('ComponentType')) component_type_optional.setParseAction(annotate('ComponentTypeOptional')) component_type_default.setParseAction(annotate('ComponentTypeDefault')) component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf')) tagged_type.setParseAction(annotate('TaggedType')) named_type.setParseAction(annotate('NamedType')) type_assignment.setParseAction(annotate('TypeAssignment')) value_assignment.setParseAction(annotate('ValueAssignment')) valuereference.setParseAction(annotate('ValueReference')) module_reference.setParseAction(annotate('ModuleReference')) module_body.setParseAction(annotate('ModuleBody')) module_definition.setParseAction(annotate('ModuleDefinition')) extension_marker.setParseAction(annotate('ExtensionMarker')) name_form.setParseAction(annotate('NameForm')) number_form.setParseAction(annotate('NumberForm')) name_and_number_form.setParseAction(annotate('NameAndNumberForm')) object_identifier_value.setParseAction(annotate('ObjectIdentifierValue')) definitive_identifier.setParseAction(annotate('DefinitiveIdentifier')) definitive_number_form.setParseAction(annotate('DefinitiveNumberForm')) definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm')) imports.setParseAction(annotate('Imports')) exports.setParseAction(annotate('Exports')) assignment_list.setParseAction(annotate('AssignmentList')) bstring.setParseAction(annotate('BinaryStringValue')) hstring.setParseAction(annotate('HexStringValue')) start = OneOrMore(module_definition) return start
integer = Regex(r"\d+").setParseAction(lambda t:int(t[0])) proximity_modifier = Group(TILDE + integer("proximity")) number = Regex(r'\d+(\.\d+)?').setParseAction(lambda t:float(t[0])) fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word.copy().setName("fieldname") range_search = Group((LBRACK('incl_lower') | LBRACE('excl_lower')) + SkipTo(to_)('lower') + to_ + SkipTo(RBRACK | RBRACE)("upper") + (RBRACK('incl_upper') | RBRACE('excl_upper'))) boost = (CARAT + number("boost")) string_expr = Group(string + proximity_modifier) | string word_expr = (Group(valid_word + fuzzy_modifier) | valid_word) term << (Optional(field_name("field") + COLON + Optional(COLON("is_contains"))) + (word_expr("query") | string_expr("phrase") | range_search("range") | Group(LPAR + expression + RPAR)("subquery")) + Optional(boost)) term.setParseAction(lambda t:[t] if 'field' in t or 'query' in t or 'boost' in t else t) expression << operatorPrecedence(term, [ (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT), ((not_ | '!')("not_").setParseAction(lambda:"NOT"), 1, opAssoc.RIGHT), (Optional(and_ | '&&')("and_").setParseAction(lambda:"AND"), 2, opAssoc.LEFT), ((or_ | '||')("or_").setParseAction(lambda:"OR"), 2, opAssoc.LEFT), ]) LuceneParser = expression
def formula_grammar(table): """ Construct a parser for molecular formulas. :Parameters: *table* = None : PeriodicTable If table is specified, then elements and their associated fields will be chosen from that periodic table rather than the default. :Returns: *parser* : pyparsing.ParserElement. The ``parser.parseString()`` method returns a list of pairs (*count, fragment*), where fragment is an *isotope*, an *element* or a list of pairs (*count, fragment*). """ # Recursive composite = Forward() mixture = Forward() # whitespace and separators space = Optional(White().suppress()) separator = space + Literal('+').suppress() + space # Lookup the element in the element table symbol = Regex("[A-Z][a-z]*") symbol = symbol.setParseAction(lambda s, l, t: table.symbol(t[0])) # Translate isotope openiso = Literal('[').suppress() closeiso = Literal(']').suppress() isotope = Optional(~White() + openiso + Regex("[1-9][0-9]*") + closeiso, default='0') isotope = isotope.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 0) # Translate ion openion = Literal('{').suppress() closeion = Literal('}').suppress() ion = Optional(~White() + openion + Regex("([1-9][0-9]*)?[+-]") + closeion, default='0+') ion = ion.setParseAction( lambda s, l, t: int(t[0][-1] + (t[0][:-1] if len(t[0]) > 1 else '1'))) # Translate counts fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)") fract = fract.setParseAction(lambda s, l, t: float(t[0]) if t[0] else 1) whole = Regex("[1-9][0-9]*") whole = whole.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 1) count = Optional(~White() + (fract | whole), default=1) # Convert symbol, isotope, ion, count to (count, isotope) element = symbol + isotope + ion + count def convert_element(string, location, tokens): """interpret string as element""" #print "convert_element received", tokens symbol, isotope, ion, count = tokens[0:4] if isotope != 0: symbol = symbol[isotope] if ion != 0: symbol = symbol.ion[ion] return (count, symbol) element = element.setParseAction(convert_element) # Convert "count elements" to a pair implicit_group = count + OneOrMore(element) def convert_implicit(string, location, tokens): """convert count followed by fragment""" #print "implicit", tokens count = tokens[0] fragment = tokens[1:] return fragment if count == 1 else (count, fragment) implicit_group = implicit_group.setParseAction(convert_implicit) # Convert "(composite) count" to a pair opengrp = space + Literal('(').suppress() + space closegrp = space + Literal(')').suppress() + space explicit_group = opengrp + composite + closegrp + count def convert_explicit(string, location, tokens): """convert (fragment)count""" #print "explicit", tokens count = tokens[-1] fragment = tokens[:-1] return fragment if count == 1 else (count, fragment) explicit_group = explicit_group.setParseAction(convert_explicit) # Build composite from a set of groups group = implicit_group | explicit_group implicit_separator = separator | space composite << group + ZeroOrMore(implicit_separator + group) density = Literal('@').suppress() + count + Optional(Regex("[ni]"), default='i') compound = composite + Optional(density, default=None) def convert_compound(string, location, tokens): """convert material @ density""" #print "compound", tokens if tokens[-1] is None: return Formula(structure=_immutable(tokens[:-1])) elif tokens[-1] == 'n': return Formula(structure=_immutable(tokens[:-2]), natural_density=tokens[-2]) else: return Formula(structure=_immutable(tokens[:-2]), density=tokens[-2]) compound = compound.setParseAction(convert_compound) partsep = space + Literal('//').suppress() + space percent = Literal('%').suppress() weight_percent = Regex("%(w((eigh)?t)?|m(ass)?)").suppress() + space by_weight = (count + weight_percent + mixture + ZeroOrMore(partsep + count + (weight_percent | percent) + mixture) + partsep + mixture) def convert_by_weight(string, location, tokens): """convert mixture by %wt or %mass""" #print "by weight", tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100 - sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture") if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_weight_pairs(zip(piece, fract)) mixture_by_weight = by_weight.setParseAction(convert_by_weight) volume_percent = Regex("%v(ol(ume)?)?").suppress() + space by_volume = (count + volume_percent + mixture + ZeroOrMore(partsep + count + (volume_percent | percent) + mixture) + partsep + mixture) def convert_by_volume(string, location, tokens): """convert mixture by %vol""" #print "by volume", tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100 - sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture " + string) if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_volume_pairs(zip(piece, fract)) mixture_by_volume = by_volume.setParseAction(convert_by_volume) mixture_by_layer = Forward() layer_thick = Group(count + Regex(LENGTH_RE) + space) layer_part = (layer_thick + mixture) | (opengrp + mixture_by_layer + closegrp + count) mixture_by_layer << layer_part + ZeroOrMore(partsep + layer_part) def convert_by_layer(string, location, tokens): """convert layer thickness '# nm material'""" if len(tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absthick * float(p2) p = p1 else: f = float(p1[0]) * LENGTH_UNITS[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) vfract = [(v / total) * 100 for v in fract] result = _mix_by_volume_pairs(zip(piece, vfract)) result.thickness = total return result mixture_by_layer = mixture_by_layer.setParseAction(convert_by_layer) mixture_by_absmass = Forward() absmass_mass = Group(count + Regex(MASS_VOLUME_RE) + space) absmass_part = (absmass_mass + mixture) | (opengrp + mixture_by_absmass + closegrp + count) mixture_by_absmass << absmass_part + ZeroOrMore(partsep + absmass_part) def convert_by_absmass(string, location, tokens): """convert mass '# mg material'""" if len(tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): p = p1 f = p1.total_mass * float(p2) else: p = p2 value = float(p1[0]) if p1[1] in VOLUME_UNITS: # convert to volume in liters to mass in grams before mixing if p.density is None: raise ValueError("Need the mass density of " + str(p)) f = value * VOLUME_UNITS[p1[1]] * 1000. * p.density else: f = value * MASS_UNITS[p1[1]] piece.append(p) fract.append(f) total = sum(fract) mfract = [(m / total) * 100 for m in fract] result = _mix_by_weight_pairs(zip(piece, mfract)) result.total_mass = total return result mixture_by_absmass = mixture_by_absmass.setParseAction(convert_by_absmass) ungrouped_mixture = (mixture_by_weight | mixture_by_volume | mixture_by_layer | mixture_by_absmass) grouped_mixture = opengrp + ungrouped_mixture + closegrp + Optional( density, default=None) def convert_mixture(string, location, tokens): """convert (mixture) @ density""" formula = tokens[0] if tokens[-1] == 'n': formula.natural_density = tokens[-2] elif tokens[-1] == 'i': formula.density = tokens[-2] # elif tokens[-1] is None return formula grouped_mixture = grouped_mixture.setParseAction(convert_mixture) mixture << (compound | grouped_mixture) formula = (compound | ungrouped_mixture | grouped_mixture) grammar = Optional(formula, default=Formula()) + StringEnd() grammar.setName('Chemical Formula') return grammar
texcmd = Forward() filler = CharsNotIn(backslash + '$') filler2 = CharsNotIn(backslash + '$' + '{}') arg = '[' + CharsNotIn("]") + ']' arg.setParseAction(argfun) dollarmath = QuotedString('$', multiline=True, unquoteResults=False) param = Suppress(Literal('{')) + ZeroOrMoreAsList(dollarmath | filler2 | QuotedString('{', endQuoteChar='}', unquoteResults=False) | texcmd) + Suppress(Literal('}')) param.setParseAction(paramfun) def bs(c): return Literal("\\" + c) singles = bs("[") | bs("]") | bs("{") | bs("}") | bs("\\") | bs("&") | bs("_") | bs(",") | bs("#") | bs("\n") | bs(";") | bs("|") | bs("%") | bs("*") | bs("~") | bs("^") texcmd << (singles | Word("\\", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", min = 2)) + ZeroOrMoreAsList(arg) + ZeroOrMoreAsList(param) def texcmdfun(s, loc, toks): return TexCmd(s, loc, toks) texcmd.setParseAction(texcmdfun) #legal = "".join([chr(x) for x in set(range(32, 127)) - set(backslash)]) #filler = Word(legal) document = ZeroOrMore(dollarmath | texcmd | filler) + StringEnd().suppress() if 0: s = "This is \\\\ test" print s for t in document.parseString(s): if isinstance(t, TexCmd): print '====> cmd=[%s]' % t.cmd, t else: print '====>', t sys.exit(-1)
LBRACE = Suppress('{') RBRACE = Suppress('}') COLON = Suppress(':') proto_string = copy.copy(dblQuotedString) proto_string.setParseAction(proto_string_fn) proto_data = proto_integer | proto_string proto_data.setParseAction(proto_data_fn) top_level_proto_definition = identifier + COLON + proto_data top_level_proto_definition.setParseAction(top_level_proto_definition_fn) nested_proto = Forward() nested_proto.setParseAction(nested_proto_fn) proto_parser = ZeroOrMore(top_level_proto_definition | nested_proto) proto_parser.setParseAction(proto_parser_fn) nested_proto << identifier + LBRACE + proto_parser + RBRACE # End proto-specific parsing field = delimitedList(identifier, '.') field.setParseAction(field_fn) array_ref = field + Suppress('[') + Word(nums) + Suppress(']') array_ref.setParseAction(array_ref_fn) integer = Word(nums) integer.setParseAction(integer_fn) string = copy.copy(dblQuotedString) string.setParseAction(string_fn)
return query_ast.Function(x[0], x[2]) code_single.setParseAction(lambda x: query_ast.Code(x[2])) code_list.setParseAction(lambda x: query_ast.Code(*x[3::2])) serial_single.setParseAction(lambda x: query_ast.Serial(x[2])) serial_list.setParseAction(lambda x: query_ast.Serial(*x[3::2])) type_single.setParseAction(lambda x: query_ast.Type(x[2])) type_list.setParseAction(lambda x: query_ast.Type(*x[3::2])) cond_single.setParseAction(lambda x: query_ast.Condition(x[2])) cond_list.setParseAction(lambda x: query_ast.Condition(*x[3::2])) path_single.setParseAction(lambda x: query_ast.Path(x[2])) path_list.setParseAction(lambda x: query_ast.Path(*x[3::2])) tristate_expr.setParseAction(lambda x: query_ast.TriState(*x[0::2])) label_expr.setParseAction(lambda x: query_ast.Labelled(x[2])) assy_expr.setParseAction(lambda x: query_ast.Assy(x[2])) or_expr.setParseAction(_pa_or_expr) and_expr.setParseAction(_pa_and_expr) func_expr.setParseAction(_pa_func_expr) not_expr.setParseAction(lambda x: query_ast.Not(x[1])) paren_expr.setParseAction(lambda x: x[1]) def search_tree(query): """ Create a search tree for a query string. :param str query: The query string to generate the search tree for. :returns: A new search tree. """ return root.parseString(query)[0]
integer )('val') # dictionaries dict_key = good_name | quoted dictionary << (S("{") + O( delimitedList( Group(dict_key('key') + S(':') + value('value')) ) )('content') + S("}")) dictionary.setParseAction(eval_dictionary) array << Group(S("[") + O(delimitedList(value)('elements')) + S("]")) array.setParseAction(eval_array) def parse_value(string, filename=None): ''' This is useful for debugging ''' try: ret_value = value.parseString(string, parseAll=True) return ret_value['val'] except ParseException as e: where = Where(filename, string, line=e.lineno, column=e.col)
term = Forward() field_name = valid_word.copy().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK) excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE) range_search = incl_range_search("incl_range") | excl_range_search("excl_range") boost = CARAT + number("boost") string_expr = Group(string + proximity_modifier) | string word_expr = Group(valid_word + fuzzy_modifier) | valid_word term << ( Optional(field_name("field") + COLON) + (word_expr | string_expr | range_search | Group(LPAR + expression + RPAR)) + Optional(boost) ) term.setParseAction(lambda t: [t] if "field" in t or "boost" in t else None) expression << infixNotation( term, [ (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT), ((not_ | "!").setParseAction(lambda: "NOT"), 1, opAssoc.RIGHT), ((and_ | "&&").setParseAction(lambda: "AND"), 2, opAssoc.LEFT), (Optional(or_ | "||").setParseAction(lambda: "OR"), 2, opAssoc.LEFT), ], ) # test strings taken from grammar description doc, and TestQueryParser.java tests = r""" a and b a and not b
def __init__(self,text): rus_alphas = 'їійцукенгшщзхъфывапролджэячсмитьбюІЇЙЦУКЕНГШЩЗХЪФЫВАПРОЛДЖЭЯЧСМИТЬБЮ' comma = ',' string = OneOrMore(Word(alphas+rus_alphas+alphanums+'.')) string.setParseAction(lambda t:t) quoted_string = ( Suppress('"') + Optional(string) + Suppress('"')| Suppress("'") + Optional(string) + Suppress("'")| string ) ('string') def string_handler(t): asList = t.asList() if len(t) == 0: return { 'type':'string', 'value':'', } else: return { 'type':'string', 'value':asList[0] } quoted_string.setParseAction(debug_lambda( string_handler, comment="parsed string" ) ) number = OneOrMore(Word(nums+'.')) ('number') number.setParseAction(debug_lambda( lambda t:{ 'type':'number', 'value':t.asList()[0] }, comment="parsing number" ), ) value = Forward() member = Forward() array = Forward() dict_ = Forward() elements = delimitedList(value) ('elements') members = delimitedList(member) ('members') member << ( value+Suppress(':')+Optional(ZeroOrMore(' '))+value ) ('member') member.setParseAction(lambda t:{ 'type':'member', 'key':t.asList()[0], 'value':t.asList()[1] }) value << ( number| # string| quoted_string| array| dict_ ) ('value') array << (Suppress("[") + Optional(elements) + Suppress("]")) ('array') array.setParseAction(lambda t:{ 'type':'array', 'elements':t.asList() }) dict_ << (Suppress("{") + Optional(members) + Suppress("}")) ('dict') dict_.setParseAction(lambda t: { 'type': 'dict', 'members':t.asList() }) self.parsed = dict_.parseString(text)
def formula_grammar(table): """ Construct a parser for molecular formulas. :Parameters: *table* = None : PeriodicTable If table is specified, then elements and their associated fields will be chosen from that periodic table rather than the default. :Returns: *parser* : pyparsing.ParserElement. The ``parser.parseString()`` method returns a list of pairs (*count,fragment*), where fragment is an *isotope*, an *element* or a list of pairs (*count,fragment*). """ # Recursive composite = Forward() mixture = Forward() # whitespace and separators space = Optional(White().suppress()) separator = space+Literal('+').suppress()+space # Lookup the element in the element table symbol = Regex("[A-Z][a-z]*") symbol = symbol.setParseAction(lambda s,l,t: table.symbol(t[0])) # Translate isotope openiso = Literal('[').suppress() closeiso = Literal(']').suppress() isotope = Optional(~White()+openiso+Regex("[1-9][0-9]*")+closeiso, default='0') isotope = isotope.setParseAction(lambda s,l,t: int(t[0]) if t[0] else 0) # Translate ion openion = Literal('{').suppress() closeion = Literal('}').suppress() ion = Optional(~White() +openion +Regex("([1-9][0-9]*)?[+-]") +closeion, default='0+') ion = ion.setParseAction(lambda s,l,t: int(t[0][-1]+(t[0][:-1] if len(t[0])>1 else '1'))) # Translate counts fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)") fract = fract.setParseAction(lambda s,l,t: float(t[0]) if t[0] else 1) whole = Regex("[1-9][0-9]*") whole = whole.setParseAction(lambda s,l,t: int(t[0]) if t[0] else 1) count = Optional(~White()+(fract|whole),default=1) # Convert symbol,isotope,ion,count to (count,isotope) element = symbol+isotope+ion+count def convert_element(string,location,tokens): #print "convert_element received",tokens symbol,isotope,ion,count = tokens[0:4] if isotope != 0: symbol = symbol[isotope] if ion != 0: symbol = symbol.ion[ion] return (count,symbol) element = element.setParseAction(convert_element) # Convert "count elements" to a pair implicit_group = count+OneOrMore(element) def convert_implicit(string,location,tokens): #print "implicit",tokens count = tokens[0] fragment = tokens[1:] return fragment if count==1 else (count,fragment) implicit_group = implicit_group.setParseAction(convert_implicit) # Convert "(composite) count" to a pair opengrp = space + Literal('(').suppress() + space closegrp = space + Literal(')').suppress() + space explicit_group = opengrp + composite + closegrp + count def convert_explicit(string,location,tokens): #print "explicit",tokens count = tokens[-1] fragment = tokens[:-1] return fragment if count == 1 else (count,fragment) explicit_group = explicit_group.setParseAction(convert_explicit) # Build composite from a set of groups group = implicit_group | explicit_group implicit_separator = separator | space composite << group + ZeroOrMore(implicit_separator + group) density = Literal('@').suppress() + count + Optional(Regex("[ni]"),default='i') compound = composite + Optional(density,default=None) def convert_compound(string,location,tokens): #print "compound",tokens if tokens[-1] is None: return Formula(structure=_immutable(tokens[:-1])) elif tokens[-1] == 'n': return Formula(structure=_immutable(tokens[:-2]), natural_density=tokens[-2]) else: return Formula(structure=_immutable(tokens[:-2]), density=tokens[-2]) compound = compound.setParseAction(convert_compound) partsep = space + Literal('//').suppress() + space percent = Literal('%').suppress() weight_percent = Regex("%(w((eigh)?t)?|m(ass)?)").suppress() + space by_weight = count + weight_percent + mixture + ZeroOrMore(partsep+count+(weight_percent|percent)+mixture) + partsep + mixture def convert_by_weight(string,location,tokens): #print "by weight",tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100-sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture") if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_weight_pairs(zip(piece,fract)) mixture_by_weight = by_weight.setParseAction(convert_by_weight) volume_percent = Regex("%v(ol(ume)?)?").suppress() + space by_volume = count + volume_percent + mixture + ZeroOrMore(partsep+count+(volume_percent|percent)+mixture) + partsep + mixture def convert_by_volume(string,location,tokens): #print "by volume",tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100-sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture "+string) if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_volume_pairs(zip(piece,fract)) mixture_by_volume = by_volume.setParseAction(convert_by_volume) mixture_by_layer = Forward() layer_thick = Group(count + Regex("(nm|um|mm)") + space) layer_part = (layer_thick + mixture ) | (opengrp + mixture_by_layer + closegrp +count) mixture_by_layer << layer_part + ZeroOrMore(partsep + layer_part) def convert_by_layer(string,location,tokens): units = {'nm': 1e-9, 'um': 1e-6, 'mm': 1e-3, } if len (tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absthick * float(p2) p = p1 else: f = float(p1[0]) * units[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) vfract = [ (v/total)*100 for v in fract] result = _mix_by_volume_pairs(zip(piece,vfract)) result.absthick = total return result mixture_by_layer = mixture_by_layer.setParseAction(convert_by_layer) mixture_by_absmass = Forward() absmass_mass = Group(count + Regex("(ng|ug|mg|g|kg)") + space) absmass_part = (absmass_mass + mixture) | (opengrp + mixture_by_absmass + closegrp + count) mixture_by_absmass << absmass_part + ZeroOrMore( partsep + absmass_part) def convert_by_absmass(string,location,tokens): units = {'ng': 1e-9, 'ug': 1e-6, 'mg': 1e-3, 'g': 1e+0, 'kg': 1e+3, } if len (tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absmass * float(p2) p = p1 else: f = float(p1[0]) * units[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) mfract = [ (m/total)*100 for m in fract] result = _mix_by_weight_pairs(zip(piece,mfract)) result.absmass=total return result mixture_by_absmass = mixture_by_absmass.setParseAction(convert_by_absmass) mixture_by_absvolume = Forward() absvolume_vol = Group(count + Regex("(nl|ul|ml|l)") + space) absvolume_part = ( absvolume_vol + mixture )|(opengrp + mixture_by_absvolume + closegrp + count) mixture_by_absvolume << absvolume_part + ZeroOrMore( partsep + absvolume_part) def convert_by_absvolume(string,location,tokens): units = {'nl': 1e-9, 'ul': 1e-6, 'ml': 1e-3, 'l': 1e+0, } if len (tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absvolume * float(p2) p = p1 else: f = float(p1[0]) * units[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) vfract = [ (v/total)*100 for v in fract] if len(piece) != len(fract): raise ValueError("Missing base component of mixture "+string) if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") result = _mix_by_volume_pairs(zip(piece,vfract)) result.absvolume = total return result mixture_by_absvolume = mixture_by_absvolume.setParseAction(convert_by_absvolume) mixture << (compound | (opengrp + (mixture_by_weight | mixture_by_volume ) + closegrp)) formula = compound | mixture_by_weight | mixture_by_volume | mixture_by_layer | mixture_by_absmass | mixture_by_absvolume grammar = Optional(formula, default=Formula()) + StringEnd() grammar.setName('Chemical Formula') return grammar
def gen_parser(): # define SQL tokens selectStmt = Forward() selectToken = Keyword(QueryTokens.SELECT, caseless=True) fromToken = Keyword(QueryTokens.FROM, caseless=True) intoToken = Keyword(QueryTokens.INTO, caseless=True) groupByToken = Keyword(QueryTokens.GROUPBY, caseless=True) windowToken = Keyword(QueryTokens.WINDOW, caseless=True) asToken = Keyword(QueryTokens.AS, caseless=True).setParseAction(upcaseTokens) nullToken = Keyword(QueryTokens.NULL, caseless=False).setParseAction(replace(QueryTokens.NULL_TOKEN)) # Math operators E = CaselessLiteral("E") binop = oneOf("= != < > >= <= == eq ne lt le gt ge %s" % (QueryTokens.CONTAINS), caseless=True).setParseAction(upcaseTokens) arithSign = Word("+-",exact=1) realNum = Combine( Optional(arithSign) + ( Word( nums ) + "." + Optional( Word(nums) ) | ( "." + Word(nums) ) ) + Optional( E + Optional(arithSign) + Word(nums) ) ) intNum = Combine( Optional(arithSign) + Word( nums ) + Optional( E + Optional("+") + Word(nums) ) ) ident = Word( alphas, alphanums + "_$" ).setName("identifier") columnName = delimitedList( ident, ".", combine=True ) columnName.setParseAction(label(QueryTokens.COLUMN_NAME)) aliasName = delimitedList( ident, ".", combine=True ) stringLiteral = Forward() stringLiteral << quotedString stringLiteral.setParseAction(label(QueryTokens.STRING_LITERAL)) intLiteral = Forward() intLiteral << intNum intLiteral.setParseAction(label(QueryTokens.INTEGER_LITERAL)) floatLiteral = Forward() floatLiteral << realNum floatLiteral.setParseAction(label(QueryTokens.FLOAT_LITERAL)) columnExpression = Forward() columnFunction = Word(alphas, alphanums) + "(" + Optional(delimitedList(Group( floatLiteral ) | Group ( stringLiteral ) | Group( intLiteral ) | columnExpression)) + ")" columnFunction.setParseAction(label(QueryTokens.FUNCTION_OR_AGGREGATE)) columnExpression << Group ( (columnFunction | columnName) + Optional( asToken + aliasName ) ) columnExpressionList = Group( delimitedList( columnExpression ) ) tableName = delimitedList( ident, ".", combine=True ).setParseAction(upcaseTokens) tableNameList = Group( delimitedList( tableName ) ) timeExpression = Word( nums ) + oneOf("seconds minutes hours days", caseless=True).setParseAction(downcaseTokens) stdoutToken = Keyword(QueryTokens.STDOUT, caseless=True).setParseAction(upcaseTokens) tableToken = Keyword(QueryTokens.TABLE, caseless=True).setParseAction(upcaseTokens) streamToken = Keyword(QueryTokens.STREAM, caseless=True).setParseAction(upcaseTokens) intoLocation = stdoutToken | ( tableToken + ident ) | ( streamToken + ident ) whereExpression = Forward() and_ = Keyword(QueryTokens.AND, caseless=True).setParseAction(upcaseTokens) or_ = Keyword(QueryTokens.OR, caseless=True).setParseAction(upcaseTokens) in_ = Keyword(QueryTokens.IN, caseless=True).setParseAction(upcaseTokens) columnRval = realNum | intNum | nullToken | columnExpression | quotedString.setParseAction(removeQuotes) whereCondition = Group( ( columnExpression + binop + columnRval ).setParseAction(label(QueryTokens.WHERE_CONDITION)) | ( columnExpression + in_ + "(" + delimitedList( columnRval ) + ")" ).setParseAction(label(QueryTokens.WHERE_CONDITION)) | ( columnExpression + in_ + "(" + selectStmt + ")" ).setParseAction(label(QueryTokens.WHERE_CONDITION)) | ( "(" + whereExpression + ")" ) ) whereExpression << whereCondition + ZeroOrMore( ( and_ | or_ ) + whereExpression ) # define the grammar selectStmt << ( Group ( selectToken + columnExpressionList ).setResultsName( "select" ) + fromToken + tableNameList.setResultsName( "sources" ) + Optional(intoToken + intoLocation, "").setResultsName("into") + Optional( Group( CaselessLiteral(QueryTokens.WHERE) + whereExpression ), "" ).setResultsName("where") + Optional ( groupByToken + columnExpressionList, "").setResultsName("groupby") + Optional ( windowToken + timeExpression, "").setResultsName("window") ) parser = selectStmt # define Oracle comment format, and ignore them oracleSqlComment = "--" + restOfLine parser.ignore( oracleSqlComment ) return parser
expr = Forward() atom = operand | lpar + expr + rpar prev_pattern = atom atom.setParseAction(toExpression) from pyparsing import Optional for opers in PRECEDENCE: pattern_list = [] # only accepts unary and binary operation # as there are no ternary in boolean for op in opers: if op in UNARY: alternative = Forward() alternative << (oper_literals[op] + (prev_pattern | alternative)) alternative.setParseAction(toExpression) unary_pattern = prev_pattern | alternative pattern_list.append(unary_pattern) elif op in BINARY: rest = oper_literals[op] + prev_pattern rest.setParseAction(toExpression) binary_pattern = prev_pattern + ZeroOrMore(rest) pattern_list.append(binary_pattern) else: raise Exception(op) if len(pattern_list) == 1: prev_pattern = pattern_list[0] else: prev_pattern = Or(pattern_list)
field = qualifier - type_("type_") + identifier("identifier") + EQ + integer( "field_number") + SEMI field.setParseAction(field_fn) oneof_definition = ONEOF - identifier + LBRACE + ZeroOrMore( Group( type_("type_") + identifier("identifier") + EQ + integer("field_number") + SEMI)) + RBRACE oneof_definition.setParseAction(oneof_definition_fn) message_line = (field | enum_definition | oneof_definition | message_definition)("message_line") message_line.setParseAction(message_line_fn) message_body << Group(ZeroOrMore(message_line))("message_body") message_body.setParseAction(message_body_fn) method_definition = ((RPC - identifier("method") + LPAR + Optional(identifier("request")) + RPAR + RETURNS + LPAR + Optional(identifier("response")) + RPAR))("method_definition") method_definition.setParseAction(method_definition_fn) service_definition = (SERVICE - identifier("service") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE)("service_definition") service_definition.setParseAction(service_definition_fn) package_directive = (Group(PACKAGE - delimitedList(identifier, '.', combine=True) + SEMI))("package_directive")
function = identifier.setResultsName('name') + lpar.suppress() + Group( Optional(delimitedList(atom))).setResultsName("args") + rpar.suppress() atom << (listing | number | string | variable | true | false | none | function) _false = Const(False) _true = Const(True) number.setParseAction(lambda t: Const(_number(t[0]))) variable.setParseAction(lambda t: Variable(t[0].strip("$"))) string.setParseAction(lambda t: Const(_str(t[0]))) none.setParseAction(lambda t: _false) false.setParseAction(lambda t: _false) true.setParseAction(lambda t: _true) dellist.setParseAction(lambda s, l, t: List(t[:])) function.setParseAction(_make_func) atom.setParseAction(lambda s, l, t: t[0]) class Parser(object): """Parser class for python expression.""" lock = Lock() def __init__(self): """@todo: to be defined1. """ pass def parse(self, expr): """Returns the BNF-Tree of the given expression :expr: String of the expression
signless_mult_expr.setParseAction(operators.InfixLeftSymbol.process) signless_mult_term = (signless_mult_expr | sign_term) # Multiplication. mult_expr = Group(signless_mult_term + OneOrMore(multop + signless_mult_term)) mult_expr.setParseAction(operators.InfixLeftSymbol.process) mult_term = (mult_expr | signless_mult_term) # Addition. add_expr = Group(mult_term + OneOrMore(addop + mult_term)) add_expr.setParseAction(operators.InfixLeftSymbol.process) add_term = (add_expr | mult_term) # Complete expression. expr <<= add_term expr.setParseAction(operators.Expression.process) # A unit expression, containing only units. ############################ unit_expr = Forward() single_unit = identifier.copy() single_unit.setParseAction(operators.Unit.process) literal_one = Literal("1") literal_one.setParseAction(operators.Unit.process_dimensionless) # The one allows for example for 1/h = h^-1. unit_term = single_unit | literal_one | (lpar + unit_expr + rpar) # Exponent unit_exp_term = Forward()
term = Forward() field_name = valid_word.copy().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK) excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE) range_search = incl_range_search("incl_range") | excl_range_search( "excl_range") boost = (CARAT + number("boost")) string_expr = Group(string + proximity_modifier) | string word_expr = Group(valid_word + fuzzy_modifier) | valid_word term << (Optional(field_name("field") + COLON) + (word_expr | string_expr | range_search | Group(LPAR + expression + RPAR)) + Optional(boost)) term.setParseAction(lambda t: [t] if 'field' in t or 'boost' in t else None) expression << infixNotation(term, [ (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT), ((not_ | '!').setParseAction(lambda: "NOT"), 1, opAssoc.RIGHT), ((and_ | '&&').setParseAction(lambda: "AND"), 2, opAssoc.LEFT), (Optional(or_ | '||').setParseAction(lambda: "OR"), 2, opAssoc.LEFT), ]) def flatten(lis): """Given a list, possibly nested to any level, return it flattened.""" new_lis = [] for item in lis: if type(item) == type([]): new_lis.extend(flatten(item))
def _build_asn1_grammar(): def build_identifier(prefix_pattern): identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]'))) # todo: more rigorous? trailing hyphens and -- forbidden return Combine(Word(srange(prefix_pattern), exact=1) + identifier_suffix) def braced_list(element_rule): elements_rule = Optional(delimitedList(element_rule)) return Suppress('{') + Group(elements_rule) + Suppress('}') def annotate(name): def annotation(t): return AnnotatedToken(name, t.asList()) return annotation # Reserved words ANY = Keyword('ANY') DEFINED_BY = Keyword('DEFINED BY') DEFINITIONS = Keyword('DEFINITIONS') BEGIN = Keyword('BEGIN') END = Keyword('END') OPTIONAL = Keyword('OPTIONAL') DEFAULT = Keyword('DEFAULT') TRUE = Keyword('TRUE') FALSE = Keyword('FALSE') UNIVERSAL = Keyword('UNIVERSAL') APPLICATION = Keyword('APPLICATION') PRIVATE = Keyword('PRIVATE') MIN = Keyword('MIN') MAX = Keyword('MAX') IMPLICIT = Keyword('IMPLICIT') EXPLICIT = Keyword('EXPLICIT') EXPLICIT_TAGS = Keyword('EXPLICIT TAGS') IMPLICIT_TAGS = Keyword('IMPLICIT TAGS') AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS') EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED') COMPONENTS_OF = Keyword('COMPONENTS OF') ELLIPSIS = Keyword('...') SIZE = Keyword('SIZE') OF = Keyword('OF') IMPORTS = Keyword('IMPORTS') EXPORTS = Keyword('EXPORTS') FROM = Keyword('FROM') # Built-in types SEQUENCE = Keyword('SEQUENCE') SET = Keyword('SET') CHOICE = Keyword('CHOICE') ENUMERATED = Keyword('ENUMERATED') BIT_STRING = Keyword('BIT STRING') BOOLEAN = Keyword('BOOLEAN') REAL = Keyword('REAL') OCTET_STRING = Keyword('OCTET STRING') CHARACTER_STRING = Keyword('CHARACTER STRING') NULL = Keyword('NULL') INTEGER = Keyword('INTEGER') OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER') # Restricted string types BMPString = Keyword('BMPString') GeneralString = Keyword('GeneralString') GraphicString = Keyword('GraphicString') IA5String = Keyword('IA5String') ISO646String = Keyword('ISO646String') NumericString = Keyword('NumericString') PrintableString = Keyword('PrintableString') TeletexString = Keyword('TeletexString') T61String = Keyword('T61String') UniversalString = Keyword('UniversalString') UTF8String = Keyword('UTF8String') VideotexString = Keyword('VideotexString') VisibleString = Keyword('VisibleString') # Useful types GeneralizedTime = Keyword('GeneralizedTime') UTCTime = Keyword('UTCTime') ObjectDescriptor = Keyword('ObjectDescriptor') # Literals number = Word(nums) signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B') hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H') # Comments hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE) comment = hyphen_comment | cStyleComment # identifier identifier = build_identifier('[a-z]') # references # these are duplicated to force unique token annotations valuereference = build_identifier('[a-z]') typereference = build_identifier('[A-Z]') module_reference = build_identifier('[A-Z]') reference = valuereference | typereference # TODO: consider object references from 12.1 # values # todo: consider more literals from 16.9 boolean_value = TRUE | FALSE bitstring_value = bstring | hstring # todo: consider more forms from 21.9 integer_value = signed_number null_value = NULL cstring_value = dblQuotedString exponent = CaselessLiteral('e') + signed_number real_value = Combine(signed_number + Optional(Literal('.') + Optional(number)) + Optional(exponent)) # In value range constraints, decimal points must be followed by number, or # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100]) constraint_real_value = Combine(signed_number + Optional(Literal('.') + number) + Optional(exponent)) builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value external_value_reference = module_reference + Suppress('.') + valuereference defined_value = external_value_reference | valuereference # todo: more options from 13.1 referenced_value = Unique(defined_value) # todo: more options from 16.11 # object identifier value name_form = Unique(identifier) number_form = Unique(number) name_and_number_form = name_form + Suppress('(') + number_form + Suppress(')') objid_components = name_and_number_form | name_form | number_form | defined_value objid_components_list = OneOrMore(objid_components) object_identifier_value = Suppress('{') + \ (objid_components_list | (defined_value + objid_components_list)) + \ Suppress('}') value = builtin_value | referenced_value | object_identifier_value # definitive identifier value definitive_number_form = Unique(number) definitive_name_and_number_form = name_form + Suppress('(') + definitive_number_form + Suppress(')') definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form definitive_objid_component_list = OneOrMore(definitive_objid_component) definitive_identifier = Optional(Suppress('{') + definitive_objid_component_list + Suppress('}')) # tags class_ = UNIVERSAL | APPLICATION | PRIVATE class_number = Unique(number) # todo: consider defined values from 30.1 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']') tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS # extensions extension_default = Unique(EXTENSIBILITY_IMPLIED) # values # Forward-declare these, they can only be fully defined once # we have all types defined. There are some circular dependencies. named_type = Forward() type_ = Forward() # constraints # todo: consider the full subtype and general constraint syntax described in 45.* lower_bound = (constraint_real_value | signed_number | referenced_value | MIN) upper_bound = (constraint_real_value | signed_number | referenced_value | MAX) single_value_constraint = Suppress('(') + value + Suppress(')') value_range_constraint = Suppress('(') + lower_bound + Suppress('..') + upper_bound + Suppress(')') # TODO: Include contained subtype constraint here if we ever implement it. size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + (single_value_constraint | value_range_constraint) + Optional(Suppress(')')) # types # todo: consider other defined types from 13.1 defined_type = Optional(module_reference + Suppress('.'), default=None) + typereference + Optional(size_constraint, default=None) # TODO: consider exception syntax from 24.1 extension_marker = Unique(ELLIPSIS) component_type_optional = named_type + Suppress(OPTIONAL) component_type_default = named_type + Suppress(DEFAULT) + value component_type_components_of = Suppress(COMPONENTS_OF) + type_ component_type = component_type_components_of | component_type_optional | component_type_default | named_type tagged_type = tag + Optional(IMPLICIT | EXPLICIT, default=None) + type_ named_number_value = Suppress('(') + signed_number + Suppress(')') named_number = identifier + named_number_value named_nonumber = Unique(identifier) enumeration = named_number | named_nonumber set_type = SET + braced_list(component_type | extension_marker) sequence_type = SEQUENCE + braced_list(component_type | extension_marker) sequenceof_type = Suppress(SEQUENCE) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type) setof_type = Suppress(SET) + Optional(size_constraint, default=None) + Suppress(OF) + (type_ | named_type) choice_type = CHOICE + braced_list(named_type | extension_marker) selection_type = identifier + Suppress('<') + type_ enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker) bitstring_type = BIT_STRING + Optional(braced_list(named_number), default=[]) + Optional(single_value_constraint | size_constraint, default=None) plain_integer_type = INTEGER restricted_integer_type = INTEGER + braced_list(named_number) + Optional(single_value_constraint, default=None) boolean_type = BOOLEAN real_type = REAL null_type = NULL object_identifier_type = OBJECT_IDENTIFIER octetstring_type = OCTET_STRING + Optional(size_constraint) unrestricted_characterstring_type = CHARACTER_STRING restricted_characterstring_type = BMPString | GeneralString | \ GraphicString | IA5String | \ ISO646String | NumericString | \ PrintableString | TeletexString | \ T61String | UniversalString | \ UTF8String | VideotexString | \ VisibleString characterstring_type = (restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint) useful_type = GeneralizedTime | UTCTime | ObjectDescriptor # ANY type any_type = ANY + Optional(Suppress(DEFINED_BY + identifier)) # todo: consider other builtins from 16.2 simple_type = (any_type | boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint | single_value_constraint) constructed_type = choice_type | sequence_type | set_type value_list_type = restricted_integer_type | enumerated_type builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type referenced_type = defined_type | selection_type # todo: consider other ref:d types from 16.3 type_ << (builtin_type | referenced_type) named_type << (identifier + type_) type_assignment = typereference + '::=' + type_ value_assignment = valuereference + type_ + '::=' + value assignment = type_assignment | value_assignment assignment_list = ZeroOrMore(assignment) # TODO: Maybe handle full assigned-identifier syntax with defined values # described in 12.1, but I haven't been able to find examples of it, and I # can't say for sure what acceptable syntax is. global_module_reference = module_reference + Optional(object_identifier_value, default=None) symbol = Unique(reference) # TODO: parameterized reference? symbol_list = delimitedList(symbol, delim=',') symbols_from_module = Group(Group(symbol_list) + Suppress(FROM) + global_module_reference) symbols_from_module_list = OneOrMore(symbols_from_module) symbols_imported = Unique(symbols_from_module_list) exports = Suppress(EXPORTS) + Optional(symbol_list) + Suppress(';') imports = Suppress(IMPORTS) + Optional(symbols_imported) + Suppress(';') module_body = Optional(exports, default=None) + Optional(imports, default=None) + assignment_list module_identifier = module_reference + definitive_identifier module_definition = module_identifier + Suppress(DEFINITIONS) + Optional(tag_default, default=None) + \ Optional(extension_default, default=None) + Suppress('::=') + \ Suppress(BEGIN) + module_body + Suppress(END) module_definition.ignore(comment) # Mark up the parse results with token tags identifier.setParseAction(annotate('Identifier')) named_number_value.setParseAction(annotate('Value')) tag.setParseAction(annotate('Tag')) class_.setParseAction(annotate('TagClass')) class_number.setParseAction(annotate('TagClassNumber')) type_.setParseAction(annotate('Type')) simple_type.setParseAction(annotate('SimpleType')) choice_type.setParseAction(annotate('ChoiceType')) sequence_type.setParseAction(annotate('SequenceType')) set_type.setParseAction(annotate('SetType')) value_list_type.setParseAction(annotate('ValueListType')) bitstring_type.setParseAction(annotate('BitStringType')) sequenceof_type.setParseAction(annotate('SequenceOfType')) setof_type.setParseAction(annotate('SetOfType')) named_number.setParseAction(annotate('NamedValue')) named_nonumber.setParseAction(annotate('NamedValue')) single_value_constraint.setParseAction(annotate('SingleValueConstraint')) size_constraint.setParseAction(annotate('SizeConstraint')) value_range_constraint.setParseAction(annotate('ValueRangeConstraint')) component_type.setParseAction(annotate('ComponentType')) component_type_optional.setParseAction(annotate('ComponentTypeOptional')) component_type_default.setParseAction(annotate('ComponentTypeDefault')) component_type_components_of.setParseAction(annotate('ComponentTypeComponentsOf')) tagged_type.setParseAction(annotate('TaggedType')) named_type.setParseAction(annotate('NamedType')) type_assignment.setParseAction(annotate('TypeAssignment')) value_assignment.setParseAction(annotate('ValueAssignment')) module_reference.setParseAction(annotate('ModuleReference')) global_module_reference.setParseAction(annotate('GlobalModuleReference')) module_body.setParseAction(annotate('ModuleBody')) module_definition.setParseAction(annotate('ModuleDefinition')) extension_marker.setParseAction(annotate('ExtensionMarker')) name_form.setParseAction(annotate('NameForm')) number_form.setParseAction(annotate('NumberForm')) name_and_number_form.setParseAction(annotate('NameAndNumberForm')) object_identifier_value.setParseAction(annotate('ObjectIdentifierValue')) definitive_identifier.setParseAction(annotate('DefinitiveIdentifier')) definitive_number_form.setParseAction(annotate('DefinitiveNumberForm')) definitive_name_and_number_form.setParseAction(annotate('DefinitiveNameAndNumberForm')) exports.setParseAction(annotate('Exports')) imports.setParseAction(annotate('Imports')) assignment_list.setParseAction(annotate('AssignmentList')) bstring.setParseAction(annotate('BinaryStringValue')) hstring.setParseAction(annotate('HexStringValue')) defined_type.setParseAction(annotate('DefinedType')) selection_type.setParseAction(annotate('SelectionType')) referenced_value.setParseAction(annotate('ReferencedValue')) start = OneOrMore(module_definition) return start
return Identifier(name) def process_function(toks): return FunctionLiteral(toks[0]) comment = '%' + restOfLine lbrace = Suppress('{') rbrace = Suppress('}') intLiteral = Regex(r'#-?\d+').setParseAction(process_int_literal) stringLiteral = QuotedString('"').setParseAction(process_string_literal) restrictedPrintables = ''.join(c for c in printables if not c in '#%^&{}~\\') nonnums = ''.join(c for c in restrictedPrintables if not c.isdigit()) identifier = Word(nonnums, restrictedPrintables).setParseAction(process_identifier) token = stringLiteral | intLiteral | identifier tokenList = Forward() tokenList.setParseAction(process_function) tokenList << Group(lbrace + ZeroOrMore(token | tokenList) + rbrace) commandName = Word(alphas).setParseAction(downcaseTokens) arg = Group(lbrace + ZeroOrMore(token | tokenList) + rbrace) command = commandName + ZeroOrMore(arg) bstGrammar = OneOrMore(command) + StringEnd() # sloooooow # bstGrammar.ignore(comment) # somewhat faster def strip_comment(line): """Strip the commented part of the line." >>> print strip_comment('a normal line') a normal line
def __init__(self): self.ae = False self.local_dict = None self.f = None self.user_functions = None self.expr_stack = [] self.texpr_stack = [] # Define constants self.constants = {} # Define Operators self.opn = {"+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, ">": operator.gt, ">=": operator.ge, "<": operator.lt, "<=": operator.le, "==": operator.eq, "!=": operator.ne, "|": operator.or_, "&": operator.and_, "!": operator.inv} # Define xarray DataArray operators with 1 input parameter self.xfn1 = {"angle": xr.ufuncs.angle, "arccos": xr.ufuncs.arccos, "arccosh": xr.ufuncs.arccosh, "arcsin": xr.ufuncs.arcsin, "arcsinh": xr.ufuncs.arcsinh, "arctan": xr.ufuncs.arctan, "arctanh": xr.ufuncs.arctanh, "ceil": xr.ufuncs.ceil, "conj": xr.ufuncs.conj, "cos": xr.ufuncs.cos, "cosh": xr.ufuncs.cosh, "deg2rad": xr.ufuncs.deg2rad, "degrees": xr.ufuncs.degrees, "exp": xr.ufuncs.exp, "expm1": xr.ufuncs.expm1, "fabs": xr.ufuncs.fabs, "fix": xr.ufuncs.fix, "floor": xr.ufuncs.floor, "frexp": xr.ufuncs.frexp, "imag": xr.ufuncs.imag, "iscomplex": xr.ufuncs.iscomplex, "isfinite": xr.ufuncs.isfinite, "isinf": xr.ufuncs.isinf, "isnan": xr.ufuncs.isnan, "isreal": xr.ufuncs.isreal, "log": xr.ufuncs.log, "log10": xr.ufuncs.log10, "log1p": xr.ufuncs.log1p, "log2": xr.ufuncs.log2, "rad2deg": xr.ufuncs.rad2deg, "radians": xr.ufuncs.radians, "real": xr.ufuncs.real, "rint": xr.ufuncs.rint, "sign": xr.ufuncs.sign, "signbit": xr.ufuncs.signbit, "sin": xr.ufuncs.sin, "sinh": xr.ufuncs.sinh, "sqrt": xr.ufuncs.sqrt, "square": xr.ufuncs.square, "tan": xr.ufuncs.tan, "tanh": xr.ufuncs.tanh, "trunc": xr.ufuncs.trunc} # Define xarray DataArray operators with 2 input parameter self.xfn2 = {"arctan2": xr.ufuncs.arctan2, "copysign": xr.ufuncs.copysign, "fmax": xr.ufuncs.fmax, "fmin": xr.ufuncs.fmin, "fmod": xr.ufuncs.fmod, "hypot": xr.ufuncs.hypot, "ldexp": xr.ufuncs.ldexp, "logaddexp": xr.ufuncs.logaddexp, "logaddexp2": xr.ufuncs.logaddexp2, "logicaland": xr.ufuncs.logical_and, "logicalnot": xr.ufuncs.logical_not, "logicalor": xr.ufuncs.logical_or, "logicalxor": xr.ufuncs.logical_xor, "maximum": xr.ufuncs.maximum, "minimum": xr.ufuncs.minimum, "nextafter": xr.ufuncs.nextafter} # Define non-xarray DataArray operators with 2 input parameter self.fn2 = {"percentile": np.percentile} # Define xarray DataArray reduction operators self.xrfn = {"all": xr.DataArray.all, "any": xr.DataArray.any, "argmax": xr.DataArray.argmax, "argmin": xr.DataArray.argmin, "max": xr.DataArray.max, "mean": xr.DataArray.mean, "median": xr.DataArray.median, "min": xr.DataArray.min, "prod": xr.DataArray.prod, "sum": xr.DataArray.sum, "std": xr.DataArray.std, "var": xr.DataArray.var} # Define non-xarray DataArray operators with 2 input parameter self.xcond = {"<": np.percentile} # Define Grammar point = Literal(".") e = CaselessLiteral("E") fnumber = Combine(Word("+-"+nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-"+nums, nums))) variable = Word(alphas, alphas+nums+"_$") seq = Literal("=") b_not = Literal("~") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") gt = Literal(">") gte = Literal(">=") lt = Literal("<") lte = Literal("<=") eq = Literal("==") neq = Literal("!=") b_or = Literal("|") b_and = Literal("&") l_not = Literal("!") lpar = Literal("(").suppress() rpar = Literal(")").suppress() comma = Literal(",") colon = Literal(":") lbrac = Literal("[") rbrac = Literal("]") lcurl = Literal("{") rcurl = Literal("}") qmark = Literal("?") scolon = Literal(";") addop = plus | minus multop = mult | div sliceop = colon compop = gte | lte | gt | lt eqop = eq | neq bitcompop = b_or | b_and bitnotop = b_not logicalnotop = l_not assignop = seq expop = Literal("^") expr = Forward() indexexpr = Forward() atom = (Optional("-") + (variable + seq + expr).setParseAction(self.push_assign) | indexexpr.setParseAction(self.push_index) | (lpar + expr + qmark.setParseAction(self.push_ternary1) + expr + scolon.setParseAction(self.push_ternary2) + expr + rpar).setParseAction(self.push_ternary) | (lpar + expr + qmark + expr + scolon + expr + rpar).setParseAction(self.push_ternary) | (logicalnotop + expr).setParseAction(self.push_ulnot) | (bitnotop + expr).setParseAction(self.push_unot) | (minus + expr).setParseAction(self.push_uminus) | (variable + lcurl + expr + rcurl).setParseAction(self.push_mask) | (variable + lpar + expr + (comma + expr)*3 + rpar).setParseAction(self.push_expr4) | (variable + lpar + expr + (comma + expr)*2 + rpar).setParseAction(self.push_expr3) | (variable + lpar + expr + comma + expr + rpar).setParseAction(self.push_expr2) | (variable + lpar + expr + rpar | variable).setParseAction(self.push_expr1) | fnumber.setParseAction(self.push_expr) | (lpar + expr + ZeroOrMore(comma + expr).setParseAction(self.get_tuple) + rpar).setParseAction(self.push_tuple) | (lpar + expr.suppress() + rpar).setParseAction(self.push_uminus)) # Define order of operations for operators factor = Forward() factor << atom + ZeroOrMore((expop + factor).setParseAction(self.push_op)) term = factor + ZeroOrMore((multop + factor).setParseAction(self.push_op)) term2 = term + ZeroOrMore((addop + term).setParseAction(self.push_op)) term3 = term2 + ZeroOrMore((sliceop + term2).setParseAction(self.push_op)) term4 = term3 + ZeroOrMore((compop + term3).setParseAction(self.push_op)) term5 = term4 + ZeroOrMore((eqop + term4).setParseAction(self.push_op)) term6 = term5 + ZeroOrMore((bitcompop + term5).setParseAction(self.push_op)) expr << term6 + ZeroOrMore((assignop + term6).setParseAction(self.push_op)) # Define index operators colon_expr = (colon + FollowedBy(comma) ^ colon + FollowedBy(rbrac)).setParseAction(self.push_colon) range_expr = colon_expr | expr | colon indexexpr << (variable + lbrac + delimitedList(range_expr, delim=',') + rbrac).setParseAction(self.push_expr) self.parser = expr
_in = _union + ZeroOrMore("in" + _union) _in.setParseAction(lambda s, l, t: reduce(lambda e, n: In(e, n), t[0:1] + t[2::2])) _comparison = _in + ZeroOrMore(oneOf("= != < > <= >=") + _in) _comparison.setParseAction(lambda s, l, t: _reduce_list_to_expr(t)) _and = _comparison + ZeroOrMore("&" + _comparison) _and.setParseAction(lambda s, l, t: reduce(lambda e, n: And(e, n), t[0:1] + t[2::2])) _or = _and + ZeroOrMore(oneOf("| xor xnor") + _and) _or.setParseAction(lambda s, l, t: _reduce_list_to_expr(t)) _ite = Forward() _ite <<= _or + Optional("?" + _ite + ":" + _ite) _ite.setParseAction(lambda s, l, t: t[0] if len(t) <= 1 else Ite(t[0], t[2], t[4])) _iff = _ite + ZeroOrMore("<->" + _ite) _iff.setParseAction(lambda s, l, t: reduce(lambda e, n: Iff(e, n), t[0:1] + t[2::2])) _implies = Forward() _implies <<= _iff + ZeroOrMore("->" + _implies) _implies.setParseAction(lambda s, l, t: reduce(lambda e, n: Implies(n, e), t[0:1] + t[2::2])) _basic_expr <<= _implies simple_expression <<= _basic_expr next_expression = _basic_expr # Type specifier _simple_type_specifier = Forward()
def parse(input_string): def flatten_binary_operators(position, source, flattened_tokens): while len(flattened_tokens) >= 3: lhs, type_call, rhs = flattened_tokens[:3] flattened_tokens = [type_call(position, source, lhs, rhs)] + flattened_tokens[3:] return flattened_tokens[0] def flatten_unary_operators(position, source, flattened_tokens): type_call = flattened_tokens[0] return type_call(position, source, flattened_tokens[1]) # Packrat ParserElement.enablePackrat() lit_form = Suppress("form") lit_if = Suppress("if") lit_else = Suppress("else") lit_l_curly = Suppress("{") lit_r_curly = Suppress("}") lit_l_paren = Suppress("(") lit_r_paren = Suppress(")") lit_colon = Suppress(":") lit_assign_op = Suppress("=") lit_op_multiplication = Literal("*").setParseAction(lambda _: ast.Multiplication) lit_op_division = Literal("/").setParseAction(lambda _: ast.Division) lit_op_subtract = Literal("-").setParseAction(lambda _: ast.Subtraction) lit_op_addition = Literal("+").setParseAction(lambda _: ast.Addition) lit_op_positive = Literal("+").setParseAction(lambda _: ast.Positive) lit_op_negative = Literal("-").setParseAction(lambda _: ast.Negative) lit_op_not = Literal("!").setParseAction(lambda _: ast.Negation) lit_op_lower_exclusive = Literal("<").setParseAction(lambda _: ast.LowerExclusive) lit_op_lower_inclusive = Literal("<=").setParseAction(lambda _: ast.LowerInclusive) lit_op_greater_inclusive = Literal(">=").setParseAction(lambda _: ast.GreaterInclusive) lit_op_greater_exclusive = Literal(">").setParseAction(lambda _: ast.GreaterExclusive) lit_op_equality = Literal("==").setParseAction(lambda _: ast.Equality) lit_op_inequality = Literal("!=").setParseAction(lambda _: ast.Inequality) lit_op_and = Literal("&&").setParseAction(lambda _: ast.And) lit_op_or = Literal("||").setParseAction(lambda _: ast.Or) type_money = Literal("money").setParseAction( lambda source, position, _: ast.Money(position, source)) type_integer = Literal("integer").setParseAction( lambda source, position, _: ast.Integer(position, source)) type_boolean = Literal("boolean").setParseAction( lambda source, position, _: ast.Boolean(position, source)) type_string = Literal("string").setParseAction( lambda source, position, _: ast.String(position, source)) data_types = (type_money | type_integer | type_boolean | type_string) true = Literal("true").setParseAction( lambda source, position, _: ast.Boolean(position, source, True)) false = Literal("false").setParseAction( lambda source, position, _: ast.Boolean(position, source, False)) boolean = (true | false) integer = Word(nums).setParseAction( lambda source, position, parsed_tokens: ast.Integer(position, source, int(parsed_tokens[0]))) money = Combine(Word(nums) + Literal(".") + Word(nums)).setParseAction( lambda source, position, parsed_tokens: ast.Money(position, source, float(parsed_tokens[0]))) number = (money | integer) string = QuotedString("'", unquoteResults=True)\ .setParseAction( lambda source, position, parsed_tokens: ast.String(position, source, str(parsed_tokens[0]))) reserved_words = (lit_form | lit_if | lit_else | boolean | number | data_types) name = ~reserved_words + Word(alphas, alphanums + '_').setResultsName( 'identifier').setParseAction( lambda source, position, parsed_tokens: ast.Identifier(position, source, parsed_tokens[0])) operand_arith = (number | boolean | name | string) operand_list_arith = [ (lit_op_positive | lit_op_negative | lit_op_not, 1, opAssoc.RIGHT, lambda source, position, flattened_tokens: flatten_unary_operators(position, source, *flattened_tokens)), (lit_op_multiplication | lit_op_division, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), (lit_op_addition | lit_op_subtract, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), ] operand_list_bool = [ (lit_op_lower_inclusive | lit_op_greater_inclusive | lit_op_greater_exclusive | lit_op_lower_exclusive, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), (lit_op_equality | lit_op_inequality, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), (lit_op_and, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), (lit_op_or, 2, opAssoc.LEFT, lambda source, position, flattened_tokens: flatten_binary_operators(position, source, *flattened_tokens)), ] literal_precedence = infixNotation( operand_arith, (operand_list_arith + operand_list_bool) ) expression = \ OneOrMore( literal_precedence | (lit_l_paren + literal_precedence + lit_r_paren) ) field = Forward() field_assignment = Forward() field_statement = ( QuotedString('"', unquoteResults=True).setResultsName("title") + name.setResultsName("identifier") + lit_colon + data_types.setResultsName("data_type") ) field <<= field_statement field.setParseAction(lambda source, position, parsed_tokens: ast.Field(position, source, *parsed_tokens)) field_assignment <<= field_statement + lit_assign_op + expression field_assignment.setParseAction( lambda source, position, parsed_tokens: ast.Assignment(position, source, *parsed_tokens)) field_order = field_assignment | field conditional_if = Forward() conditional_if_else = Forward() statement = Forward() body = Forward() if_statement = lit_if + lit_l_paren + expression + lit_r_paren + body conditional_if <<= if_statement conditional_if.setParseAction(ast.If) conditional_if_else <<= ( if_statement + Optional(lit_else + body).setResultsName('else_statement') ) conditional_if_else.setParseAction(ast.IfElse) conditional = conditional_if_else | conditional_if statement <<= (field_order | conditional) body <<= lit_l_curly + OneOrMore(statement) + lit_r_curly body.addParseAction(lambda parsed_tokens: [parsed_tokens.asList()]) body.setResultsName('statement_list') form = (lit_form + name + body)\ .addParseAction(lambda parsed_tokens: ast.Form(*parsed_tokens))\ .setResultsName('form')\ .parseWithTabs() return form.parseString(input_string).form
def braces_parser(text, opener=BLOB_OPENER, closer=BLOB_CLOSER): cvtTuple = lambda toks: tuple(toks.asList()) # @IgnorePep8 cvtRaw = lambda toks: RawString(' '.join(map(str, toks.asList())) ) # @IgnorePep8 cvtDict = lambda toks: GlobDict(toks.asList()) # @IgnorePep8 extractText = lambda s, l, t: RawString(s[t._original_start:t._original_end ]) # @IgnorePep8 def pythonize(toks): s = toks[0] if s == 'true': return True elif s == 'false': return False elif s == 'none': return [None] elif s.isdigit(): return int(s) elif re.match('(?i)^-?(\d+\.?e\d+|\d+\.\d*|\.\d+)$', s): return float(s) return toks[0] def noneDefault(s, loc, t): return t if len(t) else [RawEOL] # define punctuation as suppressed literals lbrace, rbrace = map(Suppress, "{}") identifier = Word(printables, excludeChars='{}"\'') quotedStr = QuotedString('"', escChar='\\', multiline=True) | \ QuotedString('\'', escChar='\\', multiline=True) quotedIdentifier = QuotedString('"', escChar='\\', unquoteResults=False) | \ QuotedString('\'', escChar='\\', unquoteResults=False) dictStr = Forward() setStr = Forward() objStr = Forward() oddIdentifier = identifier + quotedIdentifier dictKey = quotedIdentifier | \ Combine(oddIdentifier).setParseAction(cvtRaw) dictKey.setParseAction(cvtRaw) dictValue = quotedStr | dictStr | setStr | \ Combine(oddIdentifier).setParseAction(cvtRaw) if OLD_STYLE_KEYS: dictKey |= Combine(identifier + ZeroOrMore( White(' ') + (identifier + ~FollowedBy(Optional(White(' ')) + LineEnd())))) dictValue |= identifier.setParseAction(pythonize) else: dictKey |= identifier dictValue |= Or([ delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True), Combine( delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True) + Optional( White(' ') + originalTextFor(nestedExpr('{', '}')). setParseAction(extractText))).setParseAction(cvtRaw) ]) ParserElement.setDefaultWhitespaceChars(' \t') dictEntry = Group(dictKey + Optional(White(' ').suppress() + dictValue).setParseAction(noneDefault) + Optional(White(' ').suppress()) + LineEnd().suppress()) dictStr << (lbrace + ZeroOrMore(dictEntry) + rbrace) dictStr.setParseAction(cvtDict) ParserElement.setDefaultWhitespaceChars(' \t\r\n') setEntry = identifier.setParseAction( pythonize) | quotedString.setParseAction(removeQuotes) | dictStr setStr << (lbrace + delimitedList(setEntry, delim=White()) + rbrace) setStr.setParseAction(cvtTuple) objEntry = dictStr.ignore(pythonStyleComment) objStr << delimitedList(objEntry, delim=LineEnd()) return objStr.parseString(text)[0]
signless_mult_expr.setParseAction(operators.InfixLeftSymbol.process) signless_mult_term = ( signless_mult_expr | sign_term ) # Multiplication. mult_expr = Group( signless_mult_term + OneOrMore(multop + signless_mult_term) ) mult_expr.setParseAction(operators.InfixLeftSymbol.process) mult_term = ( mult_expr | signless_mult_term ) # Addition. add_expr = Group( mult_term + OneOrMore( addop + mult_term ) ) add_expr.setParseAction(operators.InfixLeftSymbol.process) add_term = ( add_expr | mult_term ) # Complete expression. expr <<= add_term expr.setParseAction(operators.Expression.process) # A unit expression, containing only units. ############################ unit_expr = Forward() single_unit = identifier.copy() single_unit.setParseAction(operators.Unit.process) literal_one = Literal("1") literal_one.setParseAction(operators.Unit.process_dimensionless) # The one allows for example for 1/h = h^-1. unit_term = single_unit | literal_one | ( lpar + unit_expr + rpar ) # Exponent
code_single.setParseAction(lambda x: query_ast.Code(x[2])) code_list.setParseAction(lambda x: query_ast.Code(*x[3::2])) serial_single.setParseAction(lambda x: query_ast.Serial(x[2])) serial_list.setParseAction(lambda x: query_ast.Serial(*x[3::2])) type_single.setParseAction(lambda x: query_ast.Type(x[2])) type_list.setParseAction(lambda x: query_ast.Type(*x[3::2])) cond_single.setParseAction(lambda x: query_ast.Condition(x[2])) cond_list.setParseAction(lambda x: query_ast.Condition(*x[3::2])) path_single.setParseAction(lambda x: query_ast.Path(x[2])) path_list.setParseAction(lambda x: query_ast.Path(*x[3::2])) tristate_expr.setParseAction(lambda x: query_ast.TriState(*x[0::2])) label_expr.setParseAction(lambda x: query_ast.Labelled(x[2])) assy_expr.setParseAction(lambda x: query_ast.Assy(x[2])) or_expr.setParseAction(_pa_or_expr) and_expr.setParseAction(_pa_and_expr) func_expr.setParseAction(_pa_func_expr) not_expr.setParseAction(lambda x: query_ast.Not(x[1])) paren_expr.setParseAction(lambda x: x[1]) def search_tree(query): """ Create a search tree for a query string. :param str query: The query string to generate the search tree for. :returns: A new search tree. """ return root.parseString(query)[0]
def _build_asn1_grammar(): def build_identifier(prefix_pattern): identifier_suffix = Optional(Word(srange('[-0-9a-zA-Z]'))) # todo: more rigorous? trailing hyphens and -- forbidden return Combine( Word(srange(prefix_pattern), exact=1) + identifier_suffix) def braced_list(element_rule): elements_rule = Optional(delimitedList(element_rule)) return Suppress('{') + Group(elements_rule) + Suppress('}') def annotate(name): def annotation(t): return AnnotatedToken(name, t.asList()) return annotation # Reserved words ANY = Keyword('ANY') DEFINED_BY = Keyword('DEFINED BY') DEFINITIONS = Keyword('DEFINITIONS') BEGIN = Keyword('BEGIN') END = Keyword('END') OPTIONAL = Keyword('OPTIONAL') DEFAULT = Keyword('DEFAULT') TRUE = Keyword('TRUE') FALSE = Keyword('FALSE') UNIVERSAL = Keyword('UNIVERSAL') APPLICATION = Keyword('APPLICATION') PRIVATE = Keyword('PRIVATE') MIN = Keyword('MIN') MAX = Keyword('MAX') IMPLICIT = Keyword('IMPLICIT') EXPLICIT = Keyword('EXPLICIT') EXPLICIT_TAGS = Keyword('EXPLICIT TAGS') IMPLICIT_TAGS = Keyword('IMPLICIT TAGS') AUTOMATIC_TAGS = Keyword('AUTOMATIC TAGS') EXTENSIBILITY_IMPLIED = Keyword('EXTENSIBILITY IMPLIED') COMPONENTS_OF = Keyword('COMPONENTS OF') ELLIPSIS = Keyword('...') SIZE = Keyword('SIZE') OF = Keyword('OF') IMPORTS = Keyword('IMPORTS') EXPORTS = Keyword('EXPORTS') FROM = Keyword('FROM') # Built-in types SEQUENCE = Keyword('SEQUENCE') SET = Keyword('SET') CHOICE = Keyword('CHOICE') ENUMERATED = Keyword('ENUMERATED') BIT_STRING = Keyword('BIT STRING') BOOLEAN = Keyword('BOOLEAN') REAL = Keyword('REAL') OCTET_STRING = Keyword('OCTET STRING') CHARACTER_STRING = Keyword('CHARACTER STRING') NULL = Keyword('NULL') INTEGER = Keyword('INTEGER') OBJECT_IDENTIFIER = Keyword('OBJECT IDENTIFIER') # Restricted string types BMPString = Keyword('BMPString') GeneralString = Keyword('GeneralString') GraphicString = Keyword('GraphicString') IA5String = Keyword('IA5String') ISO646String = Keyword('ISO646String') NumericString = Keyword('NumericString') PrintableString = Keyword('PrintableString') TeletexString = Keyword('TeletexString') T61String = Keyword('T61String') UniversalString = Keyword('UniversalString') UTF8String = Keyword('UTF8String') VideotexString = Keyword('VideotexString') VisibleString = Keyword('VisibleString') # Useful types GeneralizedTime = Keyword('GeneralizedTime') UTCTime = Keyword('UTCTime') ObjectDescriptor = Keyword('ObjectDescriptor') # Literals number = Word(nums) signed_number = Combine(Optional('-') + number) # todo: consider defined values from 18.1 bstring = Suppress('\'') + StringOf('01') + Suppress('\'B') hstring = Suppress('\'') + StringOf('0123456789ABCDEF') + Suppress('\'H') # Comments hyphen_comment = Regex(r"--[\s\S]*?(--|$)", flags=re.MULTILINE) comment = hyphen_comment | cStyleComment # identifier identifier = build_identifier('[a-z]') # references # these are duplicated to force unique token annotations valuereference = build_identifier('[a-z]') typereference = build_identifier('[A-Z]') module_reference = build_identifier('[A-Z]') reference = valuereference | typereference # TODO: consider object references from 12.1 # values # todo: consider more literals from 16.9 boolean_value = TRUE | FALSE bitstring_value = bstring | hstring # todo: consider more forms from 21.9 integer_value = signed_number null_value = NULL cstring_value = dblQuotedString exponent = CaselessLiteral('e') + signed_number real_value = Combine(signed_number + Optional(Literal('.') + Optional(number)) + Optional(exponent)) # In value range constraints, decimal points must be followed by number, or # the grammar becomes ambiguous: ([1.].100) vs ([1]..[100]) constraint_real_value = Combine(signed_number + Optional(Literal('.') + number) + Optional(exponent)) builtin_value = boolean_value | bitstring_value | real_value | integer_value | null_value | cstring_value external_value_reference = module_reference + Suppress( '.') + valuereference defined_value = external_value_reference | valuereference # todo: more options from 13.1 referenced_value = Unique(defined_value) # todo: more options from 16.11 # object identifier value name_form = Unique(identifier) number_form = Unique(number) name_and_number_form = name_form + Suppress('(') + number_form + Suppress( ')') objid_components = name_and_number_form | name_form | number_form | defined_value objid_components_list = OneOrMore(objid_components) object_identifier_value = Suppress('{') + \ (objid_components_list | (defined_value + objid_components_list)) + \ Suppress('}') value = builtin_value | referenced_value | object_identifier_value # definitive identifier value definitive_number_form = Unique(number) definitive_name_and_number_form = name_form + Suppress( '(') + definitive_number_form + Suppress(')') definitive_objid_component = definitive_name_and_number_form | name_form | definitive_number_form definitive_objid_component_list = OneOrMore(definitive_objid_component) definitive_identifier = Optional( Suppress('{') + definitive_objid_component_list + Suppress('}')) # tags class_ = UNIVERSAL | APPLICATION | PRIVATE class_number = Unique(number) # todo: consider defined values from 30.1 tag = Suppress('[') + Optional(class_) + class_number + Suppress(']') tag_default = EXPLICIT_TAGS | IMPLICIT_TAGS | AUTOMATIC_TAGS # extensions extension_default = Unique(EXTENSIBILITY_IMPLIED) # values # Forward-declare these, they can only be fully defined once # we have all types defined. There are some circular dependencies. named_type = Forward() type_ = Forward() # constraints # todo: consider the full subtype and general constraint syntax described in 45.* lower_bound = (constraint_real_value | signed_number | referenced_value | MIN) upper_bound = (constraint_real_value | signed_number | referenced_value | MAX) single_value_constraint = Suppress('(') + value + Suppress(')') value_range_constraint = Suppress('(') + lower_bound + Suppress( '..') + upper_bound + Suppress(')') # TODO: Include contained subtype constraint here if we ever implement it. size_constraint = Optional(Suppress('(')) + Suppress(SIZE) + ( single_value_constraint | value_range_constraint) + Optional( Suppress(')')) # types # todo: consider other defined types from 13.1 defined_type = Optional(module_reference + Suppress('.'), default=None) + typereference + Optional( size_constraint, default=None) # TODO: consider exception syntax from 24.1 extension_marker = Unique(ELLIPSIS) component_type_optional = named_type + Suppress(OPTIONAL) component_type_default = named_type + Suppress(DEFAULT) + value component_type_components_of = Suppress(COMPONENTS_OF) + type_ component_type = component_type_components_of | component_type_optional | component_type_default | named_type tagged_type = tag + Optional(IMPLICIT | EXPLICIT, default=None) + type_ named_number_value = Suppress('(') + signed_number + Suppress(')') named_number = identifier + named_number_value named_nonumber = Unique(identifier) enumeration = named_number | named_nonumber set_type = SET + braced_list(component_type | extension_marker) sequence_type = SEQUENCE + braced_list(component_type | extension_marker) sequenceof_type = Suppress(SEQUENCE) + Optional( size_constraint, default=None) + Suppress(OF) + (type_ | named_type) setof_type = Suppress(SET) + Optional( size_constraint, default=None) + Suppress(OF) + (type_ | named_type) choice_type = CHOICE + braced_list(named_type | extension_marker) selection_type = identifier + Suppress('<') + type_ enumerated_type = ENUMERATED + braced_list(enumeration | extension_marker) bitstring_type = BIT_STRING + Optional( braced_list(named_number), default=[]) + Optional( single_value_constraint | size_constraint, default=None) plain_integer_type = INTEGER restricted_integer_type = INTEGER + braced_list(named_number) + Optional( single_value_constraint, default=None) boolean_type = BOOLEAN real_type = REAL null_type = NULL object_identifier_type = OBJECT_IDENTIFIER octetstring_type = OCTET_STRING + Optional(size_constraint) unrestricted_characterstring_type = CHARACTER_STRING restricted_characterstring_type = BMPString | GeneralString | \ GraphicString | IA5String | \ ISO646String | NumericString | \ PrintableString | TeletexString | \ T61String | UniversalString | \ UTF8String | VideotexString | \ VisibleString characterstring_type = ( restricted_characterstring_type | unrestricted_characterstring_type) + Optional(size_constraint) useful_type = GeneralizedTime | UTCTime | ObjectDescriptor # ANY type any_type = ANY + Optional(Suppress(DEFINED_BY + identifier)) # todo: consider other builtins from 16.2 simple_type = (any_type | boolean_type | null_type | octetstring_type | characterstring_type | real_type | plain_integer_type | object_identifier_type | useful_type) + Optional(value_range_constraint | single_value_constraint) constructed_type = choice_type | sequence_type | set_type value_list_type = restricted_integer_type | enumerated_type builtin_type = value_list_type | tagged_type | simple_type | constructed_type | sequenceof_type | setof_type | bitstring_type referenced_type = defined_type | selection_type # todo: consider other ref:d types from 16.3 type_ << (builtin_type | referenced_type) named_type << (identifier + type_) type_assignment = typereference + '::=' + type_ value_assignment = valuereference + type_ + '::=' + value assignment = type_assignment | value_assignment assignment_list = ZeroOrMore(assignment) # TODO: Maybe handle full assigned-identifier syntax with defined values # described in 12.1, but I haven't been able to find examples of it, and I # can't say for sure what acceptable syntax is. global_module_reference = module_reference + Optional( object_identifier_value, default=None) symbol = Unique(reference) # TODO: parameterized reference? symbol_list = delimitedList(symbol, delim=',') symbols_from_module = Group( Group(symbol_list) + Suppress(FROM) + global_module_reference) symbols_from_module_list = OneOrMore(symbols_from_module) symbols_imported = Unique(symbols_from_module_list) exports = Suppress(EXPORTS) + Optional(symbol_list) + Suppress(';') imports = Suppress(IMPORTS) + Optional(symbols_imported) + Suppress(';') module_body = Optional(exports, default=None) + Optional( imports, default=None) + assignment_list module_identifier = module_reference + definitive_identifier module_definition = module_identifier + Suppress(DEFINITIONS) + Optional(tag_default, default=None) + \ Optional(extension_default, default=None) + Suppress('::=') + \ Suppress(BEGIN) + module_body + Suppress(END) module_definition.ignore(comment) # Mark up the parse results with token tags identifier.setParseAction(annotate('Identifier')) named_number_value.setParseAction(annotate('Value')) tag.setParseAction(annotate('Tag')) class_.setParseAction(annotate('TagClass')) class_number.setParseAction(annotate('TagClassNumber')) type_.setParseAction(annotate('Type')) simple_type.setParseAction(annotate('SimpleType')) choice_type.setParseAction(annotate('ChoiceType')) sequence_type.setParseAction(annotate('SequenceType')) set_type.setParseAction(annotate('SetType')) value_list_type.setParseAction(annotate('ValueListType')) bitstring_type.setParseAction(annotate('BitStringType')) sequenceof_type.setParseAction(annotate('SequenceOfType')) setof_type.setParseAction(annotate('SetOfType')) named_number.setParseAction(annotate('NamedValue')) named_nonumber.setParseAction(annotate('NamedValue')) single_value_constraint.setParseAction(annotate('SingleValueConstraint')) size_constraint.setParseAction(annotate('SizeConstraint')) value_range_constraint.setParseAction(annotate('ValueRangeConstraint')) component_type.setParseAction(annotate('ComponentType')) component_type_optional.setParseAction(annotate('ComponentTypeOptional')) component_type_default.setParseAction(annotate('ComponentTypeDefault')) component_type_components_of.setParseAction( annotate('ComponentTypeComponentsOf')) tagged_type.setParseAction(annotate('TaggedType')) named_type.setParseAction(annotate('NamedType')) type_assignment.setParseAction(annotate('TypeAssignment')) value_assignment.setParseAction(annotate('ValueAssignment')) module_reference.setParseAction(annotate('ModuleReference')) global_module_reference.setParseAction(annotate('GlobalModuleReference')) module_body.setParseAction(annotate('ModuleBody')) module_definition.setParseAction(annotate('ModuleDefinition')) extension_marker.setParseAction(annotate('ExtensionMarker')) name_form.setParseAction(annotate('NameForm')) number_form.setParseAction(annotate('NumberForm')) name_and_number_form.setParseAction(annotate('NameAndNumberForm')) object_identifier_value.setParseAction(annotate('ObjectIdentifierValue')) definitive_identifier.setParseAction(annotate('DefinitiveIdentifier')) definitive_number_form.setParseAction(annotate('DefinitiveNumberForm')) definitive_name_and_number_form.setParseAction( annotate('DefinitiveNameAndNumberForm')) exports.setParseAction(annotate('Exports')) imports.setParseAction(annotate('Imports')) assignment_list.setParseAction(annotate('AssignmentList')) bstring.setParseAction(annotate('BinaryStringValue')) hstring.setParseAction(annotate('HexStringValue')) defined_type.setParseAction(annotate('DefinedType')) selection_type.setParseAction(annotate('SelectionType')) referenced_value.setParseAction(annotate('ReferencedValue')) start = OneOrMore(module_definition) return start
def ruleParser(): global typedversion # ------------------------------------------------------ # Atomic # ------------------------------------------------------ # Tokens lbr = Literal("(") rbr = Literal(")") comma = Literal(",") hash = Literal("#") equ = Literal("=") implies = Literal("=>") dot = Literal(".") eol = Literal("\n").suppress() # Basic constructors Alfabet = alphas + nums + "_$" Number = Word(nums) Number.setParseAction(lambda s, l, t: ["number", Term.TermConstant(t[0])]) # Typeinfo/Constant TypeInfo = oneOf("mr nonce pk sk fu table") TypeInfo.setParseAction( lambda s, l, t: ["typeinfo", Term.TermConstant(t[0])]) Constant = Word(alphas, Alfabet) Constant.setParseAction(lambda s, l, t: [Term.TermConstant(t[0])]) # Time nTime = Number xTime = Literal("xTime") sTime = Literal("s").suppress() + lbr + Group(Number) + rbr Time = Or([nTime, xTime, sTime]) # Const Const = Forward() ConstC = Literal("c") + lbr + Constant + comma + Time + rbr ConstF = Literal("c(ni,ni)") Const << Or([Constant, ConstC, ConstF]) Const.setParseAction(lambda s, l, t: [If.Constant("".join(t))]) # Two versions Variable = Word("x", Alfabet) Variable.setParseAction( lambda s, l, t: [Term.TermVariable(t[0] + "V", None)]) if typedversion: Variable = TypeInfo + lbr + Variable + rbr # Optional prime optprime = Optional(Literal("'")) # Atomic ## DEVIANT : below there is an optprime after the atom. This ## is not in the BNF. Atomic = Or([TypeInfo + lbr + Const + rbr, Variable]) + optprime ### TEST #print Time.parseString("s(25)") #print Variable.parseString("xCas") #print Atomic.parseString("nonce(Koen)") # ------------------------------------------------------ # Messages # ------------------------------------------------------ # Base forward declaration Message = Forward() # Agents etc Agent = Or([Literal("mr") + lbr + Const + rbr, Variable]) KeyTable = Or([Literal("table") + lbr + Const + rbr, Variable]) KeyTableApp = Literal( "tb") + lbr + KeyTable + comma + Agent + rbr + optprime # Crypto pkterm = Literal("pk") + lbr + Const + rbr + optprime varterm = Variable + optprime Invertible = Or([pkterm, KeyTableApp, varterm]) PublicCypher = Literal("crypt") + lbr + Invertible + comma + Message + rbr PublicCypher.setParseAction(lambda s, l, t: [Term.TermEncrypt(t[2], t[1])]) XOR = Literal("rcrypt") + lbr + Message + comma + Message + rbr SymmetricCypher = Literal("scrypt") + lbr + Message + comma + Message + rbr futerm = Or([Literal("fu") + lbr + Const + rbr, Variable]) Function = Literal("funct") + lbr + futerm + comma + Message + rbr # Message composition Concatenation = Literal("c") + lbr + Message + comma + Message + rbr Concatenation.setParseAction(lambda s, l, t: [Term.TermTuple(t[1], t[2])]) Composed = Or([ Concatenation, SymmetricCypher, XOR, PublicCypher, Function, KeyTable, KeyTableApp ]) Message << Or([Composed, Atomic]) ### TEST #print Message.parseString("nonce(c(Na,xTime))") # ------------------------------------------------------ # Model of honest agents # ------------------------------------------------------ Boolean = Or([Literal("true"), Literal("false"), Variable]) Session = Forward() Session << Or([Literal("s") + lbr + Session + rbr, Number, Variable]) MsgEtc = Literal("etc") MsgList = Forward() MsgComp = Literal("c") + lbr + Message + comma + MsgList + rbr MsgList << Or([MsgEtc, Variable, MsgComp]) Step = Or([Number, Variable]) ### TEST #print Message.parseString("xKb") #print MsgList.parseString("etc") #print MsgList.parseString("c(xKb,etc)") #print MsgList.parseString("c(xA,c(xB,c(xKa,c(xKa',c(xKb,etc)))))") # Principal fact Principal = Literal( "w" ) + lbr + Step + comma + Agent + comma + Agent + comma + MsgList + comma + MsgList + comma + Boolean + comma + Session + rbr Principal.setParseAction(lambda s, l, t: ["Principal", t]) # Message fact MessageFact = Literal( "m" ) + lbr + Step + comma + Agent + comma + Agent + comma + Agent + comma + Message + comma + Session + rbr # Goal fact Correspondence = Principal + dot + Principal Secret = Literal("secret") + lbr + Message + Literal( "f") + lbr + Session + rbr + rbr Secrecy = Literal("secret") + lbr + Literal("xsecret") + comma + Literal( "f") + lbr + Session + rbr + rbr + dot + Literal("i") + lbr + Literal( "xsecret") + rbr Give = Literal("give") + lbr + Message + Literal( "f") + lbr + Session + rbr + rbr STSecrecy = Literal("give(xsecret,f(xc)).secret(xsecret,f(xc))" ) + implies + Literal("i(xsecret)") Witness = Literal( "witness" ) + lbr + Agent + comma + Agent + comma + Constant + comma + Message + rbr Request = Literal( "request" ) + lbr + Agent + comma + Agent + comma + Constant + comma + Message + rbr Authenticate = Literal( "request" ) + lbr + Agent + comma + Agent + comma + Constant + comma + Message + rbr GoalState = Or([Correspondence, Secrecy, STSecrecy, Authenticate]) GoalFact = Or([Secret, Give, Witness, Request]) # TimeFact TimeFact = Literal("h") + lbr + Message + rbr # Intruder knowledge IntruderKnowledge = Literal("i") + lbr + Message + rbr # Facts and states Fact = Or([ Principal, MessageFact, IntruderKnowledge, TimeFact, Secret, Give, Witness, Request ]) State = Group(delimitedList( Fact, ".")) ## From initial part of document, not in detailed BNF # Rules MFPrincipal = Or([MessageFact + dot + Principal, Principal]) mr1 = Literal("h") + lbr + Literal("s") + lbr + Literal( "xTime") + rbr + rbr + dot + MFPrincipal mr2 = implies mr3 = Literal("h") + lbr + Literal( "xTime") + rbr + dot + MFPrincipal + Optional( dot + delimitedList(GoalFact, ".")) MessageRule = Group(mr1) + mr2 + Group( mr3) ## DEVIANT : BNF requires newlines InitialState = Literal("h") + lbr + Literal( "xTime") + rbr + dot + State ## DEVIANT : BNF requires newlines # Intruder IntruderRule = Literal("nogniet") # Simplification f_simplif = Literal("f") + lbr + Literal("s") + lbr + Literal( "xc") + rbr + rbr + implies + Literal("f") + lbr + Literal( "xc") + rbr ## DEVIANT : EOL removed matching_request = Witness + dot + Request + implies no_auth_intruder = Request + implies SimplificationRule = Or([f_simplif, matching_request, no_auth_intruder]) # Compose all rules Rule = Or([ InitialState, MessageRule, IntruderRule, GoalState, SimplificationRule ]) return Rule
def checkUnindent(s,l,t): if l >= len(s): return curCol = col(l,s) if not(curCol < indentStack[-1] and curCol <= indentStack[-2]): raise ParseException(s,l,"not an unindent") def doUnindent(): indentStack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(checkSubIndent) UNDENT = FollowedBy(empty).setParseAction(checkUnindent) UNDENT.setParseAction(doUnindent) stmt = Forward() suite = Group( OneOrMore( empty + stmt.setParseAction( checkPeerIndent ) ) ) identifier = Word(alphas, alphanums) funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") funcDef = Group( funcDecl + INDENT + suite + UNDENT ) rvalue = Forward() funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") rvalue << (funcCall | identifier | Word(nums)) assignment = Group(identifier + "=" + rvalue) stmt << ( funcDef | assignment | identifier ) print(data) parseTree = suite.parseString(data) import pprint
type_ = (DOUBLE | UINT32 | BOOL | STRING | identifier) type_.setParseAction(type_fn) qualifier = (REQUIRED | OPTIONAL | REPEATED )("qualifier") qualifier.setParseAction(qualifier_fn) field = qualifier - type_("type_") + identifier("identifier") + EQ + integer("field_number") + SEMI field.setParseAction(field_fn) oneof_definition= ONEOF - identifier + LBRACE + ZeroOrMore(Group(type_("type_") + identifier("identifier") + EQ + integer("field_number") + SEMI) ) + RBRACE oneof_definition.setParseAction(oneof_definition_fn) message_line = (field | enum_definition| oneof_definition| message_definition)("message_line") message_line.setParseAction(message_line_fn) message_body << Group(ZeroOrMore(message_line))("message_body") message_body.setParseAction(message_body_fn) method_definition= ((RPC - identifier("method") + LPAR + Optional(identifier("request")) + RPAR + RETURNS + LPAR + Optional(identifier("response")) + RPAR))("method_definition") method_definition.setParseAction(method_definition_fn) service_definition= (SERVICE - identifier("service") + LBRACE + ZeroOrMore(Group(method_definition)) + RBRACE)("service_definition") service_definition.setParseAction(service_definition_fn) package_directive = (Group(PACKAGE - delimitedList(identifier, '.', combine=True) + SEMI))("package_directive") package_directive.setParseAction(package_directive_fn) import_directive = IMPORT - quotedString("import") + SEMI import_directive.setParseAction(import_directive_fn)
def get_exp_parser(): """ Get a pyparsing ParserElement for parsing JSGF rule expansions. The following operator precedence rules (highest to lowest) from JSGF Spec section 4.7 are enforced: 1. Rule name in angle brackets, and a quoted or unquoted token. 2. `()' parentheses for grouping and `[]' for optional grouping. 3. Unary operators (`+', `*', and tag attachment) apply to the tightest immediate preceding rule expansion. (To apply them to a sequence or to alternatives, use `()' or `[]' grouping.) 4. Sequence of rule expansions. 5. `|' separated set of alternative rule expansions. :returns: Forward """ # Make a forward declaration for defining an expansion. This is necessary for # recursive grammars. exp = Forward().setName("expansion") # Define some characters that don't appear in the output. lpar, rpar, lbrac, rbrac, slash = map(Suppress, "()[]/") # Define some other characters that do appear in the output. star, plus, pipe, lcurl, rcurl = map(PPLiteral, "*+|{}") # Define literals. literal = words.copy()\ .setParseAction(lambda tokens: Literal(" ".join(tokens))) # Define rule references. rule_ref = (langle + optionally_qualified_name + rangle)\ .setName("rule reference").setParseAction(_ref_action) # Define JSGF weights. weight = Optional(slash + pyparsing_common.number + slash)\ .setName("alternative weight") # Define expansions inside parenthesises, optionals, literals and # rule references as atomic. req = (lpar + exp + rpar).setName("required grouping")\ .setParseAction(lambda tokens: RequiredGrouping(tokens[0])) opt = (lbrac + exp + rbrac).setName("optional")\ .setParseAction(lambda tokens: OptionalGrouping(tokens[0])) atom = (weight + (literal | rule_ref | req | opt))\ .setParseAction(_atom_action) # Define tag text to one or more words defined by a regular expression. # Escaped brace characters ('\{' or '\}') are allowed in tag text. tag_text = OneOrMore( Regex(r"([\w\-\\']|\\{|\\})+", re.UNICODE).setName("tag text")) tag = lcurl + tag_text + rcurl # Define the root expansion as an atom plus additional alternatives, repeat or # kleene star operators, tags or expansions (for sequence definitions). root = (atom + ZeroOrMore(tag | plus | star | exp | pipe + weight + exp) ).setParseAction(_transform_tokens) # Assign the expansion definition. exp <<= root # Set the parse action for exp and return it. exp.setParseAction(_post_process) return exp