def __init__(self, syslogtag): ints = Word(nums) # timestamp month = Word(string.ascii_uppercase, string.ascii_lowercase, exact=3) day = ints hour = Combine(ints + ":" + ints + ":" + ints) timestamp = month + day + hour # Convert timestamp to datetime year = str(datetime.now().year) timestamp.setParseAction(lambda t: datetime.strptime( year + ' ' + ' '.join(t), '%Y %b %d %H:%M:%S')) # hostname hostname = Word(alphas + nums + "_-.") # syslogtag syslogtag = Suppress(syslogtag) # message payload = Regex(".*") payload.setParseAction( lambda t: "".join(t)) # json parsing happens in Event class self._pattern = timestamp("source_datetime") + hostname( "source_hostname") + syslogtag + payload("serialized_event_dict")
def __init__(self, name=None, status_dic=None): self._name = name self._status_dic = dict(status_dic) if isinstance(status_dic, dict) else {} lpar = Literal('(').suppress() rpar = Literal(')').suppress() op = oneOf(' '.join([k for k in self._operator_map.keys()])) op.setParseAction(self._get_operator_func) # digits d = Word(nums + '.') d.setParseAction(lambda l: [float(i) for i in l]) # something like 3m, 50s, see # https://humanfriendly.readthedocs.io/en/latest/#humanfriendly.parse_timespan td = Regex(r'[\d]+[a-zA-Z]+') td.setParseAction(self._parse_timedeltas) # something like cpu_usage_rate, vnbe0.inbytes metric = Word(alphas, alphanums + '._') # order matters any_v = td | metric | d self.expr = Forward() atom = any_v | Group(lpar + self.expr + rpar) self.expr << atom + ZeroOrMore(op + self.expr)
def _make_grammar(self): from pyparsing import (QuotedString, ZeroOrMore, Combine, Literal, Optional, OneOrMore, Regex, CaselessKeyword) def escape_handler(s, loc, toks): if toks[0] == '\\\\': return "\\" elif toks[0] == '\\\'': return "'" elif toks[0] == '\\"': return '"' elif toks[0] == '\\f': return "\f" elif toks[0] == '\\n': return "\n" elif toks[0] == '\\r': return "\r" elif toks[0] == '\\t': return "\t" elif toks[0] == '\\ ': return " " else: return toks[0][1:] escape = Combine(Regex(r'\\.')).setParseAction(escape_handler) word = Combine(OneOrMore(escape | Regex(r'[^\s\\]+'))) whitespace = Regex(r'\s+').suppress() quotedstring = Combine( OneOrMore( QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\'))) command = Regex(r'[^\s:]+') + Literal(":").suppress() + (quotedstring | word) include = quotedstring | command | word exclude = (Literal("-") | Literal("^")).suppress() + (quotedstring | command | word) or_keyword = CaselessKeyword("or") and_keyword = CaselessKeyword("and") keyword = or_keyword | and_keyword argument = (keyword | exclude | include) expr = ZeroOrMore(Optional(whitespace) + argument) # arguments.leaveWhitespace() command.setParseAction(CommandExpr) include.setParseAction(IncludeExpr) exclude.setParseAction(ExcludeExpr) or_keyword.setParseAction(OrKeywordExpr) and_keyword.setParseAction(AndKeywordExpr) # or_expr.setParseAction(lambda s, loc, toks: OrOperator(toks[0], toks[2])) # and_expr.setParseAction(lambda s, loc, toks: AndOperator(toks[0], toks[2])) # no_expr.setParseAction(lambda s, loc, toks: AndOperator(toks[0], toks[1])) # expr.setParseAction(Operator) return expr
def getToken(self): tableCell = Regex(r"(?P<text>(.|(\\\n))*?)\|\|") tableCell.setParseAction(self.__convertTableCell) tableRow = AtLineStart(Literal("||") + OneOrMore(tableCell.leaveWhitespace()) + Optional(LineEnd())) tableRow.setParseAction(self.__convertTableRow) table = AtLineStart(Regex(r"\|\| *(?P<params>.+)?") + LineEnd() + OneOrMore(tableRow)) table = table.setParseAction(self.__convertTable)("table") return table
def _make_grammar(self): from pyparsing import (QuotedString, ZeroOrMore, Combine, Literal, Optional, OneOrMore, Regex, CaselessKeyword) def escape_handler(s, loc, toks): if toks[0] == '\\\\': return "\\" elif toks[0] == '\\\'': return "'" elif toks[0] == '\\"': return '"' elif toks[0] == '\\f': return "\f" elif toks[0] == '\\n': return "\n" elif toks[0] == '\\r': return "\r" elif toks[0] == '\\t': return "\t" elif toks[0] == '\\ ': return " " else: return toks[0][1:] escape = Combine(Regex(r'\\.')).setParseAction(escape_handler) word = Combine(OneOrMore(escape | Regex(r'[^\s\\]+'))) whitespace = Regex(r'\s+').suppress() quotedstring = Combine(OneOrMore(QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\'))) command = Regex(r'[^\s:]+') + Literal(":").suppress() + (quotedstring | word) include = quotedstring | command | word exclude = (Literal("-") | Literal("^")).suppress() + (quotedstring | command | word) or_keyword = CaselessKeyword("or") and_keyword = CaselessKeyword("and") keyword = or_keyword | and_keyword argument = (keyword | exclude | include) expr = ZeroOrMore(Optional(whitespace) + argument) # arguments.leaveWhitespace() command.setParseAction(CommandExpr) include.setParseAction(IncludeExpr) exclude.setParseAction(ExcludeExpr) or_keyword.setParseAction(OrKeywordExpr) and_keyword.setParseAction(AndKeywordExpr) # or_expr.setParseAction(lambda s, loc, toks: OrOperator(toks[0], toks[2])) # and_expr.setParseAction(lambda s, loc, toks: AndOperator(toks[0], toks[2])) # no_expr.setParseAction(lambda s, loc, toks: AndOperator(toks[0], toks[1])) # expr.setParseAction(Operator) return expr
def __init__(self, EvaluateVariableChild=None, EvaluateNumberChild=None): EvaluateVariableChild = EvaluateVariableChild or EvaluateVariable EvaluateNumberChild = EvaluateNumberChild or EvaluateNumber # what is a float number floatNumber = Regex(r'[-]?\d+(\.\d*)?([eE][-+]?\d+)?') # a variable is a combination of letters, numbers, and underscor variable = Word(alphanums + "_") # a sign is plus or minus signOp = oneOf('+ -') # an operand is a variable or a floating point number operand = floatNumber ^ variable # when a floatNumber is found, parse it with evaluate number floatNumber.setParseAction(EvaluateNumberChild) # when a variable is found, parse it with the EvaluateVariableChild # or EvaluateVariable variable.setParseAction(EvaluateVariableChild) # comparisons include lt,le,gt,ge,eq,ne comparisonOp = oneOf("< <= > >= == !=") # negation of the boolean is ! notOp = oneOf("!") # an expression is a either a comparison or # a NOT operation (where NOT a is essentially (a == False)) comparisonExpression = operatorPrecedence(operand, [ (comparisonOp, 2, opAssoc.LEFT, EvaluateComparison ), (notOp, 1, opAssoc.RIGHT, EvaluateNot ), ]) # boolean logic of AND or OR boolOp = oneOf("& |") # a bool expression contains a nested bool expression or a comparison, # joined with a boolean operation boolExpression = Forward() boolPossible = boolExpression | comparisonExpression self.boolExpression = operatorPrecedence(boolPossible, [ (boolOp, 2, opAssoc.RIGHT, EvaluateOrAnd ), ]) return
def __init__(self): plus, minus, mult, div, mod = map(Literal, '+-*/%') lpar = Literal('(') rpar = Literal(')') comma = Literal(',') powop = Literal( '^' ) productop = mult | div modop = Literal( '%' ) sumop = plus | minus tupl = Forward() number = Regex(r'[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?') number.setParseAction(number_action) ident = Word(alphas, alphanums+'_') ident.setParseAction(ident_action) funccall = ident + tupl funccall.setParseAction(funccall_action) atom = funccall | ident | number atom.setParseAction(atom_action) patom = lpar + atom + rpar patom.setParseAction(patom_action) powexpr = Forward() powexpr << Group( atom + ZeroOrMore( ( powop + powexpr ) ) ) powexpr.setParseAction(powexpr_action) modexpr = Forward() modexpr << Group( powexpr + ZeroOrMore( ( modop + modexpr ) ) ) modexpr.setParseAction(modexpr_action) product = Group( modexpr + ZeroOrMore( ( productop + modexpr ) ) ) product.setParseAction(product_action) sumexpr = Group( product + Group( ZeroOrMore( sumop + product ) ) ) sumexpr.setParseAction(sum_action) tupl << lpar + Optional(sumexpr + ZeroOrMore( comma + sumexpr ) ) + rpar tupl.setParseAction(tupl_action) expr = sumexpr | tupl expr.setParseAction(expr_action) self.bnf = expr
def grammar(self): """ Define the parser grammar. """ # Ignore TeX commands between delimiters $$, \(, \) tex_eq = (Literal(r'\(') | Literal(r'$$') | Literal(r'\[')) + ... + ( Literal(r'\)') | Literal(r'$$') | Literal(r'\]')) # Define elemtary stuff leftAc = Literal('{').suppress() rightAc = Literal('}').suppress() lpar = Literal('(') rpar = Literal(')') integer = Word(nums) # simple unsigned integer real = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") real.setParseAction(self.real_hook) number = real | integer # Define function fnname = Word(alphas, alphanums + "_")('name') # Require expr to finalize the def function = Forward() function.setParseAction(self.function_hook) # What are the namming rule for the jocker? Need to start by a letter, # may contain almost everything variable = Combine(leftAc + Word(alphas, alphanums + "_") + rightAc)('name') variable.setParseAction(self.variable_hook) variable.ignore(tex_eq) # arithmetic operators minus = Literal('-') arithOp = oneOf("+ * /") | minus equal = Literal('{=').suppress() # Require atom to finalize the def expr = Forward() # Define atom atom = number | (0, None) * minus + (Group(lpar + expr + rpar) | variable | function) atom.setParseAction(self.atom_hook) # Finalize postponed elements... expr << atom + ZeroOrMore(arithOp + atom) # Need to group arguments for swapping them function << fnname + Group(lpar + Group(ZeroOrMore(expr)) + Optional(Literal(',') + Group(expr)) + rpar) # Define equation equation = equal + expr + rightAc equation.setParseAction(self.equation_hook) return equation, variable
def _create_pyparser(self): # Copied from # https://github.com/pyparsing/pyparsing/blob/master/examples/fourFn.py. from pyparsing import (Literal, Group, Forward, Regex, ZeroOrMore, Suppress) def pushFirst(strg, loc, toks): self._expr_stack.append(toks[0]) def pushUMinus(strg, loc, toks): for t in toks: if t == '-': self._expr_stack.append('unary -') else: break fnumber = Regex(r"[+-]?\d+(?:\.\d*)?(?:[eE][+-]?\d+)?") plus, minus, mult, div = map(Literal, '+-*/') lpar, rpar = map(Suppress, '()') addop = plus | minus multop = mult | div expr = Forward() atom = ((0, None) * minus + (fnumber.setParseAction(pushFirst) | Group(lpar + expr + rpar))).setParseAction(pushUMinus) factor = Forward() factor << atom term = factor + ZeroOrMore((multop + factor).setParseAction(pushFirst)) expr << term + ZeroOrMore((addop + term).setParseAction(pushFirst)) self.pyparser = expr
def translate(self, text, filename): self.source = text self.super = None self.inheritance = 0 self.declaration_lines = ['inheritance = 0'] self.block_lines = [] self.body_lines = ['def body():'] self.target_lines = self.body_lines self.indent = 1 template_close = Literal('%>') white = White() attribute = Word(alphanums + '_') + Literal('=') + QuotedString( '"') + Optional(white) directive = "<%@" + Optional(white) + Word( alphanums + '_') + white + ZeroOrMore(attribute) + template_close declaration = "<%!" + SkipTo(template_close) + template_close expression = "<%=" + SkipTo(template_close) + template_close scriptlet = '<%' + SkipTo(template_close) + template_close template_text = directive | declaration | expression | scriptlet plain_text = Regex(r'((?!<%).|\s)+', re.MULTILINE) body = template_text | plain_text lit = OneOrMore(body) directive.setParseAction(self.compile_directive) declaration.setParseAction(self.compile_declaration) expression.setParseAction(self.compile_expression) scriptlet.setParseAction(self.compile_scriptlet) plain_text.setParseAction(self.compile_plain_text) lit.leaveWhitespace() lit.parseString(self.source) translated = '\n' + '\n'.join(self.declaration_lines + ['\n'] + self.block_lines + ['\n'] + self.body_lines) if self.super: translated = self.super.module_source + translated return translated
def grammar(self): """ Define the parser grammar for FP syntaxe. Modified from base class. """ # Define elemtary stuff leftAc = Literal('{').suppress() rightAc = Literal('}').suppress() lpar = Literal('(') rpar = Literal(')') integer = Word(nums) # simple unsigned integer real = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") # add extra input parameter to the parseaction to keep in mind all variable real.setParseAction(self.real_hook) number = real | integer # Define function fnname = Word(alphas, alphanums + "_")('name') # Require expr to finalize the def function = Forward() function.setParseAction(self.function_hook) # Normally, no variable excepted random*, pi ... # may contain almost everything variable = Word(alphas, alphanums + "_")('name') variable.setParseAction( lambda t: self.variable_hook(self.wildcards, t)) # arithmetic operators minus = Literal('-') arithOp = oneOf("+ * /") | minus # Require atom to finalize the def expr = Forward() # Define atom atom = number | (0, None) * minus + (Group(lpar + expr + rpar) | function | variable) atom.setParseAction(self.atom_hook) # Finalize postponed elements... expr << atom + ZeroOrMore(arithOp + atom) function << fnname + Group(lpar + Group(ZeroOrMore(expr)) + Optional(Literal(',') + Group(expr)) + rpar) # Define equation equal = Literal('fp{').suppress() equation = equal + expr + rightAc equation.setParseAction(self.equation_hook) return equation, variable
def ListParser(): """ A parser for list columns, where each list is composed of pairs of values. """ value = Regex(r'[-+]?[0-9]+(?:\.[0-9]*)?(?:e[-+]?[0-9]+)?', IGNORECASE) value.setParseAction(lambda toks: float(toks[0])) item = Suppress('(') + value + Suppress(',') + value + Suppress(')') item.setParseAction(tuple) lst = Suppress('[') + delimitedList(item) + Suppress(']') lst.setParseAction(list) def parse(s): try: return lst.parseString(s).asList() except ParseBaseException as e: raise ValueError(e) return parse
def translate(self, text, filename): self.source = text self.super = None self.inheritance = 0 self.declaration_lines = ['inheritance = 0'] self.block_lines = [] self.body_lines = ['def body():'] self.target_lines = self.body_lines self.indent = 1 template_close = Literal('%>') white = White() attribute = Word(alphanums + '_') + Literal('=') + QuotedString('"') + Optional(white) directive = "<%@" + Optional(white) + Word(alphanums + '_') + white + ZeroOrMore(attribute) + template_close declaration = "<%!" + SkipTo(template_close) + template_close expression = "<%=" + SkipTo(template_close) + template_close scriptlet = '<%' + SkipTo(template_close) + template_close template_text = directive | declaration | expression | scriptlet plain_text = Regex(r'((?!<%).|\s)+', re.MULTILINE) body = template_text | plain_text lit = OneOrMore(body) directive.setParseAction(self.compile_directive) declaration.setParseAction(self.compile_declaration) expression.setParseAction(self.compile_expression) scriptlet.setParseAction(self.compile_scriptlet) plain_text.setParseAction(self.compile_plain_text) lit.leaveWhitespace() lit.parseString(self.source) translated = '\n' + '\n'.join(self.declaration_lines + ['\n'] + self.block_lines + ['\n'] + self.body_lines) if self.super: translated = self.super.module_source + translated return translated
def __init__(self): exprStack = [] fnumber = Regex('\\d{1,10}').setName('number') variable = Regex('v\\d').setName('variable') plus, minus, mult, div = map(Literal, '+-*/') lpar, rpar = map(Suppress, '()') addop = plus | minus multop = mult | div addop.setName('op') multop.setName('op') expr = Forward() atom = addop[...] + ( variable.setParseAction(lambda x: self.exprStack.append( (x[0], 'var'))) | fnumber.setParseAction(lambda x: self.exprStack.append( (x[0], 'num'))) | Group(lpar + expr + rpar)) factor = Forward() factor <<= atom term = factor + (multop + factor).setParseAction( lambda x: self.exprStack.append((x[0], 'op')))[...] expr <<= term + (addop + term).setParseAction( lambda x: self.exprStack.append((x[0], 'op')))[...] bnf = expr self.parser = bnf
def getToken(self): result = Regex( r"""%\s*? (?: (?:thumb\s+)? (?:width\s*?=\s*?(?P<width>\d+) |height\s*?=\s*?(?P<height>\d+) |maxsize\s*?=\s*?(?P<maxsize>\d+))\s*? (?:px)? |thumb\s*? )\s*? %\s*? Attach:(?P<fname>.*?\.(?:jpe?g|bmp|gif|tiff?|png|webp))\s*?%%""", re.IGNORECASE | re.VERBOSE) result = result.setParseAction(self.__convertThumb)("thumbnail") return result
def make_sexp_parser (): """ Returns a simple parser for nested lists of real numbers. Round parens () are assumed as customary in lisps. """ # Punctuation literals (note round parens): LPAR, RPAR = map (Suppress, "()") # Real numbers: real_string = Regex (r"[+-]?\d+\.\d*([eE][+-]?\d+)?") real = real_string.setParseAction (lambda tokens: float (tokens[0])) # Voodoo: sexp = Forward () sexp_list = Group (LPAR + ZeroOrMore (sexp) + RPAR) sexp << (real | sexp_list) return lambda s: sexp.parseString (s)[0]
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() tag_begin = Literal("<").suppress() tag_end = Literal(">").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' ival=Regex('[-]?\d+') dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?') lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)') # Helper definitions kstr= quotedString.setParseAction(removeQuotes) ^ \ dval ^ ival ^ lval ^ Word(prtable) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | tag_sect ) #| vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions ival.setParseAction(self.conv_ival) dval.setParseAction(self.conv_dval) lval.setParseAction(self.conv_lval) keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) tag_sect.setParseAction(self.add_sect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
__all__ = ['tapOutputParser', 'TAPTest', 'TAPSummary'] # newlines are significant whitespace, so set default skippable # whitespace to just spaces and tabs ParserElement.setDefaultWhitespaceChars(" \t") NL = LineEnd().suppress() integer = Word(nums) plan = '1..' + integer("ubound") OK, NOT_OK = map(Literal, ['ok', 'not ok']) testStatus = (OK | NOT_OK) description = Regex("[^#\n]+") description.setParseAction(lambda t: t[0].lstrip('- ')) TODO, SKIP = map(CaselessLiteral, 'TODO SKIP'.split()) directive = Group( Suppress('#') + (TODO + restOfLine | FollowedBy(SKIP) + restOfLine.copy().setParseAction(lambda t: ['SKIP', t[0]]))) commentLine = Suppress("#") + empty + restOfLine testLine = Group( Optional(OneOrMore(commentLine + NL))("comments") + testStatus("passed") + Optional(integer)("testNumber") + Optional(description)("description") + Optional(directive)("directive")) bailLine = Group( Literal("Bail out!")("BAIL") + empty + Optional(restOfLine)("reason"))
import pyparsing from pyparsing import Keyword, Word, alphas, nums, alphanums, Regex, Literal, OneOrMore, Suppress, Group, Dict, Optional, White NEWLINE = Literal("\n") | Literal("\r\n") EQUAL = Suppress(Literal('=')) QUOTE = Suppress(Literal('"')) SEMI = Suppress(Literal(";")) BPAREN = Suppress(Literal("(")) EPAREN = Suppress(Literal(")")) COMMA = Suppress(Literal(",")) YES = Literal("YES") NO = Literal("NO") INTEGER = Word(nums+'-', nums) INTEGER.setParseAction(lambda x: int(x[0])) FLOAT = Regex('-?[\d\.]+') FLOAT.setParseAction(lambda x: float(x[0])) NONQUOTE = Regex('[^"]+') QSTRING = QUOTE + NONQUOTE + QUOTE # quoted string MLQSTRING = OneOrMore(QSTRING | NEWLINE) # multiline quoted string DATE = QUOTE + Word(nums + ': ', exact=14) + QUOTE BQ = EQUAL + QUOTE EQ = QUOTE + SEMI BPQ = BPAREN + QUOTE EPQ = QUOTE + EPAREN tlist = Keyword("TLIST") + Group(BPAREN + Word("AHQMW1") + ((COMMA + QSTRING + Literal("-") + QSTRING + EPAREN) | (EPAREN + OneOrMore(QSTRING | COMMA)))) axis_version = Keyword("AXIS-VERSION") + BQ + Word(nums) + EQ charset = Keyword("CHARSET") + BQ + Literal("ANSI") + EQ codes = Keyword("CODES") + BPAREN + QSTRING + EPAREN + EQUAL + Group(OneOrMore(QSTRING | NEWLINE | COMMA)) + SEMI
def define_dot_parser(self): """Define dot grammar Based on the grammar http://www.graphviz.org/doc/info/lang.html """ # punctuation colon = Literal(":") lbrace = Suppress("{") rbrace = Suppress("}") lbrack = Suppress("[") rbrack = Suppress("]") lparen = Literal("(") rparen = Literal(")") equals = Suppress("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Suppress(";") at = Literal("@") minus = Literal("-") pluss = Suppress("+") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") punctuation_ = "".join([c for c in string.punctuation if c not in '_' ]) + string.whitespace # token definitions identifier = Word(alphanums + "_").setName("identifier") # double_quoted_string = QuotedString('"', multiline=True,escChar='\\', # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine( double_quoted_string + Optional(OneOrMore(pluss + double_quoted_string)), adjacent=False) alphastring_ = OneOrMore(CharsNotIn(punctuation_)) def parse_html(s, loc, toks): return '<<%s>>' % ''.join(toks[0]) opener = '<' closer = '>' try: html_text = pyparsing.nestedExpr( opener, closer, ((CharsNotIn(opener + closer).setParseAction(lambda t: t[0])) )).setParseAction(parse_html) except: log.debug('nestedExpr not available.') log.warning('Old version of pyparsing detected. Version 1.4.8 or ' 'later is recommended. Parsing of html labels may not ' 'work properly.') html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) ID = ( alphastring_ | html_text | quoted_string | # .setParseAction(strip_quotes) | identifier).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = ((OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen))).setName("port_location") port = Combine( (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location)))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName( "attr_list").setResultsName('attrlist') attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = (lbrace + Optional(stmt_list) + rbrace + Optional(semi)).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = ( Optional(subgraph_, '') + Optional(ID, '') + Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph') edge_point <<= (subgraph | graph_stmt | node_id) node_stmt = (node_id + Optional(attr_list) + Optional(semi)).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list <<= OneOrMore(stmt + Optional(semi)) graphparser = ((Optional(strict_, 'notstrict') + ((graph_ | digraph_)) + Optional(ID, '') + lbrace + Group(Optional(stmt_list)) + rbrace).setResultsName("graph")) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) node_id.setParseAction(self._proc_node_id) assignment.setParseAction(self._proc_attr_assignment) a_list.setParseAction(self._proc_attr_list) edge_stmt.setParseAction(self._proc_edge_stmt) node_stmt.setParseAction(self._proc_node_stmt) attr_stmt.setParseAction(self._proc_default_attr_stmt) attr_list.setParseAction(self._proc_attr_list_combine) subgraph.setParseAction(self._proc_subgraph_stmt) # graph_stmt.setParseAction(self._proc_graph_stmt) graphparser.setParseAction(self._main_graph_stmt) return graphparser
# Operands integer = Word(nums).setParseAction(operators.process_int) float_ = Regex(r'''[0-9]+ # integer part (?: (?: # optional decimal part followed by e-part (?: \.[0-9]* )? [eE] [-\u2212+]? # U+2212 is unicode minus [0-9]+ ) | (?: \.[0-9]* ) # mandatory decimal part without e-part )''', re.VERBOSE) float_.setParseAction(operators.process_float) unicode_fraction = oneOf("½ ⅓ ¼ ⅕ ⅙ ⅐ ⅛ ⅑ ⅒ ⅔ ¾ ⅖ ⅗ ⅘ ⅚ ⅜ ⅝ ⅞") unicode_fraction.setParseAction(operators.process_unicode_fraction) hexint = Group( Literal("0x") + Regex(r'[0-9a-fA-F]+') ) hexint.setParseAction(operators.process_intbase) octint = Group( Literal("0o") + Regex(r'[0-7]+') ) octint.setParseAction(operators.process_intbase) binint = Group( Literal("0b") + Regex(r'[01]+') ) binint.setParseAction(operators.process_intbase) romanint = Group( Literal("0r") + FollowedBy( # do not accept empty string!
# Operands integer = Word(nums).setParseAction(operators.process_int) float_ = Regex( r'''[0-9]+ # integer part (?: (?: # optional decimal part followed by e-part (?: \.[0-9]* )? [eE] [-\u2212+]? # U+2212 is unicode minus [0-9]+ ) | (?: \.[0-9]* ) # mandatory decimal part without e-part )''', re.VERBOSE) float_.setParseAction(operators.process_float) unicode_fraction = oneOf("½ ⅓ ¼ ⅕ ⅙ ⅐ ⅛ ⅑ ⅒ ⅔ ¾ ⅖ ⅗ ⅘ ⅚ ⅜ ⅝ ⅞") unicode_fraction.setParseAction(operators.process_unicode_fraction) hexint = Group(Literal("0x") + Regex(r'[0-9a-fA-F]+')) hexint.setParseAction(operators.process_intbase) octint = Group(Literal("0o") + Regex(r'[0-7]+')) octint.setParseAction(operators.process_intbase) binint = Group(Literal("0b") + Regex(r'[01]+')) binint.setParseAction(operators.process_intbase) romanint = Group( Literal("0r") + FollowedBy( # do not accept empty string! oneOf("I V X L C D M", caseless=True)) +
def formula_grammar(table): """ Construct a parser for molecular formulas. :Parameters: *table* = None : PeriodicTable If table is specified, then elements and their associated fields will be chosen from that periodic table rather than the default. :Returns: *parser* : pyparsing.ParserElement. The ``parser.parseString()`` method returns a list of pairs (*count,fragment*), where fragment is an *isotope*, an *element* or a list of pairs (*count,fragment*). """ # Recursive composite = Forward() mixture = Forward() # whitespace and separators space = Optional(White().suppress()) separator = space+Literal('+').suppress()+space # Lookup the element in the element table symbol = Regex("[A-Z][a-z]*") symbol = symbol.setParseAction(lambda s,l,t: table.symbol(t[0])) # Translate isotope openiso = Literal('[').suppress() closeiso = Literal(']').suppress() isotope = Optional(~White()+openiso+Regex("[1-9][0-9]*")+closeiso, default='0') isotope = isotope.setParseAction(lambda s,l,t: int(t[0]) if t[0] else 0) # Translate ion openion = Literal('{').suppress() closeion = Literal('}').suppress() ion = Optional(~White() +openion +Regex("([1-9][0-9]*)?[+-]") +closeion, default='0+') ion = ion.setParseAction(lambda s,l,t: int(t[0][-1]+(t[0][:-1] if len(t[0])>1 else '1'))) # Translate counts fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)") fract = fract.setParseAction(lambda s,l,t: float(t[0]) if t[0] else 1) whole = Regex("[1-9][0-9]*") whole = whole.setParseAction(lambda s,l,t: int(t[0]) if t[0] else 1) count = Optional(~White()+(fract|whole),default=1) # Convert symbol,isotope,ion,count to (count,isotope) element = symbol+isotope+ion+count def convert_element(string,location,tokens): #print "convert_element received",tokens symbol,isotope,ion,count = tokens[0:4] if isotope != 0: symbol = symbol[isotope] if ion != 0: symbol = symbol.ion[ion] return (count,symbol) element = element.setParseAction(convert_element) # Convert "count elements" to a pair implicit_group = count+OneOrMore(element) def convert_implicit(string,location,tokens): #print "implicit",tokens count = tokens[0] fragment = tokens[1:] return fragment if count==1 else (count,fragment) implicit_group = implicit_group.setParseAction(convert_implicit) # Convert "(composite) count" to a pair opengrp = space + Literal('(').suppress() + space closegrp = space + Literal(')').suppress() + space explicit_group = opengrp + composite + closegrp + count def convert_explicit(string,location,tokens): #print "explicit",tokens count = tokens[-1] fragment = tokens[:-1] return fragment if count == 1 else (count,fragment) explicit_group = explicit_group.setParseAction(convert_explicit) # Build composite from a set of groups group = implicit_group | explicit_group implicit_separator = separator | space composite << group + ZeroOrMore(implicit_separator + group) density = Literal('@').suppress() + count + Optional(Regex("[ni]"),default='i') compound = composite + Optional(density,default=None) def convert_compound(string,location,tokens): #print "compound",tokens if tokens[-1] is None: return Formula(structure=_immutable(tokens[:-1])) elif tokens[-1] == 'n': return Formula(structure=_immutable(tokens[:-2]), natural_density=tokens[-2]) else: return Formula(structure=_immutable(tokens[:-2]), density=tokens[-2]) compound = compound.setParseAction(convert_compound) partsep = space + Literal('//').suppress() + space percent = Literal('%').suppress() weight_percent = Regex("%(w((eigh)?t)?|m(ass)?)").suppress() + space by_weight = count + weight_percent + mixture + ZeroOrMore(partsep+count+(weight_percent|percent)+mixture) + partsep + mixture def convert_by_weight(string,location,tokens): #print "by weight",tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100-sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture") if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_weight_pairs(zip(piece,fract)) mixture_by_weight = by_weight.setParseAction(convert_by_weight) volume_percent = Regex("%v(ol(ume)?)?").suppress() + space by_volume = count + volume_percent + mixture + ZeroOrMore(partsep+count+(volume_percent|percent)+mixture) + partsep + mixture def convert_by_volume(string,location,tokens): #print "by volume",tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100-sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture "+string) if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_volume_pairs(zip(piece,fract)) mixture_by_volume = by_volume.setParseAction(convert_by_volume) mixture_by_layer = Forward() layer_thick = Group(count + Regex("(nm|um|mm)") + space) layer_part = (layer_thick + mixture ) | (opengrp + mixture_by_layer + closegrp +count) mixture_by_layer << layer_part + ZeroOrMore(partsep + layer_part) def convert_by_layer(string,location,tokens): units = {'nm': 1e-9, 'um': 1e-6, 'mm': 1e-3, } if len (tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absthick * float(p2) p = p1 else: f = float(p1[0]) * units[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) vfract = [ (v/total)*100 for v in fract] result = _mix_by_volume_pairs(zip(piece,vfract)) result.absthick = total return result mixture_by_layer = mixture_by_layer.setParseAction(convert_by_layer) mixture_by_absmass = Forward() absmass_mass = Group(count + Regex("(ng|ug|mg|g|kg)") + space) absmass_part = (absmass_mass + mixture) | (opengrp + mixture_by_absmass + closegrp + count) mixture_by_absmass << absmass_part + ZeroOrMore( partsep + absmass_part) def convert_by_absmass(string,location,tokens): units = {'ng': 1e-9, 'ug': 1e-6, 'mg': 1e-3, 'g': 1e+0, 'kg': 1e+3, } if len (tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absmass * float(p2) p = p1 else: f = float(p1[0]) * units[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) mfract = [ (m/total)*100 for m in fract] result = _mix_by_weight_pairs(zip(piece,mfract)) result.absmass=total return result mixture_by_absmass = mixture_by_absmass.setParseAction(convert_by_absmass) mixture_by_absvolume = Forward() absvolume_vol = Group(count + Regex("(nl|ul|ml|l)") + space) absvolume_part = ( absvolume_vol + mixture )|(opengrp + mixture_by_absvolume + closegrp + count) mixture_by_absvolume << absvolume_part + ZeroOrMore( partsep + absvolume_part) def convert_by_absvolume(string,location,tokens): units = {'nl': 1e-9, 'ul': 1e-6, 'ml': 1e-3, 'l': 1e+0, } if len (tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absvolume * float(p2) p = p1 else: f = float(p1[0]) * units[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) vfract = [ (v/total)*100 for v in fract] if len(piece) != len(fract): raise ValueError("Missing base component of mixture "+string) if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") result = _mix_by_volume_pairs(zip(piece,vfract)) result.absvolume = total return result mixture_by_absvolume = mixture_by_absvolume.setParseAction(convert_by_absvolume) mixture << (compound | (opengrp + (mixture_by_weight | mixture_by_volume ) + closegrp)) formula = compound | mixture_by_weight | mixture_by_volume | mixture_by_layer | mixture_by_absmass | mixture_by_absvolume grammar = Optional(formula, default=Formula()) + StringEnd() grammar.setName('Chemical Formula') return grammar
# -*- coding: utf-8 -*- from pyparsing import (Word, Literal, Regex, Keyword, CaselessKeyword, Forward, Group, OneOrMore, ZeroOrMore, alphas, nums, Suppress, delimitedList, CharsNotIn, Empty, Optional, Or, restOfLine) from fractions import Fraction from orderedattrdict import AttrDict from .base import FPLOFileType, loads AttrDict.__repr__ = AttrDict.__str__ = lambda self: str(dict(self)) IDENTIFIER = Word(alphas + "_", alphas + nums + "_") INT_DECIMAL = Regex(r'([+-]?(([1-9][0-9]*)|0+))') INTEGER = INT_DECIMAL.setParseAction(lambda s, l, t: int(t[0])) FLOAT = Regex(r'[+-]?((((\d+\.\d*)|(\d*\.\d+))' r'([eE][-+]?\d+)?)|(\d*[eE][+-]?\d+)|INF)').setParseAction( lambda s, l, t: float(t[0])) FLAG = Regex(r'(?P<key>[a-zA-Z_]+)\((?P<val>[+-])\)').setParseAction( lambda s, l, t: (t.key, t.val == '+')) (LPAREN, RPAREN, LBRACK, RBRACK, LBRACE, RBRACE, SEMI, COMMA, EQUAL, DQUOTE) = map(Suppress, "()[]{};,=\"") SIZE = (delimitedList(INTEGER | IDENTIFIER) | INTEGER | Literal('*') | IDENTIFIER) STRING = DQUOTE + ZeroOrMore(CharsNotIn('"')) + DQUOTE FRACTION = (INTEGER + Literal('/') + INTEGER).setParseAction(lambda s, l, t: Fraction(t[0], t[2])) BOOLEAN = ( CaselessKeyword("t") | CaselessKeyword("f")).setParseAction(lambda s, l, t: t[0].lower() == "t")
string = QuotedString('"', escChar="\\") | QuotedString('\'', escChar="\\") operand = model_reference | real | integer | constant | string | variable plusop = oneOf('+ -') multop = oneOf('* / // %') groupop = Literal(',') expr = Forward() modifier = Combine(Word(alphas + nums) + ':') integer.setParseAction(EvalInteger) real.setParseAction(EvalReal) string.setParseAction(EvalString) constant.setParseAction(EvalConstant) variable.setParseAction(EvalVariable) model_reference.setParseAction(EvalModelReference) comparisonop = (oneOf("< <= > >= != == ~= ^= $=") | (Literal('not in') + WordEnd()) | (oneOf("in lt lte gt gte matches contains icontains like") + WordEnd())) logicopOR = Literal('or') + WordEnd() logicopAND = Literal('and') + WordEnd() expr << operatorPrecedence(operand, [ (modifier, 1, opAssoc.RIGHT, EvalModifierOp), (multop, 2, opAssoc.LEFT, EvalMultOp), (plusop, 2, opAssoc.LEFT, EvalAddOp), (comparisonop, 2, opAssoc.LEFT, EvalComparisonOp),
4 294 967 295,000 # GB-English 4,294,967,295.000 # US-English 4,294,967,295.000 # Thai 4,294,967,295.000 """ from pyparsing import Regex comma_decimal = Regex(r"\d{1,2}(([ .])\d\d\d(\2\d\d\d)*)?,\d*") comma_decimal.setParseAction( lambda t: float(t[0].replace(" ", "").replace(".", "").replace(",", ".")) ) dot_decimal = Regex(r"\d{1,2}(([ ,])\d\d\d(\2\d\d\d)*)?\.\d*") dot_decimal.setParseAction(lambda t: float(t[0].replace(" ", "").replace(",", ""))) decimal = comma_decimal ^ dot_decimal decimal.runTests(tests, parseAll=True) grouped_integer = Regex(r"\d{1,2}(([ .,])\d\d\d(\2\d\d\d)*)?") grouped_integer.setParseAction( lambda t: int(t[0].replace(" ", "").replace(",", "").replace(".", "")) ) grouped_integer.runTests(tests, parseAll=False)
Optional(oneOf("E e") + Word(nums)) ) + Optional(quote).suppress() ).setName("real") # TODO: Positive real number between zero and one. decimal = real # String ---------------------------------------------------------------------- q_string = (sglQuotedString | dblQuotedString).setName("q_string") #double_quoted_string = QuotedString('"', multiline=True,escChar="\\", # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine( double_quoted_string+ Optional(OneOrMore(pluss+double_quoted_string)), adjacent=False ) word = quoted_string.setName("word") # Word(alphanums) # Graph attributes ------------------------------------------------------------ hex_color = Word(hexnums, exact=2) #TODO: Optional whitespace rgb = Literal("#").suppress() + hex_color.setResultsName("red") + \ hex_color.setResultsName("green") + hex_color.setResultsName("blue") rgba = rgb + hex_color.setResultsName("alpha") hsv = decimal.setResultsName("hue") + decimal.setResultsName("saturation") + \ decimal.setResultsName("value") color_name = double_quoted_string | Word(alphas)
# Swedish 4 294 967 295,000 # GB-English 4,294,967,295.000 # US-English 4,294,967,295.000 # Thai 4,294,967,295.000 """ from pyparsing import Regex comma_decimal = Regex(r'\d{1,2}(([ .])\d\d\d(\2\d\d\d)*)?,\d*') comma_decimal.setParseAction( lambda t: float(t[0].replace(' ', '').replace('.', '').replace(',', '.'))) dot_decimal = Regex(r'\d{1,2}(([ ,])\d\d\d(\2\d\d\d)*)?\.\d*') dot_decimal.setParseAction( lambda t: float(t[0].replace(' ', '').replace(',', ''))) decimal = comma_decimal ^ dot_decimal decimal.runTests(tests, parseAll=True) grouped_integer = Regex(r'\d{1,2}(([ .,])\d\d\d(\2\d\d\d)*)?') grouped_integer.setParseAction( lambda t: int(t[0].replace(' ', '').replace(',', '').replace('.', ''))) grouped_integer.runTests(tests, parseAll=False)
Literal, quotedString, Keyword, empty, Suppress, Combine, NotAny, Regex def eachMostOnce(*args, or_=operator.ior, and_=operator.add): return reduce( or_, (reduce(or_, map(lambda x: reduce(and_, x), permutations(args, i))) for i in range(len(args), 0, -1))) NAME = Word(alphas, alphanums + '_') INTEGER = Word(nums).setName('integer') INTEGER_K = Combine(INTEGER + Optional('_' + (INTEGER | NAME))) EOL = p.LineEnd() FortranComment = Regex(r'!.*$') FortranComment.setParseAction(lambda s, loc, toks: [' ' + toks[0]]) EOLL = Optional(FortranComment) + EOL precision = Combine('.' + INTEGER) exponent = Combine(oneOf('d e D E') + Optional(oneOf('+ -')) + INTEGER) REAL = Combine(INTEGER + ((precision + exponent) | precision | exponent)) STRING = quotedString comp_op = Forward() user_op = NotAny(comp_op | oneOf('.not. .and. .or. .eqv. .neqv. ** // % .true. .false.')) \ + Combine('.' + NAME + '.') atom = Forward() calllist = Forward() array_sub = '(' + Optional(atom) + ':' + Optional(atom) + Optional(':' + atom) + ')' type_sub = '%' + NAME
Optional(quote).suppress() + Combine( Optional(sign) + (Word(nums) + Optional(decimal_sep + Word(nums))) | (decimal_sep + Word(nums)) + Optional(oneOf("E e") + Word(nums))) + Optional(quote).suppress()).setName("real") # TODO: Positive real number between zero and one. decimal = real # String ---------------------------------------------------------------------- q_string = (sglQuotedString | dblQuotedString).setName("q_string") #double_quoted_string = QuotedString('"', multiline=True,escChar="\\", # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine(double_quoted_string + Optional(OneOrMore(pluss + double_quoted_string)), adjacent=False) word = quoted_string.setName("word") # Word(alphanums) # Graph attributes ------------------------------------------------------------ hex_color = Word(hexnums, exact=2) #TODO: Optional whitespace rgb = Literal("#").suppress() + hex_color.setResultsName("red") + \ hex_color.setResultsName("green") + hex_color.setResultsName("blue") rgba = rgb + hex_color.setResultsName("alpha") hsv = decimal.setResultsName("hue") + decimal.setResultsName("saturation") + \ decimal.setResultsName("value") color_name = double_quoted_string | Word(alphas) colour = rgba | rgb | hsv | color_name
required_modifier = Literal('+')('required') prohibit_modifier = Literal('-')('prohibit') special_characters = '=><(){}[]^"~*?:\\/' valid_word = Word(printables, excludeChars=special_characters).setName('word') valid_word.setParseAction( lambda t: t[0].replace('\\\\', chr(127)).replace('\\', '').replace(chr(127), '\\') ) clause = Forward() field_name = valid_word()('fieldname') single_term = valid_word()('singleterm') phrase = QuotedString('"', unquoteResults=True)('phrase') wildcard = Regex('[a-z0-9]*[\?\*][a-z0-9]*')('wildcard') wildcard.setParseAction( lambda t: t[0].replace('?', '.?').replace('*', '.*') ) regex = QuotedString('/', unquoteResults=True)('regex') _all = Literal('*') lower_range = Group((LBRACK('inclusive') | LBRACE('exclusive')) + (valid_word | _all)('lowerbound')) upper_range = Group((valid_word | _all)('upperbound') + (RBRACK('inclusive') | RBRACE('esclusive'))) _range = (lower_range + to_ + upper_range)('range') GT = Literal('>') GTE = Literal('>=') LT = Literal('<') LTE = Literal('<=') mongo_op = (GTE | GT | LTE | LT) mongo_op.setParseAction(
PLX_re = '(%s|%s)'%(PN_LOCAL_ESC_re,PERCENT_re) #PLX = PERCENT | PN_LOCAL_ESC # regex'd # [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )? PN_LOCAL = Regex(u"""([%(PN_CHARS_U)s:0-9]|%(PLX)s) (([%(PN_CHARS)s\\.:]|%(PLX)s)* ([%(PN_CHARS)s:]|%(PLX)s) )?"""%dict(PN_CHARS_U=PN_CHARS_U_re, PN_CHARS=PN_CHARS_re, PLX=PLX_re), flags=re.X|re.UNICODE) def _hexExpand(match): return unichr(int(match.group(0)[1:], 16)) PN_LOCAL.setParseAction(lambda x: re.sub("(%s)"%PERCENT_re, _hexExpand, x[0])) # [141] PNAME_LN ::= PNAME_NS PN_LOCAL PNAME_LN = PNAME_NS + Param('localname', PN_LOCAL.leaveWhitespace()) # [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? BLANK_NODE_LABEL = Regex(u'_:[0-9%s](?:[\\.%s]*[%s])?' % ( PN_CHARS_U_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) BLANK_NODE_LABEL.setParseAction(lambda x: rdflib.BNode(x[0])) # [166] VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )* VARNAME = Regex(u'[%s0-9][%s0-9\u00B7\u0300-\u036F\u203F-\u2040]*' % (
(PN_CHARS_U_re, PN_CHARS_U_re), flags=re.U) # [143] VAR1 ::= '?' VARNAME VAR1 = Combine(Suppress('?') + VARNAME) # [144] VAR2 ::= '$' VARNAME VAR2 = Combine(Suppress('$') + VARNAME) # [145] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* LANGTAG = Combine(Suppress('@') + Regex('[a-zA-Z]+(?:-[a-zA-Z0-9]+)*')) # [146] INTEGER ::= [0-9]+ INTEGER = Regex(r"[0-9]+") # INTEGER.setResultsName('integer') INTEGER.setParseAction( lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.integer)) # [155] EXPONENT ::= [eE] [+-]? [0-9]+ EXPONENT_re = '[eE][+-]?[0-9]+' # [147] DECIMAL ::= [0-9]* '.' [0-9]+ DECIMAL = Regex(r'[0-9]*\.[0-9]+') # (?![eE]) # DECIMAL.setResultsName('decimal') DECIMAL.setParseAction( lambda x: rdflib.Literal(x[0], datatype=rdflib.XSD.decimal)) # [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT DOUBLE = Regex(r'[0-9]+\.[0-9]*%(e)s|\.([0-9])+%(e)s|[0-9]+%(e)s' % {'e': EXPONENT_re}) # DOUBLE.setResultsName('double') DOUBLE.setParseAction(
# CREATE KEYSPACE create_keyspace = (CaselessLiteral("CREATE") + CaselessLiteral("KEYSPACE") + ifnotexists + identifier + CaselessLiteral("WITH") + identifier + '=' + literal_map + ';') create_keyspace.setParseAction(ParseActionSimple('CREATE KEYSPACE')) # Comments # It would be nice to remove comments in the lexing stage, except that # they are not part of the language that cassandra understands: they are # part of cqlsh. Instead mark them as special chunks and skip them in # the executor. This means that comments may only appear between # statements. comment = Regex('(--|//)[^\n]*') + lineEnd comment.setParseAction(ParseActionSimple('COMMENT')) cql = OneOrMore(ctable | alter | update | insert | create_keyspace | comment) cql.enablePackrat() cql.parseWithTabs() class CqlChunk(object): def __init__(self, src, chunk_type, info, start, end): self.src = src self.chunk_type = chunk_type self.info = info self.start = start self.end = end def body(self):
# Spanish 4.294.967.295,000 # Swedish 4 294 967 295,000 # GB-English 4,294,967,295.000 # US-English 4,294,967,295.000 # Thai 4,294,967,295.000 """ from pyparsing import Regex comma_decimal = Regex(r'\d{1,2}(([ .])\d\d\d(\2\d\d\d)*)?,\d*') comma_decimal.setParseAction(lambda t: float(t[0].replace(' ','').replace('.','').replace(',','.'))) dot_decimal = Regex(r'\d{1,2}(([ ,])\d\d\d(\2\d\d\d)*)?\.\d*') dot_decimal.setParseAction(lambda t: float(t[0].replace(' ','').replace(',',''))) decimal = comma_decimal ^ dot_decimal decimal.runTests(tests, parseAll=True) grouped_integer = Regex(r'\d{1,2}(([ .,])\d\d\d(\2\d\d\d)*)?') grouped_integer.setParseAction(lambda t: int(t[0].replace(' ','').replace(',','').replace('.',''))) grouped_integer.runTests(tests, parseAll=False)
alphanums_extended = alphanums + "-_" # deficion de numero estilo JSON number_ext = Combine( Optional('-') + ( '0' | Word('123456789', nums) ) + Optional( '.' + Word(nums) ) + Optional( Word('eE', exact=1) + Word(nums + '+-', nums) ) ).setName("number") number = Regex(r"\-?\d+(\.\d+)?") def numberParseAction(s, p, t): import ipdb; ipdb.set_trace() number.setParseAction(numberParseAction) # queda para definir mas adelante en el codigo expression = Forward() function = Forward() entity = Forward() lambda_expression = Forward() # expresiones entre parentesis enclosed_expression = Group(Suppress(lparen) + expression + Suppress(rparen)) lambda_name = Literal("lambda") var_name = Word(alphanums_extended) lambda_vars = delimitedList(var_name) var_access = Group(var_name + Optional(Suppress(dot) + Word(alphanums_extended)))
) ParserElement.enablePackrat() COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(Literal, ":[]{}~^") LPAR, RPAR = map(Suppress, "()") and_ = CaselessKeyword("AND") or_ = CaselessKeyword("OR") not_ = CaselessKeyword("NOT") to_ = CaselessKeyword("TO") keyword = and_ | or_ | not_ expression = Forward() valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word") valid_word.setParseAction(lambda t: t[0].replace("\\\\", chr(127)).replace("\\", "").replace(chr(127), "\\")) string = QuotedString('"') required_modifier = Literal("+")("required") prohibit_modifier = Literal("-")("prohibit") integer = Regex(r"\d+").setParseAction(lambda t: int(t[0])) proximity_modifier = Group(TILDE + integer("proximity")) number = Regex(r"\d+(\.\d+)?").setParseAction(lambda t: float(t[0])) fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word.copy().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK) excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE) range_search = incl_range_search("incl_range") | excl_range_search("excl_range")
def parse(expression, equation=False, subs=dict(), main=None, returnVars=False): if not isinstance(expression,str): return expression varSet = set() lparen = Literal("(").suppress() rparen = Literal(")").suppress() equal = Literal("=").suppress() dot = Literal(".") spec = { "E": exp(1), "Pi": pi } def getSymbol(s): varSet.add(s) if s in subs: s = subs[s] return symbols(s) def getFunction(s): if s[0] == "len": return SetLength(s[1]) elif s[0] == "sum": return SetSummation(s[1]) else: Error('Unknown slng function ' + s[0]) integer = Word(nums).setParseAction( lambda t: [ int(t[0]) ] ) decimal = Regex("[0-9]+\.[0-9]").setParseAction( lambda t: [float(t[0])]) special = Regex("[A-Z][a-zA-Z]*").setParseAction( lambda t: [spec[t[0]]]) var = Regex("[a-z][a-zA-Z]*").setParseAction( lambda t: [getSymbol(t[0])]) lowerName = Regex("[a-z][a-zA-Z]*").setParseAction( lambda t: [t[0]]) prop = Regex("[a-z][a-zA-Z]*\.[a-z][a-zA-Z]*").setParseAction( lambda t: [getSymbol(t[0])]) ref = Regex("\{[0-9]+\}").setParseAction( lambda t: [getSymbol(t[0])]) string = Regex('"[-0-9a-zA-Z: ]*"').setParseAction( lambda t: [t[0][1:-1]]) opn = { "+": (lambda a,b: a+b ), "-": (lambda a,b: a-b ), "*": (lambda a,b: a*b ), "/": (lambda a,b: a/b ), "^": (lambda a,b: a**b ) } ops = set(opn.keys()) def opClean(t): if len(t)==1: return t res = opClean([opn[t[1]](t[0],t[2])]+t[3:]) return res if main is not None: def treeCompute(p): try: node = main.fromDotRef(p) comp = hypergraph.treeCompute(node) res = solve(comp,symbols(p)) return res[0] except Exception as e: logging.exception(e) Error("Error with tree Compute: ") prop = prop.setParseAction( lambda t: [treeCompute(t[0])]) expr = Forward() paren = (lparen + expr + rparen).setParseAction( lambda s,l,t: t) function = (lowerName + lparen + (prop | var) + rparen).setParseAction( lambda t: getFunction(t) ) atom = function | string | paren | decimal | integer | ref | prop | special | var multExpr = (atom + ZeroOrMore( Word("*/") + atom)).setParseAction( lambda s,l,t: opClean(t)) expr << (multExpr + ZeroOrMore( Word("+-") + multExpr)).setParseAction( lambda s,l,t: opClean(t)) equality = (expr + equal + expr).setParseAction( lambda s,l,t: Eq(t[0],t[1]) ) if equation: res = equality.parseString(expression)[0] else: res = expr.parseString(expression)[0] if returnVars: return varSet else: return res
def define_dot_parser(self): """Define dot grammar Based on the grammar http://www.graphviz.org/doc/info/lang.html """ # punctuation colon = Literal(":") lbrace = Suppress("{") rbrace = Suppress("}") lbrack = Suppress("[") rbrack = Suppress("]") lparen = Literal("(") rparen = Literal(")") equals = Suppress("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Suppress(";") at = Literal("@") minus = Literal("-") pluss = Suppress("+") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") punctuation_ = "".join( [ c for c in string.punctuation if c not in '_' ] ) +string.whitespace # token definitions identifier = Word(alphanums + "_" ).setName("identifier") #double_quoted_string = QuotedString('"', multiline=True,escChar='\\', # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine(double_quoted_string+ Optional(OneOrMore(pluss+double_quoted_string)),adjacent=False) alphastring_ = OneOrMore(CharsNotIn(punctuation_)) def parse_html(s, loc, toks): return '<<%s>>' % ''.join(toks[0]) opener = '<' closer = '>' try: html_text = pyparsing.nestedExpr( opener, closer, (( CharsNotIn( opener + closer ).setParseAction( lambda t:t[0] )) )).setParseAction(parse_html) except: log.debug('nestedExpr not available.') log.warning('Old version of pyparsing detected. Version 1.4.8 or ' 'later is recommended. Parsing of html labels may not ' 'work properly.') html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) ID = ( alphastring_ | html_text | quoted_string | #.setParseAction(strip_quotes) | identifier ).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID ).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = ((OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen))).setName("port_location") port = Combine((Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location)))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName("attr_list").setResultsName('attrlist') attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = (lbrace + Optional(stmt_list) + rbrace + Optional(semi) ).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = (Optional(subgraph_,'') + Optional(ID,'') + Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph') edge_point << (subgraph | graph_stmt | node_id ) node_stmt = (node_id + Optional(attr_list) + Optional(semi)).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi)) graphparser = ( (Optional(strict_,'notstrict') + ((graph_ | digraph_)) + Optional(ID,'') + lbrace + Group(Optional(stmt_list)) +rbrace).setResultsName("graph") ) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) node_id.setParseAction(self._proc_node_id) assignment.setParseAction(self._proc_attr_assignment) a_list.setParseAction(self._proc_attr_list) edge_stmt.setParseAction(self._proc_edge_stmt) node_stmt.setParseAction(self._proc_node_stmt) attr_stmt.setParseAction(self._proc_default_attr_stmt) attr_list.setParseAction(self._proc_attr_list_combine) subgraph.setParseAction(self._proc_subgraph_stmt) #graph_stmt.setParseAction(self._proc_graph_stmt) graphparser.setParseAction(self._main_graph_stmt) return graphparser
# [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )? PN_LOCAL = Regex( u"""([%(PN_CHARS_U)s:0-9]|%(PLX)s) (([%(PN_CHARS)s\\.:]|%(PLX)s)* ([%(PN_CHARS)s:]|%(PLX)s) )?""" % dict(PN_CHARS_U=PN_CHARS_U_re, PN_CHARS=PN_CHARS_re, PLX=PLX_re), flags=re.X | re.UNICODE) def _hexExpand(match): return unichr(int(match.group(0)[1:], 16)) PN_LOCAL.setParseAction( lambda x: re.sub("(%s)" % PERCENT_re, _hexExpand, x[0])) # [141] PNAME_LN ::= PNAME_NS PN_LOCAL PNAME_LN = PNAME_NS + Param('localname', PN_LOCAL.leaveWhitespace()) # [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)? BLANK_NODE_LABEL = Regex(u'_:[0-9%s](?:[\\.%s]*[%s])?' % (PN_CHARS_U_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) BLANK_NODE_LABEL.setParseAction(lambda x: rdflib.BNode(x[0])) # [166] VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )* VARNAME = Regex(u'[%s0-9][%s0-9\u00B7\u0300-\u036F\u203F-\u2040]*' % (PN_CHARS_U_re, PN_CHARS_U_re), flags=re.U)
re_operator = re.compile(r'''^(?P<operator>.*?\()(?P<rest>object:.*)''') COLON, LBRACK, RBRACK, LBRACE, RBRACE, TILDE, CARAT = map(Literal, ":[]{}~^") LPAR, RPAR = map(Suppress, "()") and_ = CaselessKeyword("AND") or_ = CaselessKeyword("OR") not_ = CaselessKeyword("NOT") to_ = CaselessKeyword("TO") keyword = and_ | or_ | not_ expression = Forward() valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName( "word") valid_word.setParseAction(lambda t: t[0].replace('\\\\', chr(127)).replace( '\\', '').replace(chr(127), '\\')) string = QuotedString('"') required_modifier = Literal("+")("required") prohibit_modifier = Literal("-")("prohibit") integer = Regex(r"\d+").setParseAction(lambda t: int(t[0])) proximity_modifier = Group(TILDE + integer("proximity")) number = Regex(r'\d+(\.\d+)?').setParseAction(lambda t: float(t[0])) fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word.copy().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK) excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") +
def formula_grammar(table): """ Construct a parser for molecular formulas. :Parameters: *table* = None : PeriodicTable If table is specified, then elements and their associated fields will be chosen from that periodic table rather than the default. :Returns: *parser* : pyparsing.ParserElement. The ``parser.parseString()`` method returns a list of pairs (*count,fragment*), where fragment is an *isotope*, an *element* or a list of pairs (*count,fragment*). """ # Recursive formula = Forward() # Lookup the element in the element table symbol = Regex("[A-Z][a-z]*") symbol = symbol.setParseAction(lambda s,l,t: table.symbol(t[0])) # Translate isotope openiso = Literal('[').suppress() closeiso = Literal(']').suppress() isotope = Optional(~White()+openiso+Regex("[1-9][0-9]*")+closeiso, default='0') isotope = isotope.setParseAction(lambda s,l,t: int(t[0]) if t[0] else 0) # Translate counts fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)") fract = fract.setParseAction(lambda s,l,t: float(t[0]) if t[0] else 1) whole = Regex("[1-9][0-9]*") whole = whole.setParseAction(lambda s,l,t: int(t[0]) if t[0] else 1) count = Optional(~White()+(fract|whole),default=1) # Convert symbol,isotope,count to (count,isotope) element = symbol+isotope+count def convert_element(string,location,tokens): #print "convert_element received",tokens symbol,isotope,count = tokens[0:3] if isotope != 0: symbol = symbol[isotope] return (count,symbol) element = element.setParseAction(convert_element) # Convert "count elements" to a pair implicit_group = count+OneOrMore(element) def convert_implicit(string,location,tokens): #print "convert_implicit received",tokens count = tokens[0] fragment = tokens[1:] return fragment if count==1 else (count,fragment) implicit_group = implicit_group.setParseAction(convert_implicit) # Convert "(formula) count" to a pair opengrp = Literal('(').suppress() closegrp = Literal(')').suppress() explicit_group = opengrp + formula + closegrp + count def convert_explicit(string,location,tokens): #print "convert_group received",tokens count = tokens[-1] fragment = tokens[:-1] return fragment if count == 1 else (count,fragment) explicit_group = explicit_group.setParseAction(convert_explicit) group = implicit_group | explicit_group separator = Optional(Literal('+').suppress()) + Optional(White().suppress()) formula << group + ZeroOrMore(Optional(White().suppress())+separator+group) grammar = Optional(formula) + StringEnd() grammar.setName('Chemical Formula') return grammar
Optional, Group, FollowedBy, operatorPrecedence, opAssoc, ParseException, ParserElement) ParserElement.enablePackrat() COLON,LBRACK,RBRACK,LBRACE,RBRACE,TILDE,CARAT = map(Literal,":[]{}~^") LPAR,RPAR = map(Suppress,"()") and_ = CaselessKeyword("AND") or_ = CaselessKeyword("OR") not_ = CaselessKeyword("NOT") to_ = CaselessKeyword("TO") keyword = and_ | or_ | not_ expression = Forward() valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word") valid_word.setParseAction( lambda t : t[0].replace('\\\\',chr(127)).replace('\\','').replace(chr(127),'\\') ) string = QuotedString('"') required_modifier = Literal("+")("required") prohibit_modifier = Literal("-")("prohibit") integer = Regex(r"\d+").setParseAction(lambda t:int(t[0])) proximity_modifier = Group(TILDE + integer("proximity")) number = Regex(r'\d+(\.\d+)?').setParseAction(lambda t:float(t[0])) fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word.copy().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK) excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE)
# [167] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] PN_CHARS_re = '\\-0-9\u00B7\u0300-\u036F\u203F-\u2040' + PN_CHARS_U_re # PN_CHARS = Regex(u'[%s]'%PN_CHARS_re, flags=re.U) # [168] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)? PN_PREFIX = Regex(r'[%s](?:[%s\.]*[%s])?' % (PN_CHARS_BASE_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) # [140] PNAME_NS ::= PN_PREFIX? ':' PNAME_NS = Optional( Param('prefix', PN_PREFIX)) + Suppress(':').leaveWhitespace() # [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' ) PN_LOCAL_ESC = Regex('\\\\[_~\\.\\-!$&"\'()*+,;=/?#@%]') PN_LOCAL_ESC.setParseAction(lambda x: x[0][1:]) # [172] HEX ::= [0-9] | [A-F] | [a-f] # HEX = Regex('[0-9A-Fa-f]') # not needed # [171] PERCENT ::= '%' HEX HEX PERCENT = Regex('%[0-9a-fA-F]{2}') PERCENT.setParseAction(lambda x: chr(int(x[0][1:], 16))) # [170] PLX ::= PERCENT | PN_LOCAL_ESC PLX = PERCENT | PN_LOCAL_ESC # [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )? PN_LOCAL = Combine((Regex('[%s0-9:]' % PN_CHARS_U_re, flags=re.U) | PLX) + ZeroOrMore((Regex( '[%s\\.:]' % PN_CHARS_re, flags=re.U) | PLX) + Optional(Regex('[%s:]' % PN_CHARS_re, flags=re.U) | PLX)))
query_expr = Forward() required_modifier = Literal('+')('required') prohibit_modifier = Literal('-')('prohibit') special_characters = '=><(){}[]^"~*?:\\/.&|' valid_word = Word(printables, excludeChars=special_characters).setName('word') valid_word.setParseAction(lambda t: t[0].replace('\\\\', chr(127)).replace( '\\', '').replace(chr(127), '\\')) clause = Forward() field_name = (Optional(valid_word()('attr') + DOT)) + valid_word()('fieldname') single_term = valid_word()('singleterm') phrase = QuotedString('"', unquoteResults=True)('phrase') wildcard = Regex(r'[a-z0-9]*[\?\*][a-z0-9]*')('wildcard') wildcard.setParseAction(lambda t: t[0].replace('?', '.?').replace('*', '.*')) regex = QuotedString('/', unquoteResults=True)('regex') _all = Literal('*') lower_range = Group((LBRACK('inclusive') | LBRACE('exclusive')) + (valid_word | _all)('lowerbound')) upper_range = Group((valid_word | _all)('upperbound') + (RBRACK('inclusive') | RBRACE('esclusive'))) _range = (lower_range + TO + upper_range)('range') GT = Literal('>') GTE = Literal('>=') LT = Literal('<') LTE = Literal('<=') one_sided_range = Group((GTE | GT | LTE | LT)('op') + valid_word('bound'))('onesidedrange')
def formula_grammar(table): """ Construct a parser for molecular formulas. :Parameters: *table* = None : PeriodicTable If table is specified, then elements and their associated fields will be chosen from that periodic table rather than the default. :Returns: *parser* : pyparsing.ParserElement. The ``parser.parseString()`` method returns a list of pairs (*count, fragment*), where fragment is an *isotope*, an *element* or a list of pairs (*count, fragment*). """ # Recursive composite = Forward() mixture = Forward() # whitespace and separators space = Optional(White().suppress()) separator = space + Literal('+').suppress() + space # Lookup the element in the element table symbol = Regex("[A-Z][a-z]*") symbol = symbol.setParseAction(lambda s, l, t: table.symbol(t[0])) # Translate isotope openiso = Literal('[').suppress() closeiso = Literal(']').suppress() isotope = Optional(~White() + openiso + Regex("[1-9][0-9]*") + closeiso, default='0') isotope = isotope.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 0) # Translate ion openion = Literal('{').suppress() closeion = Literal('}').suppress() ion = Optional(~White() + openion + Regex("([1-9][0-9]*)?[+-]") + closeion, default='0+') ion = ion.setParseAction( lambda s, l, t: int(t[0][-1] + (t[0][:-1] if len(t[0]) > 1 else '1'))) # Translate counts fract = Regex("(0|[1-9][0-9]*|)([.][0-9]*)") fract = fract.setParseAction(lambda s, l, t: float(t[0]) if t[0] else 1) whole = Regex("[1-9][0-9]*") whole = whole.setParseAction(lambda s, l, t: int(t[0]) if t[0] else 1) count = Optional(~White() + (fract | whole), default=1) # Convert symbol, isotope, ion, count to (count, isotope) element = symbol + isotope + ion + count def convert_element(string, location, tokens): """interpret string as element""" #print "convert_element received", tokens symbol, isotope, ion, count = tokens[0:4] if isotope != 0: symbol = symbol[isotope] if ion != 0: symbol = symbol.ion[ion] return (count, symbol) element = element.setParseAction(convert_element) # Convert "count elements" to a pair implicit_group = count + OneOrMore(element) def convert_implicit(string, location, tokens): """convert count followed by fragment""" #print "implicit", tokens count = tokens[0] fragment = tokens[1:] return fragment if count == 1 else (count, fragment) implicit_group = implicit_group.setParseAction(convert_implicit) # Convert "(composite) count" to a pair opengrp = space + Literal('(').suppress() + space closegrp = space + Literal(')').suppress() + space explicit_group = opengrp + composite + closegrp + count def convert_explicit(string, location, tokens): """convert (fragment)count""" #print "explicit", tokens count = tokens[-1] fragment = tokens[:-1] return fragment if count == 1 else (count, fragment) explicit_group = explicit_group.setParseAction(convert_explicit) # Build composite from a set of groups group = implicit_group | explicit_group implicit_separator = separator | space composite << group + ZeroOrMore(implicit_separator + group) density = Literal('@').suppress() + count + Optional(Regex("[ni]"), default='i') compound = composite + Optional(density, default=None) def convert_compound(string, location, tokens): """convert material @ density""" #print "compound", tokens if tokens[-1] is None: return Formula(structure=_immutable(tokens[:-1])) elif tokens[-1] == 'n': return Formula(structure=_immutable(tokens[:-2]), natural_density=tokens[-2]) else: return Formula(structure=_immutable(tokens[:-2]), density=tokens[-2]) compound = compound.setParseAction(convert_compound) partsep = space + Literal('//').suppress() + space percent = Literal('%').suppress() weight_percent = Regex("%(w((eigh)?t)?|m(ass)?)").suppress() + space by_weight = (count + weight_percent + mixture + ZeroOrMore(partsep + count + (weight_percent | percent) + mixture) + partsep + mixture) def convert_by_weight(string, location, tokens): """convert mixture by %wt or %mass""" #print "by weight", tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100 - sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture") if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_weight_pairs(zip(piece, fract)) mixture_by_weight = by_weight.setParseAction(convert_by_weight) volume_percent = Regex("%v(ol(ume)?)?").suppress() + space by_volume = (count + volume_percent + mixture + ZeroOrMore(partsep + count + (volume_percent | percent) + mixture) + partsep + mixture) def convert_by_volume(string, location, tokens): """convert mixture by %vol""" #print "by volume", tokens piece = tokens[1:-1:2] + [tokens[-1]] fract = [float(v) for v in tokens[:-1:2]] fract.append(100 - sum(fract)) #print piece, fract if len(piece) != len(fract): raise ValueError("Missing base component of mixture " + string) if fract[-1] < 0: raise ValueError("Formula percentages must sum to less than 100%") return _mix_by_volume_pairs(zip(piece, fract)) mixture_by_volume = by_volume.setParseAction(convert_by_volume) mixture_by_layer = Forward() layer_thick = Group(count + Regex(LENGTH_RE) + space) layer_part = (layer_thick + mixture) | (opengrp + mixture_by_layer + closegrp + count) mixture_by_layer << layer_part + ZeroOrMore(partsep + layer_part) def convert_by_layer(string, location, tokens): """convert layer thickness '# nm material'""" if len(tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): f = p1.absthick * float(p2) p = p1 else: f = float(p1[0]) * LENGTH_UNITS[p1[1]] p = p2 piece.append(p) fract.append(f) total = sum(fract) vfract = [(v / total) * 100 for v in fract] result = _mix_by_volume_pairs(zip(piece, vfract)) result.thickness = total return result mixture_by_layer = mixture_by_layer.setParseAction(convert_by_layer) mixture_by_absmass = Forward() absmass_mass = Group(count + Regex(MASS_VOLUME_RE) + space) absmass_part = (absmass_mass + mixture) | (opengrp + mixture_by_absmass + closegrp + count) mixture_by_absmass << absmass_part + ZeroOrMore(partsep + absmass_part) def convert_by_absmass(string, location, tokens): """convert mass '# mg material'""" if len(tokens) < 2: return tokens piece = [] fract = [] for p1, p2 in zip(tokens[0::2], tokens[1::2]): if isinstance(p1, Formula): p = p1 f = p1.total_mass * float(p2) else: p = p2 value = float(p1[0]) if p1[1] in VOLUME_UNITS: # convert to volume in liters to mass in grams before mixing if p.density is None: raise ValueError("Need the mass density of " + str(p)) f = value * VOLUME_UNITS[p1[1]] * 1000. * p.density else: f = value * MASS_UNITS[p1[1]] piece.append(p) fract.append(f) total = sum(fract) mfract = [(m / total) * 100 for m in fract] result = _mix_by_weight_pairs(zip(piece, mfract)) result.total_mass = total return result mixture_by_absmass = mixture_by_absmass.setParseAction(convert_by_absmass) ungrouped_mixture = (mixture_by_weight | mixture_by_volume | mixture_by_layer | mixture_by_absmass) grouped_mixture = opengrp + ungrouped_mixture + closegrp + Optional( density, default=None) def convert_mixture(string, location, tokens): """convert (mixture) @ density""" formula = tokens[0] if tokens[-1] == 'n': formula.natural_density = tokens[-2] elif tokens[-1] == 'i': formula.density = tokens[-2] # elif tokens[-1] is None return formula grouped_mixture = grouped_mixture.setParseAction(convert_mixture) mixture << (compound | grouped_mixture) formula = (compound | ungrouped_mixture | grouped_mixture) grammar = Optional(formula, default=Formula()) + StringEnd() grammar.setName('Chemical Formula') return grammar
__all__ = ['tapOutputParser', 'TAPTest', 'TAPSummary'] # newlines are significant whitespace, so set default skippable # whitespace to just spaces and tabs ParserElement.setDefaultWhitespaceChars(" \t") NL = LineEnd().suppress() integer = Word(nums) plan = '1..' + integer("ubound") OK,NOT_OK = map(Literal,['ok','not ok']) testStatus = (OK | NOT_OK) description = Regex("[^#\n]+") description.setParseAction(lambda t:t[0].lstrip('- ')) TODO,SKIP = map(CaselessLiteral,'TODO SKIP'.split()) directive = Group(Suppress('#') + (TODO + restOfLine | FollowedBy(SKIP) + restOfLine.copy().setParseAction(lambda t:['SKIP',t[0]]) )) commentLine = Suppress("#") + empty + restOfLine testLine = Group( Optional(OneOrMore(commentLine + NL))("comments") + testStatus("passed") + Optional(integer)("testNumber") + Optional(description)("description") + Optional(directive)("directive") )
Literal, quotedString, Keyword, empty, Suppress, Combine, NotAny, Regex def eachMostOnce(*args, or_=operator.ior, and_=operator.add): return reduce(or_, (reduce(or_, map(lambda x: reduce(and_, x), permutations(args, i))) for i in range(len(args), 0, -1))) NAME = Word(alphas, alphanums + '_') INTEGER = Word(nums).setName('integer') INTEGER_K = Combine(INTEGER + Optional('_' + (INTEGER | NAME))) EOL = p.LineEnd() FortranComment = Regex(r'!.*$') FortranComment.setParseAction(lambda s,loc,toks: [' '+toks[0]]) EOLL = Optional(FortranComment) + EOL precision = Combine('.' + INTEGER) exponent = Combine(oneOf('d e D E') + Optional(oneOf('+ -')) + INTEGER) REAL = Combine(INTEGER + ((precision + exponent) | precision | exponent)) STRING = quotedString comp_op = Forward() user_op = NotAny(comp_op | oneOf('.not. .and. .or. .eqv. .neqv. ** // % .true. .false.')) \ + Combine('.' + NAME + '.') atom = Forward() calllist = Forward() array_sub = '(' + Optional(atom)+':'+Optional(atom) + Optional(':'+atom) + ')' type_sub = '%' + NAME trailer = p.Or((calllist, array_sub, type_sub))
class Variable(object): def __init__(self, tokens): self.name = tokens[0] def __repr__(self): return "<variable " + str(self.name) + ">" number = Regex(r"[\+\-]?(([0-9]+(\.[0-9]+)?)|(\.[0-9]+))") comma = Literal(",") name = Regex("[a-z][a-z0-9_]*") var_name = Regex("[a-z][a-z0-9_]*") var_name.setParseAction(lambda tokens: Variable(tokens)) element = Forward() equation = Forward() arguments = Group(equation) + ZeroOrMore(comma.suppress() + Group(equation)) function_or_element = (name + Literal("(").suppress() + Group(arguments) + Literal(")").suppress()).setParseAction( lambda tokens: Function(tokens)) | element element << (var_name | number | (Literal("(").suppress() + Group(equation) + Literal(")").suppress())) equation << (function_or_element + ZeroOrMore(infix + function_or_element))
# deficion de numero estilo JSON number_ext = Combine( Optional('-') + ('0' | Word('123456789', nums)) + Optional('.' + Word(nums)) + Optional(Word('eE', exact=1) + Word(nums + '+-', nums))).setName("number") number = Regex(r"\-?\d+(\.\d+)?") def numberParseAction(s, p, t): import ipdb ipdb.set_trace() number.setParseAction(numberParseAction) # queda para definir mas adelante en el codigo expression = Forward() function = Forward() entity = Forward() lambda_expression = Forward() # expresiones entre parentesis enclosed_expression = Group(Suppress(lparen) + expression + Suppress(rparen)) lambda_name = Literal("lambda") var_name = Word(alphanums_extended) lambda_vars = delimitedList(var_name) var_access = Group(var_name + Optional(Suppress(dot) + Word(alphanums_extended)))
import re from pyparsing import ( Word, Keyword, NotAny, alphanums, nums, alphas, OneOrMore, srange, ZeroOrMore, Regex ) from whispy_lispy import ast int_literal = Word(nums) + NotAny('.') int_literal.setParseAction(ast.Int.from_parsed_result) float_literal = Word(nums) + Word('.') + Word(nums) float_literal.setParseAction(ast.Float.from_parsed_result) bool_literal = Keyword('#t') | Keyword('#f') bool_literal.setParseAction(ast.Bool.from_parsed_result) string_literal = Regex(r'\".*?(?<!\\)\"', re.DOTALL) string_literal.setParseAction(ast.String.from_parse_result) grammar = OneOrMore(float_literal | int_literal | bool_literal | string_literal)
delimitedList, operatorPrecedence, opAssoc, ParseException, ) # Variables variable = Regex(r"(?P<table>[ai|di|sv]{2})\.(?P<tag>[\w\d]+)\.(?P<attr>\w+)") def var_parse_action(text, index, context): return context[0] variable.setParseAction(var_parse_action) # Numbers numeric_literal = Regex(r"\-?\d+(\.\d+)?") def number_prase_action(text, index, data): number = data[0] if "." in number: return float(number) else: return int(number) numeric_literal.setParseAction(number_prase_action)
PN_CHARS_U_re = "_" + PN_CHARS_BASE_re # [167] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] PN_CHARS_re = u"\\-0-9\u00B7\u0300-\u036F\u203F-\u2040" + PN_CHARS_U_re # PN_CHARS = Regex(u'[%s]'%PN_CHARS_re, flags=re.U) # [168] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)? PN_PREFIX = Regex(ur"[%s](?:[%s\.]*[%s])?" % (PN_CHARS_BASE_re, PN_CHARS_re, PN_CHARS_re), flags=re.U) # [140] PNAME_NS ::= PN_PREFIX? ':' PNAME_NS = Optional(Param("prefix", PN_PREFIX)) + Suppress(":").leaveWhitespace() # [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%' ) PN_LOCAL_ESC = Regex("\\\\[_~\\.\\-!$&\"'()*+,;=/?#@%]") PN_LOCAL_ESC.setParseAction(lambda x: x[0][1:]) # [172] HEX ::= [0-9] | [A-F] | [a-f] # HEX = Regex('[0-9A-Fa-f]') # not needed # [171] PERCENT ::= '%' HEX HEX PERCENT = Regex("%[0-9a-fA-F]{2}") PERCENT.setParseAction(lambda x: unichr(int(x[0][1:], 16))) # [170] PLX ::= PERCENT | PN_LOCAL_ESC PLX = PERCENT | PN_LOCAL_ESC # [169] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX ) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX) )? PN_LOCAL = Combine( (Regex(u"[%s0-9:]" % PN_CHARS_U_re, flags=re.U) | PLX) + ZeroOrMore(
return ServiceDefintion(t[0]) def top_level_statement_fn(s, l, t): return TopLevelStatement(t[0]) def parser_fn(s, l, t): return Parser(t[0]) identifier = Word(alphas + "_", alphanums + "_").setName("identifier") identifier.setParseAction(identifier_fn) integer = Regex(r"[+-]?\d+") integer.setParseAction(integer_fn) LBRACE = Suppress('{') RBRACE = Suppress('}') LBRACK = Suppress('[') RBRACK = Suppress(']') LPAR = Suppress('(') RPAR = Suppress(')') EQ = Suppress('=') SEMI = Suppress(';') SYNTAX = Keyword('syntax') IMPORT = Keyword('import') PACKAGE = Keyword('package') MESSAGE = Keyword('message') RPC = Keyword('rpc')
def import_directive_fn(s,l,t): return ImportDirective(t[0]) def field_fn(s,l,t): return Field(*t) def service_definition_fn(s,l,t): return ServiceDefintion(t[0]) def top_level_statement_fn(s,l,t): return TopLevelStatement(t[0]) def parser_fn(s,l,t): return Parser(t[0]) identifier = Word(alphas+"_",alphanums+"_").setName("identifier") identifier.setParseAction(identifier_fn) integer = Regex(r"[+-]?\d+") integer.setParseAction(integer_fn) LBRACE = Suppress('{') RBRACE = Suppress('}') LBRACK = Suppress('[') RBRACK = Suppress(']') LPAR = Suppress('(') RPAR = Suppress(')') EQ = Suppress('='); SEMI = Suppress(';') SYNTAX = Keyword('syntax') IMPORT = Keyword('import') PACKAGE = Keyword('package') MESSAGE = Keyword('message') RPC = Keyword('rpc')
def proto_integer_fn(s,l,t): return ProtoInteger(int(t[0])) def proto_string_fn(s,l,t): return String(t[0]) def proto_data_fn(s,l,t): return ProtoData(t[0]) def top_level_proto_definition_fn(s,l,t): return TopLevelProtoDefinition(t[0], t[1]) def nested_proto_fn(s,l,t): return NestedProto(t[0], t[1]) def proto_parser_fn(s,l,t): return ProtoParser(t) proto_integer = Regex(r"[+-]?\d+") proto_integer.setParseAction(proto_integer_fn) LBRACE = Suppress('{') RBRACE = Suppress('}') COLON = Suppress(':') proto_string = copy.copy(dblQuotedString) proto_string.setParseAction(proto_string_fn) proto_data = proto_integer | proto_string proto_data.setParseAction(proto_data_fn) top_level_proto_definition = identifier + COLON + proto_data top_level_proto_definition.setParseAction(top_level_proto_definition_fn) nested_proto = Forward()