def make_enewick_parser(): # atoms lparen = Literal("(").suppress() rparen = Literal(")").suppress() colon = Literal(":").suppress() # semicolon = Literal(";").suppress() comma = Literal(",").suppress() point = Literal(".") e = CaselessLiteral("E") sharp = Literal("#").suppress() # terminal name = Word( alphanums + alphas8bit + "_" + "-" + "." + "+" + "&" + "/" + "~" + "{" + "}" + "*" + "'" + '"' + "\\" + "?" ) string = Word(alphas) fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums)) ).setParseAction(cvtReal) number = Combine(Word(nums)).setParseAction(cvtInt) label = ( Optional(name).setResultsName("label") + Optional(sharp + Optional(string).setResultsName("type") + number.setResultsName("tag")) + Optional(colon + fnumber).setResultsName("length") ) subtree = Forward() subtreelist = Forward() subtree << Group(((lparen + subtreelist + rparen).setResultsName("subtree") | label) + Optional(label)) subtreelist << subtree + Optional(comma + subtreelist) tree = subtree + Word(";").suppress() return tree.parseString
def define_number(self): """ Return the syntax definition for a number in Arabic Numerals. Override this method to support numeral systems other than Arabic Numerals (0-9). Do not override this method just to change the character used to separate thousands and decimals: Use :attr:`T_THOUSANDS_SEPARATOR` and :attr:`T_DECIMAL_SEPARATOR`, respectively. """ # Defining the basic tokens: to_dot = lambda t: "." to_plus = lambda t: "+" to_minus = lambda t: "-" positive_sign = Literal(self._grammar.get_token("positive_sign")) positive_sign.setParseAction(to_plus) negative_sign = Literal(self._grammar.get_token("negative_sign")) negative_sign.setParseAction(to_minus) decimal_sep = Literal(self._grammar.get_token("decimal_separator")) decimal_sep.setParseAction(to_dot) thousands_sep = Suppress(self._grammar.get_token("thousands_separator")) digits = Word(nums) # Building the integers and decimals: sign = positive_sign | negative_sign thousands = Word(nums, max=3) + \ OneOrMore(thousands_sep + Word(nums, exact=3)) integers = thousands | digits decimals = decimal_sep + digits number = Combine(Optional(sign) + integers + Optional(decimals)) number.setParseAction(self.make_number) number.setName("number") return number
def define_identifier(self): """ Return the syntax definition for an identifier. """ # --- Defining the individual identifiers: # Getting all the Unicode numbers in a single string: unicode_numbers = "".join([unichr(n) for n in xrange(0x10000) if unichr(n).isdigit()]) unicode_number_expr = Regex("[%s]" % unicode_numbers, re.UNICODE) space_char = re.escape(self._grammar.get_token("identifier_spacing")) identifier0 = Regex("[\w%s]+" % space_char, re.UNICODE) # Identifiers cannot start with a number: identifier0 = Combine(~unicode_number_expr + identifier0) identifier0.setName("individual_identifier") # --- Defining the namespaces: namespace_sep = Suppress(self._grammar.get_token("namespace_separator")) namespace = Group(ZeroOrMore(identifier0 + namespace_sep)) namespace.setName("namespace") # --- The full identifier, which could have a namespace: identifier = Combine(namespace.setResultsName("namespace_parts") + identifier0.setResultsName("identifier")) identifier.setName("full_identifier") return identifier
def func_tokens(dictionary, parse_action): func_name = Word(alphas+'_', alphanums+'_') func_ident = Combine('$' + func_name.copy()('funcname')) func_tok = func_ident + originalTextFor(nestedExpr())('args') func_tok.leaveWhitespace() func_tok.setParseAction(parse_action) func_tok.enablePackrat() rx_tok = Combine(Literal('$').suppress() + Word(nums)('num')) def replace_token(tokens): index = int(tokens.num) return dictionary.get(index, u'') rx_tok.setParseAction(replace_token) strip = lambda s, l, tok: tok[0].strip() text_tok = CharsNotIn(u',').setParseAction(strip) quote_tok = QuotedString('"') if dictionary: arglist = Optional(delimitedList(quote_tok | rx_tok | text_tok)) else: arglist = Optional(delimitedList(quote_tok | text_tok)) return func_tok, arglist, rx_tok
def expression(self): from pyparsing import Suppress,Combine,Optional,oneOf,OneOrMore,Word,nums,Group,alphas,alphanums,Literal,SkipTo,empty,lineEnd cvtInt = lambda toks: int(toks[0]) cvtReal = lambda toks: float(toks[0]) cvtTuple = lambda toks : tuple(toks.asList()) nameJoin = lambda toks : "".join([tok.replace("#","") for tok in toks[0]]) #lambda toks: " ".join([str(t) for t in toks[0]]) # define punctuation as suppressed literals lparen,rparen,lbrack,rbrack,lbrace,rbrace,colon = map(Suppress,"()[]{}:") integer = Combine(Optional(oneOf("+ -")) + Word(nums))\ .setName("integer")\ .setParseAction( cvtInt ) real = Combine(Optional(oneOf("+ -")) + Word(nums) + "." + Optional(Word(nums)) + Optional(oneOf("e E")+Optional(oneOf("+ -")) +Word(nums))).setName("real").setParseAction( cvtReal ) # TREE DEFINITION # ((seq2: 0.537243, seq1: 0.000004): 0.255741, seq3: 0.281503); tree_w_branches = ( OneOrMore(Word("():,."+alphas+nums))+Literal(";") ).setParseAction(lambda tokens: " ".join(tokens[:-1])+";") # SITE PROBABILITIES # site Freq Data: # 1 1 AAA: A(0.978) A(1.000) site_prob = ( integer.setResultsName("site",listAllMatches=True) + integer.setResultsName("freq",listAllMatches=True) + Word(alphas+"-").setResultsName("extant",listAllMatches=True) + colon + Group(OneOrMore(Group(Word(alphas,exact=1)+lparen+real+rparen))).setResultsName("probability",listAllMatches=True) + lineEnd ) # ANCESTRAL SEQUENCES # seq1 ACC # node #4 ACC # Optional # character with node # needs to be joined into a single name sequence = ( Group(Word(alphanums)+ Optional(Combine(Literal("#")+Word(nums)))).setParseAction(nameJoin).setResultsName("name",listAllMatches=True)+ Word(alphas+"- ").setResultsName("sequence", listAllMatches=True)+lineEnd ) return (SkipTo(Literal("Ancestral reconstruction by AAML."),include=True).suppress() + tree_w_branches.setResultsName("tree") + SkipTo(Literal("site")+Literal("Freq")+Literal("Data:"), include=True,).suppress()+ Group(OneOrMore(site_prob)).setResultsName("sites")+ SkipTo(Literal("List of extant and reconstructed sequences")+Word(nums)+Word(nums), include=True).suppress()+ Group(OneOrMore(sequence)).setResultsName("sequences")+ SkipTo(Literal("for a site."),include=True).suppress()+ Group(OneOrMore(real)).setResultsName("probability")+ empty )
def is_ipv4_addr(inputstr): from pyparsing import Combine, Word, nums ipAddress = Combine(Word(nums) + ('.' + Word(nums)) * 3) try: ipAddress.parseString(inputstr) return True except: return False
def _BNF(self): base16 = Literal("$") hex = Combine(base16 + Word(hexnums + "_")) base4 = Literal("%%") quaternary = Combine(base4 + Word("0123_")) base2 = Literal("%") binary = Combine(base2 + Word("01_")) plusminus = Literal("+") | Literal("-") integer = Combine(Optional(plusminus) + Word(nums+"_")) name_token = Combine(Optional(Literal(":") | Literal("@")) + Word("_" + alphas, "_" + alphanums)) name_token.setParseAction(self._mark_name_token) lparens = Literal("(").suppress() rparens = Literal(")").suppress() # op0 = Literal("@") op1 = (Literal("^^") | Literal("||") | Literal("|<") | Literal(">|") | Literal("!")).setParseAction(self._mark_unary) op2 = Literal("->") | Literal("<-") | Literal(">>") | Literal("<<") | Literal("~>") | Literal("><") op3 = Literal("&") op4 = Literal("|") | Literal("^") op5 = Literal("**") | Literal("*") | Literal("//") | Literal("/") op6 = Literal("+") | Literal("-") op7 = Literal("#>") | Literal("<#") op8 = Literal("<") | Literal(">") | Literal("<>") | Literal("==") | Literal("=<") | Literal("=>") op9 = Literal("NOT").setParseAction(self._mark_unary) op10 = Literal("AND") op11 = Literal("OR") op12 = Literal(",") expr = Forward() atom = name_token | hex | quaternary | binary | integer | quotedString atom.setParseAction(self._push) atom = atom | (lparens + expr.suppress() + rparens) # term0 = atom + ZeroOrMore((op0 + atom) .setParseAction(self._push)) # term1 = term0 + ZeroOrMore((op1 + term0) .setParseAction(self._push)) term1 = atom + ZeroOrMore((op1 + atom) .setParseAction(self._push)) term2 = term1 + ZeroOrMore((op2 + term1) .setParseAction(self._push)) term3 = term2 + ZeroOrMore((op3 + term2) .setParseAction(self._push)) term4 = term3 + ZeroOrMore((op4 + term3) .setParseAction(self._push)) term5 = term4 + ZeroOrMore((op5 + term4) .setParseAction(self._push)) term6 = term5 + ZeroOrMore((op6 + term5) .setParseAction(self._push)) term7 = term6 + ZeroOrMore((op7 + term6) .setParseAction(self._push)) term8 = term7 + ZeroOrMore((op8 + term7) .setParseAction(self._push)) term9 = term8 + ZeroOrMore((op9 + term8) .setParseAction(self._push)) term10 = term9 + ZeroOrMore((op10 + term9) .setParseAction(self._push)) term11 = term10 + ZeroOrMore((op11 + term10).setParseAction(self._push)) expr << term11 + ZeroOrMore((op12 + term11).setParseAction(self._push)) return expr
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() tag_begin = Literal("<").suppress() tag_end = Literal(">").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' ival=Regex('[-]?\d+') dval=Regex('-?\d+\.\d*([eE]?[+-]?\d+)?') lval=Regex('([Yy]es|[Nn]o|[Tt]rue|[Ff]alse|[Oo]n|[Oo]ff)') # Helper definitions kstr= quotedString.setParseAction(removeQuotes) ^ \ dval ^ ival ^ lval ^ Word(prtable) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(dval ^ ival ^ lval ^ Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin tag_sect=name+Group(tag_begin+name+tag_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | tag_sect ) #| vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions ival.setParseAction(self.conv_ival) dval.setParseAction(self.conv_dval) lval.setParseAction(self.conv_lval) keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) tag_sect.setParseAction(self.add_sect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
def receiver_input_rule(): path = Combine(ZeroOrMore(word + ".") + word) input = path.setResultsName("input") operator = oneOf(operators.keys()).setResultsName("operator") value = path.setResultsName("value") comparison = operator + value is_or_was = Word("is") | Word("was") condition = Group(input + is_or_was.setResultsName("temporal") + comparison) res = ZeroOrMore(condition + _and) + condition conditions = Group(res).setResultsName("conditions") return Optional("always").setResultsName("always_fire_rule") + when + conditions + then + actions
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack,ignore=escapedChar) + rbrack.suppress()) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reDot = Literal(".") repetition = ( ( lbrace + Word(nums).setResultsName("count") + rbrace ) | ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) | oneOf(list("*+?")) ) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = ( reLiteral | reRange | reMacro | reDot | reGroup ) reExpr << operatorPrecedence( reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ] ) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def getkw_bnf(self): sect_begin = Literal("{").suppress() sect_end = Literal("}").suppress() array_begin = Literal("[").suppress() array_end = Literal("]").suppress() arg_begin = Literal("(").suppress() arg_end = Literal(")").suppress() eql = Literal("=").suppress() dmark = Literal('$').suppress() end_data=Literal('$end').suppress() prtable = alphanums+r'!$%&*+-./<>?@^_|~' # Helper definitions kstr=Word(prtable) ^ quotedString.setParseAction(removeQuotes) name = Word(alphas+"_",alphanums+"_") vec=array_begin+delimitedList(Word(prtable) ^ \ Literal("\n").suppress() ^ \ quotedString.setParseAction(removeQuotes))+array_end sect=name+sect_begin key_sect=name+Group(arg_begin+kstr+arg_end)+sect_begin vec_sect=name+Group(arg_begin+vec+ arg_end)+sect_begin # Grammar keyword = name + eql + kstr vector = name + eql + vec data=Combine(dmark+name)+SkipTo(end_data)+end_data section=Forward() sect_def=(sect | key_sect | vec_sect) input=section | data | vector | keyword section << sect_def+ZeroOrMore(input) + sect_end # Parsing actions keyword.setParseAction(self.store_key) vector.setParseAction(self.store_vector) data.setParseAction(self.store_data) sect.setParseAction(self.add_sect) key_sect.setParseAction(self.add_sect) vec_sect.setParseAction(self.add_vecsect) sect_end.setParseAction(self.pop_sect) bnf=ZeroOrMore(input) + StringEnd().setFailAction(parse_error) bnf.ignore(pythonStyleComment) return bnf
def _get_handbrake_title_pattern(self): title = Literal("+ title").suppress() integer = Word("0123456789") time = Combine(integer + ":" + integer + ":" + integer) duration = Literal("+ duration:").suppress() subtitle = Literal("+ subtitle tracks:") iso = Literal('(iso639-2:').suppress() + Word(alphas) subtitle_track = Literal("+").suppress() + Group(integer + SkipTo(iso).suppress() + iso) + restOfLine.suppress() title_num = integer.setResultsName("title") duration_num = time.setResultsName("duration") subtitles = Group(ZeroOrMore(subtitle_track)).setResultsName("subtitles") pattern = title + title_num + \ SkipTo(duration).suppress() + \ duration + duration_num + \ SkipTo(subtitle).suppress() + subtitle.suppress() + subtitles return pattern
def _build_grammar(self): expr = Forward() float_lit = Combine(Word(nums) + '.' + Word(nums)) float_lit.setName('float') float_lit.setParseAction(lambda x: \ self.to_literal(float(x[0]))) int_lit = Word(nums) int_lit.setName('int') int_lit.setParseAction(lambda x: \ self.to_literal(int(x[0]))) num = (float_lit | int_lit) num.setParseAction(lambda x: x[0]) tag_name = Word(alphas + "_", alphanums + "_") tag_name.setName('tag_name') tag_name.setParseAction(lambda t: tag_reference.TagReference(t[0])) quoted_string = QuotedString("'") quoted_string.setParseAction(lambda s: self.to_literal(s[0])) oper = oneOf('+ * / -') oper.setParseAction(lambda o: o[0]) lpar = Literal("(").suppress() rpar = Literal(")").suppress() arith = Group(lpar + expr + oper + expr + rpar) arith.setParseAction(lambda t: \ self.to_arith(t[0][0], t[0][1], t[0][2])) assign = tag_name + '=' + expr assign.setName('assign') assign.setParseAction(lambda x: self.to_assign(x[0],x[2])) print_tags = Literal('?') print_tags.setParseAction(lambda x: self.to_print_tags()) expr <<(arith|assign|tag_name|num|quoted_string|print_tags) expr.setParseAction(lambda x: x[0]) return expr
def instance(): lit_e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') number = Word(nums) integer = Combine(Optional(plusorminus) + number).setParseAction(lambda t:int(t[0])) index = integer.copy().addParseAction(index_check(0)) floatnumber = Combine( integer + Optional( Literal('.') + Optional(number) ) + Optional( lit_e + integer ) ).setParseAction(lambda t:float(t[0])) #comment = Suppress("%") + Word(alphanums + " ") comment = Regex(r"%.*").setName("comment").suppress() linend = Or( [comment , LineEnd()] ).suppress() section_end = (Literal('#') + LineEnd()).suppress() vertex = (Group( OneOrMore( floatnumber('point') + OneOrMore( White() ).suppress() ) ) + linend)('vertex') vertex_header = (Keyword('VERTEX') + linend).suppress() vertex_section = (vertex_header + Group(OneOrMore(vertex))('vertices') + section_end) simplex = (Group( OneOrMore( index('index') + OneOrMore( White() ).suppress() ) ) + linend)('simplex') simplex_header = (Keyword('SIMPLEX') + linend).suppress() simplex_section = (simplex_header + Group(OneOrMore(simplex))('simplices') + section_end) boundarysegment = (Group( index('id') + OneOrMore( index('index') + OneOrMore( White() ).suppress() ) ) + linend)('boundarysegment') boundarysegment_header = (Keyword('BOUNDARYSEGMENTS') + linend).suppress() boundarysegment_section = (boundarysegment_header + Dict(OneOrMore( boundarysegment ))('boundarysegments') + section_end) sections = Each([vertex_section, simplex_section, boundarysegment_section]) dgf_header = (Keyword('DGF') + linend).suppress() dgf = (dgf_header + Dict(sections) + OneOrMore( section_end ))('dgf') return dgf
def pattern(): """pyparsing pattern """ def attachLocation(s, loc, tocs): """pyparsing callback. Saves path position in the original string """ return [(loc, tocs[0])] path = CharsNotIn(" \t")("path") path.setParseAction(attachLocation) longPath = CharsNotIn(" \t", min=2)("path") longPath.setParseAction(attachLocation) slashPath = Combine(Literal('/') + Optional(CharsNotIn(" \t")))("path") slashPath.setParseAction(attachLocation) pat = ((Literal('f ') + Optional(White()) + Optional(path)) ^ longPath ^ slashPath) + \ Optional(White() + Word(nums)("line")) pat.leaveWhitespace() pat.setParseAction(CommandOpen.create) return pat
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack, rbrack, lbrace, rbrace, lparen, rparen = map(Literal, "[]{}()") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~ reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ( (lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?")) ) reRange.setParseAction(handle_range) reLiteral.setParseAction(handle_literal) reMacro.setParseAction(handle_macro) reDot.setParseAction(handle_dot) reTerm = (reLiteral | reRange | reMacro | reDot) reExpr = operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, handle_repetition), (None, 2, opAssoc.LEFT, handle_sequence), (Suppress('|'), 2, opAssoc.LEFT, handle_alternative), ]) _parser = reExpr return _parser
def getLogLineBNF(): global logLineBNF if logLineBNF is None: integer = Word( nums ) ipAddress = delimitedList( integer, ".", combine=True ) timeZoneOffset = Word("+-",nums) month = Word(string.uppercase, string.lowercase, exact=3) serverDateTime = Combine( Suppress("[") + Combine( integer + "/" + month + "/" + integer + ":" + integer + ":" + integer + ":" + integer + ' ' + timeZoneOffset) + Suppress("]") ) logLineBNF = ( ipAddress.setResultsName("ipAddr") + Suppress("-") + ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") + serverDateTime.setResultsName("timestamp") + dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) + (integer | "-").setResultsName("statusCode") + (integer | "-").setResultsName("numBytesSent") + dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) + dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) ) return logLineBNF
def urlsplit(url, scheme='', allow_fragments=1): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" global _urlBNF key = url, scheme, allow_fragments cached = _parse_cache.get(key, None) if cached: return cached if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() if (_urlBNF is None): scheme_chars = alphanums + "+-." urlscheme = Word( scheme_chars ) netloc_chars = "".join( [ c for c in printables if c not in "/." ] ) netloc = Combine(delimitedList( Word( netloc_chars ), ".", combine=True )) path_chars = "".join( [ c for c in printables if c not in "?" ] ) path = Word( path_chars ) query_chars = "".join( [ c for c in printables if c not in "#" ] ) query = Word( query_chars ) fragment = Word( printables+" " ) _urlBNF = Combine(Optional(urlscheme.setResultsName("scheme") + ":" ) + Optional(Literal("//").suppress() + netloc, default="").setResultsName("netloc") + Optional(path.setResultsName("path"), default="") + Optional(Literal("?").suppress() + query, default="").setResultsName("query") + Optional(Literal("#").suppress() + fragment, default="").setResultsName("fragment") ) tokens = _urlBNF.parseString( url ) tuple = (tokens.scheme or scheme), tokens.netloc[0], tokens.path, tokens.query[0], tokens.fragment[0] _parse_cache[key] = tuple return tuple
def _define_grammar(self): '''define the grammar to be used, and add actions''' self._define_actions() eol = LineEnd().suppress() white = Optional(White()).suppress() begin = Keyword('begin').suppress() end = Keyword('end').suppress() comment = (Literal('#') + restOfLine).suppress() data_value = Combine(OneOrMore(CharsNotIn('#\n\r'))) data_line = (LineStart() + white + Optional(data_value) + Optional(comment) + eol) block_name = Word(alphas, alphanums + '_') begin_block = (LineStart() + begin + block_name + Optional(comment) + eol) end_block = LineStart() + end + block_name + Optional(comment) + eol junk = ZeroOrMore(LineStart() + white + NotAny(begin) + restOfLine + eol).suppress() data = Group(ZeroOrMore(NotAny(end) + data_line)) block_def = begin_block + data + end_block block_defs = junk + OneOrMore(block_def + junk) self._grammar = block_defs begin_block.addParseAction(self._begin_block_action) end_block.addParseAction(self._end_block_action) data_value.addParseAction(self._data_value_action)
def grammar(self): number = Combine(Word(nums) + Optional("." + OneOrMore(Word(nums)))) table = Combine(Word(alphas) + OneOrMore(Word("_"+alphanums))) number.setParseAction(self._number_parse_action) table.setParseAction(self._table_parse_action) signop = oneOf('+ -') multop = oneOf('* /') plusop = oneOf('+ -') operand = number | table return operatorPrecedence(operand, [ (signop, 1, opAssoc.RIGHT), (multop, 2, opAssoc.LEFT), (plusop, 2, opAssoc.LEFT) ])
def create_bnf(): cvt_int = lambda toks: int(toks[0]) cvt_real = lambda toks: float(toks[0]) cvt_tuple = lambda toks : tuple(toks.asList()) cvt_dict = lambda toks: dict(toks.asList()) # define punctuation as suppressed literals (lparen, rparen, lbrack, rbrack, lbrace, rbrace, colon) = map(Suppress,"()[]{}:") integer = Combine(Optional(oneOf("+ -")) + Word(nums)).setName("integer") integer.setParseAction(cvt_int) real = Combine(Optional(oneOf("+ -"))+ Word(nums) + "." + Optional(Word(nums)) + Optional("e" + Optional(oneOf("+ -")) + Word(nums))).setName("real") real.setParseAction(cvt_real) tuple_str = Forward() list_str = Forward() dict_str = Forward() list_item = (real | integer | Group(list_str) | tuple_str | dict_str | quotedString.setParseAction(removeQuotes) | Word(alphas8bit + alphas, alphas8bit + alphanums + "_")) list_item2 = list_item | Empty().setParseAction(lambda: [None]) tuple_str << (Suppress("(") + Optional(delimitedList(list_item)) + Optional(Suppress(",")) + Suppress(")")) tuple_str.setParseAction(cvt_tuple) list_str << (lbrack + Optional(delimitedList(list_item) + Optional(Suppress(","))) + rbrack) dict_entry = Group(list_item + colon + list_item2) dict_inner = delimitedList(dict_entry) + Optional(Suppress(",")) dict_inner.setParseAction(cvt_dict) dict_str << (lbrace + Optional(dict_inner) + rbrace) return dict_inner
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack,rbrack,lbrace,rbrace,lparen,rparen,colon,qmark = map(Literal,"[]{}():?") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack,ignore=escapedChar) + rbrack) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reNonCaptureGroup = Suppress("?:") reDot = Literal(".") repetition = ( ( lbrace + Word(nums)("count") + rbrace ) | ( lbrace + Word(nums)("minCount")+","+ Word(nums)("maxCount") + rbrace ) | oneOf(list("*+?")) ) reRange.setParseAction(handleRange) reLiteral.setParseAction(handleLiteral) reMacro.setParseAction(handleMacro) reDot.setParseAction(handleDot) reTerm = ( reLiteral | reRange | reMacro | reDot | reNonCaptureGroup) reExpr = infixNotation( reTerm, [ (repetition, 1, opAssoc.LEFT, handleRepetition), (None, 2, opAssoc.LEFT, handleSequence), (Suppress('|'), 2, opAssoc.LEFT, handleAlternative), ] ) _parser = reExpr return _parser
See http://pyparsing.wikispaces.com/message/view/home/620225 and the discussion of the "-" operator in the docs. ''' ParsingTmp.keywords.append(x) W = Where O = Optional S = Suppress number = Word(nums) point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') integer = Combine(O(plusorminus) + number) floatnumber = Combine(integer + (point + O(number)) ^ (e + integer)) integer.setParseAction(lambda tokens: SimpleRValue(int(tokens[0]))) floatnumber.setParseAction(lambda tokens: SimpleRValue(float(tokens[0]))) pi = Keyword('pi').setParseAction(lambda tokens: SimpleRValue(math.pi, 'pi')) #@UnusedVariable isnumber = lambda x: isinstance(x, Number) rvalue = Forward() rvalue.setName('rvalue') contract_expression = Forward() contract_expression.setName('contract') simple_contract = Forward() simple_contract.setName('simple_contract') # Import all expressions -- they will call add_contract() from .library import (EqualTo, Unary, Binary, composite_contract,
def get_grammar(self): """ Defines our grammar for mathematical expressions. Possibly helpful: - BNF form of context-free grammar https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form - Some pyparsing docs http://infohost.nmt.edu/~shipman/soft/pyparsing/web/index.html """ # Define + and - plus = Literal("+") minus = Literal("-") plus_minus = plus | minus # 1 or 1.0 or .1 number_part = Word(nums) inner_number = Combine((number_part + Optional("." + Optional(number_part))) | ("." + number_part)) # Combine() joints the matching parts together in a single token, # and requires that the matching parts be contiguous (no spaces) # Define our suffixes suffix = Word(alphas + '%') suffix.setParseAction(self.suffix_parse_action) # Construct number as a group consisting of a text string ("num") and an optional suffix. # num can include a decimal number and numerical exponent, and can be # converted to a number using float() # suffix may contain alphas or % # Spaces are ignored inside numbers # Group wraps everything up into its own ParseResults object when parsing number = Group( Combine( inner_number + Optional( CaselessLiteral("E") + Optional(plus_minus) + number_part), )("num") + Optional(suffix)("suffix"))("number") # Note that calling ("name") on the end of a parser is equivalent to calling # parser.setResultsName, which is used to pull that result out of a parsed # expression like a dictionary. # Construct variable and function names front = Word(alphas, alphanums) # must start with alpha subscripts = Word(alphanums + '_') + ~FollowedBy('{') # ~ = not lower_indices = Literal("_{") + Optional("-") + Word( alphanums) + Literal("}") upper_indices = Literal("^{") + Optional("-") + Word( alphanums) + Literal("}") # Construct an object name in either of two forms: # 1. front + subscripts + tail # 2. front + lower_indices + upper_indices + tail # where: # front (required): # starts with alpha, followed by alphanumeric # subscripts (optional): # any combination of alphanumeric and underscores # lower_indices (optional): # Of form "_{(-)<alphanumeric>}" # upper_indices (optional): # Of form "^{(-)<alphanumeric>}" # tail (optional): # any number of primes name = Combine(front + Optional(subscripts | (Optional(lower_indices) + Optional(upper_indices))) + ZeroOrMore("'")) # Define a variable as a pyparsing result that contains one object name variable = Group(name("varname"))("variable") variable.setParseAction(self.variable_parse_action) # initialize recursive grammar expression = Forward() # Construct functions as consisting of funcname and arguments as # funcname(arguments) # where arguments is a comma-separated list of arguments, returned as a list # Must have at least 1 argument function = Group( name("funcname") + Suppress("(") + Group(delimitedList(expression))("arguments") + Suppress(")"))("function") function.setParseAction(self.function_parse_action) # Define parentheses parentheses = Group(Suppress("(") + expression + Suppress(")"))('parentheses') # Define arrays array = Group( Suppress("[") + delimitedList(expression) + Suppress("]"))("array") # atomic units evaluate directly to number or array without binary operations atom = number | function | variable | parentheses | array # Define operations in order of precedence # Define exponentiation, possibly including negative powers power = atom + ZeroOrMore(Suppress("^") + Optional(minus)("op") + atom) power.addParseAction(self.group_if_multiple('power')) # Define negation (e.g., in 5*-3 --> we need to evaluate the -3 first) # Negation in powers is handled separately # This has been arbitrarily assigned a higher precedence than parallel negation = Optional(minus)("op") + power negation.addParseAction(self.group_if_multiple('negation')) # Define the parallel operator 1 || 5 == 1/(1/1 + 1/5) pipes = Literal('|') + Literal('|') parallel = negation + ZeroOrMore(Suppress(pipes) + negation) parallel.addParseAction(self.group_if_multiple('parallel')) # Define multiplication and division product = parallel + ZeroOrMore((Literal('*') | Literal('/'))("op") + parallel) product.addParseAction(self.group_if_multiple('product')) # Define sums and differences # Note that leading - signs are treated by negation sumdiff = Optional(plus) + product + ZeroOrMore( plus_minus("op") + product) sumdiff.addParseAction(self.group_if_multiple('sum')) # Close the recursion expression << sumdiff return expression + stringEnd
PERIOD = Literal(".").suppress() RANGLE = Literal(">").suppress() RBRACE = Literal("]").suppress() RPAREN = Literal(")").suppress() CATEGORIES = CaselessLiteral("categories").suppress() END = CaselessLiteral("end").suppress() FONT = CaselessLiteral("font").suppress() HINT = CaselessLiteral("hint").suppress() ITEM = CaselessLiteral("item").suppress() OBJECT = CaselessLiteral("object").suppress() attribute_value_pair = Forward() # this is recursed in item_list_entry simple_identifier = Word(alphas, alphanums + "_") identifier = Combine( simple_identifier + ZeroOrMore( Literal(".") + simple_identifier )) object_name = identifier object_type = identifier # Integer and floating point values are converted to Python longs and floats, respectively. int_value = Combine(Optional("-") + Word(nums)).setParseAction(lambda s,l,t: [ int(t[0]) ] ) float_value = Combine(Optional("-") + Optional(Word(nums)) + "." + Word(nums)).setParseAction(lambda s,l,t: [ float(t[0]) ] ) number_value = float_value | int_value # Base16 constants are left in string form, including the surrounding braces. base16_value = Combine(Literal("{") + OneOrMore(Word("0123456789ABCDEFabcdef")) + Literal("}"), adjacent=False) # This is the first part of a hack to convert the various delphi partial sglQuotedStrings # into a single sglQuotedString equivalent. The gist of it is to combine # all sglQuotedStrings (with their surrounding quotes removed (suppressed)) # with sequences of #xyz character constants, with "strings" concatenated
def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = Literal("strict") graph_ = Literal("graph") digraph_ = Literal("digraph") subgraph_ = Literal("subgraph") node_ = Literal("node") edge_ = Literal("edge") # token definitions identifier = Word(alphanums + "_").setName("identifier") double_quoted_string = dblQuotedString alphastring_ = OneOrMore(CharsNotIn(_noncomma)) ID = (identifier | double_quoted_string.setParseAction(strip_quotes) |\ alphastring_).setName("ID") html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) float_number = Combine(Optional(minus) + \ OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID | html_text).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (Group(colon + ID) | \ Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | \ Group(port_angle + Optional(port_location))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals.suppress() + righthand_id) + \ Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + \ rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + \ rbrace.suppress()).setName("graph_stmt") subgraph = (Group(Optional(subgraph_ + Optional(ID)) + graph_stmt) | \ Group(subgraph_ + ID)).setName("subgraph") edgeRHS = OneOrMore(edgeop + Group(node_id | subgraph)) edge_stmt = Group(node_id | subgraph) + edgeRHS + Optional(attr_list) node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals.suppress() + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = (Optional(strict_) + Group((graph_ | digraph_)) + \ Optional(ID) + graph_stmt).setResultsName("graph") singleLineComment = "//" + restOfLine # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser
return f"{node.fn}({', '.join(pprint(a) for a in node.args)})" elif isinstance(node, Nary): return "({})".format(f" {node.fn} ".join(pprint(a) for a in node.args)) else: return node EXPR = Forward() BEXPR = Forward() OFFSET = LBRAK + EXPR + RBRAK EXPRATOM = ( ppc.signed_integer | (VARNAME + Optional(OFFSET, default=None) + Keyword("of").suppress() + VARNAME).setParseAction(lambda toks: OfNode(*toks)) | # noqa: E501 (Combine(BUILTIN + LPAR) + Group(delimitedList(EXPR)) + RPAR).setParseAction(lambda toks: BuiltIn(*toks)) # noqa: E501 ) EXPR <<= infixNotation( EXPRATOM, [("%", 2, opAssoc.LEFT, lambda x: BinOp(*x[0])), (oneOf("* /"), 2, opAssoc.LEFT, lambda x: BinOp(*x[0])), (oneOf("+ - "), 2, opAssoc.LEFT, lambda x: BinOp(*x[0])), (oneOf("> < = >= <= !="), 2, opAssoc.LEFT, lambda x: BinOp(*x[0]))]) BEXPR <<= infixNotation( EXPR, [ # Note: "not" is implemented as a BuiltIn (oneOf("and or"), 2, opAssoc.LEFT, lambda x: BinOp(*x[0]))
def convertToFloat(s, loc, toks): try: return float(toks[0]) except: raise ParseException(loc, "invalid float format %s"%toks[0]) exponent = CaselessLiteral("e")+Optional(sign)+Word(nums) #note that almost all these fields are optional, #and this can match almost anything. We rely on Pythons built-in #float() function to clear out invalid values - loosely matching like this #speeds up parsing quite a lot floatingPointConstant = Combine( Optional(sign) + Optional(Word(nums)) + Optional(Literal(".") + Optional(Word(nums)))+ Optional(exponent) ) floatingPointConstant.setParseAction(convertToFloat) number = floatingPointConstant #same as FP constant but don't allow a - sign nonnegativeNumber = Combine( Optional(Word(nums)) + Optional(Literal(".") + Optional(Word(nums)))+ Optional(exponent) ) nonnegativeNumber.setParseAction(convertToFloat)
(even binary content) inside the structure. This is done by pre- sizing the data with the NUMBER similar to Dan Bernstein's netstrings setup. SPACE White space is basically ignored. This is interesting because since Stackish is serialized consistently this means you can use \n as the separation character and perform reasonable diffs on two structures. """ from pyparsing import Suppress,Word,nums,alphas,alphanums,Combine,oneOf,\ Optional,QuotedString,Forward,Group,ZeroOrMore,srange MARK, UNMARK, AT, COLON, QUOTE = map(Suppress, "[]@:'") NUMBER = Word(nums) NUMBER.setParseAction(lambda t: int(t[0])) FLOAT = Combine(oneOf("+ -") + Word(nums) + "." + Optional(Word(nums))) FLOAT.setParseAction(lambda t: float(t[0])) STRING = QuotedString('"', multiline=True) WORD = Word(alphas, alphanums + "_:") ATTRIBUTE = Combine(AT + WORD) strBody = Forward() def setBodyLength(tokens): strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0])) return "" BLOB = Combine(QUOTE + Word(nums).setParseAction(setBodyLength) + COLON + strBody + QUOTE)
from pyparsing import alphas, alphanums, Combine, delimitedList, Forward, Group, Literal, \ Keyword, nums, oneOf, Optional, ParserElement, Suppress, White, Word ParserElement.enablePackrat() LPAR, RPAR = map(Suppress, '()') const = Literal('true') | Literal('false') AOps = oneOf('INTS_MODULUS_TOTAL * / + -').setParseAction( lambda s, l, t: ['%'] if t[0] == 'INTS_MODULUS_TOTAL' else t) BOps = (Keyword('and').setParseAction(lambda s, l, t: ['&']) | Keyword('not').setParseAction(lambda s, l, t: ['!']) | Keyword('or').setParseAction(lambda s, l, t: ['|'])) ROps = oneOf('< > <= >= =') val = Combine(Optional('-') + Word(nums)) var = Word(alphas + '_:$', alphanums + '_:$') term = val | var let = Forward() pred = Forward() stmt = Forward() expr = Forward() expr << (term | (LPAR + AOps + Group(delimitedList(expr, delim=White(' '))) + RPAR ).setParseAction(lambda s, l, t: [list(joinit(t[1], t[0]))] if not ( t[0] == '-' and len(t[1]) == 1) else [['0 -', t[1][0]]]) | (LPAR + expr + RPAR))
See http://pyparsing.wikispaces.com/message/view/home/620225 and the discussion of the "-" operator in the docs. ''' ParsingTmp.keywords.append(x) W = Where O = Optional S = Suppress number = Word(nums) point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') integer = Combine(O(plusorminus) + number) # warnings.warn('TODO: negative float number') floatnumber = Combine(integer + (point + O(number)) ^ (e + integer)) integer.setParseAction(lambda tokens: SimpleRValue(int(tokens[0]))) floatnumber.setParseAction(lambda tokens: SimpleRValue(float(tokens[0]))) pi = Keyword('pi').setParseAction( lambda tokens: SimpleRValue(math.pi, 'pi')) # @UnusedVariable def isnumber(x): # These are scalar quantities that we can compare (=,>,>=, etc.) if isinstance(x, Number): return True try: # Slow, do it only once (TODO) import numpy
ident = Word(alphas, alphanums + "_$").setName("identifier") columnName = Upcase(delimitedList(ident, ".", combine=True)) columnNameList = Group(delimitedList(columnName)) tableName = Upcase(delimitedList(ident, ".", combine=True)) tableNameList = Group(delimitedList(tableName)) whereExpression = Forward() and_ = Keyword("and", caseless=True) or_ = Keyword("or", caseless=True) in_ = Keyword("in", caseless=True) E = CaselessLiteral("E") binop = oneOf("= != < > >= <= eq ne lt le gt ge", caseless=True) arithSign = Word("+-", exact=1) realNum = Combine( Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(arithSign) + Word(nums))) intNum = Combine( Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) columnRval = realNum | intNum | quotedString | columnName # need to add support for alg expressions whereCondition = Group((columnName + binop + columnRval) | (columnName + in_ + "(" + delimitedList(columnRval) + ")") | (columnName + in_ + "(" + selectStmt + ")") | ("(" + whereExpression + ")")) whereExpression << whereCondition + ZeroOrMore((and_ | or_) + whereExpression) # define the grammar selectStmt << (selectToken + ('*' | columnNameList).setResultsName("columns") + fromToken + tableNameList.setResultsName("tables") +
def graph_definition(): global graphparser if not graphparser: # punctuation colon = Literal(":") lbrace = Literal("{") rbrace = Literal("}") lbrack = Literal("[") rbrack = Literal("]") lparen = Literal("(") rparen = Literal(")") equals = Literal("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Literal(";") at = Literal("@") minus = Literal("-") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") # token definitions identifier = Word(alphanums + "_." ).setName("identifier") double_quoted_string = QuotedString('"', escChar="\\", multiline=True, unquoteResults=False) # dblQuotedString _noncomma = "".join( [ c for c in printables if c != "," ] ) alphastring_ = OneOrMore(CharsNotIn(_noncomma + ' ')) def parse_html(s, loc, toks): return '<%s>' % ''.join(toks[0]) opener = '<' closer = '>' html_text = nestedExpr( opener, closer, ( CharsNotIn( opener + closer ) ) ).setParseAction(parse_html).leaveWhitespace() ID = ( identifier | html_text | double_quoted_string | #.setParseAction(strip_quotes) | alphastring_ ).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID ).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = (OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen)).setName("port_location") port = (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack.suppress() + Optional(a_list) + rbrack.suppress()).setName("attr_list") attr_stmt = (Group(graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = Group(lbrace.suppress() + Optional(stmt_list) + rbrace.suppress() + Optional(semi.suppress()) ).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = Group(subgraph_ + Optional(ID) + graph_stmt).setName("subgraph") edge_point << Group( subgraph | graph_stmt | node_id ).setName('edge_point') node_stmt = (node_id + Optional(attr_list) + Optional(semi.suppress())).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi.suppress())) graphparser = OneOrMore( (Optional(strict_) + Group((graph_ | digraph_)) + Optional(ID) + graph_stmt).setResultsName("graph") ) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) assignment.setParseAction(push_attr_list) a_list.setParseAction(push_attr_list) edge_stmt.setParseAction(push_edge_stmt) node_stmt.setParseAction(push_node_stmt) attr_stmt.setParseAction(push_default_stmt) subgraph.setParseAction(push_subgraph_stmt) graph_stmt.setParseAction(push_graph_stmt) graphparser.setParseAction(push_top_graph_stmt) return graphparser
def pushFirst(str, loc, toks): exprStack.append(toks[0]) def assignVar(str, loc, toks): varStack.append(toks[0]) # define grammar point = Literal('.') e = CaselessLiteral('E') plusorminus = Literal('+') | Literal('-') number = Word(nums) integer = Combine(Optional(plusorminus) + number) floatnumber = Combine(integer + Optional(point + Optional(number)) + Optional(e + integer)) ident = Word(alphas, alphanums + '_') plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div expop = Literal("^") assign = Literal("=")
E = CaselessLiteral("E") EQ = '=' LOGICAL_OPS = ['and', 'or'] NE = '!=' NE_OPS = [NE, '!~', '>', '<'] NOT = 'not' PARENTHESES = ['(', ')'] BINOP = \ oneOf( "== %s =? =^ =$ =~ !~ intersects >= > <= < before after" % NE, caseless=True) ARITH_SIGN = Word("+-", exact=1) REAL_NUM = \ Combine( Optional(ARITH_SIGN) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) + Optional(E + Optional(ARITH_SIGN) + Word(nums))) INT_NUM = \ Combine( Optional(ARITH_SIGN) + Word(nums) + Optional(E + Optional("+") + Word(nums))) R_VALUE = REAL_NUM | INT_NUM | quotedString | FIELD_NAME OXTL_EXPRESSION = Forward() OXTL_CLAUSE = Group((FIELD_NAME + BINOP + R_VALUE) | (FIELD_NAME + IN_ + PARENTHESES[0] + delimitedList(R_VALUE) + PARENTHESES[1]) | (FIELD_NAME + IN_ + PARENTHESES[0] + Forward() + PARENTHESES[1]) | (PARENTHESES[0] + OXTL_EXPRESSION + PARENTHESES[1])
val2 = _eval(archive, context, app, exp_context) val1 = or_(val1, val2) return val1 class EvalGroupOp(object): def __init__(self, tokens): self._evals = [t.eval for t in tokens[0][0::2]] def eval(self, archive, context, app, exp_context): val = [eval(archive, context, app, exp_context) for eval in self._evals] return val integer = Word(nums) real = Combine(Word(nums) + '.' + Word(nums)) constant = (Literal('True') | Literal('False') | Literal('None') | Literal('yes') | Literal('no')) + WordEnd() model_reference = Regex(r'([\w\.]*#[\w\.]+)') variable = Regex(r'([a-zA-Z0-9\._]+)') string = QuotedString('"', escChar="\\") | QuotedString('\'', escChar="\\") operand = model_reference | real | integer | constant | string | variable plusop = oneOf('+ -') multop = oneOf('* / // %')
else: query &= t return query NO_BRTS = printables.replace('(', '').replace(')', '') SINGLE = Word(NO_BRTS.replace('*', '')) WILDCARDS = Optional('*') + SINGLE + Optional('*') + WordEnd(wordChars=NO_BRTS) QUOTED = quotedString.setParseAction(removeQuotes) OPER_AND = CaselessLiteral('and') OPER_OR = CaselessLiteral('or') OPER_NOT = '-' TERM = Combine(Optional(Word(alphas).setResultsName('meta') + ':') + (QUOTED.setResultsName('query') | WILDCARDS.setResultsName('query'))) TERM.setParseAction(createQ) EXPRESSION = operatorPrecedence(TERM, [ (OPER_NOT, 1, opAssoc.RIGHT), (OPER_OR, 2, opAssoc.LEFT), (Optional(OPER_AND, default='and'), 2, opAssoc.LEFT)]) EXPRESSION.setParseAction(unionQ) QUERY = OneOrMore(EXPRESSION) + StringEnd() QUERY.setParseAction(unionQ) def advanced_search(pattern): """Parse the grammar of a pattern
#=============================================================================== FREE_TEXT = Word(printables + ' ', excludeChars='()') INTEGER = Word(nums) ID_TEXT = Word(alphanums, alphanums + ':/_-.') ONTOLOGY_SUFFIX = (Keyword('ABI') | Keyword('FM') | Keyword('FMA') | Keyword('ILX') | Keyword('MA') | Keyword('NCBITaxon') | Keyword('UBERON')) ONTOLOGY_ID = Combine(ONTOLOGY_SUFFIX + ':' + ID_TEXT) #=============================================================================== IDENTIFIER = Group(Keyword('id') + Suppress('(') + ID_TEXT + Suppress(')')) MODELS = Group(Keyword('models') + Suppress('(') + ONTOLOGY_ID + Suppress(')')) ZOOM_LEVEL = INTEGER ZOOM = Group( Keyword('zoom') + Suppress('(') + Group(ZOOM_LEVEL + Suppress(',') + ZOOM_LEVEL + Suppress(',') + ZOOM_LEVEL) + Suppress(')')) LAYER_DIRECTIVES = IDENTIFIER | MODELS | ZOOM LAYER_DIRECTIVE = '.' + ZeroOrMore(LAYER_DIRECTIVES)
class NginxParser(object): # pylint: disable=expression-not-assigned """A class that parses nginx configuration with pyparsing.""" # constants space = Optional(White()) nonspace = Regex(r"\S+") left_bracket = Literal("{").suppress() right_bracket = space.leaveWhitespace() + Literal("}").suppress() semicolon = Literal(";").suppress() key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+")) condition = Regex(r"\(.+\)") # Matches anything that is not a special character, and ${SHELL_VARS}, AND # any chars in single or double quotes # All of these COULD be upgraded to something like # https://stackoverflow.com/a/16130746 dquoted = Regex(r'(\".*\")') squoted = Regex(r"(\'.*\')") nonspecial = Regex(r"[^\{\};,]") varsub = Regex(r"(\$\{\w+\})") # nonspecial nibbles one character at a time, but the other objects take # precedence. We use ZeroOrMore to allow entries like "break ;" to be # parsed as assignments value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial)) location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~") # rules comment = space + Literal('#') + restOfLine assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space charset_map_statement = space + Literal( "charset_map") + space + value + space + value map_statement = space + Literal( "map") + space + nonspace + space + dollar_var + space # This is NOT an accurate way to parse nginx map entries; it's almost # certainly too permissive and may be wrong in other ways, but it should # preserve things correctly in mmmmost or all cases. # # - I can neither prove nor disprove that it is correct wrt all escaped # semicolon situations # Addresses https://github.com/fatiherikli/nginxparser/issues/19 map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace map_entry = space + map_pattern + space + value + space + semicolon map_block = Group( Group(map_statement).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | map_entry)) + space).leaveWhitespace() + right_bracket) block = Forward() # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) block_begin = (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(charset_map_statement)).leaveWhitespace() block_innards = Group( ZeroOrMore(Group(comment | assignment) | block | map_block) + space).leaveWhitespace() block << Group(block_begin + left_bracket + block_innards + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd script.parseWithTabs().leaveWhitespace() def __init__(self, source): self.source = source def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) def as_list(self): """Returns the parsed tree as a list.""" return self.parse().asList()
) attr.leaveWhitespace() attr.setName('attr') hexdigits = Word(string.hexdigits, exact=2) hexdigits.setName('hexdigits') escaped = Suppress(Literal('\\')) + hexdigits escaped.setName('escaped') def _p_escaped(s, l, t): text = t[0] return chr(int(text, 16)) escaped.setParseAction(_p_escaped) value = Combine(OneOrMore(CharsNotIn('*()\\\0') | escaped)) value.setName('value') equal = Literal("=") equal.setParseAction(lambda s, l, t: pureldap.LDAPFilter_equalityMatch) approx = Literal("~=") approx.setParseAction(lambda s, l, t: pureldap.LDAPFilter_approxMatch) greater = Literal(">=") greater.setParseAction(lambda s, l, t: pureldap.LDAPFilter_greaterOrEqual) less = Literal("<=") less.setParseAction(lambda s, l, t: pureldap.LDAPFilter_lessOrEqual) filtertype = equal | approx | greater | less filtertype.setName('filtertype') simple = attr + filtertype + value simple.leaveWhitespace() simple.setName('simple')
class FilterExpression: """Filter factory based on filter expressions such as "name~John and (age>18 or consent:true)". Filters consist of: - boolean expressions using and, not, or and parentheses. - field expressions, which consist of: - a key name (with optional wildcards) or _index_ - an operator - a value (with type appropriate to the operator) - operators are one of: - numeric comparisons: =, !=, <, <=, >, >= - length comparisons: #=, #<, etc - string comparisons: =, !=, ~ (regex match), !~ - containment: >> (contains), !>> - existence: KEY:exists (has value that's not NaN or None), KEY:true (has true value) """ sign = oneOf("+ -") integer = Word(nums) number_base = (integer + Optional("." + Optional(integer)) ("float")) | Literal(".")("float") + integer number_exponent = CaselessLiteral("E")("float") + Optional( sign) + integer number = Combine( Optional(sign) + number_base + Optional(number_exponent)).setParseAction( lambda t: float(t[0]) if t.float else int(t[0])) onlen = lambda f: (lambda x, y: f(len(x), y)) num_ops = { "<": operator.lt, "<=": operator.le, "=": operator.eq, "!=": operator.ne, ">": operator.gt, ">=": operator.ge, "#<": onlen(operator.lt), "#<=": onlen(operator.le), "#=": onlen(operator.eq), "#!=": onlen(operator.ne), "#>": onlen(operator.gt), "#>=": onlen(operator.ge), } str_ops = { "=": lambda x, y: x == str(y), "!=": lambda x, y: x != str(y), "~": lambda x, y: re.search(y, str(x)), "!~": lambda x, y: not re.search(y, str(x)), ">>": lambda x, y: y in x, "!>>": lambda x, y: y not in x, } exist_ops = { ":": lambda x, y: { "exists": not non(x), "true": bool(x) }[y.lower()] } oneOfOpMap = lambda map: oneOf(list(map.keys())).setParseAction( lambda t: ignoring_exceptions(map[t[0]], False)) num_op = oneOfOpMap(num_ops) str_op = oneOfOpMap(str_ops) exist_op = oneOfOpMap(exist_ops) quoted_string = QuotedString('"', "\\") | QuotedString("'", "\\") key_value = Word(alphas + alphas8bit + "*?[]_") | quoted_string str_value = Word(alphas + alphas8bit + "_") | quoted_string exist_value = CaselessLiteral("True") | CaselessLiteral("Exists") base_expr = (key_value + (str_op + str_value | num_op + number | exist_op + exist_value)).setParseAction(lambda t: [t]) expr = infixNotation( base_expr, [ (Literal("not").setParseAction(lambda t: operator.not_), 1, opAssoc.RIGHT), (Literal("and").setParseAction(lambda t: operator.and_), 2, opAssoc.LEFT), (Literal("or").setParseAction(lambda t: operator.or_), 2, opAssoc.LEFT), ], ) @classmethod def _eval_parse(cls, parse, d, i): if not isinstance(parse, list): return parse elif len(parse) == 1: return cls._eval_parse(parse[0], d, i) elif callable(parse[0]): return parse[0](cls._eval_parse(parse[1], d, i)) else: x, y = cls._eval_parse(parse[0], d, i), cls._eval_parse(parse[2], d, i) if x == "_index_": return parse[1](i, y) elif isinstance(x, str): return any(parse[1](d[k], y) for k in d.keys() if fnmatch.fnmatch(k, x)) else: return parse[1](x, y) @classmethod def make_filter(cls, string): """Generates a filter function from a filter expression.""" parse = cls.expr.parseString(string, parseAll=True).asList() return lambda d, i=None: cls._eval_parse(parse, d, i)
def __init__(self): from pyparsing import Word, nums, alphas, Combine, oneOf, opAssoc, operatorPrecedence # Define the parser integer = Word(nums).setParseAction(lambda t: int(t[0])) real = Combine(Word(nums) + "." + Word(nums)) variable = Word(alphas, exact=1) operand = real | integer | variable # Operators self.operators = { 'sign': oneOf('+ -'), 'multiply': oneOf('* /'), 'plus': oneOf('+ -'), 'comparision': oneOf('< <= > >= != = <> LT GT LE GE EQ NE'), } def operator_operands(token_list): """generator to extract operators and operands in pairs.""" it = iter(token_list) while True: try: o1 = next(it) o2 = next(it) yield (o1, o2) except StopIteration: break class EvalConstant(object): """Class to evaluate a parsed constant or variable.""" def __init__(self, tokens): self.value = tokens[0] def eval(self, vars): if self.value in vars: return vars[self.value] else: try: return int(self.value) except: return float(self.value) class EvalAddOp(object): """Class to evaluate addition and subtraction expressions.""" def __init__(self, tokens): self.value = tokens[0] def eval(self, vars): sum = self.value[0].eval(vars) for op, val in operator_operands(self.value[1:]): if op == '+': sum += val.eval(vars) if op == '-': sum -= val.eval(vars) return sum class EvalSignOp(object): """Class to evaluate expressions with a leading + or - sign.""" def __init__(self, tokens): self.sign, self.value = tokens[0] def eval(self, vars_): mult = {'+': 1, '-': -1}[self.sign] return mult * self.value.eval(vars_) class EvalMultOp(object): """Class to evaluate multiplication and division expressions.""" def __init__(self, tokens): self.operator_map = { '*': lambda a, b: a * b, '/': lambda a, b: a / b, } self.value = tokens[0] def eval(self, vars): prod = self.value[0].eval(vars) for op, val in operator_operands(self.value[1:]): fn = self.operator_map[op] val2 = val.eval(vars) prod = fn(prod, val2) return prod class EvalComparisonOp(object): """Class to evaluate comparison expressions""" def __init__(self, tokens): self.value = tokens[0] self.operator_map = { "<": lambda a, b: a < b, "<=": lambda a, b: a <= b, ">": lambda a, b: a > b, ">=": lambda a, b: a >= b, "!=": lambda a, b: a != b, "=": lambda a, b: a == b, "LT": lambda a, b: a < b, "LE": lambda a, b: a <= b, "GT": lambda a, b: a > b, "GE": lambda a, b: a >= b, "NE": lambda a, b: a != b, "EQ": lambda a, b: a == b, "<>": lambda a, b: a != b, } def eval(self, vars): val1 = self.value[0].eval(vars) for op, val in operator_operands(self.value[1:]): fn = self.operator_map[op] val2 = val.eval(vars) if not fn(val1, val2): break val1 = val2 else: return True return False operand.setParseAction(EvalConstant) self.arith_expr = operatorPrecedence(operand, [ (self.operators['sign'], 1, opAssoc.RIGHT, EvalSignOp), (self.operators['multiply'], 2, opAssoc.LEFT, EvalMultOp), (self.operators['plus'], 2, opAssoc.LEFT, EvalAddOp), (self.operators['comparision'], 2, opAssoc.LEFT, EvalComparisonOp), ])
# pyParsing tokens # --------------------------------------------------------------------- # Copyright (C) 2007-2015 The NOC Project # See LICENSE for details # --------------------------------------------------------------------- # Third-party modules from pyparsing import (alphanums, Combine, Group, LineEnd, nums, Suppress, Word, restOfLine) # Match \s+ SPACE = Suppress(Word(" ").leaveWhitespace()) # Match \n\s+ INDENT = Suppress(LineEnd() + SPACE) # Skip whole line LINE = Suppress(restOfLine) # REST = SPACE + restOfLine # Sequence of numbers DIGITS = Word(nums) # Sequence of letters and numbers ALPHANUMS = Word(alphanums) # Number from 0 to 255 OCTET = Word(nums, max=3) # IPv4 address IPv4_ADDRESS = Combine(OCTET + "." + OCTET + "." + OCTET + "." + OCTET) # RD RD = Combine(Word(nums) + Word(":") + Word(nums))
if res: res += [res[-3], rdflib.RDF.rest, b, b, rdflib.RDF.first, x] else: res += [b, rdflib.RDF.first, x] res += [b, rdflib.RDF.rest, rdflib.RDF.nil] if DEBUG: print "CollectionOut", res return [res] # SPARQL Grammar from http://www.w3.org/TR/sparql11-query/#grammar # ------ TERMINALS -------------- # [139] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20])* '>' IRIREF = Combine( Suppress('<') + Regex(r'[^<>"{}|^`\\%s]*' % ''.join('\\x%02X' % i for i in range(33))) + Suppress('>')) IRIREF.setParseAction(lambda x: rdflib.URIRef(x[0])) # [164] P_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] if sys.maxunicode == 0xffff: # this is narrow python build (default on windows/osx) # this means that unicode code points over 0xffff are stored # as several characters, which in turn means that regex character # ranges with these characters do not work. # See # * http://bugs.python.org/issue12729 # * http://bugs.python.org/issue12749 # * http://bugs.python.org/issue3665 #
def __init__(self, funcname, selected_fields=False, userargs=None, default_fields=None, parent=None, example=None, text=None): """funcname is name the function you want to use(can be either string, or functions.py function). if combotags is true then a combobox with tags that the user can choose from are shown. userargs is the default values you want to fill the controls in the dialog with [make sure they don't exceed the number of arguments of funcname].""" QWidget.__init__(self, parent) identifier = QuotedString('"') | Combine(Word (alphanums + ' !"#$%&\'()*+-./:;<=>?@[\\]^_`{|}~')) tags = delimitedList(identifier) self.func = Function(funcname) docstr = self.func.doc[1:] self.vbox = QVBoxLayout() self.retval = [] self._selectedFields = selected_fields if selected_fields: fields = ['__all'] + sorted(INFOTAGS) + \ selected_fields + gettaglist() else: fields = ['__selected', '__all'] + sorted(INFOTAGS) + \ gettaglist() self.tagcombo = QComboBox(self) self.tagcombo.setToolTip(FIELDS_TOOLTIP) self.tagcombo.setEditable(True) self.tagcombo.setCompleter(QCompleter(self.tagcombo)) self.tagcombo.addItems(fields) self.tagcombo.editTextChanged.connect(self.showexample) if self.func.function not in functions.no_fields: label = QLabel(translate('Defaults', "&Fields")) self.vbox.addWidget(label) self.vbox.addWidget(self.tagcombo) label.setBuddy(self.tagcombo) else: self.tagcombo.setVisible(False) self.example = example self._text = text if self.func.function in functions_dialogs.dialogs: vbox = QVBoxLayout() vbox.addWidget(self.tagcombo) self.widget = functions_dialogs.dialogs[self.func.function](self) vbox.addWidget(self.widget) vbox.addStretch() self.setLayout(vbox) self.setMinimumSize(self.sizeHint()) self.setArguments(default_fields, userargs) return else: self.widget = None self.textcombos = [] # Loop that creates all the controls self.controls = [] for argno, line in enumerate(docstr): args = tags.parseString(line) label = args[0] ctype = args[1] default = args[2:] control, func, label = self._createControl(label, ctype, default) self.retval.append(func) self.controls.append(control) getattr(control, self.signals[ctype]).connect(self.showexample) if label: self.vbox.addWidget(label) self.vbox.addWidget(control) self.setArguments(default_fields, userargs) self.vbox.addStretch() self.setLayout(self.vbox) self.setMinimumSize(self.sizeHint())
# The aim of this parser is not to support database application, # but to create automagically a pgn annotated reading the log console file # of a lecture of ICC (Internet Chess Club), saved by Blitzin. # Of course you can modify the Abstract Syntax Tree to your purpose. # # Copyright 2004, by Alberto Santini http://www.albertosantini.it/chess/ # from pyparsing import alphanums, nums, quotedString from pyparsing import Combine, Forward, Group, Literal, oneOf, OneOrMore, Optional, Suppress, ZeroOrMore, White, Word from pyparsing import ParseException # # define pgn grammar # tag = Suppress("[") + Word(alphanums) + Combine(quotedString) + Suppress("]") comment = Suppress("{") + Word(alphanums + " ") + Suppress("}") dot = Literal(".") piece = oneOf("K Q B N R") file_coord = oneOf("a b c d e f g h") rank_coord = oneOf("1 2 3 4 5 6 7 8") capture = oneOf("x :") promote = Literal("=") castle_queenside = oneOf("O-O-O 0-0-0 o-o-o") castle_kingside = oneOf("O-O 0-0 o-o") move_number = Optional(comment) + Word(nums) + dot m1 = file_coord + rank_coord # pawn move e.g. d4 m2 = file_coord + capture + file_coord + rank_coord # pawn capture move e.g. dxe5 m3 = file_coord + "8" + promote + piece # pawn promotion e.g. e8=Q
# Grammar Rules # ============================================================================ # mul = Literal("*") plus = Literal("+") minus = Literal("-") comma = Literal(",") lbracket = Literal("[") rbracket = Literal("]") lbrace = Literal("{") rbrace = Literal("}") hashsign = Literal("#") exclamation = Literal("!") caret = Literal("^") hex_num = Combine(Literal("0x") + Word("0123456789abcdef")) dec_num = Word("0123456789") # Operand Parsing # ============================================================================ # sign = Optional(Or([plus, minus("minus")])) immediate = Group( Optional(Suppress(hashsign)) + (sign + Or([hex_num, dec_num]))("value")) register = Group( Or([ Combine(Literal("r") + Word(nums)("reg_num")), Combine(Literal("d") + Word(nums)("reg_num")), Combine(Literal("c") + Word(nums)("reg_num")), Combine(Literal("p") + Word(nums)("reg_num")),
except ImportError: ecodes = None print('WARNING: evdev is not available') try: from functools import lru_cache except ImportError: # don't do caching on old python lru_cache = lambda: (lambda f: f) EOL = LineEnd().suppress() EMPTYLINE = LineEnd() COMMENTLINE = pythonStyleComment + EOL INTEGER = Word(nums) STRING = QuotedString('"') REAL = Combine((INTEGER + Optional('.' + Optional(INTEGER))) ^ ('.' + INTEGER)) SIGNED_REAL = Combine(Optional(Word('-+')) + REAL) UDEV_TAG = Word(string.ascii_uppercase, alphanums + '_') # Those patterns are used in type-specific matches TYPES = {'mouse': ('usb', 'bluetooth', 'ps2', '*'), 'evdev': ('name', 'atkbd', 'input'), 'id-input': ('modalias'), 'touchpad': ('i8042', 'rmi', 'bluetooth', 'usb'), 'joystick': ('i8042', 'rmi', 'bluetooth', 'usb'), 'keyboard': ('name', ), 'sensor': ('modalias', ), } # Patterns that are used to set general properties on a device GENERAL_MATCHES = {'acpi',
def create_bnf(term_descs): """term_descs .. list of TermParse objects (sign, term_name, term_arg_names), where sign can be real or complex multiplier""" lc = ['+'] # Linear combination context. equal = Literal("=").setParseAction(rhs(lc)) zero = Literal("0").suppress() point = Literal(".") e = CaselessLiteral("E") inumber = Word("+-" + nums, nums) fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-" + nums, nums))) number = fnumber + Optional(Literal('j'), default='') add_op = oneOf('+ -') number_expr = Forward() number_expr << ZeroOrMore('(') + number \ + ZeroOrMore(add_op + number_expr) \ + ZeroOrMore(')') ident = Word(alphas, alphanums + "_") integral = Combine((Literal('i') + Word(alphanums)) | Literal('a') | Word(nums))("integral") history = Optional('[' + inumber + ']', default='')("history") variable = Combine(Word(alphas, alphanums + '._') + history) derivative = Combine(Literal('d') + variable\ + Literal('/') + Literal('dt')) trace = Combine(Literal('tr') + '(' + variable + ')') generalized_var = derivative | trace | variable args = Group(delimitedList(generalized_var)) flag = Literal('a') term = Optional( Literal( '+' ) | Literal( '-' ), default = '+' )( "sign" )\ + Optional( number_expr + Literal( '*' ).suppress(), default = ['1.0', ''] )( "mul" ) \ + Combine( ident( "name" )\ + Optional( "." + (integral + "." + ident( "region" ) + "." + flag( "flag" ) | integral + "." + ident( "region" ) | ident( "region" ) )))( "term_desc" ) + "("\ + Optional(args, default=[''])( "args" ) + ")" term.setParseAction(collect_term(term_descs, lc)) rhs1 = equal + OneOrMore(term) rhs2 = equal + zero equation = StringStart() + OneOrMore( term )\ + Optional( rhs1 | rhs2 ) + StringEnd() ## term.setDebug() return equation
escaped = ( Literal("\\").suppress() + # chr(20)-chr(126) + chr(128)-unichr(sys.maxunicode) Regex("[\u0020-\u007e\u0080-\uffff]", re.IGNORECASE) ) def convertToUnicode(t): return chr(int(t[0], 16)) hex_unicode = ( Literal("\\").suppress() + Regex("[0-9a-f]{1,6}", re.IGNORECASE) + Optional(White(exact=1)).suppress() ).setParseAction(convertToUnicode) escape = hex_unicode | escaped # any unicode literal outside the 0-127 ascii range nonascii = Regex("[^\u0000-\u007f]") # single character for starting an identifier. nmstart = Regex("[A-Z]", re.IGNORECASE) | nonascii | escape nmchar = Regex("[0-9A-Z-]", re.IGNORECASE) | nonascii | escape identifier = Combine(nmstart + ZeroOrMore(nmchar))
binopstr = " ".join(opcodes) def parsebinop(opexpr): "parse action for binary operators" left, opstr, right = opexpr for opclass in binopclasses: if opstr in opclass.literals: return opclass(left, right) binop = oneOf(binopstr) arithSign = Word("+-", exact=1) realNum = Combine(Optional(arithSign) + (Word(nums) + "." + Optional(Word(nums)) | ("." + Word(nums))) # noqa + Optional(E + Optional(arithSign) + Word(nums))) realNum.setParseAction(lambda x: expression.NumericLiteral(float(x[0]))) intNum = Combine(Optional(arithSign) + Word(nums) + Optional(E + Optional("+") + Word(nums))) intNum.setParseAction(lambda x: expression.NumericLiteral(int(x[0]))) number = realNum | intNum variable = ident.copy() variable.setParseAction(lambda x: model.Var(x[0])) quotedString.setParseAction(lambda x: expression.StringLiteral(x[0][1:-1])) literal = quotedString | number
from pyparsing import OneOrMore from pyparsing import Word from pyparsing import ZeroOrMore from pyparsing import alphanums from pyparsing import nums # Intermediate parsers _varchar_names = (CaselessKeyword('VARCHAR') | CaselessKeyword('TEXT')) _varchar_names |= CaselessKeyword('NVARCHAR') # Data types _smallint = (CaselessKeyword('SMALLINT') | CaselessKeyword('INT2')) _integer = CaselessKeyword('INTEGER') _integer |= CaselessKeyword('INT') | CaselessKeyword('INT4') _bigint = (CaselessKeyword('BIGINT') | CaselessKeyword('INT8')) _decimal = Combine((CaselessKeyword('DECIMAL') | CaselessKeyword('NUMERIC')) + '(' + Word(nums + ' ,') + ')') # noqa _real = (CaselessKeyword('REAL') | CaselessKeyword('FLOAT4')) _double = (CaselessKeyword('DOUBLE PRECISION') | CaselessKeyword('FLOAT') | CaselessKeyword('FLOAT8') | CaselessKeyword('DOUBLE')) # noqa _boolean = CaselessKeyword('BOOLEAN') _char = (CaselessKeyword('CHAR') | CaselessKeyword('CHARACTER')) _char |= (CaselessKeyword('NCHAR') | CaselessKeyword('BPCHAR')) _varchar = Combine(_varchar_names + '(' + Word(alphanums) + ')') _date = CaselessKeyword('DATE') _text = CaselessKeyword('TEXT') _timestamp = CaselessKeyword('TIMESTAMP') # Create SQL keywords _create = CaselessKeyword('CREATE') _table = CaselessKeyword('TABLE') _view = CaselessKeyword('VIEW') _temp = CaselessKeyword('TEMP')
def __init__(self): self.ae = False self.local_dict = None self.f = None self.user_functions = None self.expr_stack = [] self.texpr_stack = [] # Define constants self.constants = {} # Define Operators self.opn = {"+": operator.add, "-": operator.sub, "*": operator.mul, "/": operator.truediv, ">": operator.gt, ">=": operator.ge, "<": operator.lt, "<=": operator.le, "==": operator.eq, "!=": operator.ne, "|": operator.or_, "&": operator.and_, "!": operator.inv} # Define xarray DataArray operators with 1 input parameter self.xfn1 = {"angle": xr.ufuncs.angle, "arccos": xr.ufuncs.arccos, "arccosh": xr.ufuncs.arccosh, "arcsin": xr.ufuncs.arcsin, "arcsinh": xr.ufuncs.arcsinh, "arctan": xr.ufuncs.arctan, "arctanh": xr.ufuncs.arctanh, "ceil": xr.ufuncs.ceil, "conj": xr.ufuncs.conj, "cos": xr.ufuncs.cos, "cosh": xr.ufuncs.cosh, "deg2rad": xr.ufuncs.deg2rad, "degrees": xr.ufuncs.degrees, "exp": xr.ufuncs.exp, "expm1": xr.ufuncs.expm1, "fabs": xr.ufuncs.fabs, "fix": xr.ufuncs.fix, "floor": xr.ufuncs.floor, "frexp": xr.ufuncs.frexp, "imag": xr.ufuncs.imag, "iscomplex": xr.ufuncs.iscomplex, "isfinite": xr.ufuncs.isfinite, "isinf": xr.ufuncs.isinf, "isnan": xr.ufuncs.isnan, "isreal": xr.ufuncs.isreal, "log": xr.ufuncs.log, "log10": xr.ufuncs.log10, "log1p": xr.ufuncs.log1p, "log2": xr.ufuncs.log2, "rad2deg": xr.ufuncs.rad2deg, "radians": xr.ufuncs.radians, "real": xr.ufuncs.real, "rint": xr.ufuncs.rint, "sign": xr.ufuncs.sign, "signbit": xr.ufuncs.signbit, "sin": xr.ufuncs.sin, "sinh": xr.ufuncs.sinh, "sqrt": xr.ufuncs.sqrt, "square": xr.ufuncs.square, "tan": xr.ufuncs.tan, "tanh": xr.ufuncs.tanh, "trunc": xr.ufuncs.trunc} # Define xarray DataArray operators with 2 input parameter self.xfn2 = {"arctan2": xr.ufuncs.arctan2, "copysign": xr.ufuncs.copysign, "fmax": xr.ufuncs.fmax, "fmin": xr.ufuncs.fmin, "fmod": xr.ufuncs.fmod, "hypot": xr.ufuncs.hypot, "ldexp": xr.ufuncs.ldexp, "logaddexp": xr.ufuncs.logaddexp, "logaddexp2": xr.ufuncs.logaddexp2, "logicaland": xr.ufuncs.logical_and, "logicalnot": xr.ufuncs.logical_not, "logicalor": xr.ufuncs.logical_or, "logicalxor": xr.ufuncs.logical_xor, "maximum": xr.ufuncs.maximum, "minimum": xr.ufuncs.minimum, "nextafter": xr.ufuncs.nextafter} # Define non-xarray DataArray operators with 2 input parameter self.fn2 = {"percentile": np.percentile} # Define xarray DataArray reduction operators self.xrfn = {"all": xr.DataArray.all, "any": xr.DataArray.any, "argmax": xr.DataArray.argmax, "argmin": xr.DataArray.argmin, "max": xr.DataArray.max, "mean": xr.DataArray.mean, "median": xr.DataArray.median, "min": xr.DataArray.min, "prod": xr.DataArray.prod, "sum": xr.DataArray.sum, "std": xr.DataArray.std, "var": xr.DataArray.var} # Define non-xarray DataArray operators with 2 input parameter self.xcond = {"<": np.percentile} # Define Grammar point = Literal(".") e = CaselessLiteral("E") fnumber = Combine(Word("+-"+nums, nums) + Optional(point + Optional(Word(nums))) + Optional(e + Word("+-"+nums, nums))) variable = Word(alphas, alphas+nums+"_$") seq = Literal("=") b_not = Literal("~") plus = Literal("+") minus = Literal("-") mult = Literal("*") div = Literal("/") gt = Literal(">") gte = Literal(">=") lt = Literal("<") lte = Literal("<=") eq = Literal("==") neq = Literal("!=") b_or = Literal("|") b_and = Literal("&") l_not = Literal("!") lpar = Literal("(").suppress() rpar = Literal(")").suppress() comma = Literal(",") colon = Literal(":") lbrac = Literal("[") rbrac = Literal("]") lcurl = Literal("{") rcurl = Literal("}") qmark = Literal("?") scolon = Literal(";") addop = plus | minus multop = mult | div sliceop = colon compop = gte | lte | gt | lt eqop = eq | neq bitcompop = b_or | b_and bitnotop = b_not logicalnotop = l_not assignop = seq expop = Literal("^") expr = Forward() indexexpr = Forward() atom = (Optional("-") + (variable + seq + expr).setParseAction(self.push_assign) | indexexpr.setParseAction(self.push_index) | (lpar + expr + qmark.setParseAction(self.push_ternary1) + expr + scolon.setParseAction(self.push_ternary2) + expr + rpar).setParseAction(self.push_ternary) | (lpar + expr + qmark + expr + scolon + expr + rpar).setParseAction(self.push_ternary) | (logicalnotop + expr).setParseAction(self.push_ulnot) | (bitnotop + expr).setParseAction(self.push_unot) | (minus + expr).setParseAction(self.push_uminus) | (variable + lcurl + expr + rcurl).setParseAction(self.push_mask) | (variable + lpar + expr + (comma + expr)*3 + rpar).setParseAction(self.push_expr4) | (variable + lpar + expr + (comma + expr)*2 + rpar).setParseAction(self.push_expr3) | (variable + lpar + expr + comma + expr + rpar).setParseAction(self.push_expr2) | (variable + lpar + expr + rpar | variable).setParseAction(self.push_expr1) | fnumber.setParseAction(self.push_expr) | (lpar + expr + ZeroOrMore(comma + expr).setParseAction(self.get_tuple) + rpar).setParseAction(self.push_tuple) | (lpar + expr.suppress() + rpar).setParseAction(self.push_uminus)) # Define order of operations for operators factor = Forward() factor << atom + ZeroOrMore((expop + factor).setParseAction(self.push_op)) term = factor + ZeroOrMore((multop + factor).setParseAction(self.push_op)) term2 = term + ZeroOrMore((addop + term).setParseAction(self.push_op)) term3 = term2 + ZeroOrMore((sliceop + term2).setParseAction(self.push_op)) term4 = term3 + ZeroOrMore((compop + term3).setParseAction(self.push_op)) term5 = term4 + ZeroOrMore((eqop + term4).setParseAction(self.push_op)) term6 = term5 + ZeroOrMore((bitcompop + term5).setParseAction(self.push_op)) expr << term6 + ZeroOrMore((assignop + term6).setParseAction(self.push_op)) # Define index operators colon_expr = (colon + FollowedBy(comma) ^ colon + FollowedBy(rbrac)).setParseAction(self.push_colon) range_expr = colon_expr | expr | colon indexexpr << (variable + lbrac + delimitedList(range_expr, delim=',') + rbrac).setParseAction(self.push_expr) self.parser = expr
dot_fanging_patterns = Combine( Optional(White()) + Or([ # '.' - enclosed with ( and ) CaselessLiteral("(.("), CaselessLiteral("(.)"), CaselessLiteral(").("), CaselessLiteral(").)"), CaselessLiteral("(."), CaselessLiteral(".("), # CaselessLiteral(")."), # this is commented and is NOT used to fang indicators b/c this may appear in real text CaselessLiteral(".)"), # 'dot' - enclosed with ( and ) CaselessLiteral("(dot("), CaselessLiteral("(dot)"), CaselessLiteral(")dot("), CaselessLiteral(")dot)"), CaselessLiteral("(dot"), CaselessLiteral("dot("), CaselessLiteral(")dot"), CaselessLiteral("dot)"), # 'punkt' - enclosed with ( and ) CaselessLiteral("(punkt("), CaselessLiteral("(punkt)"), CaselessLiteral(")punkt("), CaselessLiteral(")punkt)"), CaselessLiteral("(punkt"), CaselessLiteral("punkt("), CaselessLiteral(")punkt"), CaselessLiteral("punkt)"), # 'punto' - enclosed with ( and ) CaselessLiteral("(punto("), CaselessLiteral("(punto)"), CaselessLiteral(")punto("), CaselessLiteral(")punto)"), CaselessLiteral("(punto"), CaselessLiteral("punto("), CaselessLiteral(")punto"), CaselessLiteral("punto)"), # '.' - enclosed with [ and ] CaselessLiteral("[.["), CaselessLiteral("[.]"), CaselessLiteral("].["), CaselessLiteral("].]"), CaselessLiteral("[."), CaselessLiteral(".["), CaselessLiteral("]."), CaselessLiteral(".]"), # 'dot' - enclosed with [ and ] CaselessLiteral("[dot["), CaselessLiteral("[dot]"), CaselessLiteral("]dot["), CaselessLiteral("]dot]"), CaselessLiteral("[dot"), CaselessLiteral("dot["), CaselessLiteral("]dot"), CaselessLiteral("dot]"), # 'punkt' - enclosed with [ and ] CaselessLiteral("[punkt["), CaselessLiteral("[punkt]"), CaselessLiteral("]punkt["), CaselessLiteral("]punkt]"), CaselessLiteral("[punkt"), CaselessLiteral("punkt["), CaselessLiteral("]punkt"), CaselessLiteral("punkt]"), # 'punto' - enclosed with [ and ] CaselessLiteral("[punto["), CaselessLiteral("[punto]"), CaselessLiteral("]punto["), CaselessLiteral("]punto]"), CaselessLiteral("[punto"), CaselessLiteral("punto["), CaselessLiteral("]punto"), CaselessLiteral("punto]"), # '.' - enclosed with { and } CaselessLiteral("{.{"), CaselessLiteral("{.}"), CaselessLiteral("}.{"), CaselessLiteral("}.}"), CaselessLiteral("{."), CaselessLiteral(".{"), CaselessLiteral("}."), CaselessLiteral(".}"), # 'dot' - enclosed with { and } CaselessLiteral("{dot{"), CaselessLiteral("{dot}"), CaselessLiteral("}dot{"), CaselessLiteral("}dot}"), CaselessLiteral("{dot"), CaselessLiteral("dot{"), CaselessLiteral("}dot"), CaselessLiteral("dot}"), # 'punkt' - enclosed with { and } CaselessLiteral("{punkt{"), CaselessLiteral("{punkt}"), CaselessLiteral("}punkt{"), CaselessLiteral("}punkt}"), CaselessLiteral("{punkt"), CaselessLiteral("punkt{"), CaselessLiteral("}punkt"), CaselessLiteral("punkt}"), # 'punto' - enclosed with { and } CaselessLiteral("{punto{"), CaselessLiteral("{punto}"), CaselessLiteral("}punto{"), CaselessLiteral("}punto}"), CaselessLiteral("{punto"), CaselessLiteral("punto{"), CaselessLiteral("}punto"), CaselessLiteral("punto}"), # a Literal("DOT"), Literal("PUNKT"), Literal("PUNTO"), CaselessLiteral("-dot-"), CaselessLiteral("-punkt-"), CaselessLiteral("-punto-"), ]) + Optional(White())).addParseAction(replaceWith("."))
aop0 = oneOf('* /') aop1 = oneOf('+ -') aop2 = oneOf('%').setParseAction(lambda s, l, t: ['mod']) bop = oneOf('& |').setParseAction(lambda s, l, t: ['and'] if t[0] == '&' else ['or']) NOT = Literal('!') rop = oneOf('< > <= >= = !=').setParseAction(lambda s, l, t: ['distinct'] if t[0] == '!=' else t) GET, CAT, HAS, IND, LEN, REP, SUB, EQL = map( Literal, '#get #cat #has #ind #len #rep #sub #eql'.split()) var = Word(alphas + '_:$', alphanums + '_:$').setParseAction(addVar) ival = Combine(Optional('-') + Word(nums)).setParseAction( lambda s, l, t: ['(- %s)' % t[0][1:]] if t[0][0] == '-' else t) ivar = (ival + var).setParseAction(lambda s, l, t: ['*', t[0], t[1]]) term = ivar | ival | var | QuotedString(quoteChar='"', unquoteResults=False) stmt = Forward() expr = Forward() sexpr = Forward() sexpr << ( (GET + LPAR + expr + COMMA + expr + RPAR).setParseAction(lambda s, l, t: CharAtAction(t)) | (CAT + LPAR + expr + COMMA + expr + RPAR).setParseAction( lambda s, l, t: [['Concat', chkString(t[1]), chkString(t[2])]]) |
def define_dot_parser(self): """Define dot grammar Based on the grammar http://www.graphviz.org/doc/info/lang.html """ # punctuation colon = Literal(":") lbrace = Suppress("{") rbrace = Suppress("}") lbrack = Suppress("[") rbrack = Suppress("]") lparen = Literal("(") rparen = Literal(")") equals = Suppress("=") comma = Literal(",") dot = Literal(".") slash = Literal("/") bslash = Literal("\\") star = Literal("*") semi = Suppress(";") at = Literal("@") minus = Literal("-") pluss = Suppress("+") # keywords strict_ = CaselessLiteral("strict") graph_ = CaselessLiteral("graph") digraph_ = CaselessLiteral("digraph") subgraph_ = CaselessLiteral("subgraph") node_ = CaselessLiteral("node") edge_ = CaselessLiteral("edge") punctuation_ = "".join([c for c in string.punctuation if c not in '_' ]) + string.whitespace # token definitions identifier = Word(alphanums + "_").setName("identifier") #double_quoted_string = QuotedString('"', multiline=True,escChar='\\', # unquoteResults=True) # dblQuotedString double_quoted_string = Regex(r'\"(?:\\\"|\\\\|[^"])*\"', re.MULTILINE) double_quoted_string.setParseAction(removeQuotes) quoted_string = Combine( double_quoted_string + Optional(OneOrMore(pluss + double_quoted_string)), adjacent=False) alphastring_ = OneOrMore(CharsNotIn(punctuation_)) def parse_html(s, loc, toks): return '<<%s>>' % ''.join(toks[0]) opener = '<' closer = '>' try: html_text = pyparsing.nestedExpr( opener, closer, ((CharsNotIn(opener + closer).setParseAction(lambda t: t[0])) )).setParseAction(parse_html) except: log.debug('nestedExpr not available.') log.warning('Old version of pyparsing detected. Version 1.4.8 or ' 'later is recommended. Parsing of html labels may not ' 'work properly.') html_text = Combine(Literal("<<") + OneOrMore(CharsNotIn(",]"))) ID = ( alphastring_ | html_text | quoted_string | #.setParseAction(strip_quotes) | identifier).setName("ID") float_number = Combine(Optional(minus) + OneOrMore(Word(nums + "."))).setName("float_number") righthand_id = (float_number | ID).setName("righthand_id") port_angle = (at + ID).setName("port_angle") port_location = ((OneOrMore(Group(colon + ID)) | Group(colon + lparen + ID + comma + ID + rparen))).setName("port_location") port = Combine( (Group(port_location + Optional(port_angle)) | Group(port_angle + Optional(port_location)))).setName("port") node_id = (ID + Optional(port)) a_list = OneOrMore(ID + Optional(equals + righthand_id) + Optional(comma.suppress())).setName("a_list") attr_list = OneOrMore(lbrack + Optional(a_list) + rbrack).setName( "attr_list").setResultsName('attrlist') attr_stmt = ((graph_ | node_ | edge_) + attr_list).setName("attr_stmt") edgeop = (Literal("--") | Literal("->")).setName("edgeop") stmt_list = Forward() graph_stmt = (lbrace + Optional(stmt_list) + rbrace + Optional(semi)).setName("graph_stmt") edge_point = Forward() edgeRHS = OneOrMore(edgeop + edge_point) edge_stmt = edge_point + edgeRHS + Optional(attr_list) subgraph = ( Optional(subgraph_, '') + Optional(ID, '') + Group(graph_stmt)).setName("subgraph").setResultsName('ssubgraph') edge_point << (subgraph | graph_stmt | node_id) node_stmt = (node_id + Optional(attr_list) + Optional(semi)).setName("node_stmt") assignment = (ID + equals + righthand_id).setName("assignment") stmt = (assignment | edge_stmt | attr_stmt | subgraph | graph_stmt | node_stmt).setName("stmt") stmt_list << OneOrMore(stmt + Optional(semi)) graphparser = ((Optional(strict_, 'notstrict') + ((graph_ | digraph_)) + Optional(ID, '') + lbrace + Group(Optional(stmt_list)) + rbrace).setResultsName("graph")) singleLineComment = Group("//" + restOfLine) | Group("#" + restOfLine) # actions graphparser.ignore(singleLineComment) graphparser.ignore(cStyleComment) node_id.setParseAction(self._proc_node_id) assignment.setParseAction(self._proc_attr_assignment) a_list.setParseAction(self._proc_attr_list) edge_stmt.setParseAction(self._proc_edge_stmt) node_stmt.setParseAction(self._proc_node_stmt) attr_stmt.setParseAction(self._proc_default_attr_stmt) attr_list.setParseAction(self._proc_attr_list_combine) subgraph.setParseAction(self._proc_subgraph_stmt) #graph_stmt.setParseAction(self._proc_graph_stmt) graphparser.setParseAction(self._main_graph_stmt) return graphparser
b = rdflib.BNode() if res: res += [res[-3], rdflib.RDF.rest, b, b, rdflib.RDF.first, x] else: res += [b, rdflib.RDF.first, x] res += [b, rdflib.RDF.rest, rdflib.RDF.nil] if DEBUG: print "CollectionOut", res return [res] # SPARQL Grammar from http://www.w3.org/TR/sparql11-query/#grammar # ------ TERMINALS -------------- # [139] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20])* '>' IRIREF = Combine(Suppress("<") + Regex(r'[^<>"{}|^`\\%s]*' % "".join("\\x%02X" % i for i in range(33))) + Suppress(">")) IRIREF.setParseAction(lambda x: rdflib.URIRef(x[0])) # [164] P_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] if sys.maxunicode == 0xFFFF: # this is narrow python build (default on windows/osx) # this means that unicode code points over 0xffff are stored # as several characters, which in turn means that regex character # ranges with these characters do not work. # See # * http://bugs.python.org/issue12729 # * http://bugs.python.org/issue12749 # * http://bugs.python.org/issue3665 # # Here we simple skip the [#x10000-#xEFFFF] part
def __init__(self): """ expop :: '^' multop :: 'x' | '/' addop :: '+' | '-' integer :: ['+' | '-'] '0'..'9'+ atom :: PI | E | real | fn '(' expr ')' | '(' expr ')' factor :: atom [ expop factor ]* term :: factor [ multop factor ]* expr :: term [ addop term ]* """ point = Literal(".") exp = CaselessLiteral("E") fnumber = Combine( Word("+-" + nums, nums) + Optional(point + Optional(Word(nums))) + Optional(exp + Word("+-" + nums, nums))) ident = Word(alphas, alphas + nums + "_$") plus = Literal("+") minus = Literal("-") mult = Literal("x") div = Literal("/") lpar = Literal("(").suppress() rpar = Literal(")").suppress() addop = plus | minus multop = mult | div powop = Literal("^") pi = CaselessLiteral("PI") expr = Forward() atom = ( (Optional(oneOf("- +")) + (pi | exp | fnumber | ident + lpar + expr + rpar).setParseAction( self.push_first)) | Optional(oneOf("- +")) + Group(lpar + expr + rpar)).setParseAction(self.push_unary_minus) # by defining exponentiation as "atom [ ^ factor ]..." instead of # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right # that is, 2^3^2 = 2^(3^2), not (2^3)^2. factor = Forward() factor << atom + ZeroOrMore( (powop + factor).setParseAction(self.push_first)) term = factor + ZeroOrMore( (multop + factor).setParseAction(self.push_first)) expr << term + ZeroOrMore( (addop + term).setParseAction(self.push_first)) # addop_term = ( addop + term ).setParseAction( self.pushFirst ) # general_term = term + ZeroOrMore( addop_term ) | OneOrMore( addop_term) # expr << general_term self.bnf = expr # map operator symbols to corresponding arithmetic operations epsilon = 1e-12 self.opn = { "+": operator.add, "-": operator.sub, "x": operator.mul, "/": operator.truediv, "^": operator.pow } self.function = { "sin": math.sin, "cos": math.cos, "tan": math.tan, "abs": abs, "trunc": lambda a: int(a), "round": round, "sgn": lambda a: abs(a) > epsilon and cmp(a, 0) or 0 }
def _grammar(): from pyparsing import alphas, alphanums, nums from pyparsing import oneOf, Suppress, Optional, Group, ZeroOrMore, NotAny from pyparsing import Forward, operatorPrecedence, opAssoc, Word, White from pyparsing import delimitedList, Combine, Literal, OneOrMore expression = Forward() LPAR, RPAR, DOT, LBRAC, RBRAC = map(Suppress, "().{}") nw = NotAny(White()) identifier = Word(alphas + "_", alphanums + "_") integer = Word(nums) integer.setParseAction(IntegerNode) fractional = Combine(Word('+' + '-' + nums, nums) + '.' + Word(nums)) fractional.setParseAction(FloatNode) literal = fractional | integer arglist = delimitedList(expression) seqrange = LBRAC + expression + Suppress('..') + expression + RBRAC seqrange.setParseAction(lambda t: SequenceNode(start=t[0], stop=t[1])) seqexplicit = LBRAC + Optional(arglist) + RBRAC seqexplicit.setParseAction(lambda t: SequenceNode(lst=t)) sequence = seqrange | seqexplicit rollmod = nw + Group(oneOf("d k r e x") + Optional(integer)) numdice = Optional(integer, default=1) roll = numdice + nw + Suppress("d") + nw + (integer | sequence) roll += Group(ZeroOrMore(rollmod)) roll.setParseAction(DieRollNode) call = LPAR + Group(Optional(arglist)) + RPAR function = identifier + call function.setParseAction(FunctionNode) seqexpr = ((roll | sequence | function) + Group(OneOrMore(DOT + identifier + call))) seqexpr.setParseAction(SeqMethodNode) variable = Word(alphas + "_", alphanums + "_ ") variable.setParseAction(VariableNode) atom = seqexpr | roll | literal | sequence | function | variable expoop = Literal('^') signop = oneOf("+ -") multop = oneOf("* /") plusop = oneOf("+ -") # noinspection PyUnresolvedReferences expression << operatorPrecedence( atom, [ (expoop, 2, opAssoc.LEFT, BinaryOpNode), (signop, 1, opAssoc.RIGHT, UnaryOpNode), (multop, 2, opAssoc.LEFT, BinaryOpNode), (plusop, 2, opAssoc.LEFT, BinaryOpNode), ] ) return expression
def rc_statement(): """ Generate a RC statement parser that can be used to parse a RC file :rtype: pyparsing.ParserElement """ one_line_comment = '//' + restOfLine comments = cStyleComment ^ one_line_comment precompiler = Word('#', alphanums) + restOfLine language_definition = "LANGUAGE" + Word(alphas + '_').setResultsName( "language") + Optional(',' + Word(alphas + '_').setResultsName("sublanguage")) block_start = (Keyword('{') | Keyword("BEGIN")).setName("block_start") block_end = (Keyword('}') | Keyword("END")).setName("block_end") reserved_words = block_start | block_end name_id = ~reserved_words + \ Word(alphas, alphanums + '_').setName("name_id") numbers = Word(nums) integerconstant = numbers ^ Combine('0x' + numbers) constant = Combine(Optional(Keyword("NOT")) + (name_id | integerconstant), adjacent=False, joinString=' ') combined_constants = delimitedList(constant, '|') block_options = Optional( SkipTo(Keyword("CAPTION"), failOn=block_start)("pre_caption") + Keyword("CAPTION") + quotedString("caption")) + SkipTo(block_start)("post_caption") undefined_control = Group( name_id.setResultsName("id_control") + delimitedList(quotedString ^ constant ^ numbers ^ Group(combined_constants)).setResultsName("values_")) block = block_start + \ ZeroOrMore(undefined_control)("controls") + block_end dialog = name_id("block_id") + (Keyword("DIALOGEX") | Keyword("DIALOG") )("block_type") + block_options + block string_table = Keyword("STRINGTABLE")("block_type") + block_options + block menu_item = Keyword("MENUITEM")("block_type") + ( commaSeparatedList("values_") | Keyword("SEPARATOR")) popup_block = Forward() popup_block <<= Group( Keyword("POPUP")("block_type") + Optional(quotedString("caption")) + block_start + ZeroOrMore(Group(menu_item | popup_block))("elements") + block_end)("popups*") menu = name_id("block_id") + \ Keyword("MENU")("block_type") + block_options + \ block_start + ZeroOrMore(popup_block) + block_end statem = comments ^ precompiler ^ language_definition ^ dialog ^ string_table ^ menu return statem