def _get_simple_ref_parser(): string = pp.CharsNotIn(_EXCLUDES).setParseAction(_string) ref_open = pp.Literal(_REF_OPEN).suppress() ref_close = pp.Literal(_REF_CLOSE).suppress() reference = (ref_open + pp.Group(string) + ref_close).setParseAction(_reference) line = pp.StringStart() + pp.Optional(string) + reference + pp.Optional(string) + pp.StringEnd() return line
def get_guild_role_parser(self, guild): """ Create a role parser for the specified guild. :param guild: :return: """ intersect = pp.CaselessKeyword(self.INTERSECT_TOKEN) union = pp.CaselessKeyword(self.UNION_TOKEN) complement = pp.CaselessKeyword(self.COMPLEMENT_TOKEN) left_paren = pp.Literal(self.LEFT_PAREN_TOKEN) right_paren = pp.Literal(self.RIGHT_PAREN_TOKEN) role = pp.Word(pp.alphanums) | pp.QuotedString("'", escChar="\\") expression = pp.Forward() term = pp.Forward() factor = pp.Forward() factor <<= left_paren + expression + pp.FollowedBy( right_paren) + right_paren | complement + factor | role term <<= factor + intersect + term | factor expression <<= term + union + expression | term factor.setParseAction(self.get_guild_factor_action(guild)) term.setParseAction(self.get_guild_term_action()) expression.setParseAction(self.expression_action) role_statement = pp.StringStart() + expression + pp.StringEnd() return role_statement
def handle_report(rep, clus): if rep.report_type == "lost-static": import pyparsing static_field_gram = pyparsing.StringStart() + parse.field_with_type parsed = static_field_gram.parseString(rep.target_fact[1:])[0] if parsed not in clus.static_clusters: clus.static_clusters[parsed] = LostStaticReport(parsed) clus.static_clusters[parsed].add_lost_flow(rep.target_fact, rep.inputs, rep) elif rep.report_type == "lost-heap": key = (rep.container, rep.target_fact) if key not in clus.heap_reports: clus.heap_reports[key] = LostHeapReport(key) clus.heap_reports[key].add_report(rep) elif rep.report_type == "value": meth = rep.container key = (meth, tuple(rep.key[0]), rep.target_fact) if key not in clus.value_clusters: clus.value_clusters[key] = InconsistentValue(key, rep.context_tags) if rep.failing is None: rep.failing = [] clus.value_clusters[key].add_inconsistent(rep.key[2], rep.vals, rep.target_tag, rep.failing, rep.blob, rep.dup_contexts) else: key = (rep.container, ) + tuple(rep.key) flow = FlowReport(key, rep) clus.flow_reports[key] = flow
def get_simple_ref_parser(settings): ESCAPE = settings.escape_character REF_OPEN, REF_CLOSE = settings.reference_sentinels INV_OPEN, INV_CLOSE = settings.export_sentinels EXCLUDES = ESCAPE + REF_OPEN + REF_CLOSE + INV_OPEN + INV_CLOSE string = pp.CharsNotIn(EXCLUDES).setParseAction(_tag_with(tags.STR)) ref_open = pp.Literal(REF_OPEN).suppress() ref_close = pp.Literal(REF_CLOSE).suppress() reference = (ref_open + pp.Group(string) + ref_close).setParseAction( _tag_with(tags.REF)) line = pp.StringStart() + pp.Optional(string) + reference + pp.Optional( string) + s_end return line.leaveWhitespace()
def get_simple_ref_parser(escape_character, reference_sentinels, export_sentinels): _ESCAPE = escape_character _REF_OPEN = reference_sentinels[0] _REF_CLOSE = reference_sentinels[1] _INV_OPEN = export_sentinels[0] _INV_CLOSE = export_sentinels[1] _EXCLUDES = _ESCAPE + _REF_OPEN + _REF_CLOSE + _INV_OPEN + _INV_CLOSE string = pp.CharsNotIn(_EXCLUDES).setParseAction(_string) ref_open = pp.Literal(_REF_OPEN).suppress() ref_close = pp.Literal(_REF_CLOSE).suppress() reference = (ref_open + pp.Group(string) + ref_close).setParseAction(_reference) line = pp.StringStart() + pp.Optional(string) + reference + pp.Optional( string) + pp.StringEnd() return line
def create_parser(slist, current_section): colon = pp.Literal(':') section = pp.Combine(colon + pp.Word(pp.alphas, pp.alphanums + '_ ') + colon) section.setParseAction(set_section(current_section)) section.setName('section') text = pp.SkipTo(section | pp.StringEnd()) text.setParseAction(to_list(slist, current_section)) text.setName('text') doc = pp.StringStart()\ + pp.Optional(text) + pp.ZeroOrMore(section + text)\ + pp.StringEnd() return doc
def build_element(self): """ A helper function to assemble the command parser's top level element. """ command_elements = [] for command in self.commands.values(): command_elements.append(command.build_element()) # Enforce command at string start element = pp.StringStart() # Attempt to match command name only first using lookahead commands_element = pp.MatchFirst(pp.Keyword(c) for c in self.commands) element -= pp.FollowedBy(commands_element) # If the above lookahead element matches, the parser will then look for # one of the full command string. Otherwise, it will stop immediately. element -= pp.MatchFirst(command_elements).setParseAction(self.on_match) # Enforce no extra arguments. element -= pp.StringEnd() return element
def __parse_name(to_parse): return ((pp.StringStart() + parse.NAME('result') + pp.StringEnd()).leaveWhitespace().parseWithTabs().parseString( to_parse))['result']
def format_as_foma_regex(self): return ("\[" if self.negated else "[") + u"|".join(self.letters) + "]" class dot_node(node): def format_as_foma_regex(self): return "?" Letter = pyp.oneOf(list(letters)) Letterset = pyp.Literal("[").suppress() + pyp.Optional( pyp.Literal("^")) + pyp.Group(pyp.OneOrMore( Letter.copy())) + pyp.Literal("]").suppress() Dot = pyp.Literal(".") StartAnchor = pyp.Optional(pyp.StringStart() + pyp.Literal("^")) EndAnchor = pyp.Optional(pyp.Literal("$") + pyp.StringEnd()) Letter.setParseAction(letter_node) Letterset.setParseAction(letterset_node) Dot.setParseAction(dot_node) Atom = Letter | Letterset | Dot Regex0 = pyp.operatorPrecedence( Atom, [(pyp.oneOf("* + ?"), 1, pyp.opAssoc.LEFT, repetition_node), (pyp.Empty(), 2, pyp.opAssoc.LEFT, sequence_node), (pyp.Literal("|").suppress(), 2, pyp.opAssoc.LEFT, alternation_node)]) Regex = StartAnchor.setResultsName("start_of_string") + Regex0.setResultsName( "root") + EndAnchor.setResultsName("end_of_string") def parse(string):
Limitations: - Considers anything that matches `typereference` as `Type` Attributes: value_name (str): name of the assigned value. type_name (str): name of the value's type. value (Any): Python object that holds the value. """ def __init__(self, toks: pp.ParseResults): assert len(toks) == 3 self.value_name = str(toks[0]) self.type_name = str(toks[1]) self.value = toks[2].value # type: Any def __str__(self): return str(self.__dict__) __repr__ = __str__ _raw_syntax = (valuereference.copy() + typereference.copy() + pp.Suppress('::=') + AsnValue.syntax()) for _i in _forwarded_syntaxes: _i.add_forwarded_syntax() # to be exported value_syntax = pp.StringStart() + AsnValue.syntax() value_assignment_syntax = pp.StringStart() + AsnValueAssignment.syntax()
def _create_parser(self): #----------------------------------------------------------------------# # TOKENS # #----------------------------------------------------------------------# START = pp.StringStart().suppress() END = pp.StringEnd().suppress() # # NUMBER # #NUMBER = pp.Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?") .setParseAction( lambda s, loc, toks: [ self.create_NumberLiteral(int(toks[0])) ] ) # # -foo_bar: TERM = pp.Word(pp.alphanums, pp.alphanums + '.:-+_/') # # "..." # '...' # QUOTED = pp.QuotedString('"', escChar='\\') | pp.QuotedString("'", escChar='\\') # # r"..." # r'...' # REGEXP = pp.Combine(pp.Suppress('r') + QUOTED).setParseAction( self.create_RegExLiteral) STRINGS = (REGEXP | QUOTED.setParseAction(self.create_StringLiteral) | TERM.setParseAction(self.create_StringLiteral)) # # SYNTAX # LPAR, RPAR = map(pp.Suppress, "()") PLUS = pp.Suppress('+') MINUS = pp.Suppress('-') COLON = pp.Suppress(':') EQ = pp.Suppress('=') LT = pp.Literal('<') LTE = pp.Literal('<=') GT = pp.Literal('>') GTE = pp.Literal('>=') NOT = pp.Suppress('NOT') AND = pp.Suppress('AND') OR = pp.Suppress('OR') TOKENS = COLON | LPAR | RPAR | NOT | AND | OR | PLUS | MINUS # # IDENTIFIER (field_names) # FIELD = pp.Word(pp.alphas, pp.alphanums + ".").setParseAction(lambda s, loc, toks: [toks[0]]) #FIELD = (~(TOKENS)) .setParseAction( lambda s, loc, toks: [ toks[0] ] ) basic_value = (~(TOKENS) + STRINGS) #----------------------------------------------------------------------# # TERMS # #----------------------------------------------------------------------# # # Simple TERM # simple_term = ( # bool_term #| basic_value.copy()).setParseAction(self.create_SimpleTerm) # # COMPLEX TERM # # <field name> ':' <field_value> # multi_term_expr = ( (PLUS + basic_value).setParseAction(self.create_BoolMust) | (MINUS + basic_value).setParseAction(self.create_BoolMustNot) | basic_value) multi_term_sequence = (LPAR + pp.OneOrMore(multi_term_expr).setParseAction( self.create_MultiValue) + RPAR) compare_term = ((LTE | LT | GTE | GT) + basic_value).setParseAction( self.create_CompareValue) complex_value = (simple_term | multi_term_sequence | compare_term) complex_term = (FIELD + (EQ | COLON) + complex_value).setParseAction( self.create_ComplexTerm) #------------------------------------------------------------------- # EXPRESSION #------------------------------------------------------------------- query = pp.Forward() # # <field>:<query> # <term> # ( <query> ) # base_expr = ( complex_term | simple_term | (LPAR + query + RPAR).setParseAction(lambda s, loc, toks: [toks[0]])) #------------------------------------------------------------------- # BOOLEAN EXPRESSION #------------------------------------------------------------------- # NOT expr # expr unary_expr = ((NOT + base_expr).setParseAction(self.create_NotExpr) | (PLUS + base_expr).setParseAction(self.create_BoolMust) | (MINUS + base_expr).setParseAction(self.create_BoolMustNot) | base_expr) #simple_expr = unary_expr # # expr ( AND expr ) * # and_expr = (unary_expr + pp.ZeroOrMore(AND + unary_expr)).setParseAction( self.create_AndExpr) # # expr ( OR expr ) * # or_expr = (and_expr + pp.ZeroOrMore(OR + and_expr)).setParseAction( self.create_OrExpr) boolean_expr = or_expr full_expr = boolean_expr # # clause ::= cond_expr + # clauses = pp.OneOrMore(full_expr) query <<= clauses # # PARSER # parser = (START + query.setParseAction(self.create_Query) + END) return parser
# commands botname = P.Forward() def setBotName(newname): botname << CL(newname) identifier = P.Word(P.alphas + "_", P.alphanums + "_").setResultsName('identifier') command_leader = L(".") hail = (botname + P.oneOf(": ,")) | (botname + P.White()) command_args = P.restOfLine.setResultsName('command_args') command = ( P.StringStart() + Sup(command_leader | hail) + identifier.setResultsName('command_identifier') + Sup(P.Optional(P.White())) + command_args.setResultsName('command_args')).setResultsName('command') _test_commands = [ (".hello", "['hello', '']"), (".foo bar", "['foo', 'bar']"), (". foo", "['foo', '']"), ("..foo", P.ParseException), ("TestBot:foo", "['foo', '']"), ("tesTBot,foo", "['foo', '']"), ("TestBot foo", "['foo', '']"), ("TestBot: foo", "['foo', '']"), ("tesTBot, foo", "['foo', '']"), ("tesTBotfoo", P.ParseException),
Optional(( (W + CKeyword('OFFSET') + W) | (OW + ',' + OW)) + expression) # N.B. this doesn't account for compound operators (union, intersect...) select << (\ CKeyword('SELECT') + W + Optional((CKeyword('DISTINCT') | CKeyword('ALL')) + W) + \ result_column_list.setResultsName("result_columns") + \ Optional(W + CKeyword('FROM') + W + join_source.setResultsName("join_source")) + \ Optional(W + where_clause).setResultsName("where") + \ Optional(W + group_by_clause).setResultsName("group_by") + \ Optional(W + order_by_clause).setResultsName("order_by") + \ Optional(W + limit_clause).setResultsName("limit") ) SQL_select = pyparsing.StringStart() + OW + select + Optional(';') + pyparsing.StringEnd() if __name__ == '__main__': ''' def dbgStart(s, loc, grammar): print 'Starting', s def dbgSuccess(s, start, end, grammar, match): # print 'Success', s def dbgExcept(s, loc, grammar, e): print 'Except', s SQL_select.setDebugActions(dbgStart, dbgSuccess, dbgExcept) ''' def translate_col_name(loc, toks): print 'column:', toks, loc return '.'.join(['XX']*len([x for x in toks if x != '.']))
def BuildFilterParser(): """Builds a parser for query filter strings. @rtype: pyparsing.ParserElement """ field_name = pyp.Word(pyp.alphas, pyp.alphanums + "_/.") # Integer num_sign = pyp.Word("-+", exact=1) number = pyp.Combine(pyp.Optional(num_sign) + pyp.Word(pyp.nums)) number.setParseAction(lambda toks: int(toks[0])) quoted_string = pyp.quotedString.copy().setParseAction(pyp.removeQuotes) # Right-hand-side value rval = (number | quoted_string) # Boolean condition bool_cond = field_name.copy() bool_cond.setParseAction(lambda (fname, ): [[OP_TRUE, fname]]) # Simple binary conditions binopstbl = { "==": OP_EQUAL, "!=": OP_NOT_EQUAL, "<": OP_LT, "<=": OP_LE, ">": OP_GT, ">=": OP_GE, } binary_cond = (field_name + pyp.oneOf(binopstbl.keys()) + rval) binary_cond.setParseAction(lambda (lhs, op, rhs): [[binopstbl[op], lhs, rhs]]) # "in" condition in_cond = (rval + pyp.Suppress("in") + field_name) in_cond.setParseAction(lambda (value, field): [[OP_CONTAINS, field, value]]) # "not in" condition not_in_cond = (rval + pyp.Suppress("not") + pyp.Suppress("in") + field_name) not_in_cond.setParseAction( lambda (value, field): [[OP_NOT, [OP_CONTAINS, field, value]]]) # Regular expression, e.g. m/foobar/i regexp_val = pyp.Group( pyp.Optional("m").suppress() + pyp.MatchFirst( [pyp.QuotedString(i, escChar="\\") for i in _KNOWN_REGEXP_DELIM]) + pyp.Optional(pyp.Word(pyp.alphas), default="")) regexp_val.setParseAction(_ConvertRegexpValue) regexp_cond = (field_name + pyp.Suppress("=~") + regexp_val) regexp_cond.setParseAction(lambda (field, value): [[OP_REGEXP, field, value]]) not_regexp_cond = (field_name + pyp.Suppress("!~") + regexp_val) not_regexp_cond.setParseAction( lambda (field, value): [[OP_NOT, [OP_REGEXP, field, value]]]) # Globbing, e.g. name =* "*.site" glob_cond = (field_name + pyp.Suppress("=*") + quoted_string) glob_cond.setParseAction( lambda (field, value): [[OP_REGEXP, field, utils.DnsNameGlobPattern(value)]]) not_glob_cond = (field_name + pyp.Suppress("!*") + quoted_string) not_glob_cond.setParseAction( lambda (field, value): [[OP_NOT, [OP_REGEXP, field, utils.DnsNameGlobPattern(value)]]]) # All possible conditions condition = (binary_cond ^ bool_cond ^ in_cond ^ not_in_cond ^ regexp_cond ^ not_regexp_cond ^ glob_cond ^ not_glob_cond) # Associativity operators filter_expr = pyp.operatorPrecedence(condition, [ (pyp.Keyword("not").suppress(), 1, pyp.opAssoc.RIGHT, lambda toks: [[OP_NOT, toks[0][0]]]), (pyp.Keyword("and").suppress(), 2, pyp.opAssoc.LEFT, _ConvertLogicOp(OP_AND)), (pyp.Keyword("or").suppress(), 2, pyp.opAssoc.LEFT, _ConvertLogicOp(OP_OR)), ]) parser = pyp.StringStart() + filter_expr + pyp.StringEnd() parser.parseWithTabs() # Originally C{parser.validate} was called here, but there seems to be some # issue causing it to fail whenever the "not" operator is included above. return parser
class Token: """Encapsulates transforming mini-language patterns tokens into regex.""" from .patterns import number_patterns as _numpats #: Mini-language token string to be parsed token = attr.ib() #: Whether group capture should be added or not do_capture = attr.ib(default=True) #: Flag for whether group ID substitution needs to be done needs_group_id = attr.ib(default=False, init=False, repr=False) # Internal pyparsing parse result and generated regex pattern _pr = attr.ib(default=None, init=False, repr=False) _pattern = attr.ib(default=None, init=False, repr=False) # ##### pyparsing pattern internals ##### # ## MINOR PATTERN COMPONENTS ## group_prefix = "g" _s_any_flag = "~" _s_capture = "!" _pp_space_after = pp.Optional( pp.Word("".join(SpaceAfter), exact=1) ).setResultsName(TokenField.SpaceAfter) _pp_capture = pp.Optional(pp.Literal(_s_capture)).setResultsName( TokenField.Capture ) _pp_quantity = pp.Word("".join(Quantity), exact=1).setResultsName( TokenField.Quantity ) # ## OPTIONAL LINE TOKEN ## _pp_optional_line = pp.Literal(Content.OptionalLine.value).setResultsName( TokenField.Type ) # ## ARBITRARY CONTENT TOKEN ## # Anything may be matched here, including multiple words. _pp_any_flag = ( pp.Literal(_s_any_flag).setResultsName(TokenField.Type) + _pp_capture ) # ## LITERAL STRING TOKEN ## # Marker for the rest of the token to be a literal string _pp_str_flag = pp.Literal(Content.String.value).setResultsName( TokenField.Type ) # Remainder of the content after the marker, spaces included _pp_str_value = pp.Word(pp.printables + " ").setResultsName(TokenField.Str) # Composite pattern for a literal string _pp_string = ( _pp_str_flag + _pp_space_after + _pp_capture + _pp_quantity + _pp_str_value ) # ## MISC SINGLE VALUE TOKEN ## # Initial marker for the 'misc' token _pp_misc_flag = pp.Literal(Content.Misc.value).setResultsName( TokenField.Type ) # Composite token pattern for the misc match _pp_misc = _pp_misc_flag + _pp_space_after + _pp_capture + _pp_quantity # ## NUMERICAL VALUE TOKEN ## # Initial marker for a numerical value _pp_num_flag = pp.Literal(Content.Number.value).setResultsName( TokenField.Type ) # Marker for the sign of the value; period indicates either sign _pp_num_sign = pp.Word("".join(Sign), exact=1).setResultsName( TokenField.Sign ) # Marker for the number type to look for _pp_num_type = pp.Word("".join(Number), exact=1).setResultsName( TokenField.Number ) # Composite pattern for a number _pp_number = ( _pp_num_flag + _pp_space_after + _pp_capture + _pp_quantity + pp.Group(_pp_num_sign + _pp_num_type).setResultsName( TokenField.SignNumber ) ) # ## COMBINED TOKEN PARSER ## _pp_token = ( pp.StringStart() + ( _pp_optional_line ^ _pp_any_flag ^ _pp_string ^ _pp_number ^ _pp_misc ) + pp.StringEnd() ) # Informational properties @property def pattern(self): """Return assembled regex pattern from the token, as |str|.""" return self._pattern @property def is_any(self): """Return flag for whether the token is an "any content" token.""" return self._pr[TokenField.Type] == Content.Any @property def is_optional_line(self): """Return flag for whether the token flags an optional line.""" return self._pr[TokenField.Type] == Content.OptionalLine @property def is_str(self): """Return flag for whether the token matches a literal string.""" return self._pr[TokenField.Type] == Content.String @property def is_misc(self): """Return flag for whether the token is a misc token.""" return self._pr[TokenField.Type] == Content.Misc @property def is_num(self): """Return flag for whether the token matches a number.""" return self._pr[TokenField.Type] == Content.Number @property def match_quantity(self): """Return match quantity. |None| for :attr:`pent.enums.Content.Any` or :attr:`pent.enums.Content.OptionalLine` """ if self.is_any or self.is_optional_line: return None else: return Quantity(self._pr[TokenField.Quantity]) @property def number(self): """#: Return number format; |None| if token doesn't match a number.""" if self.is_num: return Number(self._pr[TokenField.SignNumber][TokenField.Number]) else: return None @property def sign(self): """#: Return number sign; |None| if token doesn't match a number.""" if self.is_num: return Sign(self._pr[TokenField.SignNumber][TokenField.Sign]) else: return None @property def space_after(self): """Return Enum value for handling of post-match whitespace.""" if self.is_any: return False elif TokenField.SpaceAfter in self._pr: return SpaceAfter(self._pr[TokenField.SpaceAfter]) else: return SpaceAfter.Required @property def capture(self): """Return flag for whether a regex capture group should be created.""" return TokenField.Capture in self._pr def __attrs_post_init__(self): """Handle automatic creation stuff.""" try: self._pr = self._pp_token.parseString(self.token) except pp.ParseException as e: raise TokenError(self.token) from e if self.is_any: self._pattern, self.needs_group_id = self._selective_group_enclose( ".*?" ) return # Only single and one-or-more captures implemented for now. # Optional and zero-or-more captures may actually not be feasible if self.is_str: # Always store the string pattern self._pattern = self._string_pattern() # Modify, depending on the Quantity if self.match_quantity is Quantity.OneOrMore: self._pattern = "(" + self._pattern + ")+" elif self.is_num: self._pattern = self._get_number_pattern() if self.match_quantity is Quantity.OneOrMore: self._pattern += r"([ \t]+{})*".format(self._pattern) elif self.is_misc: self._pattern = self._get_misc_pattern() if self.match_quantity is Quantity.OneOrMore: self._pattern += r"([ \t]+{})*".format(self._pattern) elif self.is_optional_line: pass else: # pragma: no cover raise NotImplementedError( "Unknown content type somehow specified!" ) self._pattern, self.needs_group_id = self._selective_group_enclose( self._pattern ) def _string_pattern(self): """Create a literal string pattern from the parse result.""" pattern = "" for c in self._pr[TokenField.Str]: if c in r"[\^$.|?*+(){}": # Must escape regex special characters pattern += "\\" + c else: pattern += c return pattern def _get_misc_pattern(self): """Return the no-whitespace item pattern. Lazy capture is probably the best approach here, for optional-space after situations? That way, it will be as generous as possible in not aggressively consuming negative signs on following numeric fields, but should still expand to whatever extent necessary to cover the whole misc field. """ return r"[^ \t\n]+?" def _get_number_pattern(self): """Return the correct number pattern given the parse result.""" num = Number(self._pr[TokenField.SignNumber][TokenField.Number]) sign = Sign(self._pr[TokenField.SignNumber][TokenField.Sign]) return self._numpats[num, sign] @classmethod def _group_open(cls): """Create the opening pattern for a named group. This leaves a formatting placeholder for the invoking Parser to inject the appropriate group ID. """ return r"(?P<{0}{{0}}>".format(cls.group_prefix) @staticmethod def _group_close(): """Create the closing pattern for a named group.""" return ")" def _selective_group_enclose(self, pat): """Return token pattern enclosed in group IF it should be grouped. FIX THIS DOCSTRING, IT'S OUT OF DATE!!! """ if self.do_capture and self.capture: return (self._group_open() + pat + self._group_close(), True) else: return pat, False
def parser_factory_abc(styler): a = styler('#f00', 'a') b = styler('#00f', 'b') return pp.StringStart() + pp.OneOrMore(a | b | 'c')
def create_parser(self): LPAR, RPAR, COMMA = map(pp.Suppress, "(),") DOT, STAR = map(pp.Literal, ".*") #select_stmt = Forward().setName("select statement") #UNION = CaselessKeyword('UNION') #ALL = CaselessKeyword('ALL') AND = pp.CaselessKeyword('AND') #INTERSECT = CaselessKeyword('INTERSECT') #EXCEPT = CaselessKeyword('EXCEPT') #COLLATE = CaselessKeyword('COLLATE') ASC = pp.CaselessKeyword('ASC') DESC = pp.CaselessKeyword('DESC') #ON = CaselessKeyword('ON') #USING = CaselessKeyword('USING') NATURAL = pp.CaselessKeyword('NATURAL') #INNER = CaselessKeyword('INNER') #CROSS = CaselessKeyword('CROSS') #LEFT = CaselessKeyword('LEFT') #OUTER = CaselessKeyword('OUTER') #JOIN = CaselessKeyword('JOIN') AS = pp.CaselessKeyword('AS').suppress() #INDEXED = CaselessKeyword('INDEXED') NOT = pp.CaselessKeyword('NOT') SELECT = pp.CaselessKeyword('SELECT').suppress() TOP = pp.CaselessKeyword('TOP').suppress() #DISTINCT = CaselessKeyword('DISTINCT') FROM = pp.CaselessKeyword('FROM').suppress() WHERE = pp.CaselessKeyword('WHERE').suppress() GROUP = pp.CaselessKeyword('GROUP') BY = pp.CaselessKeyword('BY').suppress() HAVING = pp.CaselessKeyword('HAVING') ORDER = pp.CaselessKeyword('ORDER').suppress() LIMIT = pp.CaselessKeyword('LIMIT').suppress() #OFFSET = CaselessKeyword('OFFSET') OR = pp.CaselessKeyword('OR') #CAST = CaselessKeyword('CAST') ISNULL = pp.CaselessKeyword('ISNULL') NOTNULL = pp.CaselessKeyword('NOTNULL') NULL = pp.CaselessKeyword('NULL') IS = pp.CaselessKeyword('IS') BETWEEN = pp.CaselessKeyword('BETWEEN') #ELSE = CaselessKeyword('ELSE') #END = CaselessKeyword('END') #CASE = CaselessKeyword('CASE') #WHEN = CaselessKeyword('WHEN') #THEN = CaselessKeyword('THEN') #EXISTS = CaselessKeyword('EXISTS') IN = pp.CaselessKeyword('IN') LIKE = pp.CaselessKeyword('LIKE') GLOB = pp.CaselessKeyword('GLOB') REGEXP = pp.CaselessKeyword('REGEXP') MATCH = pp.CaselessKeyword('MATCH') ESCAPE = pp.CaselessKeyword('ESCAPE') QUERYSTRING = pp.CaselessKeyword('QUERYSTRING') #CURRENT_TIME = CaselessKeyword('CURRENT_TIME') #CURRENT_DATE = CaselessKeyword('CURRENT_DATE') #CURRENT_TIMESTAMP = CaselessKeyword('CURRENT_TIMESTAMP') TRUE = pp.CaselessKeyword('TRUE') #.setParseAction( lambda s, loc, toks: True ) FALSE = pp.CaselessKeyword('FALSE') #.setParseAction( lambda s, loc, toks: False ) keywords = [ #UNION , #ALL , AND , #INTERSECT , #EXCEPT , #COLLATE , ASC , DESC , #ON , #USING , NATURAL , #INNER , #CROSS , #LEFT , #OUTER , #JOIN , AS , #INDEXED , NOT , SELECT , #DISTINCT , FROM , WHERE , GROUP , BY , HAVING , ORDER , LIMIT , #OFFSET , OR , #CAST , ISNULL , NOTNULL , NULL , IS , BETWEEN , #ELSE , #END , #CASE , #WHEN , #THEN , #EXISTS , IN , LIKE , GLOB , REGEXP , MATCH , ESCAPE , #CURRENT_TIME , #CURRENT_DATE , #CURRENT_TIMESTAMP , QUERYSTRING , TRUE , FALSE , ] keywords = [k.suppress() for k in keywords] any_keyword = pp.MatchFirst(keywords) quoted_identifier = pp.QuotedString('"', escQuote='""') identifier = (~any_keyword + pp.Word(pp.alphas + '@', pp.alphanums + "_")) .setParseAction( pp.pyparsing_common.downcaseTokens) | quoted_identifier #collation_name = identifier.copy() column_name = identifier.copy() .setParseAction( lambda s, loc, toks: self.createStringLiteral(toks[0]) ) column_alias = identifier.copy() .setParseAction( lambda s, loc, toks: self.createStringLiteral(toks[0]) ) table_name = identifier.copy() .setParseAction( lambda s, loc, toks: self.createStringLiteral(toks[0]) ) table_alias = identifier.copy() .setParseAction( lambda s, loc, toks: self.createStringLiteral(toks[0]) ) index_name = identifier.copy() function_name = identifier.copy() parameter_name = identifier.copy() #database_name = identifier.copy() comment = "--" + pp.restOfLine # expression expr = pp.Forward().setName("expression") numeric_literal = pp.pyparsing_common.number string_literal = pp.QuotedString("'", escQuote="''") query_string_literal = pp.QuotedString("`", escQuote='\\`') dotted_identifier = pp.delimitedList(identifier, delim='.', combine=True) .setParseAction( lambda s, loc, toks: self.createIdentifier(toks[0]) ) #blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'") literal_value = ( numeric_literal .setParseAction( lambda s, loc, toks: self.createNumericLiteral(toks[0]) ) | string_literal .setParseAction( lambda s, loc, toks: self.createStringLiteral(toks[0]) ) #| dotted_identifier # | blob_literal | TRUE .setParseAction( lambda s, loc, toks: self.createBoolLiteral(True) ) | FALSE .setParseAction( lambda s, loc, toks: self.createBoolLiteral(False) ) | NULL #| CURRENT_TIME .setParseAction( lambda s, loc, toks: self.createIdentifier(toks[0]) ) #| CURRENT_DATE .setParseAction( lambda s, loc, toks: self.createIdentifier(toks[0]) ) #| CURRENT_TIMESTAMP .setParseAction( lambda s, loc, toks: self.createIdentifier(toks[0]) ) ) #bind_parameter = Word("?", nums) | Combine(oneOf(": @ $") + parameter_name) #type_name = oneOf("TEXT REAL INTEGER BLOB NULL") def _op_function( s, loc, toks ): # 0 1 # <identifier> <p1> <p2> ... toks = list(toks) name = toks.pop(0) params = toks return self.createCall(name, params) def _op_query_string(s, loc, toks): # 0 # QUERY_STRING `....` toks = list(toks) query_string = toks[0] return self.createQueryString(query_string) expr_term = ( #CAST + LPAR + expr + AS + type_name + RPAR #| # EXISTS + LPAR + select_stmt + RPAR #| (function_name.setName("function_name") .setParseAction( lambda s, loc, toks: self.createIdentifier(toks[0]) ) + LPAR + pp.Optional( STAR .setParseAction( lambda s, loc, toks: self.createIdentifier("*") ) | pp.delimitedList(expr) ).setName('params') + RPAR ) .setParseAction( _op_function ) | (pp.Optional(QUERYSTRING).suppress() + query_string_literal) .setParseAction( _op_query_string ) | literal_value #| bind_parameter | #Group( # identifier("col_db") + DOT + identifier("col_tab") + DOT + identifier("col") #) #| #Group(identifier("col_tab") + DOT + identifier("col")) #| #Group(identifier("col")) .setParseAction( lambda s, loc, toks: ASTIdentifier(toks[0]) ) # # <identifier>.<identifier>.<identifier> # dotted_identifier ) NOT_NULL = pp.Group(NOT + NULL) NOT_BETWEEN = pp.Group(NOT + BETWEEN) NOT_IN = pp.Group(NOT + IN) NOT_LIKE = pp.Group(NOT + LIKE) NOT_MATCH = pp.Group(NOT + MATCH) NOT_GLOB = pp.Group(NOT + GLOB) NOT_REGEXP = pp.Group(NOT + REGEXP) UNARY, BINARY, TERNARY = 1, 2, 3 def _op_handle_unary(s, loc, toks): toks = list(toks[0]) op = toks.pop(0) e1 = toks.pop(0) expr = self.createASTUnaryExpr(op, e1) return [ expr ] def _op_handle_binary(s, loc, toks): toks = list(toks[0]) # e1 op e2 op e3 op e3 expr1 = toks.pop(0) if not toks: return expr1 while toks: op = toks.pop(0) expr2 = toks.pop(0) expr1 = self.createBinaryExpr(op, expr1, expr2) return [ expr1 ] def _op_handle_ternary(s, loc, toks): toks = list(toks[0]) # 0 1 2 3 4 # e1 between e2 and e3 expr1 = toks[0] expr2 = toks[2] expr3 = toks[4] expr = self.createASTTernaryExpr('between', expr1, expr2, expr3) return [ expr ] expr << pp.infixNotation( expr_term , [ (pp.oneOf("- + ~") | NOT , UNARY , pp.opAssoc.RIGHT , _op_handle_unary ), (ISNULL | NOTNULL | NOT_NULL , UNARY , pp.opAssoc.LEFT , _op_handle_unary ), ("||" , BINARY, pp.opAssoc.LEFT , _op_handle_binary), (pp.oneOf("* / %") , BINARY, pp.opAssoc.LEFT , _op_handle_binary), (pp.oneOf("+ -") , BINARY, pp.opAssoc.LEFT , _op_handle_binary), (pp.oneOf("<< >> & |") , BINARY, pp.opAssoc.LEFT , _op_handle_binary), (pp.oneOf("< <= > >=") , BINARY, pp.opAssoc.LEFT , _op_handle_binary), ( pp.oneOf("= == != <>") | IS | IN | LIKE | GLOB | MATCH | REGEXP | NOT_IN | NOT_LIKE | NOT_GLOB | NOT_MATCH | NOT_REGEXP, BINARY, pp.opAssoc.LEFT , _op_handle_binary ), ((BETWEEN | NOT_BETWEEN, AND), TERNARY, pp.opAssoc.LEFT , _op_handle_ternary), ( (IN | NOT_IN) + LPAR + pp.Group( #select_stmt | pp.delimitedList(expr) ) + RPAR , UNARY, pp.opAssoc.LEFT , _op_handle_unary ), (AND , BINARY, pp.opAssoc.LEFT , _op_handle_binary), (OR , BINARY, pp.opAssoc.LEFT , _op_handle_binary), ], ) #compound_operator = UNION + Optional(ALL) | INTERSECT | EXCEPT # # ORDER BY # def _opt_ordering_term(s, loc, toks): toks = list(toks[0]) # expr DESC|ASC direction = None expr = toks.pop(0) if toks: direction = toks.pop(0) return self.createOrderTerm(expr, direction) ordering_term = pp.Group( (expr("order_key")) #+ Optional(COLLATE + collation_name("collate")) + (pp.Optional(ASC | DESC)("direction")) ) .setParseAction( _opt_ordering_term ) #join_constraint = Group( # Optional(ON + expr | USING + LPAR + Group(delimitedList(column_name)) + RPAR) #) #join_op = COMMA | Group( # Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | OUTER) + JOIN #) def _op_single_source(s, loc, toks): toks = list(toks) table_alias = None table_identifier = toks.pop(0)[0] if toks: table_alias = toks.pop(0) return self.createASTTable(table_identifier, table_alias) #join_source = Forward() single_source = ( pp.Group( #database_name("database") + DOT + table_name("table*") #| table_name("table*") ) + pp.Optional(pp.Optional(AS) + table_alias("table_alias*")) # + Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)("index") #| #(LPAR + select_stmt + RPAR + Optional(Optional(AS) + table_alias)) #| (LPAR + join_source + RPAR) ) .setParseAction( _op_single_source ) # #join_source <<= ( # #Group(single_source + OneOrMore(join_op + single_source + join_constraint)) # #| # single_source #) join_source = single_source def _op_col_alias_callback(s, loc, toks): expr = toks.get('expr') alias = toks.get('alias') return self.createASTSelectColumn(expr, alias) # result_column = "*" | table_name + "." + "*" | Group(expr + Optional(Optional(AS) + column_alias)) result_column = ( STAR("expr") | #table_name("col_table") + DOT + STAR("col") .setParseAction( _op_col_alias_callback ) #| ( ( expr("expr") + pp.Optional(pp.Optional(AS) + column_alias("alias") ) ) .setParseAction( _op_col_alias_callback ) ) ) def _op_columns_stmt(s, loc, toks): col_exprs = list(toks) #col_exprs = [ e for e in col_exprs] return self.createASTSelect(col_exprs) #columns_stmt = Optional(DISTINCT | ALL) + Group(delimitedList(result_column))("columns") columns_stmt = pp.delimitedList(result_column) .setParseAction( _op_columns_stmt ) # # FROM # def _op_from_callback(s, loc, toks): tables = list(toks.get("from")) return self.createASTFrom( tables ) #from_stmt = (FROM + join_source("from*")) .setParseAction( _op_from_callback ) from_stmt = (FROM + pp.delimitedList(join_source, delim=',')("from")) .setParseAction( _op_from_callback ) # # TOP # #def _op_top_stmt(s, loc, toks): # expr = toks.get("top_expr") # o = { # 'TOP': expr # } # return o # #top_stmt = (TOP + expr("top_expr")) .setParseAction( _op_top_stmt ) # # WHERE # def _op_where_callback(s, loc, toks): expr = toks.get("where_expr") return self.createASTWhere(expr) where_stmt = (WHERE + expr("where_expr")) .setParseAction( _op_where_callback ) # # GROUP # def _op_group_stmt(s, loc, toks): expr_list = list(toks['group_by_terms']) return self.createASTGroup(expr_list) group_stmt = ( GROUP + BY + #Group(delimitedList(ordering_term))("group_by_terms") pp.Group(pp.delimitedList(dotted_identifier))("group_by_terms") ) .setParseAction( _op_group_stmt ) # # HAVING # def _op_having_stmt(s, loc, toks): #expr = toks['having_expr'] #return self.createASTHaving( expr ) expr_list = list(toks['having_terms']) return self.createASTHaving(expr_list) having_stmt = ( HAVING + pp.Group(pp.delimitedList(expr))("having_terms") ) .setParseAction( _op_having_stmt ) # # ORDER # def _op_order_stmt(s, loc, toks): expr_list = list(toks['ordering_terms']) return self.createASTOrder(expr_list) order_stmt = ( ORDER + BY + pp.Group( pp.delimitedList(ordering_term) )('ordering_terms') ) .setParseAction( _op_order_stmt ) # # LIMIT # def _op_limit_stmt(s, loc, toks): expr = toks['limit_value'] return self.createASTLimit(expr) limit_stmt = (LIMIT + #Group(expr + OFFSET + expr) #| #Group(expr + COMMA + expr) #| expr('limit_value') ) .setParseAction( _op_limit_stmt ) # # SELECT ... FROM ... WHERE .... GROUP BY... HAVING ..... # def _op_select_core(s, loc, toks): s = toks.get('COLUMNS') f = toks.get('FROM' ) w = toks.get('WHERE' ) g = toks.get('GROUP' ) h = toks.get('HAVING' ) return self.createSelectBody(s,f, w, g, h) select_core = ( SELECT #+ Optional(top_stmt('TOP')) + columns_stmt('COLUMNS') + pp.Optional(from_stmt ('FROM' )) + pp.Optional(where_stmt('WHERE')) + pp.Optional( group_stmt('GROUP') + pp.Optional(having_stmt('HAVING')) ) ) .setParseAction( _op_select_core ) # # # def _opt_select_stmt(s, loc, toks): s = toks['SELECT_BODY'] o = toks.get('ORDER') l = toks.get('LIMIT') return self.createQuery(s, o, l) #select_stmt << ( # select_core ("select_core") # #+ ZeroOrMore(compound_operator + select_core) # #+ Optional(order_stmt) ("order_stmt") # + Optional(limit_stmt("limit_stmt")) #) .setParseAction( _opt_select_stmt ) select_stmt = ( select_core ("SELECT_BODY") #+ ZeroOrMore(compound_operator + select_core) + (pp.Optional(order_stmt("ORDER"))) + (pp.Optional(limit_stmt("LIMIT"))) ) .setParseAction( _opt_select_stmt ) # # # START = pp.StringStart().suppress() END = pp.StringEnd().suppress() query_stm = START + select_stmt + END query_stm.ignore(comment) return query_stm
def createParser(self): """This function returns a parser for the RestrictedXpathQuery grammar. """ # xml standard tokens (see: http://www.w3.org/TR/REC-xml) xmlNameStartChar = pp.alphas + ":" + "_" + \ pp.srange("[\u00C0-\u00D6]") + \ pp.srange("[\u00D8-\u00F6]") + \ pp.srange("[\u00F8-\u02FF]") + \ pp.srange("[\u0370-\u037D]") + \ pp.srange("[\u037F-\u1FFF]") + \ pp.srange("[\u200C-\u200D]") + \ pp.srange("[\u2070-\u218F]") + \ pp.srange("[\u2C00-\u2FEF]") + \ pp.srange("[\u3001-\uD7FF]") + \ pp.srange("[\uF900-\uFDCF]") + \ pp.srange("[\uFDF0-\uFFFD]") + \ pp.srange("[\u10000-\uEFFFF]") xmlNameChar = xmlNameStartChar + "-" + "." + pp.nums + \ unichr(0xB7) + pp.srange("[\u0300-\u036F]") + \ pp.srange("[\u203F-\u2040]") # custom tokens wildcard = pp.Literal(self.WILDCARD) # node wildcard operator sep = pp.Literal(self.SEP) # path separator selfNd = pp.Literal('.').suppress() # current node parentNd = pp.Literal(self.PARENT) # parent of current node lpar = pp.Literal('(').suppress() # left parenthesis literal rpar = pp.Literal(')').suppress() # right parenthesis literal pstart = pp.Literal('[').suppress() # beginning of predicates pend = pp.Literal(']').suppress() # end of predicates ncPrefix = pp.Word(xmlNameStartChar, xmlNameChar) + ':' # namespace prefix # node name, may contain a namespace prefix and may start with '@' for # attribute nodes ndName = pp.Combine(pp.Optional('@') + pp.Optional(ncPrefix) + \ pp.Word(xmlNameStartChar, xmlNameChar)) node = wildcard | parentNd | selfNd | ndName # node literalValue = pp.Literal('"').suppress() + \ pp.CharsNotIn('"') + \ pp.Literal('"').suppress() \ | \ pp.Literal("'").suppress() + \ pp.CharsNotIn("'") + \ pp.Literal("'").suppress() # literal value delimited # by either "" or '' numericValue = pp.Combine(pp.Optional('-') + \ pp.Word(pp.nums) + \ pp.Optional('.' + pp.Word(pp.nums)))# Numbers # keywords orderBy = pp.CaselessKeyword('order by') asc = pp.CaselessKeyword('asc') desc = pp.CaselessKeyword('desc') limit = pp.CaselessKeyword('limit') offset = pp.CaselessKeyword('offset') # operators eqOp = pp.Literal('==').setParseAction(pp.replaceWith("=")) | \ pp.Literal('=') ltOp = pp.Literal('<') gtOp = pp.Literal('>') leOp = pp.Literal('<=') geOp = pp.Literal('>=') ineqOp = pp.Literal('!=') orOp = pp.CaselessKeyword('or') andOp = pp.CaselessKeyword('and') relOp = eqOp | ineqOp | leOp | geOp | ltOp | gtOp logOp = orOp | andOp # functions notFunc = pp.CaselessKeyword('not') # location step package_id = (pp.Word(pp.alphanums + "-_") | wildcard).\ setResultsName('package_id').\ setParseAction(self.evalPackage_id).suppress() resourcetype_id = (pp.Word(pp.alphanums + "-_") | wildcard).\ setResultsName('resourcetype_id').\ setParseAction(self.evalResourcetype_id).suppress() locationStep = (sep.suppress() + (ndName | wildcard)).\ setResultsName('locationStep', True) location = (sep.suppress() + package_id + \ sep.suppress() + resourcetype_id + \ pp.ZeroOrMore(locationStep)).\ setParseAction(self.evalLocationSteps) # predicate expression pexpr = pp.Forward().setParseAction(self.remove_list) pathExpr = (pp.Optional(sep) + node + \ pp.ZeroOrMore(sep.suppress() + node)).\ setParseAction(self.evalPath) valueExpr = literalValue | numericValue relExpr = pathExpr + pp.Optional(relOp + (valueExpr | pathExpr)) parExpr = pp.Group(lpar + pexpr + rpar) notExpr = pp.Group(notFunc + parExpr) pexpr << (notExpr | pp.Group(relExpr) | parExpr) + \ pp.Optional(logOp + (pp.Group(pexpr) | parExpr)) # order by clause obItem = (pathExpr + pp.Optional(asc | desc, 'asc')).\ setResultsName('order_by', listAllMatches=True) orderByExpr = orderBy + pp.delimitedList(obItem, ',') # limit and offset limitExpr = limit + pp.Word(pp.nums).setResultsName('limit') + \ pp.Optional(',' + \ pp.Word(pp.nums).setResultsName('offset')) offsetExpr = offset + pp.Word(pp.nums).setResultsName('offset') # query predicates = (pstart + pexpr + pend).setResultsName('predicates') query = pp.StringStart() + \ location + \ pp.Optional(predicates) + \ pp.Optional(orderByExpr) + \ pp.Optional(limitExpr) + \ pp.Optional(offsetExpr) + \ pp.StringEnd() return query.parseString
def detectChemicalType(formula): """detectChemicalType: utility routine for detecting chemical type Arguments ---------- formula : text version of formula Returns ------- type: string "element"|"ionic"|"covalent"|"acid"|"water" """ verbose = False #if verbose: print(f"detect formula:{formula}") if formula in ['H2O', 'HOH']: return "water" if formula == 'O2': return "oxygen" integer = pp.Word(pp.nums) element = pp.StringStart() + pp.oneOf(element_symbols) + pp.Optional( integer) + pp.StringEnd() try: parts = element.parseString(formula) return "element" except pp.ParseException: pass ammonium_formulas = [ammonium_formula, '(' + ammonium_formula + ')'] cation = pp.StringStart() + pp.oneOf(ionic_metal_symbols) try: parts = cation.parseString(formula) return "ionic" except pp.ParseException: pass hydrocarbon_formula = [ammonium_formula, '(' + ammonium_formula + ')'] integer_or_hco = pp.Word("HC", "CHO1234567890") hydrocarbon = pp.StringStart() + integer_or_hco + pp.StringEnd() if formula in ["CO2", "CO"]: return "covalent" if formula in ["H2CO3"]: return "ionic" try: parts = hydrocarbon.parseString(formula) return "hydrocarbon" except pp.ParseException: pass ammonium_formulas = [ammonium_formula, '(' + ammonium_formula + ')'] polycation = pp.StringStart() + pp.oneOf(ammonium_formulas) try: parts = polycation.parseString(formula) return "ionic" except pp.ParseException: pass acid = pp.StringStart() + pp.Char('H') + pp.NotAny(pp.oneOf('e o f g s')) try: parts = acid.parseString(formula) return "acid" except pp.ParseException: pass return "covalent"
True ).setResultsName('substituents') ).setParseAction(lambda t: t.asDict()) atom_chain = full_atom + pp.ZeroOrMore(bond_type + full_atom) substituent_num = (pp.Word(pp.nums, exact=1) ^ (pp.Literal('%').suppress() + pp.Word(pp.nums)) ).setParseAction(lambda t: int(t[0])) substituent <<= (pp.Group(pp.Literal('(').suppress() + bond_type + atom_chain + pp.Literal(')').suppress()) ^ substituent_num) smiles = (pp.StringStart() + pp.Group(atom_chain) + pp.ZeroOrMore(pp.Literal('.').suppress() + pp.Group(atom_chain)) + pp.StringEnd()) def construct_substituent(smiles_substituent, numbered_substituents, i=0): cur_bond = smiles_substituent[i] return chemistry.Bond(cur_bond, construct_atom_graph(smiles_substituent, numbered_substituents, i+1, cur_bond)) def construct_atom_graph(smiles_atoms, numbered_substituents, i=0, behind_bond_type=None): cur_atom = smiles_atoms[i] processed_substituents = [] if behind_bond_type: processed_substituents.append(chemistry.Bond(behind_bond_type, 'behind'))
"""Every IRC client needs some /slash commands.""" import pyparsing as P L = P.Literal Sup = P.Suppress commandWord = P.Word(P.alphanums + '_').setResultsName('commandWord') commandLeader = L('/') commandArgs = P.restOfLine.setResultsName('commandArgs') command = (P.StringStart() + Sup(commandLeader) + commandWord + P.Optional(Sup(P.White()) + commandArgs)).setResultsName('command') nonCommand = (P.StringStart() + P.restOfLine).setResultsName('nonCommand') line = (command | nonCommand) tests = [ ('hello', 'nonCommand'), ('/me says hi', 'command'), ('/123', 'command'), ('/abc1_ sas', 'command'), ('hi /abc1_ sas', 'nonCommand'), ('/** sup **/', 'nonCommand'), ('/', 'nonCommand'), ('hello /there', 'nonCommand'), ]
# problems with the interface to {sgl,dbl}QuotedString in pyparsing. OPTIONAL_VAR = VAR_NAME + "=" + ( (NAME | REAL | INTEGER) | pparse.QuotedString("'", unquoteResults=False) | pparse.QuotedString('"', unquoteResults=False)) # lambda creates a temporary function which, in this case, takes three # arguments and creates a NamedArg object. OPTIONAL_VAR.setParseAction(lambda strg, loc, toks: [NamedArg(toks)]) GROUP = LPAR + EXPR + RPAR GROUP.setParseAction(lambda strg, loc, toks: [Grouping(toks)]) # Parser will attempt to match with the expressions in the order they # are specified here. Therefore must list them in order of decreasing # generality OPERAND = (GROUP | OPTIONAL_VAR | VAR_OR_FUNCTION | REAL | INTEGER | LITERAL_ARRAY) # Cause the binary operators to work. OPERATOR = pparse.operatorPrecedence(OPERAND, ( (pparse.Literal("**"), 2, pparse.opAssoc.RIGHT, lambda strg, loc, toks: [BinaryOperator(toks)]), (pparse.Literal("*") | pparse.Literal("/"), 2, pparse.opAssoc.LEFT, lambda strg, loc, toks: [BinaryOperator(toks)]), (pparse.Literal("+") | pparse.Literal("-"), 2, pparse.opAssoc.LEFT, lambda strg, loc, toks: [BinaryOperator(toks)]), )) EXPR << (OPERATOR | OPERAND) FORT_EXPRESSION = pparse.StringStart() + EXPR + pparse.StringEnd()
EXPRESSION = pp.Forward() VARIABLE_METADATA_NAME = pp.pyparsing_common.identifier.copy() VARIABLE_METADATA_ACCESS = '@' + VARIABLE_METADATA_NAME VARIABLE = pp.pyparsing_common.identifier.copy() TARGET = VARIABLE | ('(' + EXPRESSION + ')') | VARIABLE_METADATA_ACCESS MEMBER_NAME = pp.pyparsing_common.identifier.copy() ARGUMENTS = '(' + pp.Optional(pp.delimitedList(EXPRESSION, delim=",")) + ')' METHOD_ATTRIBUTE_OR_ENUM = TARGET + pp.ZeroOrMore( pp.oneOf(('.', '->', '.@', '::')) + MEMBER_NAME + pp.Optional(ARGUMENTS)) STRING = pp.QuotedString(quoteChar="'", escChar="\\") NUMBER = pp.Regex("[0-9]+(\\.[0-9]+)?") VALUE = METHOD_ATTRIBUTE_OR_ENUM | STRING | NUMBER UNARY = pp.ZeroOrMore(pp.oneOf(UNARY_OPERATORS)) + VALUE BINARY = UNARY + pp.ZeroOrMore(pp.oneOf(BINARY_OPERATORS) + UNARY) LAMBDA_EXPRESSION = '[' + pp.OneOrMore( ':' + pp.pyparsing_common.identifier.copy()) + '|' + EXPRESSION + ']' EXPRESSION << (BINARY | LAMBDA_EXPRESSION) WHOLE_EXPRESSION = pp.StringStart() + EXPRESSION + pp.StringEnd()
import pyparsing as pp from funcy import first from dpath import (select_children, select_all_children, select_all_descendants, compose_selectors, select_text, make_filter) ctx = {"text": select_text} start, stop = pp.StringStart(), pp.StringEnd() sep = pp.Literal("/").suppress() osep = pp.Optional(sep) descendants = pp.Literal("**") children = pp.Literal("*") element = pp.Word(pp.alphanums + "-_") func = pp.Word(pp.alphas, pp.alphanums + "-_") + "()" condition = pp.Forward() # condition and path are mutually recursive segment = (descendants | children | func | element) + condition path = osep + segment + pp.ZeroOrMore(sep + segment) + osep condition << pp.Optional( pp.Literal("[").suppress() + path + pp.Literal("]").suppress()) parser = (start + path + stop) @condition.setParseAction def condition_action(txt, loc, toks):
def __create(self): START = pp.StringStart().suppress() END = pp.StringEnd().suppress() #----------------------------------------------------------------------# # LANGUAGE TOKENS #----------------------------------------------------------------------# TRUE = pp.Literal('True').setParseAction(lambda s, loc, toks: toks[0]) FALSE = pp.Literal('False').setParseAction( lambda s, loc, toks: toks[0]) AND = pp.Literal('and').setParseAction(lambda s, loc, toks: toks[0]) OR = pp.Literal('or').setParseAction(lambda s, loc, toks: toks[0]) NOT = pp.Literal('not').setParseAction(lambda s, loc, toks: toks[0]) # # Expression's elements # LEFT_PAREN = pp.Literal('(') RIGHT_PAREN = pp.Literal(')') LEFT_SPAREN = pp.Literal('[') RIGHT_SPAREN = pp.Literal(']') COMMA = pp.Literal(',') SEMICOLON = pp.Literal(';') # OID's syntax elements COLUMN = pp.Literal(':') TYPE_NEW = pp.Literal('@') TYPE_OLD = pp.Literal('#') # Unescaped String prefix UNESCAPE_STR = pp.Literal('r') # # Operators # ASSIGN = pp.Literal('=') # OIDs concat operator DOT = pp.Literal('.') PLUS_PLUS = pp.Literal('++') MINUS_MINUS = pp.Literal('--') POWER = pp.Literal('**') PLUS = pp.Literal('+') MINUS = pp.Literal('-') MULTI = pp.Literal('*') DIV = pp.Literal('/') MOD = pp.Literal('%') EQ = pp.Literal('eq') EQUAL = pp.Literal('==') NEQUAL = pp.Literal('!=') REGEXPQUAL = pp.Literal('=~') GT = pp.Literal('>') LT = pp.Literal('<') GEQ = pp.Literal('>=') LEQ = pp.Literal('<=') LOGIC_NOT = pp.Literal('!') LOGIC_AND = pp.Literal('&&') LOGIC_OR = pp.Literal('||') BITAND = pp.Literal('&') BITOR = pp.Literal('|') BITXOR = pp.Literal('^') # One's complement operator BITONE = pp.Literal('~') IF = pp.Literal('if') THEN = pp.Literal('then') ELSE = pp.Literal('else') TRY = pp.Literal('try') CATCH = pp.Literal('catch') #---------------------------------------------------------------------------*/ # Language Types #---------------------------------------------------------------------------*/ # # Literals # QUOTED = pp.QuotedString('"', escChar='\\') | pp.QuotedString( "'", escChar='\\') STRING = pp.originalTextFor(QUOTED) RSTRING = pp.originalTextFor(UNESCAPE_STR + QUOTED) # # Variable identifiers ($a, $a1, $_a, $a_a123) # VAR_ID = pp.Word('$', pp.alphanums + '_', min=2) # # Function identifiers # FUNCTION_ID = pp.Word(pp.alphas, pp.alphanums + '_', min=1) # # Numbers # HEX = pp.originalTextFor(pp.Regex('[0][xX][0-9a-fA-F]+')) DEC = pp.originalTextFor(pp.Word('0') | pp.Regex('[1-9][0-9]*')) OCTAL = pp.originalTextFor(pp.Regex('[0][0-7]+')) FLOAT1 = pp.Regex('[0-9]+[\.][0-9]+([eE][+-]?[0-9]+)*') FLOAT2 = pp.Regex('[0-9]+[\.]([eE][+-]?[0-9]+)*') FLOAT = pp.originalTextFor(FLOAT1 | FLOAT2) # # Special identifiers { <name> (@|#) } # DATA_ID = pp.originalTextFor( pp.Combine( pp.Word('{') + pp.Word(pp.alphas, pp.alphanums + '_-.') + pp.Word('@#') + pp.Word('}'))) #----------------------------------------------------------------------# #----------------------------------------------------------------------# # # GRAMMAR SYNTAX # #----------------------------------------------------------------------# #----------------------------------------------------------------------# #----------------------------------------------------------------------# # variabile # constants (1, 1.0, 'c', "foo", ecc...) # ( ... ) #----------------------------------------------------------------------# OID_SEQUENCE = pp.Regex('[0-9]+[\.][0-9]+([\.][0-9]+)+') constant = ( TRUE.setParseAction(lambda s, loc, toks: self.f.createBool(True)) | FALSE.setParseAction(lambda s, loc, toks: self.f.createBool(False)) | HEX.setParseAction( lambda s, loc, toks: self.f.createInteger(int(toks[1], 16))) | (~(OID_SEQUENCE) + FLOAT).setParseAction( lambda s, loc, toks: self.f.createFloat(float(toks[0]))) | OCTAL.setParseAction( lambda s, loc, toks: self.f.createInteger(int(toks[1], 8))) | DEC.setParseAction( lambda s, loc, toks: self.f.createInteger(int(toks[1], 10))) | STRING.setParseAction( lambda s, loc, toks: self.f.createString(toks, True)) | RSTRING.setParseAction( lambda s, loc, toks: self.f.createString(toks[1:], True))) cond_expr = pp.Forward() #----------------------------------------------------------------------# # Primary Expr #----------------------------------------------------------------------# primary_expr = ( (LEFT_PAREN.suppress() + cond_expr + RIGHT_PAREN.suppress() ).setParseAction(lambda s, loc, toks: toks[0]) | VAR_ID.setParseAction( lambda s, loc, toks: self.f.createIdentifier(toks[0])) | DATA_ID.setParseAction( lambda s, loc, toks: self.f.createDataIdentifier(toks[1])) | constant) #----------------------------------------------------------------------# # POSTFIX EXPRESSION #----------------------------------------------------------------------# # foo() # for(a,b,...) # $id() # $id # $id(a,b,...) #----------------------------------------------------------------------# # # Named argument # named_argument_value = pp.Forward() name_argument = ( FUNCTION_ID + ASSIGN.suppress() + named_argument_value ).setParseAction( lambda s, loc, toks: self.f.createNamedArgument(toks[0], toks[1])) # # Simple argument # simple_argument_value = pp.Forward() # # 1, 2, 3, foo=10, bar=10234 # argument = name_argument | simple_argument_value argument_expr_list = (argument + pp.ZeroOrMore(COMMA.suppress() + argument)) #----------------------------------------------------------------------# # ( ), (a,b,c,...) #----------------------------------------------------------------------# def _call_expr_callback(s, loc, toks): args = toks.get('args') if args is None: args = [] else: args = list(args) return ('CALL', args) call_expr = ( LEFT_PAREN.suppress() + pp.Optional(argument_expr_list('args')) + RIGHT_PAREN.suppress()).setParseAction(_call_expr_callback) #----------------------------------------------------------------------# # [], [;], [i], [i;], [;j] [i;j] #----------------------------------------------------------------------# def _range_expr_callback(s, loc, toks): args = [] start = toks.get('start') args.append(start) if 'end' in toks: end = toks.get('end') args.append(end) return ('RANGE', args) range_value = pp.Forward() range_expr = ( LEFT_SPAREN.suppress() + pp.Optional(range_value)('start') + pp.Optional(SEMICOLON.suppress() + pp.Optional(range_value)('end')) + RIGHT_SPAREN.suppress()).setParseAction(_range_expr_callback) #----------------------------------------------------------------------# call_or_range = range_expr | call_expr def _func_callback(s, loc, toks): if len(toks) == 1: return toks[0] current_t = toks[0] for t in toks[1:]: f_type, args = t if f_type == 'CALL': current_t = self.f.createCallOp(current_t, args) elif f_type == 'RANGE': current_t = self.f.createRangeOp(current_t, args) else: raise Exception("ERROR") return current_t postfix_expr = ( (FUNCTION_ID + pp.OneOrMore(call_or_range)).setParseAction(_func_callback) | (primary_expr + pp.ZeroOrMore(call_or_range)).setParseAction(_func_callback)) #----------------------------------------------------------------------# # UNARY EXPRESSION #----------------------------------------------------------------------# # <expr> # <expr>() # <expr>[] # + <expr> # - <expr> # ~ <expr> # ! <expr> #---------------------------------------------------------------------------*/ unary_expr = pp.Forward() calc_expr = ( postfix_expr | (PLUS_PLUS.suppress() + unary_expr).setParseAction( lambda s, loc, toks: self.f.createAddAddOp(toks[0])) | (MINUS_MINUS.suppress() + unary_expr).setParseAction( lambda s, loc, toks: self.f.createSubSubOp(toks[0])) | (PLUS.suppress() + unary_expr).setParseAction(lambda s, loc, toks: toks[0]) | (MINUS.suppress() + unary_expr).setParseAction( lambda s, loc, toks: self.f.createMinusOp(toks[0])) | ((LOGIC_NOT | NOT).suppress() + unary_expr ).setParseAction(lambda s, loc, toks: self.f.createNotOp(toks[0])) | (BITONE.suppress() + unary_expr).setParseAction( lambda s, loc, toks: self.f.createBitOneOp(toks[0]))) #---------------------------------------------------------------------------*/ # OID Expressions #---------------------------------------------------------------------------*/ # These expressions rappresent SNMP OID values: # # <oid expression> [':' <community-expr>] '@' [ <host-expr> [':' <port-expr>] ] # # where <oid expression> is: # # n.n.n '.' <exp-1> '.' <exp-2> '.' <exp-n> # #---------------------------------------------------------------------------*/ # # The DOT ('.') operator is a bit tricky: expressions are converted # into strings and concatenated. # # This means that if i concatenate OID 1.2.3.4 with the float # literal 5.6 the result is 1.2.3.4.5.6 # def _oid_compositon_callback(s, loc, toks): toks = list(toks) expr = toks.pop(0) while toks: expr = self.f.createConcatOID(expr, toks.pop(0)) return expr def _oid_callback(s, loc, toks): return self.f.createOID(toks[1]) oid_compositon = ( pp.originalTextFor(OID_SEQUENCE).setParseAction(_oid_callback) + pp.ZeroOrMore(DOT.suppress() + ( pp.originalTextFor(OID_SEQUENCE).setParseAction(_oid_callback) | postfix_expr))).setParseAction(_oid_compositon_callback) def _snmp_single_expr_callback(s, loc, toks): oid = toks['oid'] community = toks['community'] if 'community' in toks else None t = toks['type'] node = toks['node'] if 'node' in toks else None port = toks['port'] if 'port' in toks else None return self.f.createSnmpValue(oid, community, t, node, port) snmp_single_expr = ( oid_compositon('oid') + pp.Optional(COLUMN.suppress() + postfix_expr)('community') + pp.originalTextFor(TYPE_OLD | TYPE_NEW)('type') + pp.Optional( postfix_expr('node') + pp.Optional(COLUMN.suppress() + postfix_expr)('port')) ).setParseAction(_snmp_single_expr_callback) #----------------------------------------------------------------------# # 1.3.6.1.2.1.1@ [ ] #----------------------------------------------------------------------# def _func_callback_x(s, loc, toks): toks = list(toks) if len(toks) == 1: return toks[0] expr = toks[0] range_args = toks[1][1] return self.f.createRangeOp(expr, range_args) snmp_value_expr = ( snmp_single_expr + pp.Optional(range_expr)).setParseAction(_func_callback_x) #----------------------------------------------------------------------# # IF <expr> THEN <expr ELSE <expr> #----------------------------------------------------------------------# def _if_callback(s, loc, toks): e1 = toks.get('e1') e2 = toks.get('e2') e3 = toks.get('e3') return self.f.createIf(e1, e2, e3) if_expr = (IF.suppress() + cond_expr("e1") + THEN.suppress() + cond_expr("e2") + ELSE.suppress() + cond_expr("e3")).setParseAction(_if_callback) #----------------------------------------------------------------------# # try <expr> catch [ <id> ] ( <expr> ) [ catch <id> ( <expr> ) ....] #----------------------------------------------------------------------# def _catch_expr_callback(s, loc, toks): ex_name = toks.get('exception') expr = toks.get('expr') return (ex_name, expr) def _try_expr_callback(s, loc, toks): body = toks['body'] catch_list = list(toks['catch_list']) return self.f.createTry(body, catch_list) # # catch [ <expr> ] ( <expr> ) # catch_expr_body = pp.Forward() catch_expr = ( pp.Optional(FUNCTION_ID)('exception') + LEFT_PAREN.suppress() + pp.Optional(cond_expr)('expr') + RIGHT_PAREN.suppress()).setParseAction(_catch_expr_callback) # # try <expr> [ catch <expr> ( <expr> ) .... ] # catch_list = CATCH.suppress() + pp.OneOrMore(catch_expr) try_expr = ( TRY.suppress() + cond_expr('body') + catch_list('catch_list')).setParseAction(_try_expr_callback) #----------------------------------------------------------------------# # UNARY EXPRESSION #----------------------------------------------------------------------# unary_expr <<= (if_expr | try_expr | snmp_value_expr | calc_expr) #----------------------------------------------------------------------# # OPERATORS #----------------------------------------------------------------------# OP_MAP = { str(POWER.match): self.f.createPowerOp, str(MULTI.match): self.f.createMultiOp, str(DIV.match): self.f.createDivOp, str(MOD.match): self.f.createModOp, str(PLUS.match): self.f.createAddOp, str(MINUS.match): self.f.createSubOp, str(LT.match): self.f.createLtOp, str(GT.match): self.f.createGtOp, str(LEQ.match): self.f.createLEqOp, str(GEQ.match): self.f.createGEqOp, str(EQUAL.match): self.f.createEqOp, str(EQ.match): self.f.createEqOp, str(NEQUAL.match): self.f.createNotEqOp, str(REGEXPQUAL.match): self.f.createRegExpEqOp, str(BITAND.match): self.f.createBitAndOp, str(BITXOR.match): self.f.createBitXOrOp, str(BITOR.match): self.f.createBitOrOp, str(AND.match): self.f.createAndOp, str(LOGIC_AND.match): self.f.createAndOp, str(OR.match): self.f.createOrOp, str(LOGIC_OR.match): self.f.createOrOp, } def _op_callback(s, loc, toks): l = list(toks) if len(l) == 1: return l expr = l.pop(0) while l: op, expr2 = l.pop(0), l.pop(0) op_callback = OP_MAP[op] expr = op_callback(expr, expr2) return expr expr = unary_expr #// a ** b expr = (expr + pp.ZeroOrMore(POWER + expr)).setParseAction(_op_callback) #// a * b #// a / c #// a % c expr = (expr + pp.ZeroOrMore((MULTI | DIV | MOD) + expr)).setParseAction(_op_callback) #// a + b #// a - b expr = ( expr + pp.ZeroOrMore((PLUS | MINUS) + expr)).setParseAction(_op_callback) #// a < b #// a > b #// a <= b #// a >= b expr = (expr + pp.ZeroOrMore((LT | GT | LEQ | GEQ) + expr)).setParseAction(_op_callback) #// a == b #// a != b #// a ~= b expr = (expr + pp.ZeroOrMore((EQUAL | EQ | NEQUAL | REGEXPQUAL) + expr)).setParseAction(_op_callback) #// a & b expr = (expr + pp.ZeroOrMore(BITAND + expr)).setParseAction(_op_callback) #// a ^ b expr = (expr + pp.ZeroOrMore(BITXOR + expr)).setParseAction(_op_callback) #// a | b expr = (expr + pp.ZeroOrMore(BITOR + expr)).setParseAction(_op_callback) #// a && b expr = (expr + pp.ZeroOrMore((LOGIC_AND | AND) + expr)).setParseAction(_op_callback) #// a || b expr = ( expr + pp.ZeroOrMore((LOGIC_OR | OR) + expr)).setParseAction(_op_callback) #----------------------------------------------------------------------# # Recursive rules #----------------------------------------------------------------------# cond_expr <<= expr simple_argument_value <<= cond_expr named_argument_value <<= cond_expr range_value <<= cond_expr #----------------------------------------------------------------------# # Initiali RULE #----------------------------------------------------------------------# lang_expr = (START + cond_expr + END) return lang_expr