def leaveWhitespace(self): """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on all contained expressions.""" with Engine(""): output = self.copy() output.exprs = [e.leaveWhitespace() for e in self.exprs] return output
def leaveWhitespace(self): """ Disables the skipping of whitespace before matching the characters in the :class:`ParserElement`'s defined pattern. This is normally only used internally by the mo_parsing module, but may be needed in some whitespace-sensitive grammars. """ with Engine(""): output = self.copy() return output
def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): with Engine(white="".join(c for c in self.engine.white_chars if c not in ws)) as e: super(White, self).__init__() self.set_config(lock_engine=e) white_chars = "".join(sorted(set(ws))) self.parser_name = "|".join(White.whiteStrs[c] for c in white_chars) max = max if max > 0 else MAX_INT if exact > 0: max = exact min = exact self.set_config(min_len=min, max_len=max, white_chars=white_chars)
def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): with Engine(white="".join(c for c in self.engine.white_chars if c not in ws)) as e: super(White, self).__init__() self.parser_config.lock_engine = e self.matchWhite = ws self.parser_name = "".join(White.whiteStrs[c] for c in self.matchWhite) self.parser_config.mayReturnEmpty = True self.minLen = min if max > 0: self.maxLen = max else: self.maxLen = _MAX_INT if exact > 0: self.maxLen = exact self.minLen = exact
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): """Helper method for defining nested lists enclosed in opening and closing delimiters ("(" and ")" are the default). Parameters: - opener - opening character for a nested list (default= ``"("``); can also be a mo_parsing expression - closer - closing character for a nested list (default= ``")"``); can also be a mo_parsing expression - content - expression for items within the nested lists (default= ``None``) - ignoreExpr - expression for ignoring opening and closing delimiters (default= `quotedString`) If an expression is not provided for the content argument, the nested expression will capture all whitespace-delimited content between delimiters as a list of separate values. Use the ``ignoreExpr`` argument to define expressions that may contain opening or closing characters that should not be treated as opening or closing characters for nesting, such as quotedString or a comment expression. Specify multiple expressions using an `Or` or `MatchFirst`. The default is `quotedString`, but if no expressions are to be ignored, then pass ``None`` for this argument. """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: if not isinstance(opener, text) or not isinstance(closer, text): raise ValueError( "opening and closing arguments must be strings if no content expression" " is given" ) ignore_chars = engine.CURRENT.white_chars with Engine(""): def scrub(t): return t[0].strip() if len(opener) == 1 and len(closer) == 1: if ignoreExpr is not None: content = Combine(OneOrMore( ~ignoreExpr + CharsNotIn(opener + closer + "".join(ignore_chars), exact=1,) )).addParseAction(scrub) else: content = Empty + CharsNotIn( opener + closer + "".join(ignore_chars) ).addParseAction(scrub) else: if ignoreExpr is not None: content = Combine(OneOrMore( ~ignoreExpr + ~Literal(opener) + ~Literal(closer) + CharsNotIn(ignore_chars, exact=1) )).addParseAction(scrub) else: content = Combine(OneOrMore( ~Literal(opener) + ~Literal(closer) + CharsNotIn(ignore_chars, exact=1) )).addParseAction(scrub) ret = Forward() if ignoreExpr is not None: ret <<= Group( Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) ) else: ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) ret.set_parser_name("nested %s%s expression" % (opener, closer)) return ret
).set_parser_name("common HTML entity") def replaceHTMLEntity(t): """Helper parser action to replace common HTML entities with their special characters""" return _htmlEntityMap.get(t.entity) # it's easy to get these comment structures wrong - they're very common, so may as well make them available cStyleComment = Combine( Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" ).set_parser_name("C style comment") htmlComment = Regex(r"<!--[\s\S]*?-->").set_parser_name("HTML comment") with Engine("") as engine: restOfLine = Regex(r"[^\n]*").set_parser_name("rest of line") dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment") cppStyleComment = Combine( Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment ).set_parser_name("C++ style comment") javaStyleComment = cppStyleComment pythonStyleComment = Regex(r"#[^\n]*").set_parser_name("Python style comment") _commasepitem = ( Combine(OneOrMore( Word(printables, exclude=",")
def replaceHTMLEntity(t): """Helper parser action to replace common HTML entities with their special characters""" return _htmlEntityMap.get(t.entity) # it's easy to get these comment structures wrong - they're very common, so may as well make them available cStyleComment = Combine( Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" ).set_parser_name("C style comment") "Comment of the form ``/* ... */``" htmlComment = Regex(r"<!--[\s\S]*?-->").set_parser_name("HTML comment") "Comment of the form ``<!-- ... -->``" with Engine() as engine: engine.set_whitespace("") restOfLine = Regex(r".*").leaveWhitespace().set_parser_name("rest of line") dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment") "Comment of the form ``// ... (to end of line)``" cppStyleComment = Combine( Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment ).set_parser_name("C++ style comment") "Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`" javaStyleComment = cppStyleComment "Same as :class:`cppStyleComment`" pythonStyleComment = Regex(r"#.*").set_parser_name("Python style comment")
OneOrMore(~Literal(opener) + ~Literal(closer) + CharsNotIn(ignore_chars, exact=1)) ).addParseAction(scrub) ret = Forward() if ignoreExpr is not None: ret <<= Group( Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)) else: ret <<= Group( Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) ret.set_parser_name("nested %s%s expression" % (opener, closer)) return ret with Engine(""): _escapedPunc = Word("\\", r"\[]-*.$+^?()~ ", exact=2).addParseAction(lambda t, l, s: t[0][1]) _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction( lambda t: unichr(int(t[0].lstrip("\\").lstrip("0").lstrip("xX"), 16))) _escapedOctChar = Regex(r"\\0[0-7]+").addParseAction( lambda t, l, s: unichr(int(t[0][1:], 8))) _singleChar = (_escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)) _charRange = Group(_singleChar + Suppress("-") + _singleChar) _reBracketExpr = ("[" + Optional("^").set_token_name("negate") + Group( OneOrMore(_charRange | _singleChar)).set_token_name("body") + "]") def srange(s): r"""Helper to easily define string ranges for use in Word
def __init__(self): with Engine() as e: super(StringEnd, self).__init__() self.parser_config.lock_engine = e
def __init__(self, *args, **kwargs): with Engine(""): super(And.SyntaxErrorGuard, self).__init__(*args, **kwargs) self.parser_name = "-"
+ "]" ).addParseAction(to_bracket) ######################################################################################### # REGEX regex = Forward() line_start = Literal("^").addParseAction(lambda: LineStart()) line_end = Literal("$").addParseAction(lambda: LineEnd()) word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar)) simple_char = Word( printables, exclude=r".^$*+{}[]\|()" ).addParseAction(lambda t: Literal(t.value())) esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1])) with Engine(): # ALLOW SPACES IN THE RANGE repetition = ( Word(nums)("exact") + "}" | Word(nums)("min") + "," + Word(nums)("max") + "}" | Word(nums)("min") + "," + "}" | "," + Word(nums)("max") + "}" ) repetition = Group( "{" + repetition | (Literal("*?") | Literal("+?") | Char("*+?"))("mode") ) LB = Char("(")
PrecededBy, SkipTo, Suppress, TokenConverter, ZeroOrMore, Many, ) from mo_parsing.exceptions import ( ParseException, ParseException, ParseSyntaxException, RecursiveGrammarException, ) from mo_parsing.expressions import And, MatchAll, MatchFirst, Or, ParseExpression engine.PLAIN_ENGINE = Engine("").use() engine.STANDARD_ENGINE = Engine().use() from mo_parsing.infix import LEFT_ASSOC, RIGHT_ASSOC, infixNotation from mo_parsing.regex import Regex from mo_parsing.results import ParseResults, engine from mo_parsing.tokens import ( CaselessKeyword, CaselessLiteral, Char, CloseMatch, Empty, GoToColumn, LineEnd, LineStart,
def __init__(self, *args, **kwargs): with Engine() as engine: engine.set_whitespace("") super(And._ErrorStop, self).__init__(*args, **kwargs) self.parser_name = "-"
# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # # Contact: Kyle Lahnakoski ([email protected]) # from __future__ import absolute_import, division, unicode_literals from mo_parsing.engine import Engine from mo_parsing.helpers import delimitedList, restOfLine from mo_sql_parsing.keywords import * from mo_sql_parsing.utils import * from mo_sql_parsing.windows import sortColumn, window engine = Engine().use() engine.add_ignore(Literal("--") + restOfLine) engine.add_ignore(Literal("#") + restOfLine) # IDENTIFIER literal_string = Regex(r'\"(\"\"|[^"])*\"').addParseAction(unquote) mysql_ident = Regex(r"\`(\`\`|[^`])*\`").addParseAction(unquote) sqlserver_ident = Regex(r"\[(\]\]|[^\]])*\]").addParseAction(unquote) ident = Combine(~RESERVED + (delimitedList( literal_string | mysql_ident | sqlserver_ident | Word(IDENT_CHAR), separator=".", combine=True, ))).set_parser_name("identifier")
def __init__(self): with Engine(" \t") as e: super(LineEnd, self).__init__() self.set_config(lock_engine=e, regex=regex_compile("\\r?(\\n|$)"))
# # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this file, # You can obtain one at http://mozilla.org/MPL/2.0/. # # Contact: Kyle Lahnakoski ([email protected]) # from __future__ import absolute_import, division, unicode_literals from mo_parsing.engine import Engine from moz_sql_parser.keywords import * from moz_sql_parser.utils import * from moz_sql_parser.row_clause import row_clause engine = Engine().use() engine.add_ignore(Literal("--") + restOfLine) engine.add_ignore(Literal("#") + restOfLine) # IDENTIFIER literal_string = Regex(r'\"(\"\"|[^"])*\"').addParseAction(unquote) mysql_ident = Regex(r"\`(\`\`|[^`])*\`").addParseAction(unquote) sqlserver_ident = Regex(r"\[(\]\]|[^\]])*\]").addParseAction(unquote) ident = Combine(~RESERVED + (delimitedList( Literal("*") | literal_string | mysql_ident | sqlserver_ident | Word(IDENT_CHAR), separator=".", combine=True, ))).set_parser_name("identifier") # EXPRESSIONS
def __init__(self): with Engine() as e: super(StringEnd, self).__init__() self.set_config(lock_engine=e)
KNOWN_OPS, RESERVED, binary_ops, NULL, NOCASE, TRUE, FALSE, OVER, PARTITION_BY, CAST, SELECT_DISTINCT, LB, RB, ) engine = Engine().use() engine.set_debug_actions(*debug) IDENT_CHAR = alphanums + "@_$" def scrub_literal(candidate): # IF ALL MEMBERS OF A LIST ARE LITERALS, THEN MAKE THE LIST LITERAL if all(isinstance(r, number_types) for r in candidate): pass elif all( isinstance(r, number_types) or ( is_data(r) and "literal" in r.keys()) for r in candidate): candidate = { "literal": [r["literal"] if is_data(r) else r for r in candidate] }
def leaveWhitespace(self): with Engine(""): output = self.copy() output.expr = self.expr.leaveWhitespace() return output
def __init__(self): with Engine(" \t") as e: super(LineEnd, self).__init__() self.parser_config.lock_engine = e