def leaveWhitespace(self):
     """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on
     all contained expressions."""
     with Engine(""):
         output = self.copy()
         output.exprs = [e.leaveWhitespace() for e in self.exprs]
         return output
Beispiel #2
0
 def leaveWhitespace(self):
     """
     Disables the skipping of whitespace before matching the characters in the
     :class:`ParserElement`'s defined pattern.  This is normally only used internally by
     the mo_parsing module, but may be needed in some whitespace-sensitive grammars.
     """
     with Engine(""):
         output = self.copy()
     return output
Beispiel #3
0
    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
        with Engine(white="".join(c for c in self.engine.white_chars
                                  if c not in ws)) as e:
            super(White, self).__init__()
            self.set_config(lock_engine=e)
        white_chars = "".join(sorted(set(ws)))
        self.parser_name = "|".join(White.whiteStrs[c] for c in white_chars)

        max = max if max > 0 else MAX_INT
        if exact > 0:
            max = exact
            min = exact
        self.set_config(min_len=min, max_len=max, white_chars=white_chars)
Beispiel #4
0
    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
        with Engine(white="".join(c for c in self.engine.white_chars
                                  if c not in ws)) as e:
            super(White, self).__init__()
            self.parser_config.lock_engine = e
        self.matchWhite = ws
        self.parser_name = "".join(White.whiteStrs[c] for c in self.matchWhite)
        self.parser_config.mayReturnEmpty = True

        self.minLen = min

        if max > 0:
            self.maxLen = max
        else:
            self.maxLen = _MAX_INT

        if exact > 0:
            self.maxLen = exact
            self.minLen = exact
Beispiel #5
0
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
    """Helper method for defining nested lists enclosed in opening and
    closing delimiters ("(" and ")" are the default).

    Parameters:
     - opener - opening character for a nested list
       (default= ``"("``); can also be a mo_parsing expression
     - closer - closing character for a nested list
       (default= ``")"``); can also be a mo_parsing expression
     - content - expression for items within the nested lists
       (default= ``None``)
     - ignoreExpr - expression for ignoring opening and closing
       delimiters (default= `quotedString`)

    If an expression is not provided for the content argument, the
    nested expression will capture all whitespace-delimited content
    between delimiters as a list of separate values.

    Use the ``ignoreExpr`` argument to define expressions that may
    contain opening or closing characters that should not be treated as
    opening or closing characters for nesting, such as quotedString or
    a comment expression.  Specify multiple expressions using an
    `Or` or `MatchFirst`. The default is
    `quotedString`, but if no expressions are to be ignored, then
    pass ``None`` for this argument.

    """
    if opener == closer:
        raise ValueError("opening and closing strings cannot be the same")
    if content is None:
        if not isinstance(opener, text) or not isinstance(closer, text):
            raise ValueError(
                "opening and closing arguments must be strings if no content expression"
                " is given"
            )

        ignore_chars = engine.CURRENT.white_chars
        with Engine(""):

            def scrub(t):
                return t[0].strip()

            if len(opener) == 1 and len(closer) == 1:
                if ignoreExpr is not None:
                    content = Combine(OneOrMore(
                        ~ignoreExpr
                        + CharsNotIn(opener + closer + "".join(ignore_chars), exact=1,)
                    )).addParseAction(scrub)
                else:
                    content = Empty + CharsNotIn(
                        opener + closer + "".join(ignore_chars)
                    ).addParseAction(scrub)
            else:
                if ignoreExpr is not None:
                    content = Combine(OneOrMore(
                        ~ignoreExpr
                        + ~Literal(opener)
                        + ~Literal(closer)
                        + CharsNotIn(ignore_chars, exact=1)
                    )).addParseAction(scrub)
                else:
                    content = Combine(OneOrMore(
                        ~Literal(opener)
                        + ~Literal(closer)
                        + CharsNotIn(ignore_chars, exact=1)
                    )).addParseAction(scrub)
    ret = Forward()
    if ignoreExpr is not None:
        ret <<= Group(
            Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
        )
    else:
        ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
    ret.set_parser_name("nested %s%s expression" % (opener, closer))
    return ret
Beispiel #6
0
).set_parser_name("common HTML entity")


def replaceHTMLEntity(t):
    """Helper parser action to replace common HTML entities with their special characters"""
    return _htmlEntityMap.get(t.entity)


# it's easy to get these comment structures wrong - they're very common, so may as well make them available
cStyleComment = Combine(
    Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/"
).set_parser_name("C style comment")

htmlComment = Regex(r"<!--[\s\S]*?-->").set_parser_name("HTML comment")

with Engine("") as engine:
    restOfLine = Regex(r"[^\n]*").set_parser_name("rest of line")

    dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment")

    cppStyleComment = Combine(
        Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment
    ).set_parser_name("C++ style comment")

    javaStyleComment = cppStyleComment

    pythonStyleComment = Regex(r"#[^\n]*").set_parser_name("Python style comment")

_commasepitem = (
    Combine(OneOrMore(
        Word(printables, exclude=",")
Beispiel #7
0
def replaceHTMLEntity(t):
    """Helper parser action to replace common HTML entities with their special characters"""
    return _htmlEntityMap.get(t.entity)


# it's easy to get these comment structures wrong - they're very common, so may as well make them available
cStyleComment = Combine(
    Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/"
).set_parser_name("C style comment")
"Comment of the form ``/* ... */``"

htmlComment = Regex(r"<!--[\s\S]*?-->").set_parser_name("HTML comment")
"Comment of the form ``<!-- ... -->``"

with Engine() as engine:
    engine.set_whitespace("")
    restOfLine = Regex(r".*").leaveWhitespace().set_parser_name("rest of line")

dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment")
"Comment of the form ``// ... (to end of line)``"

cppStyleComment = Combine(
    Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment
).set_parser_name("C++ style comment")
"Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`"

javaStyleComment = cppStyleComment
"Same as :class:`cppStyleComment`"

pythonStyleComment = Regex(r"#.*").set_parser_name("Python style comment")
Beispiel #8
0
                        OneOrMore(~Literal(opener) + ~Literal(closer) +
                                  CharsNotIn(ignore_chars, exact=1))
                    ).addParseAction(scrub)
    ret = Forward()
    if ignoreExpr is not None:
        ret <<= Group(
            Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) +
            Suppress(closer))
    else:
        ret <<= Group(
            Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
    ret.set_parser_name("nested %s%s expression" % (opener, closer))
    return ret


with Engine(""):
    _escapedPunc = Word("\\", r"\[]-*.$+^?()~ ",
                        exact=2).addParseAction(lambda t, l, s: t[0][1])
    _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction(
        lambda t: unichr(int(t[0].lstrip("\\").lstrip("0").lstrip("xX"), 16)))
    _escapedOctChar = Regex(r"\\0[0-7]+").addParseAction(
        lambda t, l, s: unichr(int(t[0][1:], 8)))
    _singleChar = (_escapedPunc | _escapedHexChar | _escapedOctChar
                   | CharsNotIn(r"\]", exact=1))
    _charRange = Group(_singleChar + Suppress("-") + _singleChar)
    _reBracketExpr = ("[" + Optional("^").set_token_name("negate") + Group(
        OneOrMore(_charRange | _singleChar)).set_token_name("body") + "]")


def srange(s):
    r"""Helper to easily define string ranges for use in Word
Beispiel #9
0
 def __init__(self):
     with Engine() as e:
         super(StringEnd, self).__init__()
         self.parser_config.lock_engine = e
Beispiel #10
0
 def __init__(self, *args, **kwargs):
     with Engine(""):
         super(And.SyntaxErrorGuard, self).__init__(*args, **kwargs)
         self.parser_name = "-"
Beispiel #11
0
    + "]"
).addParseAction(to_bracket)

#########################################################################################
# REGEX
regex = Forward()

line_start = Literal("^").addParseAction(lambda: LineStart())
line_end = Literal("$").addParseAction(lambda: LineEnd())
word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar))
simple_char = Word(
    printables, exclude=r".^$*+{}[]\|()"
).addParseAction(lambda t: Literal(t.value()))
esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1]))

with Engine():
    # ALLOW SPACES IN THE RANGE
    repetition = (
        Word(nums)("exact") + "}"
        | Word(nums)("min") + "," + Word(nums)("max") + "}"
        | Word(nums)("min") + "," + "}"
        | "," + Word(nums)("max") + "}"
    )

repetition = Group(
    "{" + repetition | (Literal("*?") | Literal("+?") | Char("*+?"))("mode")
)


LB = Char("(")
Beispiel #12
0
    PrecededBy,
    SkipTo,
    Suppress,
    TokenConverter,
    ZeroOrMore,
    Many,
)
from mo_parsing.exceptions import (
    ParseException,
    ParseException,
    ParseSyntaxException,
    RecursiveGrammarException,
)
from mo_parsing.expressions import And, MatchAll, MatchFirst, Or, ParseExpression

engine.PLAIN_ENGINE = Engine("").use()
engine.STANDARD_ENGINE = Engine().use()


from mo_parsing.infix import LEFT_ASSOC, RIGHT_ASSOC, infixNotation
from mo_parsing.regex import Regex
from mo_parsing.results import ParseResults, engine
from mo_parsing.tokens import (
    CaselessKeyword,
    CaselessLiteral,
    Char,
    CloseMatch,
    Empty,
    GoToColumn,
    LineEnd,
    LineStart,
Beispiel #13
0
 def __init__(self, *args, **kwargs):
     with Engine() as engine:
         engine.set_whitespace("")
         super(And._ErrorStop, self).__init__(*args, **kwargs)
         self.parser_name = "-"
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Contact: Kyle Lahnakoski ([email protected])
#

from __future__ import absolute_import, division, unicode_literals

from mo_parsing.engine import Engine
from mo_parsing.helpers import delimitedList, restOfLine
from mo_sql_parsing.keywords import *
from mo_sql_parsing.utils import *
from mo_sql_parsing.windows import sortColumn, window

engine = Engine().use()
engine.add_ignore(Literal("--") + restOfLine)
engine.add_ignore(Literal("#") + restOfLine)

# IDENTIFIER
literal_string = Regex(r'\"(\"\"|[^"])*\"').addParseAction(unquote)
mysql_ident = Regex(r"\`(\`\`|[^`])*\`").addParseAction(unquote)
sqlserver_ident = Regex(r"\[(\]\]|[^\]])*\]").addParseAction(unquote)
ident = Combine(~RESERVED + (delimitedList(
    literal_string
    | mysql_ident
    | sqlserver_ident
    | Word(IDENT_CHAR),
    separator=".",
    combine=True,
))).set_parser_name("identifier")
Beispiel #15
0
 def __init__(self):
     with Engine(" \t") as e:
         super(LineEnd, self).__init__()
         self.set_config(lock_engine=e, regex=regex_compile("\\r?(\\n|$)"))
Beispiel #16
0
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
#
# Contact: Kyle Lahnakoski ([email protected])
#

from __future__ import absolute_import, division, unicode_literals

from mo_parsing.engine import Engine
from moz_sql_parser.keywords import *
from moz_sql_parser.utils import *
from moz_sql_parser.row_clause import row_clause

engine = Engine().use()
engine.add_ignore(Literal("--") + restOfLine)
engine.add_ignore(Literal("#") + restOfLine)

# IDENTIFIER
literal_string = Regex(r'\"(\"\"|[^"])*\"').addParseAction(unquote)
mysql_ident = Regex(r"\`(\`\`|[^`])*\`").addParseAction(unquote)
sqlserver_ident = Regex(r"\[(\]\]|[^\]])*\]").addParseAction(unquote)
ident = Combine(~RESERVED + (delimitedList(
    Literal("*") | literal_string | mysql_ident | sqlserver_ident
    | Word(IDENT_CHAR),
    separator=".",
    combine=True,
))).set_parser_name("identifier")

# EXPRESSIONS
Beispiel #17
0
 def __init__(self):
     with Engine() as e:
         super(StringEnd, self).__init__()
         self.set_config(lock_engine=e)
    KNOWN_OPS,
    RESERVED,
    binary_ops,
    NULL,
    NOCASE,
    TRUE,
    FALSE,
    OVER,
    PARTITION_BY,
    CAST,
    SELECT_DISTINCT,
    LB,
    RB,
)

engine = Engine().use()
engine.set_debug_actions(*debug)

IDENT_CHAR = alphanums + "@_$"


def scrub_literal(candidate):
    # IF ALL MEMBERS OF A LIST ARE LITERALS, THEN MAKE THE LIST LITERAL
    if all(isinstance(r, number_types) for r in candidate):
        pass
    elif all(
            isinstance(r, number_types) or (
                is_data(r) and "literal" in r.keys()) for r in candidate):
        candidate = {
            "literal": [r["literal"] if is_data(r) else r for r in candidate]
        }
Beispiel #19
0
 def leaveWhitespace(self):
     with Engine(""):
         output = self.copy()
         output.expr = self.expr.leaveWhitespace()
         return output
Beispiel #20
0
 def __init__(self):
     with Engine(" \t") as e:
         super(LineEnd, self).__init__()
         self.parser_config.lock_engine = e