Ejemplo n.º 1
0
def repeat(tokens):
    if tokens.length() == 1:
        return tokens.value()

    try:
        operand, operator = tokens
    except Exception as cause:
        Log.error("not expected", cause=cause)

    mode = operator["mode"]
    if not mode:
        if operator["exact"]:
            return Many(operand, PLAIN_ENGINE, exact=int(operator["exact"]))
        else:
            return Many(operand,
                        PLAIN_ENGINE,
                        min_match=int(operator["min"]),
                        max_match=int(operator["max"]))
    elif mode in "*?":
        return ZeroOrMore(operand, PLAIN_ENGINE)
    elif mode in "+?":
        return OneOrMore(operand, PLAIN_ENGINE)
    elif mode == "?":
        return Optional(operand, PLAIN_ENGINE)
    else:
        Log.error("not expected")
Ejemplo n.º 2
0
def repeat(tokens):
    if tokens.length() == 1:
        return tokens.value()

    operand, operator = tokens
    mode = operator["mode"]
    if not mode:
        if operator["exact"]:
            return Many(operand, exact=int(operator["exact"]))
        else:
            return Many(
                operand, min_match=int(operator["min"]), max_match=int(operator["max"])
            )
    elif mode in "*?":
        return ZeroOrMore(operand)
    elif mode in "+?":
        return OneOrMore(operand)
    elif mode == "?":
        return Optional(operand)
    else:
        Log.error("not expected")
Ejemplo n.º 3
0
def dictOf(key, value):
    """Helper to easily and clearly define a dictionary by specifying
    the respective patterns for the key and value.  Takes care of
    defining the `Dict`, `ZeroOrMore`, and
    `Group` tokens in the proper order.  The key pattern
    can include delimiting markers or punctuation, as long as they are
    suppressed, thereby leaving the significant key text.  The value
    pattern can include named results, so that the `Dict` results
    can include named token fields.

    Example::

        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).addParseAction(' '.join))
        print(OneOrMore(attr_expr).parseString(text))

        attr_label = label
        attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).addParseAction(' '.join)

        # similar to Dict, but simpler call format
        result = dictOf(attr_label, attr_value).parseString(text)
        print(result)
        print(result['shape'])
        print(result.shape)  # object attribute access works too
        print(result)

    prints::

        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
        - color: light blue
        - posn: upper left
        - shape: SQUARE
        - texture: burlap
        SQUARE
        SQUARE
        {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
    """
    return Dict(OneOrMore(Group(key + value)))
Ejemplo n.º 4
0
def indentedBlock(blockStatementExpr, indent=True):
    """Helper method for defining space-delimited indentation blocks,
    such as those used to define block statements in Python source code.

    Parameters:

     - blockStatementExpr - expression defining syntax of statement that
       is repeated within the indented block
     - indentStack - list created by caller to manage indentation stack
       (multiple statementWithIndentedBlock expressions within a single
       grammar should share a common indentStack)
     - indent - boolean indicating whether block must be indented beyond
       the current level; set to False for block of left-most
       statements (default= ``True``)

    A valid block must contain at least one ``blockStatement``.
    """
    blockStatementExpr.engine.add_ignore("\\" + LineEnd())

    PEER = Forward()
    DEDENT = Forward()

    def _reset_stack(p=None, l=None, s=None, ex=None):
        oldCol, oldPeer, oldDedent = _indent_stack.pop()
        PEER << oldPeer
        DEDENT << oldDedent

    def peer_stack(expectedCol):
        def output(t, l, s):
            if l >= len(s):
                return
            curCol = col(l, s)
            if curCol != expectedCol:
                if curCol > expectedCol:
                    raise ParseException(t.type, s, l, "illegal nesting")
                raise ParseException(t.type, l, s, "not a peer entry")

        return output

    def dedent_stack(expectedCol):
        def output(t, l, s):
            if l >= len(s):
                return
            curCol = col(l, s)
            if curCol not in (i for i, _, _ in _indent_stack):
                raise ParseException(s, l, "not an unindent")
            if curCol < _indent_stack[-1][0]:
                oldCol, oldPeer, oldDedent = _indent_stack.pop()
                PEER << oldPeer
                DEDENT << oldDedent

        return output

    def indent_stack(t, l, s):
        curCol = col(l, s)
        if curCol > _indent_stack[-1][0]:
            PEER << Empty().addParseAction(peer_stack(curCol))
            DEDENT << Empty().addParseAction(dedent_stack(curCol))
            _indent_stack.append((curCol, PEER, DEDENT))
        else:
            raise ParseException(t.type, l, s, "not a subentry")

    def nodent_stack(t, l, s):
        curCol = col(l, s)
        if curCol == _indent_stack[-1][0]:
            PEER << Empty().addParseAction(peer_stack(curCol))
            DEDENT << Empty().addParseAction(dedent_stack(curCol))
            _indent_stack.append((curCol, PEER, DEDENT))
        else:
            raise ParseException(t.type, s, l, "not a subentry")

    NL = OneOrMore(LineEnd().suppress())
    INDENT = Empty().addParseAction(indent_stack)
    NODENT = Empty().addParseAction(nodent_stack)

    if indent:
        smExpr = Group(
            Optional(NL)
            + INDENT
            + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))
            + DEDENT
        )
    else:
        smExpr = Group(
            Optional(NL)
            + NODENT
            + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))
            + DEDENT
        )
    return smExpr.setFailAction(_reset_stack).set_parser_name("indented block")
Ejemplo n.º 5
0
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
    """Helper method for defining nested lists enclosed in opening and
    closing delimiters ("(" and ")" are the default).

    Parameters:
     - opener - opening character for a nested list
       (default= ``"("``); can also be a mo_parsing expression
     - closer - closing character for a nested list
       (default= ``")"``); can also be a mo_parsing expression
     - content - expression for items within the nested lists
       (default= ``None``)
     - ignoreExpr - expression for ignoring opening and closing
       delimiters (default= `quotedString`)

    If an expression is not provided for the content argument, the
    nested expression will capture all whitespace-delimited content
    between delimiters as a list of separate values.

    Use the ``ignoreExpr`` argument to define expressions that may
    contain opening or closing characters that should not be treated as
    opening or closing characters for nesting, such as quotedString or
    a comment expression.  Specify multiple expressions using an
    `Or` or `MatchFirst`. The default is
    `quotedString`, but if no expressions are to be ignored, then
    pass ``None`` for this argument.

    """
    if opener == closer:
        raise ValueError("opening and closing strings cannot be the same")
    if content is None:
        if not isinstance(opener, text) or not isinstance(closer, text):
            raise ValueError(
                "opening and closing arguments must be strings if no content expression"
                " is given"
            )

        ignore_chars = engine.CURRENT.white_chars
        with Engine(""):

            def scrub(t):
                return t[0].strip()

            if len(opener) == 1 and len(closer) == 1:
                if ignoreExpr is not None:
                    content = Combine(OneOrMore(
                        ~ignoreExpr
                        + CharsNotIn(opener + closer + "".join(ignore_chars), exact=1,)
                    )).addParseAction(scrub)
                else:
                    content = Empty + CharsNotIn(
                        opener + closer + "".join(ignore_chars)
                    ).addParseAction(scrub)
            else:
                if ignoreExpr is not None:
                    content = Combine(OneOrMore(
                        ~ignoreExpr
                        + ~Literal(opener)
                        + ~Literal(closer)
                        + CharsNotIn(ignore_chars, exact=1)
                    )).addParseAction(scrub)
                else:
                    content = Combine(OneOrMore(
                        ~Literal(opener)
                        + ~Literal(closer)
                        + CharsNotIn(ignore_chars, exact=1)
                    )).addParseAction(scrub)
    ret = Forward()
    if ignoreExpr is not None:
        ret <<= Group(
            Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
        )
    else:
        ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
    ret.set_parser_name("nested %s%s expression" % (opener, closer))
    return ret
Ejemplo n.º 6
0
with Engine("") as engine:
    restOfLine = Regex(r"[^\n]*").set_parser_name("rest of line")

    dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment")

    cppStyleComment = Combine(
        Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment
    ).set_parser_name("C++ style comment")

    javaStyleComment = cppStyleComment

    pythonStyleComment = Regex(r"#[^\n]*").set_parser_name("Python style comment")

_commasepitem = (
    Combine(OneOrMore(
        Word(printables, exclude=",")
        + Optional(Word(" \t") + ~Literal(",") + ~LineEnd())
    ))
    .addParseAction(lambda t: text(t).strip())
    .set_parser_name("commaItem")
)
commaSeparatedList = delimitedList(Optional(
    quotedString | _commasepitem, default=""
)).set_parser_name("commaSeparatedList")


convertToInteger = tokenMap(int)
convertToFloat = tokenMap(float)

integer = Word(nums).set_parser_name("integer").addParseAction(convertToInteger)

hex_integer = (
Ejemplo n.º 7
0
_escapedHexChar = (
    Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction(lambda t: unichr(int(
        t[0].lstrip('\\').lstrip('0').lstrip('xX'), 16
    )))
)
_escapedOctChar = Regex(r"\\0[0-7]+").addParseAction(lambda t, l, s: unichr(int(
    t[0][1:], 8
)))
_singleChar = (
    _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
)
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
_reBracketExpr = (
    Literal("[")
    + Optional("^").set_token_name("negate")
    + Group(OneOrMore(_charRange | _singleChar)).set_token_name("body")
    + "]"
)


def srange(s):
    r"""Helper to easily define string ranges for use in Word
    construction. Borrows syntax from regexp '[]' string range
    definitions::

        srange("[0-9]")   -> "0123456789"
        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

    The input string must be enclosed in []'s, and the returned string
    is the expanded character set joined into a single string. The
Ejemplo n.º 8
0
    ret.set_parser_name("nested %s%s expression" % (opener, closer))
    return ret


with Engine(""):
    _escapedPunc = Word("\\", r"\[]-*.$+^?()~ ",
                        exact=2).addParseAction(lambda t, l, s: t[0][1])
    _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction(
        lambda t: unichr(int(t[0].lstrip("\\").lstrip("0").lstrip("xX"), 16)))
    _escapedOctChar = Regex(r"\\0[0-7]+").addParseAction(
        lambda t, l, s: unichr(int(t[0][1:], 8)))
    _singleChar = (_escapedPunc | _escapedHexChar | _escapedOctChar
                   | CharsNotIn(r"\]", exact=1))
    _charRange = Group(_singleChar + Suppress("-") + _singleChar)
    _reBracketExpr = ("[" + Optional("^").set_token_name("negate") + Group(
        OneOrMore(_charRange | _singleChar)).set_token_name("body") + "]")


def srange(s):
    r"""Helper to easily define string ranges for use in Word
    construction. Borrows syntax from regexp '[]' string range
    definitions::

        srange("[0-9]")   -> "0123456789"
        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

    The input string must be enclosed in []'s, and the returned string
    is the expanded character set joined into a single string. The
    values enclosed in the []'s may be:
Ejemplo n.º 9
0
    restOfLine = Regex(r"[^\n]*").set_parser_name("rest of line")

    dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment")

    cppStyleComment = Combine(
        Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/"
        | dblSlashComment).set_parser_name("C++ style comment")

    javaStyleComment = cppStyleComment

    pythonStyleComment = Regex(r"#[^\n]*").set_parser_name(
        "Python style comment")

_commasepitem = (Combine(
    OneOrMore(
        Word(printables, exclude=",") +
        Optional(Word(" \t") + ~Literal(",") + ~LineEnd()))).addParseAction(
            lambda t: text(t).strip()).set_parser_name("commaItem"))
commaSeparatedList = delimitedList(
    Optional(quotedString | _commasepitem,
             default="")).set_parser_name("commaSeparatedList")

convertToInteger = tokenMap(int)
convertToFloat = tokenMap(float)

integer = Word(nums).set_parser_name("integer").addParseAction(
    convertToInteger)

hex_integer = (Word(hexnums).set_parser_name("hex integer").addParseAction(
    tokenMap(int, 16)))
Ejemplo n.º 10
0
    | not_wordchar
    | not_whitechar
    | CR
    | LF
    | any_char
    | bs_char
    | tab_char
)
escapedChar = (
    ~macro + Combine("\\" + AnyChar())
).addParseAction(lambda t: Literal(t.value()[1]))
plainChar = Char(exclude=r"\]").addParseAction(lambda t: Literal(t.value()))

escapedHexChar = Combine(
    (Literal("\\0x") | Literal("\\x") | Literal("\\X"))  # lookup literals is faster
    + OneOrMore(Char(hexnums))
).addParseAction(hex_to_char)

escapedOctChar = Combine(
    Literal("\\0") + OneOrMore(Char("01234567"))
).addParseAction(lambda t: Literal(unichr(int(t.value()[2:], 8))))

singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar

charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range)

brackets = (
    "["
    + Optional("^")("negate")
    + OneOrMore(Group(charRange | singleChar | macro)("body"))
    + "]"
Ejemplo n.º 11
0
         | not_wordchar
         | not_whitechar
         | CR
         | LF
         | any_char
         | bs_char
         | tab_char)
escapedChar = (
    ~macro +
    Combine("\\" + AnyChar())).addParseAction(lambda t: Literal(t.value()[1]))
plainChar = Char(exclude=r"\]").addParseAction(lambda t: Literal(t.value()))

escapedHexChar = Combine((Literal("\\0x") | Literal("\\x")
                          | Literal("\\X"))  # lookup literals is faster
                         +
                         OneOrMore(Char(hexnums))).addParseAction(hex_to_char)

escapedOctChar = Combine(Literal("\\0") +
                         OneOrMore(Char("01234567"))).addParseAction(
                             lambda t: Literal(unichr(int(t.value()[2:], 8))))

singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar

charRange = Group(singleChar("min") + "-" +
                  singleChar("max")).addParseAction(to_range)

brackets = ("[" + Optional("^")("negate") +
            OneOrMore(Group(charRange | singleChar | macro)("body")) +
            "]").addParseAction(to_bracket)

#########################################################################################
Ejemplo n.º 12
0
         | not_wordchar
         | not_whitechar
         | CR
         | LF
         | any_char
         | bs_char
         | tab_char)
escapedChar = (
    ~macro +
    Combine("\\" + AnyChar())).addParseAction(lambda t: Literal(t.value()[1]))
plainChar = Char(exclude=r"\]").addParseAction(lambda t: Literal(t.value()))

escapedHexChar = Combine(
    (Literal("\\0x") | Literal("\\x")
     | Literal("\\X"))  # lookup literals is faster
    + OneOrMore(Char(hexnums), PLAIN_ENGINE)).addParseAction(hex_to_char)

escapedOctChar = Combine(
    Literal("\\0") + OneOrMore(Char("01234567"), PLAIN_ENGINE)).addParseAction(
        lambda t: Literal(unichr(int(t.value()[2:], 8))))

singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar

charRange = Group(singleChar("min") + "-" +
                  singleChar("max")).addParseAction(to_range)

brackets = (
    "[" + Optional("^", PLAIN_ENGINE)("negate") +
    OneOrMore(Group(charRange | singleChar | macro)("body"), PLAIN_ENGINE) +
    "]").addParseAction(to_bracket)