Ejemplo n.º 1
0
def repeat(tokens):
    if tokens.length() == 1:
        return tokens.value()

    try:
        operand, operator = tokens
    except Exception as cause:
        Log.error("not expected", cause=cause)

    mode = operator["mode"]
    if not mode:
        if operator["exact"]:
            return Many(operand, PLAIN_ENGINE, exact=int(operator["exact"]))
        else:
            return Many(operand,
                        PLAIN_ENGINE,
                        min_match=int(operator["min"]),
                        max_match=int(operator["max"]))
    elif mode in "*?":
        return ZeroOrMore(operand, PLAIN_ENGINE)
    elif mode in "+?":
        return OneOrMore(operand, PLAIN_ENGINE)
    elif mode == "?":
        return Optional(operand, PLAIN_ENGINE)
    else:
        Log.error("not expected")
Ejemplo n.º 2
0
def makeHTMLTags(tagStr, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
    """Helper to construct opening and closing tag expressions for HTML,
    given a tag name. Matches tags in either upper or lower case,
    attributes with namespaces and with quoted or unquoted values.
    """
    if isinstance(tagStr, text):
        resname = tagStr
        tagStr = Keyword(tagStr, caseless=True)
    else:
        resname = tagStr.parser_name

    tagAttrName = Word(alphas, alphanums + "_-:")
    tagAttrValue = quotedString.addParseAction(removeQuotes) | Word(
        printables, exclude=">"
    )
    simpler_name = "".join(resname.replace(":", " ").title().split())

    openTag = (
        (
            suppress_LT
            + tagStr("tag")
            + OpenDict(ZeroOrMore(Group(
                tagAttrName.addParseAction(downcaseTokens)
                + Optional(Suppress("=") + tagAttrValue)
            )))
            + Optional(
                "/", default=[False]
            )("empty").addParseAction(lambda t, l, s: t[0] == "/")
            + suppress_GT
        )
        .set_token_name("start" + simpler_name)
        .set_parser_name("<%s>" % resname)
    )

    closeTag = (
        Combine(Literal("</") + tagStr + ">")
        .set_token_name("end" + simpler_name)
        .set_parser_name("</%s>" % resname)
    )

    # openTag.tag = resname
    # closeTag.tag = resname
    # openTag.tag_body = SkipTo(closeTag)

    return openTag, closeTag
Ejemplo n.º 3
0
def repeat(tokens):
    if tokens.length() == 1:
        return tokens.value()

    operand, operator = tokens
    mode = operator["mode"]
    if not mode:
        if operator["exact"]:
            return Many(operand, exact=int(operator["exact"]))
        else:
            return Many(
                operand, min_match=int(operator["min"]), max_match=int(operator["max"])
            )
    elif mode in "*?":
        return ZeroOrMore(operand)
    elif mode in "+?":
        return OneOrMore(operand)
    elif mode == "?":
        return Optional(operand)
    else:
        Log.error("not expected")
Ejemplo n.º 4
0
def indentedBlock(blockStatementExpr, indent=True):
    """Helper method for defining space-delimited indentation blocks,
    such as those used to define block statements in Python source code.

    Parameters:

     - blockStatementExpr - expression defining syntax of statement that
       is repeated within the indented block
     - indentStack - list created by caller to manage indentation stack
       (multiple statementWithIndentedBlock expressions within a single
       grammar should share a common indentStack)
     - indent - boolean indicating whether block must be indented beyond
       the current level; set to False for block of left-most
       statements (default= ``True``)

    A valid block must contain at least one ``blockStatement``.
    """
    blockStatementExpr.engine.add_ignore("\\" + LineEnd())

    PEER = Forward()
    DEDENT = Forward()

    def _reset_stack(p=None, l=None, s=None, ex=None):
        oldCol, oldPeer, oldDedent = _indent_stack.pop()
        PEER << oldPeer
        DEDENT << oldDedent

    def peer_stack(expectedCol):
        def output(t, l, s):
            if l >= len(s):
                return
            curCol = col(l, s)
            if curCol != expectedCol:
                if curCol > expectedCol:
                    raise ParseException(t.type, s, l, "illegal nesting")
                raise ParseException(t.type, l, s, "not a peer entry")

        return output

    def dedent_stack(expectedCol):
        def output(t, l, s):
            if l >= len(s):
                return
            curCol = col(l, s)
            if curCol not in (i for i, _, _ in _indent_stack):
                raise ParseException(s, l, "not an unindent")
            if curCol < _indent_stack[-1][0]:
                oldCol, oldPeer, oldDedent = _indent_stack.pop()
                PEER << oldPeer
                DEDENT << oldDedent

        return output

    def indent_stack(t, l, s):
        curCol = col(l, s)
        if curCol > _indent_stack[-1][0]:
            PEER << Empty().addParseAction(peer_stack(curCol))
            DEDENT << Empty().addParseAction(dedent_stack(curCol))
            _indent_stack.append((curCol, PEER, DEDENT))
        else:
            raise ParseException(t.type, l, s, "not a subentry")

    def nodent_stack(t, l, s):
        curCol = col(l, s)
        if curCol == _indent_stack[-1][0]:
            PEER << Empty().addParseAction(peer_stack(curCol))
            DEDENT << Empty().addParseAction(dedent_stack(curCol))
            _indent_stack.append((curCol, PEER, DEDENT))
        else:
            raise ParseException(t.type, s, l, "not a subentry")

    NL = OneOrMore(LineEnd().suppress())
    INDENT = Empty().addParseAction(indent_stack)
    NODENT = Empty().addParseAction(nodent_stack)

    if indent:
        smExpr = Group(
            Optional(NL)
            + INDENT
            + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))
            + DEDENT
        )
    else:
        smExpr = Group(
            Optional(NL)
            + NODENT
            + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))
            + DEDENT
        )
    return smExpr.setFailAction(_reset_stack).set_parser_name("indented block")
Ejemplo n.º 5
0
    restOfLine = Regex(r"[^\n]*").set_parser_name("rest of line")

    dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment")

    cppStyleComment = Combine(
        Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment
    ).set_parser_name("C++ style comment")

    javaStyleComment = cppStyleComment

    pythonStyleComment = Regex(r"#[^\n]*").set_parser_name("Python style comment")

_commasepitem = (
    Combine(OneOrMore(
        Word(printables, exclude=",")
        + Optional(Word(" \t") + ~Literal(",") + ~LineEnd())
    ))
    .addParseAction(lambda t: text(t).strip())
    .set_parser_name("commaItem")
)
commaSeparatedList = delimitedList(Optional(
    quotedString | _commasepitem, default=""
)).set_parser_name("commaSeparatedList")


convertToInteger = tokenMap(int)
convertToFloat = tokenMap(float)

integer = Word(nums).set_parser_name("integer").addParseAction(convertToInteger)

hex_integer = (
Ejemplo n.º 6
0
            Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
    ret.set_parser_name("nested %s%s expression" % (opener, closer))
    return ret


with Engine(""):
    _escapedPunc = Word("\\", r"\[]-*.$+^?()~ ",
                        exact=2).addParseAction(lambda t, l, s: t[0][1])
    _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction(
        lambda t: unichr(int(t[0].lstrip("\\").lstrip("0").lstrip("xX"), 16)))
    _escapedOctChar = Regex(r"\\0[0-7]+").addParseAction(
        lambda t, l, s: unichr(int(t[0][1:], 8)))
    _singleChar = (_escapedPunc | _escapedHexChar | _escapedOctChar
                   | CharsNotIn(r"\]", exact=1))
    _charRange = Group(_singleChar + Suppress("-") + _singleChar)
    _reBracketExpr = ("[" + Optional("^").set_token_name("negate") + Group(
        OneOrMore(_charRange | _singleChar)).set_token_name("body") + "]")


def srange(s):
    r"""Helper to easily define string ranges for use in Word
    construction. Borrows syntax from regexp '[]' string range
    definitions::

        srange("[0-9]")   -> "0123456789"
        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"

    The input string must be enclosed in []'s, and the returned string
    is the expanded character set joined into a single string. The
    values enclosed in the []'s may be:
Ejemplo n.º 7
0
    def __or__(self, other):
        if other is Ellipsis:
            return _PendingSkip(Optional(self))

        return MatchFirst([self, engine.CURRENT.normalize(other)]).streamline()
Ejemplo n.º 8
0
escapedHexChar = Combine(
    (Literal("\\0x") | Literal("\\x") | Literal("\\X"))  # lookup literals is faster
    + OneOrMore(Char(hexnums))
).addParseAction(hex_to_char)

escapedOctChar = Combine(
    Literal("\\0") + OneOrMore(Char("01234567"))
).addParseAction(lambda t: Literal(unichr(int(t.value()[2:], 8))))

singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar

charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range)

brackets = (
    "["
    + Optional("^")("negate")
    + OneOrMore(Group(charRange | singleChar | macro)("body"))
    + "]"
).addParseAction(to_bracket)

#########################################################################################
# REGEX
regex = Forward()

line_start = Literal("^").addParseAction(lambda: LineStart())
line_end = Literal("$").addParseAction(lambda: LineEnd())
word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar))
simple_char = Word(
    printables, exclude=r".^$*+{}[]\|()"
).addParseAction(lambda t: Literal(t.value()))
esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1]))
Ejemplo n.º 9
0
escapedHexChar = Combine((Literal("\\0x") | Literal("\\x")
                          | Literal("\\X"))  # lookup literals is faster
                         +
                         OneOrMore(Char(hexnums))).addParseAction(hex_to_char)

escapedOctChar = Combine(Literal("\\0") +
                         OneOrMore(Char("01234567"))).addParseAction(
                             lambda t: Literal(unichr(int(t.value()[2:], 8))))

singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar

charRange = Group(singleChar("min") + "-" +
                  singleChar("max")).addParseAction(to_range)

brackets = ("[" + Optional("^")("negate") +
            OneOrMore(Group(charRange | singleChar | macro)("body")) +
            "]").addParseAction(to_bracket)

#########################################################################################
# REGEX
regex = Forward()

line_start = Literal("^").addParseAction(lambda: LineStart())
line_end = Literal("$").addParseAction(lambda: LineEnd())
word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar))
simple_char = Word(
    printables,
    exclude=r".^$*+{}[]\|()").addParseAction(lambda t: Literal(t.value()))
esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1]))
Ejemplo n.º 10
0
escapedHexChar = Combine(
    (Literal("\\0x") | Literal("\\x")
     | Literal("\\X"))  # lookup literals is faster
    + OneOrMore(Char(hexnums), PLAIN_ENGINE)).addParseAction(hex_to_char)

escapedOctChar = Combine(
    Literal("\\0") + OneOrMore(Char("01234567"), PLAIN_ENGINE)).addParseAction(
        lambda t: Literal(unichr(int(t.value()[2:], 8))))

singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar

charRange = Group(singleChar("min") + "-" +
                  singleChar("max")).addParseAction(to_range)

brackets = (
    "[" + Optional("^", PLAIN_ENGINE)("negate") +
    OneOrMore(Group(charRange | singleChar | macro)("body"), PLAIN_ENGINE) +
    "]").addParseAction(to_bracket)

#########################################################################################
# REGEX
regex = Forward()

line_start = Literal("^").addParseAction(lambda: LineStart())
line_end = Literal("$").addParseAction(lambda: LineEnd())
word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar))
simple_char = Word(
    printables,
    exclude=r".^$*+{}[]\|()").addParseAction(lambda t: Literal(t.value()))
esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1]))