Example #1
0
def name_token(tokens):
    with PLAIN_ENGINE:
        n = tokens["name"]
        v = tokens["value"]
        if not n:
            n = str(num_captures)
        return Combine(v).set_token_name(n)
Example #2
0
def delimitedList(expr, separator=",", combine=False):
    """
    PARSE DELIMITED LIST OF expr
    Example::

        delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
        delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
    """
    if combine:
        return Combine(expr + ZeroOrMore(separator + expr))
    else:
        return expr + ZeroOrMore(Suppress(separator) + expr)
Example #3
0
def makeHTMLTags(tagStr, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
    """Helper to construct opening and closing tag expressions for HTML,
    given a tag name. Matches tags in either upper or lower case,
    attributes with namespaces and with quoted or unquoted values.
    """
    if isinstance(tagStr, text):
        resname = tagStr
        tagStr = Keyword(tagStr, caseless=True)
    else:
        resname = tagStr.parser_name

    tagAttrName = Word(alphas, alphanums + "_-:")
    tagAttrValue = quotedString.addParseAction(removeQuotes) | Word(
        printables, exclude=">"
    )
    simpler_name = "".join(resname.replace(":", " ").title().split())

    openTag = (
        (
            suppress_LT
            + tagStr("tag")
            + OpenDict(ZeroOrMore(Group(
                tagAttrName.addParseAction(downcaseTokens)
                + Optional(Suppress("=") + tagAttrValue)
            )))
            + Optional(
                "/", default=[False]
            )("empty").addParseAction(lambda t, l, s: t[0] == "/")
            + suppress_GT
        )
        .set_token_name("start" + simpler_name)
        .set_parser_name("<%s>" % resname)
    )

    closeTag = (
        Combine(Literal("</") + tagStr + ">")
        .set_token_name("end" + simpler_name)
        .set_parser_name("</%s>" % resname)
    )

    # openTag.tag = resname
    # closeTag.tag = resname
    # openTag.tag_body = SkipTo(closeTag)

    return openTag, closeTag
Example #4
0
def QuotedString(
    quote_char,
    esc_char=None,
    esc_quote=None,
    multiline=False,
    unquote_results=True,
    end_quote_char="",
    convert_whitespace_escape=True,
):
    r"""
    Token for matching strings that are delimited by quoting characters.

    Defined with the following parameters:

        - quote_char - string of one or more characters defining the
          quote delimiting string
        - esc_char - character to escape quotes, typically backslash
          (default= ``None``)
        - esc_quote - special quote sequence to escape an embedded quote
          string (such as SQL's ``""`` to escape an embedded ``"``)
          (default= ``None``)
        - multiline - boolean indicating whether quotes can span
          multiple lines (default= ``False``)
        - unquoteResults - boolean indicating whether the matched text
          should be unquoted (default= ``True``)
        - end_quote_char - string of one or more characters defining the
          end of the quote delimited string (default= ``None``  => same as
          quote_char)
        - convertWhitespaceEscapes - convert escaped whitespace
          (``'\t'``, ``'\n'``, etc.) to actual whitespace
          (default= ``True``)

    """
    quote_char = quote_char.strip()
    end_quote_char = end_quote_char.strip() or quote_char

    if not quote_char:
        Log.error("quote_char cannot be the empty string")
    if not end_quote_char:
        Log.error("end_quote_char cannot be the empty string")

    excluded = Literal(end_quote_char)

    if multiline:
        anychar = AnyChar()
    else:
        anychar = Char(exclude="\n")
        excluded |= Char("\r\n")

    included = ~Literal(end_quote_char) + anychar

    if esc_quote:
        included = Literal(esc_quote) | included
    if esc_char:
        excluded |= Literal(esc_char)
        included = esc_char + Char(printables) | included
        esc_char_replace_pattern = re.escape(esc_char) + "(.)"

    prec, pattern = (
        Literal(quote_char) + ((~excluded + anychar) | included)[0:]
    ).__regex__()
    # IMPORTANT: THE end_quote_char IS OUTSIDE THE Regex BECAUSE OF PATHOLOGICAL BACKTRACKING
    output = Combine(Regex(pattern) + Literal(end_quote_char))

    def post_parse(tokens):
        ret = tokens[0]
        if unquote_results:
            # strip off quotes
            ret = ret[len(quote_char) : -len(end_quote_char)]

            if isinstance(ret, text):
                # replace escaped whitespace
                if "\\" in ret and convert_whitespace_escape:
                    ws_map = {
                        r"\t": "\t",
                        r"\n": "\n",
                        r"\f": "\f",
                        r"\r": "\r",
                    }
                    for wslit, wschar in ws_map.items():
                        ret = ret.replace(wslit, wschar)

                # replace escaped characters
                if esc_char:
                    ret = re.sub(esc_char_replace_pattern, r"\g<1>", ret)

                # replace escaped quotes
                if esc_quote:
                    ret = ret.replace(esc_quote, end_quote_char)

        return ParseResults(tokens.type, tokens.start, tokens.end, [ret])

    return output.addParseAction(post_parse).streamline()
Example #5
0
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
    """Helper method for defining nested lists enclosed in opening and
    closing delimiters ("(" and ")" are the default).

    Parameters:
     - opener - opening character for a nested list
       (default= ``"("``); can also be a mo_parsing expression
     - closer - closing character for a nested list
       (default= ``")"``); can also be a mo_parsing expression
     - content - expression for items within the nested lists
       (default= ``None``)
     - ignoreExpr - expression for ignoring opening and closing
       delimiters (default= `quotedString`)

    If an expression is not provided for the content argument, the
    nested expression will capture all whitespace-delimited content
    between delimiters as a list of separate values.

    Use the ``ignoreExpr`` argument to define expressions that may
    contain opening or closing characters that should not be treated as
    opening or closing characters for nesting, such as quotedString or
    a comment expression.  Specify multiple expressions using an
    `Or` or `MatchFirst`. The default is
    `quotedString`, but if no expressions are to be ignored, then
    pass ``None`` for this argument.

    """
    if opener == closer:
        raise ValueError("opening and closing strings cannot be the same")
    if content is None:
        if not isinstance(opener, text) or not isinstance(closer, text):
            raise ValueError(
                "opening and closing arguments must be strings if no content expression"
                " is given"
            )

        ignore_chars = engine.CURRENT.white_chars
        with Engine(""):

            def scrub(t):
                return t[0].strip()

            if len(opener) == 1 and len(closer) == 1:
                if ignoreExpr is not None:
                    content = Combine(OneOrMore(
                        ~ignoreExpr
                        + CharsNotIn(opener + closer + "".join(ignore_chars), exact=1,)
                    )).addParseAction(scrub)
                else:
                    content = Empty + CharsNotIn(
                        opener + closer + "".join(ignore_chars)
                    ).addParseAction(scrub)
            else:
                if ignoreExpr is not None:
                    content = Combine(OneOrMore(
                        ~ignoreExpr
                        + ~Literal(opener)
                        + ~Literal(closer)
                        + CharsNotIn(ignore_chars, exact=1)
                    )).addParseAction(scrub)
                else:
                    content = Combine(OneOrMore(
                        ~Literal(opener)
                        + ~Literal(closer)
                        + CharsNotIn(ignore_chars, exact=1)
                    )).addParseAction(scrub)
    ret = Forward()
    if ignoreExpr is not None:
        ret <<= Group(
            Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
        )
    else:
        ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
    ret.set_parser_name("nested %s%s expression" % (opener, closer))
    return ret
Example #6
0
                # replace escaped characters
                if esc_char:
                    ret = re.sub(esc_char_replace_pattern, r"\g<1>", ret)

                # replace escaped quotes
                if esc_quote:
                    ret = ret.replace(esc_quote, end_quote_char)

        return ParseResults(tokens.type, tokens.start, tokens.end, [ret])

    return output.addParseAction(post_parse).streamline()


dblQuotedString = Combine(
    #       0         1         2         3         4         5
    #       012345678901234567890123456789012345678901234567890123456789
    Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')
    + '"'
).set_parser_name("string enclosed in double quotes")
sglQuotedString = Combine(
    Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
).set_parser_name("string enclosed in single quotes")
quotedString = Combine(
    Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
    | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
).set_parser_name("quotedString using single or double quotes")
unicodeString = Combine(
    Literal("u") + quotedString
).set_parser_name("unicode string literal")


def countedArray(expr, intExpr=None):
Example #7
0
macro = (
    any_whitechar
    | any_wordchar
    | any_digitchar
    | not_digitchar
    | not_wordchar
    | not_whitechar
    | CR
    | LF
    | any_char
    | bs_char
    | tab_char
)
escapedChar = (
    ~macro + Combine("\\" + AnyChar())
).addParseAction(lambda t: Literal(t.value()[1]))
plainChar = Char(exclude=r"\]").addParseAction(lambda t: Literal(t.value()))

escapedHexChar = Combine(
    (Literal("\\0x") | Literal("\\x") | Literal("\\X"))  # lookup literals is faster
    + OneOrMore(Char(hexnums))
).addParseAction(hex_to_char)

escapedOctChar = Combine(
    Literal("\\0") + OneOrMore(Char("01234567"))
).addParseAction(lambda t: Literal(unichr(int(t.value()[2:], 8))))

singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar

charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range)