Ejemplo n.º 1
0
def get_plaintext(parse_actions=False):
    """Get plaintext parser element.

    plaintext = { ( any of a-zA-Z0-9 or '!"$%&()+,-./?@\^_`~',
    any of "[]*#:;='", any Unicode character without "|[]*#:;<>='{}" ) |
    any Unicode character without "|[]*#:;<>='{}" }-;

    :returns: plaintext parser element
    :rtype: ParserElement
    """
    str0 = '!"$%&()+,-./?@\^_`~'
    str1 = "[]*#:;='"
    str2 = "|[]*#:;<>='{}"
    plaintext = pyparsing.Combine(
        pyparsing.OneOrMore(
            (
                pyparsing.oneOf(" ".join(pyparsing.alphanums+str0))
                + pyparsing.oneOf(" ".join(str1))
                + pyparsing.CharsNotIn(str2, max=1)
            )
            ^ pyparsing.CharsNotIn(str2, max=1)
        )
    )
    plaintext.setName("plaintext")
    plaintext.parseWithTabs()
    if parse_actions:
        pass
    return plaintext
Ejemplo n.º 2
0
class TestTransformStringUsingParseActions(PyparsingExpressionTestCase):
    markup_convert_map = {
        '*': 'B',
        '_': 'U',
        '/': 'I',
    }

    def markup_convert(t):
        htmltag = TestTransformStringUsingParseActions.markup_convert_map[
            t.markup_symbol]
        return "<{0}>{1}</{2}>".format(htmltag, t.body, htmltag)

    tests = [
        PpTestSpec(
            desc="Use transformString to convert simple markup to HTML",
            expr=(pp.oneOf(markup_convert_map)('markup_symbol') + "(" +
                  pp.CharsNotIn(")")('body') +
                  ")").addParseAction(markup_convert),
            text="Show in *(bold), _(underscore), or /(italic) type",
            expected_list=[
                'Show in <B>bold</B>, <U>underscore</U>, or <I>italic</I> type'
            ],
            parse_fn='transformString',
        ),
    ]
Ejemplo n.º 3
0
    def _parse(cls, text, function):
        '''Prep `text` into a list of strs and then pass it to `function`.

        The given text should contain one or more {}s in it,
        like "foo {bar} buzz", and this function will convert it
        to ['foo', ['bar'], 'buzz'] and then pass that to `function`.

        Args:
            text (str):
                The text to convert into a list (of lists) of strs.
            function (callable[list[str or list[str]]]):
                The function that takes the parsed `text` as input.

        Returns:
            str: The output of function.

        '''
        # 1. We wrap the entire text in {}s, to make it a nested expression
        text = cls._wrap(text)

        pyparsing.ParserElement.setDefaultWhitespaceChars('\n\t')
        _content = pyparsing.CharsNotIn(['{', '}'])
        _curlys = pyparsing.nestedExpr('{', '}', content=_content)

        # 2. Since we made the expression nested a little while ago, lets unpack it
        #    by getting the 0th index
        #
        parsed_text = _curlys.parseString(text).asList()[0]
        result = function(parsed_text)

        # 3. The {}s that we added with `_wrap` need to be removed.
        #    So we just remove the first and last characters, [1:-1]
        #    from the final result
        #
        return result[1:-1]
Ejemplo n.º 4
0
def _get_simple_ref_parser():
    string = pp.CharsNotIn(_EXCLUDES).setParseAction(_string)
    ref_open = pp.Literal(_REF_OPEN).suppress()
    ref_close = pp.Literal(_REF_CLOSE).suppress()
    reference = (ref_open + pp.Group(string) + ref_close).setParseAction(_reference)
    line = pp.StringStart() + pp.Optional(string) + reference + pp.Optional(string) + pp.StringEnd()
    return line
Ejemplo n.º 5
0
class Parser(object):
    comment_def = "--" + pyparsing.NotAny(
        '-' + pyparsing.CaselessKeyword('begin')) + pyparsing.ZeroOrMore(
            pyparsing.CharsNotIn("\n"))

    def __init__(self, scanner, retainSeparator=True):
        self.scanner = scanner
        self.scanner.ignore(pyparsing.sglQuotedString)
        self.scanner.ignore(pyparsing.dblQuotedString)
        self.scanner.ignore(self.comment_def)
        self.scanner.ignore(pyparsing.cStyleComment)
        self.retainSeparator = retainSeparator

    def separate(self, txt):
        itms = []
        for (sqlcommand, start, end) in self.scanner.scanString(txt):
            if sqlcommand:
                if type(sqlcommand[0]) == pyparsing.ParseResults:
                    if self.retainSeparator:
                        itms.append("".join(sqlcommand[0]))
                    else:
                        itms.append(sqlcommand[0][0])
                else:
                    if sqlcommand[0]:
                        itms.append(sqlcommand[0])
        return itms
Ejemplo n.º 6
0
 def parse_input(self, s):
     function = (p.oneOf(accepted_functions) + "(" + p.CharsNotIn(")") +
                 ")" + p.White()).setParseAction(self.function_action)
     block = ("{block " + p.Word(p.nums) + "}" +
              p.ZeroOrMore(function)).setParseAction(self.block_action)
     cf2text = p.OneOrMore(block)
     cf2text.parseString(s)
Ejemplo n.º 7
0
class TestTransformStringUsingParseActions(PyparsingExpressionTestCase):
    markup_convert_map = {
        "*": "B",
        "_": "U",
        "/": "I",
    }

    def markup_convert(t):
        htmltag = TestTransformStringUsingParseActions.markup_convert_map[
            t.markup_symbol]
        return "<{}>{}</{}>".format(htmltag, t.body, htmltag)

    tests = [
        PpTestSpec(
            desc="Use transformString to convert simple markup to HTML",
            expr=(pp.oneOf(markup_convert_map)("markup_symbol") + "(" +
                  pp.CharsNotIn(")")("body") +
                  ")").addParseAction(markup_convert),
            text="Show in *(bold), _(underscore), or /(italic) type",
            expected_list=[
                "Show in <B>bold</B>, <U>underscore</U>, or <I>italic</I> type"
            ],
            parse_fn="transformString",
        ),
    ]
Ejemplo n.º 8
0
def get_parser():
    "Construct and return the parser."

    left = pp.Suppress("(")
    right = pp.Suppress(")")
    value = pp.QuotedString(quote_char='"', esc_char="\\") | pp.CharsNotIn(")")
    identifier = pp.Word(pp.alphas, pp.alphanums).set_parse_action(_Identifier)

    label = (pp.Keyword("label") + left + value +
             right).set_parse_action(_Label)
    year = (pp.Keyword("year") + left + value + right).set_parse_action(_Year)
    author = (pp.Keyword("author") + left + value +
              right).set_parse_action(_Author)
    orcid = (pp.Keyword("orcid") + left + value +
             right).set_parse_action(_Orcid)
    issn = (pp.Keyword("issn") + left + value + right).set_parse_action(_Issn)
    published = (pp.Keyword("published") + left + value +
                 right).set_parse_action(_Published)
    first = (pp.Keyword("first") + left + value +
             right).set_parse_action(_First)
    online = (pp.Keyword("online") + left + value +
              right).set_parse_action(_Online)
    modified = (pp.Keyword("modified") + left + value +
                right).set_parse_action(_Modified)
    no_pmid = (pp.Keyword("no_pmid") + left + right).set_parse_action(_NoPmid)
    no_doi = (pp.Keyword("no_doi") + left + right).set_parse_action(_NoDoi)
    no_label = (pp.Keyword("no_label") + left +
                right).set_parse_action(_NoLabel)
    function = (label
                | year
                | author
                | orcid
                | issn
                | published
                | first
                | online
                | modified
                | no_pmid
                | no_doi
                | no_label)

    if settings["TEMPORAL_LABELS"]:
        current = (pp.Keyword("active") + left +
                   right).set_parse_action(_Active)
        active = (pp.Keyword("active") + left + value +
                  right).set_parse_action(_Active)
        function = function | current | active

    union = pp.Literal("+").set_parse_action(_Union)
    symdifference = pp.Literal("^").set_parse_action(_Symdifference)
    intersection = pp.Literal("#").set_parse_action(_Intersection)
    difference = pp.Literal("-").set_parse_action(_Difference)
    operator = union | symdifference | difference | intersection

    expression = pp.Forward()
    atom = function | identifier | pp.Group(left + expression + right)
    expression <<= atom + (operator + atom)[...]
    expression.set_parse_action(_Expression)
    expression.ignore("!" + pp.rest_of_line)
    return expression
Ejemplo n.º 9
0
class constant_def:
	zero = pp.Char("0")
	nonzero_digit = pp.Char("123456789")
	digit = pp.Char(pp.nums)
	octal_digit = pp.Char("01234567")
	hexadecimal_digit = pp.Char(pp.hexnums)
	# (6.4.4.1) decimal-constant:
	decimal_constant = nonzero_digit + digit.copy()[...]
	# (6.4.4.1) octal-constant:
	octal_constant = zero + octal_digit.copy()[...]
	# (6.4.4.1) hexadecimal-constant:
	hexadecimal_prefix = pp.Word("0x") | pp.Word("0X")
	hexadecimal_constant = hexadecimal_prefix + hexadecimal_digit.copy()[1, ...]
	# (6.4.4.1) integer-suffix:
	unsigned_suffix = pp.Char("uU")
	long_suffix = pp.Char("lL")
	longlong_suffix = pp.Word("ll") | pp.Word("LL")
	integer_suffix = (unsigned_suffix + pp.Optional(long_suffix | longlong_suffix)) | ((long_suffix | longlong_suffix) + pp.Optional(unsigned_suffix))
	# (6.4.4.1) integer-constant:
	integer_constant = (decimal_constant | octal_constant | hexadecimal_constant) + pp.Optional(integer_suffix)
	# (6.4.4.2) floating-constant:
	# 略
	# (6.4.4.3) enumeration-constant:
	enumeration_constant = identifier
	# (6.4.4.4) character-constant:
	simple_escape_sequence = pp.Char("\\") + pp.Char("\\'\"?abfnrtv")
	octal_escape_sequence = pp.Char("\\") + octal_digit.copy()[1, ...]
	hexadecimal_escape_sequence = pp.Word("\\x") + hexadecimal_digit.copy()[1, ...]
	escape_sequence = simple_escape_sequence | octal_escape_sequence | hexadecimal_escape_sequence | identifier_def.universal_character_name
	c_char = pp.CharsNotIn("'\\\r\n") | escape_sequence
	character_constant = pp.Optional(pp.Char("L")) + pp.Char("'") + c_char[1,...] + pp.Char("'")
Ejemplo n.º 10
0
def get_param(wiki_markup, parse_actions=False):
    """Get parameter parser element.

    param = "{{{", { any Unicode character without "|={}" }-, [ default ],
    "}}}";
    default = "|", { any Unicode character };

    :returns: parameter parser element
    :rtype: ParserElement
    """
    name = pyparsing.CharsNotIn("|={}").setResultsName("name")
    name.setName("name")
    name.parseWithTabs()
    value = _get_value(wiki_markup, parse_actions=parse_actions)
    param = pyparsing.nestedExpr(
        opener="{{{",
        closer="}}}",
        content=(name + pyparsing.Optional(
            pyparsing.Literal("|") + pyparsing.Optional(value))),
        ignoreExpr=None)
    param.setName("param")
    param.parseWithTabs()
    if parse_actions:
        pass
    return param
Ejemplo n.º 11
0
class Tokens(object):
    # shared tokens
    delim_chars = '[]{},'
    pause = pp.FollowedBy(pp.Word(delim_chars) | pp.StringEnd())
    number = (pp.pyparsing_common.number + pause)
    quoted_string = pp.QuotedString('"', escChar='\\')
    true = (pp.Regex(r'(True|true|yes|on)') + pause). \
        setParseAction(lambda _: True)
    false = (pp.Regex(r'(False|false|no|off)') + pause). \
        setParseAction(lambda _: False)
    unquoted_string = pp.CharsNotIn(delim_chars). \
        setParseAction(lambda toks: toks[0].strip())
    empty_value = pp.Empty(). \
        setParseAction(lambda _: '')

    # tokens for configs
    identifier = pp.pyparsing_common.identifier.copy()
    comma = pp.Literal(',').suppress()
    assign = pp.Literal('=').suppress()
    config_value = (
        number | true | false | quoted_string | unquoted_string | empty_value)
    key_value_pair = (identifier + assign + config_value). \
        setParseAction(lambda toks: (toks[0], toks[1]))
    key_value_pair_list = pp.Optional(
        key_value_pair + pp.ZeroOrMore(comma + key_value_pair))

    # tokens for tags
    tag = quoted_string | unquoted_string | pp.Empty().suppress()
    tag_list = pp.Optional(tag + pp.ZeroOrMore(comma + tag))
Ejemplo n.º 12
0
 def _make_valid_state_name(self, state_name):
     """Transform the input state_name into a valid state in XMLBIF.
     XMLBIF states must start with a letter an only contain letters,
     numbers and underscores.
     """
     s = str(state_name)
     s_fixed = pp.CharsNotIn(pp.alphanums + "_").setParseAction(pp.replaceWith("_")).transformString(s)
     if not s_fixed[0].isalpha():
         s_fixed = "state" + s_fixed
     return s_fixed
    def _get_parser(self):
        assign = pp.oneOf(['=', '?=', ':=', '::=', '+='])('assign')
        var_name = pp.Word(pp.alphas + '_', pp.alphanums + '_')('var')

        enclosed = pp.Forward()
        nested_parents = pp.nestedExpr('$(', ')', content=enclosed)
        nested_brackets = pp.nestedExpr('${', '}', content=enclosed)
        enclosed <<= (nested_parents | nested_brackets |
                      pp.CharsNotIn('$(){}\n')).leaveWhitespace()

        return pp.lineStart + var_name + assign + pp.ZeroOrMore(pp.White()) + pp.ZeroOrMore(enclosed)('value')
Ejemplo n.º 14
0
def _make_parser():
    word = pp.CharsNotIn(''.join(whitespace))
    word.skipWhitespace = True

    value = pp.MatchFirst([
        pp.dblQuotedString.copy().setParseAction(pp.removeQuotes),
        pp.sglQuotedString.copy().setParseAction(pp.removeQuotes),
        pp.Empty() + pp.CharsNotIn(''.join(whitespace)),
    ])

    expressions = []

    for field in named_fields:
        exp = pp.Suppress(pp.CaselessLiteral(field) + ':') + \
            value.copy().setParseAction(_decorate_match(field))
        expressions.append(exp)

    any_ = value.copy().setParseAction(_decorate_match('any'))
    expressions.append(any_)

    return pp.ZeroOrMore(pp.MatchFirst(expressions))
Ejemplo n.º 15
0
Archivo: parser.py Proyecto: kaydoh/h
def _make_parser():
    word = pp.CharsNotIn("".join(whitespace))
    word.skipWhitespace = True

    value = pp.MatchFirst([
        pp.dbl_quoted_string.copy().set_parse_action(pp.remove_quotes),
        pp.sgl_quoted_string.copy().set_parse_action(pp.remove_quotes),
        pp.Empty() + pp.CharsNotIn("".join(whitespace)),
    ])

    expressions = []

    for field in named_fields:
        exp = pp.Suppress(pp.CaselessLiteral(field) +
                          ":") + value.copy().set_parse_action(
                              _decorate_match(field))
        expressions.append(exp)

    any_ = value.copy().set_parse_action(_decorate_match("any"))
    expressions.append(any_)

    return pp.ZeroOrMore(pp.MatchFirst(expressions))
Ejemplo n.º 16
0
def _make():
    # Order is important - multi-char expressions need to come before narrow
    # ones.
    parts = []
    for cls in filter_unary:
        f = pp.Literal(f"~{cls.code}") + pp.WordEnd()
        f.setParseAction(cls.make)
        parts.append(f)

    # This is a bit of a hack to simulate Word(pyparsing_unicode.printables),
    # which has a horrible performance with len(pyparsing.pyparsing_unicode.printables) == 1114060
    unicode_words = pp.CharsNotIn("()~'\"" + pp.ParserElement.DEFAULT_WHITE_CHARS)
    unicode_words.skipWhitespace = True
    regex = (
        unicode_words
        | pp.QuotedString('"', escChar='\\')
        | pp.QuotedString("'", escChar='\\')
    )
    for cls in filter_rex:
        f = pp.Literal(f"~{cls.code}") + pp.WordEnd() + regex.copy()
        f.setParseAction(cls.make)
        parts.append(f)

    for cls in filter_int:
        f = pp.Literal(f"~{cls.code}") + pp.WordEnd() + pp.Word(pp.nums)
        f.setParseAction(cls.make)
        parts.append(f)

    # A naked rex is a URL rex:
    f = regex.copy()
    f.setParseAction(FUrl.make)
    parts.append(f)

    atom = pp.MatchFirst(parts)
    expr = pp.infixNotation(
        atom,
        [(pp.Literal("!").suppress(),
          1,
          pp.opAssoc.RIGHT,
          lambda x: FNot(*x)),
         (pp.Literal("&").suppress(),
          2,
          pp.opAssoc.LEFT,
          lambda x: FAnd(*x)),
         (pp.Literal("|").suppress(),
          2,
          pp.opAssoc.LEFT,
          lambda x: FOr(*x)),
         ])
    expr = pp.OneOrMore(expr)
    return expr.setParseAction(lambda x: FAnd(x) if len(x) != 1 else x)
Ejemplo n.º 17
0
def parser_factory(styler):
    """Builds the S-expression parser."""
    def cond_optional(expr):
        return pp.Optional(expr) if styler else expr

    LPAR, RPAR, SQUO, DQUO = map(pp.Suppress, '()\'"')

    form_first = pp.Forward()
    form = pp.Forward()

    nil = pp.CaselessKeyword('nil').addParseAction(pp.replaceWith([]))
    t = pp.CaselessKeyword('t').addParseAction(pp.replaceWith(True))
    constant = styler('class:constant', nil | t)

    number = styler('class:number', ppc.number).setName('number')

    control_chars = ''.join(map(chr, range(0, 32))) + '\x7f'
    symbol = pp.CharsNotIn(control_chars + '\'"`;,()[]{} ')
    symbol = styler('class:symbol', symbol).setName('symbol')
    symbol.addParseAction(lambda t: Symbol(t[0]))
    call = styler('class:call', symbol)

    string = DQUO + pp.Combine(pp.Optional(
        pp.CharsNotIn('"'))) + cond_optional(DQUO)
    string = styler('class:string', string).setName('string')

    forms = (form_first + pp.ZeroOrMore(form)).setName('one or more forms')
    sexp = (LPAR + pp.Optional(forms) +
            cond_optional(RPAR)).setName('s-expression')
    sexp.addParseAction(lambda t: [list(t)])

    quote = (styler('class:quote', SQUO) + form).setName('quoted form')
    quote.addParseAction(lambda t: Quote(t[0]))

    form_first <<= constant | number ^ call | string | sexp | quote
    form <<= constant | number ^ symbol | string | sexp | quote

    return form
Ejemplo n.º 18
0
def get_pagename(parse_actions=False):
    """Get pagename parser element.

    pagename = { any Unicode character without "|[]#<>{}" }-;

    :returns: pagename parser element
    :rtype: ParserElement
    """
    pagename = pyparsing.CharsNotIn("|[]#<>{}").setResultsName("pagename")
    pagename.setName("pagename")
    pagename.parseWithTabs()
    if parse_actions:
        pass
    return pagename
Ejemplo n.º 19
0
def _get_heading(parse_actions=False):
    """Get heading parser element.

    heading = { any Unicode character without "|[]" }-;

    :returns: heading parser element
    :rtype: ParserElement
    """
    heading = pyparsing.CharsNotIn("|[]")
    heading.setName("heading")
    heading.parseWithTabs()
    if parse_actions:
        pass
    return heading
Ejemplo n.º 20
0
def _get_label(parse_actions=False):
    """Get label parser element.

    label = { any Unicode character without "|[]" }-;

    :returns: label parser element
    :rtype: ParserElement
    """
    label = pyparsing.CharsNotIn("|[]").setResultsName("label")
    label.setName("label")
    label.parseWithTabs()
    if parse_actions:
        pass
    return label
Ejemplo n.º 21
0
def _get_anchor(parse_actions=False):
    """Get anchor parser element.

    anchor = { any Unicode character without "[]" }-;

    :returns: anchor parser element
    :rtype: ParserElement
    """
    anchor = pyparsing.CharsNotIn("[]").setResultsName("anchor")
    anchor.setName("anchor")
    anchor.parseWithTabs()
    if parse_actions:
        pass
    return anchor
Ejemplo n.º 22
0
def _get_parser():
    double_escape = pp.Combine(pp.Literal(_DOUBLE_ESCAPE) + pp.MatchFirst([pp.FollowedBy(_REF_OPEN), pp.FollowedBy(_REF_CLOSE),
                               pp.FollowedBy(_INV_OPEN), pp.FollowedBy(_INV_CLOSE)])).setParseAction(pp.replaceWith(_ESCAPE))

    ref_open = pp.Literal(_REF_OPEN).suppress()
    ref_close = pp.Literal(_REF_CLOSE).suppress()
    ref_not_open = ~pp.Literal(_REF_OPEN) + ~pp.Literal(_REF_ESCAPE_OPEN) + ~pp.Literal(_REF_DOUBLE_ESCAPE_OPEN)
    ref_not_close = ~pp.Literal(_REF_CLOSE) + ~pp.Literal(_REF_ESCAPE_CLOSE) + ~pp.Literal(_REF_DOUBLE_ESCAPE_CLOSE)
    ref_escape_open = pp.Literal(_REF_ESCAPE_OPEN).setParseAction(pp.replaceWith(_REF_OPEN))
    ref_escape_close = pp.Literal(_REF_ESCAPE_CLOSE).setParseAction(pp.replaceWith(_REF_CLOSE))
    ref_text = pp.CharsNotIn(_REF_EXCLUDES) | pp.CharsNotIn(_REF_CLOSE_FIRST, exact=1)
    ref_content = pp.Combine(pp.OneOrMore(ref_not_open + ref_not_close + ref_text))
    ref_string = pp.MatchFirst([double_escape, ref_escape_open, ref_escape_close, ref_content]).setParseAction(_string)
    ref_item = pp.Forward()
    ref_items = pp.OneOrMore(ref_item)
    reference = (ref_open + pp.Group(ref_items) + ref_close).setParseAction(_reference)
    ref_item << (reference | ref_string)

    inv_open = pp.Literal(_INV_OPEN).suppress()
    inv_close = pp.Literal(_INV_CLOSE).suppress()
    inv_not_open = ~pp.Literal(_INV_OPEN) + ~pp.Literal(_INV_ESCAPE_OPEN) + ~pp.Literal(_INV_DOUBLE_ESCAPE_OPEN)
    inv_not_close = ~pp.Literal(_INV_CLOSE) + ~pp.Literal(_INV_ESCAPE_CLOSE) + ~pp.Literal(_INV_DOUBLE_ESCAPE_CLOSE)
    inv_escape_open = pp.Literal(_INV_ESCAPE_OPEN).setParseAction(pp.replaceWith(_INV_OPEN))
    inv_escape_close = pp.Literal(_INV_ESCAPE_CLOSE).setParseAction(pp.replaceWith(_INV_CLOSE))
    inv_text = pp.CharsNotIn(_INV_CLOSE_FIRST)
    inv_content = pp.Combine(pp.OneOrMore(inv_not_close + inv_text))
    inv_string = pp.MatchFirst([double_escape, inv_escape_open, inv_escape_close, inv_content]).setParseAction(_string)
    inv_items = pp.OneOrMore(inv_string)
    export = (inv_open + pp.Group(inv_items) + inv_close).setParseAction(_invquery)

    text = pp.CharsNotIn(_EXCLUDES) | pp.CharsNotIn('', exact=1)
    content = pp.Combine(pp.OneOrMore(ref_not_open + inv_not_open + text))
    string = pp.MatchFirst([double_escape, ref_escape_open, inv_escape_open, content]).setParseAction(_string)

    item = reference | export | string
    line = pp.OneOrMore(item) + pp.StringEnd()
    return line
Ejemplo n.º 23
0
def get_simple_ref_parser(settings):

    ESCAPE = settings.escape_character
    REF_OPEN, REF_CLOSE = settings.reference_sentinels
    INV_OPEN, INV_CLOSE = settings.export_sentinels
    EXCLUDES = ESCAPE + REF_OPEN + REF_CLOSE + INV_OPEN + INV_CLOSE

    string = pp.CharsNotIn(EXCLUDES).setParseAction(_tag_with(tags.STR))
    ref_open = pp.Literal(REF_OPEN).suppress()
    ref_close = pp.Literal(REF_CLOSE).suppress()
    reference = (ref_open + pp.Group(string) + ref_close).setParseAction(
        _tag_with(tags.REF))
    line = pp.StringStart() + pp.Optional(string) + reference + pp.Optional(
        string) + s_end
    return line.leaveWhitespace()
Ejemplo n.º 24
0
def parser_factory(styler):
    """Builds the repr() parser."""
    squo = styler('class:string', "'")
    dquo = styler('class:string', '"')

    esc_single = pp.oneOf(r'\\ \' \" \n \r \t')
    esc_hex = pp.Literal(r'\x') + pp.Word(pp.hexnums, exact=2)
    escs = styler('class:escape', esc_single | esc_hex)

    control_chars = ''.join(map(chr, range(32))) + '\x7f'
    normal_chars_squo = pp.CharsNotIn(control_chars + r"\'")
    chars_squo = styler('class:string', normal_chars_squo) | escs
    normal_chars_dquo = pp.CharsNotIn(control_chars + r'\"')
    chars_dquo = styler('class:string', normal_chars_dquo) | escs

    skip_white = pp.Optional(pp.White())
    bytes_prefix = pp.Optional(styler('class:string_prefix', 'b'))
    string_squo = skip_white + bytes_prefix + squo - pp.ZeroOrMore(
        chars_squo) + squo
    string_dquo = skip_white + bytes_prefix + dquo - pp.ZeroOrMore(
        chars_dquo) + dquo
    string = string_squo | string_dquo
    string.leaveWhitespace()

    address = styler('class:address', '0x' + pp.Word(pp.hexnums))
    number = styler('class:number', ppc.number)
    const = pp.oneOf('True False None NotImplemented Ellipsis ...')
    const = styler('class:constant', const)
    kwarg = styler('class:kwarg', ppc.identifier) + styler(
        'class:operator', '=')
    call = styler('class:call', ppc.identifier) + pp.FollowedBy('(')
    magic = styler('class:magic', pp.Regex(r'__[a-zA-Z0-9_]+__'))

    token = string | address | number | const | kwarg | call | magic
    token.parseWithTabs()
    return pp.originalTextFor(token)
Ejemplo n.º 25
0
def parser_factory(styler):
    """Builds the JSON parser."""
    LBRK, RBRK, LBRC, RBRC, COLON, DQUO = map(pp.Suppress, '[]{}:"')
    DQUO = styler('class:string', DQUO)

    control_chars = ''.join(map(chr, range(32))) + '\x7f'
    normal_chars = pp.CharsNotIn(control_chars + '\\"')
    s_quo = pp.Literal('\\"').addParseAction(pp.replaceWith('"'))
    s_sol = pp.Literal('\\/').addParseAction(pp.replaceWith('/'))
    s_rsol = pp.Literal('\\\\').addParseAction(pp.replaceWith('\\'))
    s_back = pp.Literal('\\b').addParseAction(pp.replaceWith('\b'))
    s_form = pp.Literal('\\f').addParseAction(pp.replaceWith('\f'))
    s_nl = pp.Literal('\\n').addParseAction(pp.replaceWith('\n'))
    s_ret = pp.Literal('\\r').addParseAction(pp.replaceWith('\r'))
    s_tab = pp.Literal('\\t').addParseAction(pp.replaceWith('\t'))
    s_unicode = pp.Suppress('\\u') + pp.Word(pp.hexnums, exact=4)
    s_unicode.addParseAction(lambda t: chr(int(t[0], 16)))
    escape_seqs = s_quo | s_sol | s_rsol | s_back | s_form | s_nl | s_ret | s_tab | s_unicode
    chars = styler('class:string', normal_chars) | styler(
        'class:escape', escape_seqs)

    skip_white = pp.Optional(pp.Suppress(pp.White()))
    string = skip_white + DQUO - pp.Combine(pp.ZeroOrMore(chars)) + DQUO
    string.leaveWhitespace()
    string.setName('string')

    value = pp.Forward()

    pair = string + COLON + value
    pair.addParseAction(tuple)
    obj = LBRC - pp.Optional(pp.delimitedList(pair)) + pp.NotAny(',') + RBRC
    obj.addParseAction(lambda t: {k: v for k, v in t})
    obj.setName('object')

    array = LBRK - pp.Optional(pp.delimitedList(value)) + pp.NotAny(',') + RBRK
    array.addParseAction(lambda t: [list(t)])
    array.setName('array')

    true = pp.Literal('true').addParseAction(pp.replaceWith(True))
    false = pp.Literal('false').addParseAction(pp.replaceWith(False))
    null = pp.Literal('null').addParseAction(pp.replaceWith(None))
    constant = styler('class:constant', true | false | null)

    value <<= obj | array | string | styler('class:number',
                                            ppc.number) | constant
    value.parseWithTabs()
    value.setName('JSON value')
    return value
    def _get_parser(self):
        """ Initialize the pyparsing parser for Makefile
        """
        assign = pp.oneOf(['=', '?=', ':=', '::=', '+='])('assign')
        var_name = pp.Word(pp.alphas + '_', pp.alphanums + '_')('var')

        enclosed = pp.Forward()
        nested_parents = pp.nestedExpr('$(', ')', content=enclosed)
        nested_brackets = pp.nestedExpr('${', '}', content=enclosed)
        enclosed <<= (nested_parents | nested_brackets
                      | pp.CharsNotIn('$(){}#\n')).leaveWhitespace()

        return pp.lineStart + var_name + pp.ZeroOrMore(
            pp.White()) + assign + pp.ZeroOrMore(
                pp.White()) + pp.ZeroOrMore(enclosed)('value') + pp.Optional(
                    pp.pythonStyleComment)('comment')
Ejemplo n.º 27
0
def get_simple_ref_parser(escape_character, reference_sentinels,
                          export_sentinels):
    _ESCAPE = escape_character
    _REF_OPEN = reference_sentinels[0]
    _REF_CLOSE = reference_sentinels[1]
    _INV_OPEN = export_sentinels[0]
    _INV_CLOSE = export_sentinels[1]
    _EXCLUDES = _ESCAPE + _REF_OPEN + _REF_CLOSE + _INV_OPEN + _INV_CLOSE

    string = pp.CharsNotIn(_EXCLUDES).setParseAction(_string)
    ref_open = pp.Literal(_REF_OPEN).suppress()
    ref_close = pp.Literal(_REF_CLOSE).suppress()
    reference = (ref_open + pp.Group(string) +
                 ref_close).setParseAction(_reference)
    line = pp.StringStart() + pp.Optional(string) + reference + pp.Optional(
        string) + pp.StringEnd()
    return line
Ejemplo n.º 28
0
    def read(self, state):
        if state is None:
            state = {}
        self.formatString = state.get(
            'formatString', '/tmp/<1.artist>/<1.title>/<#> - <title>')
        self.replaceChars = state.get('replaceChars', '\\:/')
        self.replaceBy = state.get('replaceBy', '_.;')
        self.removeChars = state.get('removeChars', '?*')

        if len(self.replaceChars) != len(self.replaceBy):
            raise ValueError("replaceChars and replaceBy must equal in length")
        self.translation = str.maketrans(self.replaceChars, self.replaceBy,
                                         self.removeChars)

        oldDefaultWhitespaceChars = pyparsing.ParserElement.DEFAULT_WHITE_CHARS
        pyparsing.ParserElement.setDefaultWhitespaceChars("\t\n")
        #pyparsing.ParserElement.enablePackrat() does not work (buggy)
        lbrace = Literal("<").suppress()
        rbrace = Literal(">").suppress()

        number = Word(nums)

        levelDef = number + Literal(".").suppress()
        levelDef.setParseAction(self.levelDefAction)

        tagName = Word(alphas + "_", alphanums + "_:()/\\")

        tagDefinition = Optional(levelDef("levelDef")) + (tagName | "#")("tag")
        tagDefinition.setParseAction(self.tagDefinitionAction)

        expression = Forward()

        ifExpr = Literal("?").suppress() + expression
        elseExpr = Literal("!").suppress() + expression
        condition = lbrace + tagDefinition("tagDef") \
                + ((Optional(ifExpr("if")) + Optional(elseExpr("else"))) | (Optional(elseExpr("else")) + Optional(ifExpr("else"))))\
                + rbrace # i am sure this is possible in a nicer way ...
        condition.setParseAction(self.conditionAction)

        staticText = pyparsing.CharsNotIn(
            "<>!?")  #Word("".join(p for p in printables if p not in "<>"))
        expression << OneOrMore(condition | staticText)
        self.expression = expression
        pyparsing.ParserElement.setDefaultWhitespaceChars(
            oldDefaultWhitespaceChars)
Ejemplo n.º 29
0
    def _ParseTags(cls, string : str) -> dict:
        SINGLE_VALUE = pp.CharsNotIn(";$")
        LIST_VALUE = pp.OneOrMore(pp.Word(pp.alphanums + "_.") + pp.Literal(",")) + pp.Word(pp.alphanums + "_.")
        ANY_VALUE = pp.Group(LIST_VALUE("listVal") | SINGLE_VALUE("singleVal"))
        TAGS = "$$" + pp.OneOrMore(
            pp.Group(pp.Word(pp.alphas)("tag") + "=" + ANY_VALUE("value") + pp.Optional(";")))("tags") + "$$"

        tags = {}
        for t, s, e in TAGS.scanString(string):
            for tag in t.get("tags"):
                val = tag.get("value")
                if "listVal" in val.keys():
                    val = "".join(val).split(",")
                elif "singleVal" in val.keys():
                    val = val.get("singleVal").strip()
                else:
                    raise Exception("Illegal Tag Format")
                tags[tag.get("tag").lower()] = val
        return tags
Ejemplo n.º 30
0
def _get_named_arg(wiki_markup, parse_actions=False):
    """Get named argument parser element.

    :param ParserElement wiki_markup: wiki markup

    named_arg = { any Unicode character without "|=" }, "=", value;

    :returns: named argument parser element
    :rtype: ParserElement
    """
    name = pyparsing.CharsNotIn("|=").setResultsName("name")
    name.setName("name")
    name.parseWithTabs()
    value = _get_value(wiki_markup)
    named_arg = (pyparsing.Optional(name) + pyparsing.Literal("=") + value)
    named_arg.setName("named_arg")
    named_arg.parseWithTabs()
    if parse_actions:
        pass
    return named_arg