Beispiel #1
0
def property_grammar():
    ParserElement.setDefaultWhitespaceChars(' ')

    dpi_setting = (Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ'))('SETTINGS*')
    mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL
    mount_matrix = (mount_matrix_row + ';' + mount_matrix_row + ';' + mount_matrix_row)('MOUNT_MATRIX')

    props = (('MOUSE_DPI', Group(OneOrMore(dpi_setting))),
             ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER),
             ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER),
             ('MOUSE_WHEEL_CLICK_COUNT', INTEGER),
             ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER),
             ('ID_INPUT_TRACKBALL', Literal('1')),
             ('POINTINGSTICK_SENSITIVITY', INTEGER),
             ('POINTINGSTICK_CONST_ACCEL', REAL),
             ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))),
             ('XKB_FIXED_LAYOUT', STRING),
             ('XKB_FIXED_VARIANT', STRING),
             ('ACCEL_MOUNT_MATRIX', mount_matrix),
            )
    fixed_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE')
                   for name, val in props]
    kbd_props = [Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME')
                 - Suppress('=') -
                 ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE')
                ]
    abs_props = [Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME')
                 - Suppress('=') -
                 Word(nums + ':')('VALUE')
                ]

    grammar = Or(fixed_props + kbd_props + abs_props) + EOL

    return grammar
Beispiel #2
0
def parse_equation(eq: str) -> dict:
    ParserElement.enablePackrat()

    if getrecursionlimit() <= LOWER_RECURSION_LIMIT:
        setrecursionlimit(LOWER_RECURSION_LIMIT)

    # Define atoms
    NUM = pyparsing_common.number
    VARIABLE = Word(['x', 'y'], exact=True)
    operand = NUM | VARIABLE

    # Define production rules
    expr = infixNotation(operand, [(Literal(op), 1, opAssoc.RIGHT, op_rep)
                                   for op in uniops] +
                         [(Literal(op), 2, opAssoc.LEFT, op_rep)
                          for op in binops])
    comp = infixNotation(expr, [(Literal(op), 2, opAssoc.LEFT, op_rep)
                                for op in compops])
    cond = infixNotation(comp, [(Literal(op), 1, opAssoc.RIGHT, op_rep)
                                for op in logicuniops] +
                         [(Literal(op), 2, opAssoc.LEFT, op_rep)
                          for op in logicbinops])

    try:
        return cond.parseString(eq, parseAll=True)[0]
    except ParseException as pex:
        print('Error while parsing "%s": %s' % (eq, str(pex)), file=stderr)
        return None
Beispiel #3
0
def property_grammar():
    ParserElement.setDefaultWhitespaceChars(' ')

    setting = Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress(
        '@') + INTEGER('HZ')
    props = (
        ('MOUSE_DPI', Group(OneOrMore(setting('SETTINGS*')))),
        ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER),
        ('ID_INPUT_TRACKBALL', Literal('1')),
        ('POINTINGSTICK_SENSITIVITY', INTEGER),
        ('POINTINGSTICK_CONST_ACCEL', REAL),
        ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))),
    )
    fixed_props = [
        Literal(name)('NAME') - Suppress('=') - val('VALUE')
        for name, val in props
    ]
    kbd_props = [
        Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') -
        ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE')
    ]
    abs_props = [
        Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') -
        Word(nums + ':')('VALUE')
    ]

    grammar = Or(fixed_props + kbd_props + abs_props)

    return grammar
Beispiel #4
0
    def __init__(self):
        ParserElement.enablePackrat()
        decimal_integer = Word(nums).setName('decimal integer') \
                          .setParseAction(lambda t: int(''.join(t)))

        hexadecimal_integer = Combine(Word(nums, hexnums) + Word('hH')) \
                              .setName('hexadecimal integer') \
                              .setParseAction(lambda t: int((''.join(t))[:-1], 16))

        identifier = Word(alphas, alphanums + '_@?') \
                     .setName('identifier')
        # XXX and maybe dollar sign?

        baseExpr = (hexadecimal_integer | decimal_integer | identifier)

        operators = [
            (oneOf('+ - ~'), 1, opAssoc.RIGHT, self.nest_operand_pairs),
            (oneOf('* /'), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('+ -'), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('<< >>'), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('&'), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('^'), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('|'), 2, opAssoc.LEFT, self.nest_operand_pairs),
        ]

        self.expr = infixNotation(baseExpr, operators) + StringEnd()
Beispiel #5
0
def _path_contains_grammar(grammar: ParserElement, path: str) -> dict:
    """
    Return a dict mapping the path to the lines where the grammar matched.

    :param grammar: Grammar to be searched for in path.
    :param path: Path to the destination file.
    """
    with open(path, encoding='latin-1') as file_d:
        lines = file_d.read().splitlines()

    lines_length = tuple(map(lambda x: len(x) + 1, lines))
    file_as_string = '\n'.join(lines)

    # Given scanString expands tabs to 'n' number of spaces
    # And we count tabs as '1' char width
    # And scanString reports the match column relative to the expanded version
    # When a file contains tabs
    # Then the line numbers will get an offset
    # Given we force to parse without expanding tabs
    grammar.parseWithTabs()
    # Then the line numbers are reported correctly

    matched_lines = [
        _get_line_number(start, lines_length)
        for _, start, _ in grammar.scanString(file_as_string)
    ]

    if matched_lines:
        return {
            path: {
                'lines': str(matched_lines)[1:-1],
                'sha256': get_sha256(path),
            }
        }
    return {}
Beispiel #6
0
def property_grammar():
    ParserElement.setDefaultWhitespaceChars(' ')

    dpi_setting = Group(
        Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') +
        INTEGER('HZ'))('SETTINGS*')
    mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL
    mount_matrix = Group(mount_matrix_row + ';' + mount_matrix_row + ';' +
                         mount_matrix_row)('MOUNT_MATRIX')
    xkb_setting = Optional(Word(alphanums + '+-/@._'))

    props = (
        ('MOUSE_DPI', Group(OneOrMore(dpi_setting))),
        ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER),
        ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER),
        ('MOUSE_WHEEL_CLICK_COUNT', INTEGER),
        ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER),
        ('ID_AUTOSUSPEND', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_ACCELEROMETER', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_JOYSTICK', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_KEY', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_KEYBOARD', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_MOUSE', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_POINTINGSTICK', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_SWITCH', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_TABLET', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_TABLET_PAD', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_TOUCHPAD', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_TOUCHSCREEN', Or((Literal('0'), Literal('1')))),
        ('ID_INPUT_TRACKBALL', Or((Literal('0'), Literal('1')))),
        ('POINTINGSTICK_SENSITIVITY', INTEGER),
        ('POINTINGSTICK_CONST_ACCEL', REAL),
        ('ID_INPUT_JOYSTICK_INTEGRATION', Or(('internal', 'external'))),
        ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))),
        ('XKB_FIXED_LAYOUT', xkb_setting),
        ('XKB_FIXED_VARIANT', xkb_setting),
        ('XKB_FIXED_MODEL', xkb_setting),
        ('KEYBOARD_LED_NUMLOCK', Literal('0')),
        ('KEYBOARD_LED_CAPSLOCK', Literal('0')),
        ('ACCEL_MOUNT_MATRIX', mount_matrix),
        ('ACCEL_LOCATION', Or(('display', 'base'))),
        ('PROXIMITY_NEAR_LEVEL', INTEGER),
    )
    fixed_props = [
        Literal(name)('NAME') - Suppress('=') - val('VALUE')
        for name, val in props
    ]
    kbd_props = [
        Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') -
        ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE')
    ]
    abs_props = [
        Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') -
        Word(nums + ':')('VALUE')
    ]

    grammar = Or(fixed_props + kbd_props + abs_props) + EOL

    return grammar
Beispiel #7
0
def _evaluate_expression(expr_list, keyword_list, referred_checks_result):
    # Convert the expression now in the format to be parsed by pyparsing module
    parsed_list = []
    for expr in expr_list:
        if expr.upper() not in keyword_list:
            # Check reference is passed. Pass the fetched value instead of original check id
            parsed_list.append(referred_checks_result.get(expr))
        else:
            parsed_list.append(expr.upper())

    parsed_expr = " ".join(parsed_list)

    # Logic to use boolean expression parser using pyparsing library
    # We are passing the boolean expression in the following form:
    # check1 and not (check2 or (check3 and not check4) )
    #   --> check1 and not ( check2 or ( check3 and not check4 )  )
    #       --> True and not ( False or ( True and not False ) )
    ParserElement.enablePackrat()

    TRUE = Keyword("True")
    FALSE = Keyword("False")
    boolOperand = TRUE | FALSE | Word(alphas, max=1)
    boolOperand.setParseAction(BoolOperand)

    boolExpr = infixNotation(
        boolOperand,
        [
            ("NOT", 1, opAssoc.RIGHT, BoolNot),
            ("AND", 2, opAssoc.LEFT, BoolAnd),
            ("OR", 2, opAssoc.LEFT, BoolOr),
        ],
    )
    return boolExpr.parseString(parsed_expr)[0]
def defineParsers():
    #Enable a fast parsing mode with caching.
    ParserElement.enablePackrat()
    #end of line terminates statements, so it is not regular whitespace
    ParserElement.setDefaultWhitespaceChars('\t ')

    func_call = Forward() #forward declaration because this is a recursive rule   

    #The "terminal" rules
    symbol = Word(alphas+'_-', alphanums+'_-') .setParseAction(action_symbol)
    q_symbol = quotedString                    .setParseAction(action_q_symbol)
    bracket_term = Literal("(").suppress() - func_call \
                   + Literal(")").suppress()
    word = symbol | q_symbol | bracket_term
    
    #The function call
    #Parse: "foo | bar | baz" or "foo"
    pipeline = (word + ZeroOrMore("|" - word)) .setParseAction(action_pipeline) 
    #Parse "foo|bar op1 op2 op3"
    func_call << (pipeline - ZeroOrMore(word)) .setParseAction(action_func_call)
    
    #High level structure of program
    line = LineEnd() | func_call - LineEnd() #empty line or function call
    program = ZeroOrMore(line) + StringEnd() #multiple lines are a program
    
    #define the comments
    program.ignore('%' + restOfLine)
    #no tab expansion
    program.parseWithTabs()   
    #return additional func_call parser to make testing more easy
    return program, func_call
Beispiel #9
0
def parser():
    global _parser
    if _parser is None:
        ParserElement.setDefaultWhitespaceChars("")
        lbrack, rbrack, lbrace, rbrace, lparen, rparen = map(Literal, "[]{}()")

        reMacro = Combine("\\" + oneOf(list("dws")))
        escapedChar = ~ reMacro + Combine("\\" + oneOf(list(printables)))
        reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t"

        reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack)
        reLiteral = (escapedChar | oneOf(list(reLiteralChar)))
        reDot = Literal(".")
        repetition = (
                      (lbrace + Word(nums).setResultsName("count") + rbrace) |
                      (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) |
                      oneOf(list("*+?"))
                      )

        reRange.setParseAction(handle_range)
        reLiteral.setParseAction(handle_literal)
        reMacro.setParseAction(handle_macro)
        reDot.setParseAction(handle_dot)

        reTerm = (reLiteral | reRange | reMacro | reDot)
        reExpr = operatorPrecedence(reTerm, [
                (repetition, 1, opAssoc.LEFT, handle_repetition),
                (None, 2, opAssoc.LEFT, handle_sequence),
                (Suppress('|'), 2, opAssoc.LEFT, handle_alternative),
            ])

        _parser = reExpr

    return _parser
Beispiel #10
0
def hwdb_grammar():
    ParserElement.setDefaultWhitespaceChars('')

    prefix = Or(category + ':' + Or(conn) + ':'
                for category, conn in TYPES.items())

    matchline_typed = Combine(prefix + Word(printables + ' ' + '®'))
    matchline_general = Combine(
        Or(GENERAL_MATCHES) + ':' + Word(printables + ' ' + '®'))
    matchline = (matchline_typed | matchline_general) + EOL

    propertyline = (
        White(' ', exact=1).suppress() +
        Combine(UDEV_TAG - '=' - Optional(Word(alphanums + '_=:@*.!-;, "/')) -
                Optional(pythonStyleComment)) + EOL)
    propertycomment = White(' ', exact=1) + pythonStyleComment + EOL

    group = (
        OneOrMore(matchline('MATCHES*') ^ COMMENTLINE.suppress()) -
        OneOrMore(propertyline('PROPERTIES*') ^ propertycomment.suppress()) -
        (EMPTYLINE ^ stringEnd()).suppress())
    commentgroup = OneOrMore(COMMENTLINE).suppress() - EMPTYLINE.suppress()

    grammar = OneOrMore(Group(group)('GROUPS*') ^ commentgroup) + stringEnd()

    return grammar
Beispiel #11
0
 def set_parse_action_magic(rule_name: str, parser_element: pp.ParserElement) -> None:
     if rule_name == rule_name.upper():
         return
     if getattr(parser_element, 'name', None) and parser_element.name.isidentifier():
         rule_name = parser_element.name
     if rule_name in ('bin_op', ):
         def bin_op_parse_action(s, loc, tocs):
             node = tocs[0]
             if not isinstance(node, AstNode):
                 node = bin_op_parse_action(s, loc, node)
             for i in range(1, len(tocs) - 1, 2):
                 second_node = tocs[i + 1]
                 if not isinstance(second_node, AstNode):
                     second_node = bin_op_parse_action(s, loc, second_node)
                 node = BinOpNode(BinOp(tocs[i]), node, second_node, loc=loc)
             return node
         parser_element.setParseAction(bin_op_parse_action)
     else:
         cls = ''.join(x.capitalize() for x in rule_name.split('_')) + 'Node'
         with suppress(NameError):
             cls = eval(cls)
             if not inspect.isabstract(cls):
                 def parse_action(s, loc, tocs):
                     if cls is FuncNode:
                         return FuncNode(tocs[0], tocs[1], tocs[2:-1], tocs[-1], loc=loc)
                     else:
                         return cls(*tocs, loc=loc)
                 parser_element.setParseAction(parse_action)
def read_sets_java(string):
    from pyparsing import nestedExpr, alphas, Word, nums, ParserElement, delimitedList
    ParserElement.setDefaultWhitespaceChars(" ,")
    element = Word(alphas + nums).setParseAction(parse_elem_java) 
    elements = delimitedList(element)
    setofsets = nestedExpr("[", "]", content=elements).setParseAction(lambda x: frozenset(x[0]))
    return setofsets.parseString(string).asList()[0]
Beispiel #13
0
 def set_parse_action_magic(rule_name: str, parser: pp.ParserElement)->None:
     if rule_name == rule_name.upper():
         return
     if getattr(parser, 'name', None) and parser.name.isidentifier():
         rule_name = parser.name
     if rule_name in ('bin_op', ):
         def bin_op_parse_action(s, loc, tocs):
             node = tocs[0]
             if not isinstance(node, AstNode):
                 node = bin_op_parse_action(s, loc, node)
             for i in range(1, len(tocs) - 1, 2):
                 secondNode = tocs[i + 1]
                 if not isinstance(secondNode, AstNode):
                     secondNode = bin_op_parse_action(s, loc, secondNode)
                 node = BinOpNode(BinOp(tocs[i]), node, secondNode)
             return node
         parser.setParseAction(bin_op_parse_action)
     else:
         cls = ''.join(x.capitalize() for x in rule_name.split('_')) + 'Node' #разбитие названия переменной на куски по _, создание заглавной первой буквы и прибавление Node
         with suppress(NameError):
             cls = eval(cls)
             if not inspect.isabstract(cls):
                 def parse_action(s, loc, tocs):
                     return cls(*tocs)
                 parser.setParseAction(parse_action)
Beispiel #14
0
def hwdb_grammar():
    ParserElement.setDefaultWhitespaceChars('')

    prefix = Or(category + ':' + Or(conn) + ':'
                for category, conn in TYPES.items())
    matchline = Combine(prefix + Word(printables + ' ' + '®')) + EOL
    propertyline = (
        White(' ', exact=1).suppress()
        + Combine(UDEV_TAG
                  - '='
                  - Word(alphanums + '_=:@*.! ')
                  - Optional(pythonStyleComment))
        + EOL
    )
    propertycomment = White(' ', exact=1) + pythonStyleComment + EOL

    group = (
        OneOrMore(matchline('MATCHES*') ^ COMMENTLINE.suppress())
        - OneOrMore(propertyline('PROPERTIES*') ^ propertycomment.suppress())
        - (EMPTYLINE ^ stringEnd()).suppress()
    )
    commentgroup = OneOrMore(COMMENTLINE).suppress() - EMPTYLINE.suppress()

    grammar = OneOrMore(group('GROUPS*') ^ commentgroup) + stringEnd()

    return grammar
Beispiel #15
0
def intersperse_parser(parser_list: list,
                       interspersed: pp.ParserElement) -> pp.ParserElement:
    ret = interspersed.copy()
    for i in parser_list:
        ret = ret + i.copy() + interspersed.copy()

    return ret
Beispiel #16
0
    def __init__(self):
        from pyparsing import (ParserElement, StringEnd, LineEnd, Literal,
                               pythonStyleComment, ZeroOrMore, Suppress,
                               Optional, Combine, OneOrMore, Regex, oneOf,
                               QuotedString, Group, ParseException)

        ParserElement.setDefaultWhitespaceChars("\t ")

        EOF = StringEnd()
        EOL = ~EOF + LineEnd() # EOL must not match on EOF

        escape = Literal("\\")
        comment = pythonStyleComment
        junk = ZeroOrMore(comment | EOL).suppress()

        ## word (i.e: single argument string)
        word = Suppress(escape + EOL + Optional(comment)) \
        | Combine(OneOrMore( escape.suppress() + Regex(".") |
                             QuotedString("'", escChar='\\', multiline=True) |
                             QuotedString('"', escChar='\\', multiline=True) |
                             Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") |
                             Suppress(escape + EOL) ))

        ## redirector (aka bash file redirectors, such as "2>&1" sequences)
        fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0]))
        fd_dst = Suppress("&") + fd_src
        # "[n]<word" || "[n]<&word" || "[n]<&digit-"
        fd_redir = (Optional(fd_src, 0) + Literal("<")
                    |Optional(fd_src, 1) + Literal(">"))\
                   +(word | (fd_dst + Optional("-")))
        # "&>word" || ">&word"
        full_redir = (oneOf("&> >&") + word)\
                     .setParseAction(lambda t:("&" ,">", t[-1]))
        # "<<<word" || "<<[-]word"
        here_doc = Regex("<<(<|-?)") + word
        # "[n]>>word"
        add_to_file = Optional(fd_src | Literal("&"), 1) + \
                      Literal(">>") + word
        # "[n]<>word"
        fd_bind = Optional(fd_src, 0) + Literal("<>") + word

        redirector = (fd_redir | full_redir | here_doc
                      | add_to_file | fd_bind)\
                     .setParseAction(lambda token: tuple(token))

        ## single command (args/redir list)
        command = Group(OneOrMore(redirector | word))

        ## logical operators (section splits)
        semicolon = Suppress(";") + junk
        connector = (oneOf("&& || |") + junk) | semicolon

        ## pipeline, aka logical block of interconnected commands
        pipeline = junk + Group(command +
                                ZeroOrMore(connector + command) +
                                Optional(semicolon))

        # define object attributes
        self.LEXER = pipeline.ignore(comment) + EOF
        self.parseException = ParseException
Beispiel #17
0
def parser():
    global _parser
    if _parser is None:
        ParserElement.setDefaultWhitespaceChars("")
        lbrack, rbrack, lbrace, rbrace, lparen, rparen, colon, qmark = map(
            Literal, "[]{}():?")

        reMacro = Combine("\\" + oneOf(list("dws")))
        escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables)))
        reLiteralChar = "".join(
            c for c in printables if c not in r"\[]{}().*?+|") + " \t"

        reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack)
        reLiteral = (escapedChar | oneOf(list(reLiteralChar)))
        reNonCaptureGroup = Suppress("?:")
        reDot = Literal(".")
        repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) |
                      (lbrace + Word(nums).setResultsName("minCount") + "," +
                       Word(nums).setResultsName("maxCount") + rbrace)
                      | oneOf(list("*+?")))

        reRange.setParseAction(handleRange)
        reLiteral.setParseAction(handleLiteral)
        reMacro.setParseAction(handleMacro)
        reDot.setParseAction(handleDot)

        reTerm = (reLiteral | reRange | reMacro | reDot | reNonCaptureGroup)
        reExpr = infixNotation(reTerm, [
            (repetition, 1, opAssoc.LEFT, handleRepetition),
            (None, 2, opAssoc.LEFT, handleSequence),
            (Suppress('|'), 2, opAssoc.LEFT, handleAlternative),
        ])
        _parser = reExpr

    return _parser
Beispiel #18
0
def _make_grammar():
    """Make a grammar for parsing a sanitized F5 config

    The syntax is Tcl, except for a 'Sanitized out =' string at the
    top. We only parse enough to find commands and their arguments.

    Return a ParseResult where 'prog' is a list of commands. Each
    command has a name and some arguments. These arguments can be
    further nested lists in case of '{ ... }' and '[ ... ]' blocks.
    """
    ParserElement.setDefaultWhitespaceChars(' ')

    white = Suppress(Optional(White()))
    comment = white + '#' - restOfLine
    lbrace, rbrace = Suppress('{'), Suppress('}')
    lbracket, rbracket = Suppress('['), Suppress(']')
    cmds = Forward()
    braces = Group(lbrace - white - Optional(cmds) - white - rbrace)
    brackets = Group(lbracket - white - Optional(cmds) - white - rbracket)

    string = QuotedString(quoteChar='"', escChar='\\', multiline=True)
    word = string | braces | brackets | Word(alphanums + '-:()_./<>%*$|!=&?')
    cmd = Group(word('name') + ZeroOrMore(word)('args'))
    cmd_sep = OneOrMore(Literal('\n') | ';')
    cmds << (cmd + ZeroOrMore(Suppress(cmd_sep) + cmd))

    prog_end = Suppress(Optional(cmd_sep)) + StringEnd()
    prog = cmds + prog_end

    sanitized_begin = Suppress(Optional(White()))
    sanitized = sanitized_begin + Optional('Sanitized out =') + prog('prog')
    sanitized.ignore(comment)

    return sanitized
Beispiel #19
0
    def __init__(self):
        ParserElement.enablePackrat()
        hexadecimal_integer = Combine(CaselessLiteral('0x') + Word(hexnums)) \
                              .setName('hexadecimal integer') \
                              .setParseAction(lambda *t: int(t[2][0][2:], 16))

        decimal_integer = Word(nums) \
                          .setName('decimal integer') \
                          .setParseAction(lambda t: int(''.join(t)))

        identifier = Word(alphanums + '_$') \
                     .setName('identifier')

        baseExpr = (hexadecimal_integer | decimal_integer | identifier)

        operators = [
            (oneOf('+ - ~ !'), 1, opAssoc.RIGHT, self.nest_operand_pairs),
            (oneOf('* /'), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('+ -'), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('<< >>'), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('<= < > >='), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('== !='), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('&'), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('^'), 2, opAssoc.LEFT, self.nest_operand_pairs),
            (oneOf('|'), 2, opAssoc.LEFT, self.nest_operand_pairs),
        ]

        self.expr = infixNotation(baseExpr, operators) + StringEnd()
Beispiel #20
0
def property_grammar():
    ParserElement.setDefaultWhitespaceChars(' ')

    model_props = [
        Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') -
        Suppress('=') - (Literal('1'))('VALUE')
    ]

    dimension = INTEGER('X') + Suppress('x') + INTEGER('Y')

    crange = INTEGER('X') + Suppress(':') + INTEGER('Y')
    vprops = (
        ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))),
        ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))),
        ('LIBINPUT_ATTR_PRESSURE_RANGE', Group(crange('SETTINGS*'))),
        ('LIBINPUT_ATTR_TOUCH_SIZE_RANGE', Group(crange('SETTINGS*'))),
        ('LIBINPUT_ATTR_TPKBCOMBO_LAYOUT', Or(('below'))),
        ('LIBINPUT_ATTR_LID_SWITCH_RELIABILITY',
         Or(('reliable', 'write_open'))),
        ('LIBINPUT_ATTR_KEYBOARD_INTEGRATION', Or(('internal', 'external'))),
    )
    value_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for
                   name, val in vprops]

    tprops = (
        ('LIBINPUT_ATTR_PALM_PRESSURE_THRESHOLD', INTEGER('X')),
        ('LIBINPUT_ATTR_PALM_SIZE_THRESHOLD', INTEGER('X')),
    )
    typed_props = [Literal(name)('NAME') - Suppress('=') - val
                   for name, val in tprops]

    grammar = Or(model_props + value_props + typed_props)

    return grammar
Beispiel #21
0
def property_grammar():
    ParserElement.setDefaultWhitespaceChars(' ')

    model_props = [
        Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') -
        (Literal('1'))('VALUE')
    ]

    dimension = INTEGER('X') + Suppress('x') + INTEGER('Y')
    sz_props = (
        ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))),
        ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))),
    )

    size_props = [
        Literal(name)('NAME') - Suppress('=') - val('VALUE')
        for name, val in sz_props
    ]

    reliability_tags = Or(('reliable', 'write_open'))
    reliability = [
        Literal('LIBINPUT_ATTR_LID_SWITCH_RELIABILITY')('NAME') -
        Suppress('=') - reliability_tags('VALUE')
    ]

    tpkbcombo_tags = Or(('below'))
    tpkbcombo = [
        Literal('LIBINPUT_ATTR_TPKBCOMBO_LAYOUT')('NAME') - Suppress('=') -
        tpkbcombo_tags('VALUE')
    ]

    grammar = Or(model_props + size_props + reliability + tpkbcombo)

    return grammar
Beispiel #22
0
def property_grammar():
    ParserElement.setDefaultWhitespaceChars(' ')

    setting = Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ')
    props = (('MOUSE_DPI', Group(OneOrMore(setting('SETTINGS*')))),
             ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER),
             ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER),
             ('ID_INPUT_TRACKBALL', Literal('1')),
             ('POINTINGSTICK_SENSITIVITY', INTEGER),
             ('POINTINGSTICK_CONST_ACCEL', REAL),
             ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))),
    )
    fixed_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE')
                   for name, val in props]
    kbd_props = [Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME')
                 - Suppress('=') -
                 ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE')
                 ]
    abs_props = [Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME')
                 - Suppress('=') -
                 Word(nums + ':')('VALUE')
                 ]

    grammar = Or(fixed_props + kbd_props + abs_props)

    return grammar
    def __init__(self):
        """
        Initialise the class and enable packrat parsing. Packrat speeds up
        parsing considerably.

        """
        ParserElement.enablePackrat()
Beispiel #24
0
    def __init__(self):
        from pyparsing import (ParserElement, StringEnd, LineEnd, Literal,
                               pythonStyleComment, ZeroOrMore, Suppress,
                               Optional, Combine, OneOrMore, Regex, oneOf,
                               QuotedString, Group, ParseException)

        ParserElement.setDefaultWhitespaceChars("\t ")

        EOF = StringEnd()
        EOL = ~EOF + LineEnd()  # EOL must not match on EOF

        escape = Literal("\\")
        comment = pythonStyleComment
        junk = ZeroOrMore(comment | EOL).suppress()

        # word (i.e: single argument string)
        word = Suppress(escape + EOL + Optional(comment)) \
            | Combine(OneOrMore(
                escape.suppress() + Regex(".") |
                QuotedString("'", escChar='\\', multiline=True) |
                QuotedString('"', escChar='\\', multiline=True) |
                Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") |
                Suppress(escape + EOL)))

        # redirector (aka bash file redirectors, such as "2>&1" sequences)
        fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0]))
        fd_dst = Suppress("&") + fd_src
        # "[n]<word" || "[n]<&word" || "[n]<&digit-"
        fd_redir = (Optional(fd_src, 0) + Literal("<")
                    | Optional(fd_src, 1) + Literal(">")) + \
                   (word | (fd_dst + Optional("-")))
        # "&>word" || ">&word"
        obj = (oneOf("&> >&") + word)
        full_redir = obj.setParseAction(lambda t: ("&", ">", t[-1]))
        # "<<<word" || "<<[-]word"
        here_doc = Regex("<<(<|-?)") + word
        # "[n]>>word"
        add_to_file = (Optional(fd_src | Literal("&"), 1) + Literal(">>") +
                       word)
        # "[n]<>word"
        fd_bind = Optional(fd_src, 0) + Literal("<>") + word

        obj = (fd_redir | full_redir | here_doc | add_to_file | fd_bind)
        redirector = obj.setParseAction(lambda token: tuple(token))

        # single command (args/redir list)
        command = Group(OneOrMore(redirector | word))

        # logical operators (section splits)
        semicolon = Suppress(";") + junk
        connector = (oneOf("&& || |") + junk) | semicolon

        # pipeline, aka logical block of interconnected commands
        pipeline = junk + Group(command + ZeroOrMore(connector + command) +
                                Optional(semicolon))

        # define object attributes
        self.LEXER = pipeline.ignore(comment) + EOF
        self.parseException = ParseException
Beispiel #25
0
    def __init__(self):
        # speed up infixNotation considerably at the price of some cache memory
        ParserElement.enablePackrat()

        boolean = Keyword('True') | Keyword('False')
        none = Keyword('None')
        integer = Word(nums)
        real = Combine(Word(nums) + "." + Word(nums))
        string = (QuotedString('"', escChar='\\')
                  | QuotedString("'", escChar='\\'))
        regex = QuotedString('/', escChar='\\')
        identifier = Word(alphas, alphanums + '_')
        dereference = infixNotation(identifier, [
            (Literal('.'), 2, opAssoc.LEFT, EvalArith),
        ])
        result = (Keyword('bad') | Keyword('fail') | Keyword('good')
                  | Keyword('ignore') | Keyword('unknown'))
        rval = boolean | none | real | integer | string | regex | result | dereference
        rvallist = Group(
            Suppress('[') + Optional(delimitedList(rval)) + Suppress(']'))
        rvalset = Group(
            Suppress('{') + Optional(delimitedList(rval)) + Suppress('}'))
        operand = rval | rvallist | rvalset

        # parse actions replace the parsed tokens with an instantiated object
        # which we can later call into for evaluation of its content
        boolean.setParseAction(EvalBoolean)
        none.setParseAction(EvalNone)
        integer.setParseAction(EvalInteger)
        real.setParseAction(EvalReal)
        string.setParseAction(EvalString)
        regex.setParseAction(EvalRegex)
        identifier.setParseAction(EvalIdentifier)
        result.setParseAction(EvalResult)
        rvallist.setParseAction(EvalList)
        rvalset.setParseAction(EvalSet)

        identity_test = Keyword('is') + ~Keyword('not') | Combine(
            Keyword('is') + Keyword('not'), adjacent=False, joinString=' ')
        membership_test = Keyword('in') | Combine(
            Keyword('not') + Keyword('in'), adjacent=False, joinString=' ')
        comparison_op = oneOf('< <= > >= != == isdisjoint')
        comparison = identity_test | membership_test | comparison_op

        self.parser = infixNotation(operand, [
            (Literal('**'), 2, opAssoc.LEFT, EvalPower),
            (oneOf('+ - ~'), 1, opAssoc.RIGHT, EvalModifier),
            (oneOf('* / // %'), 2, opAssoc.LEFT, EvalArith),
            (oneOf('+ -'), 2, opAssoc.LEFT, EvalArith),
            (oneOf('<< >>'), 2, opAssoc.LEFT, EvalArith),
            (Literal('&'), 2, opAssoc.LEFT, EvalArith),
            (Literal('^'), 2, opAssoc.LEFT, EvalArith),
            (Literal('|'), 2, opAssoc.LEFT, EvalArith),
            (comparison, 2, opAssoc.LEFT, EvalLogic),
            (Keyword('not'), 1, opAssoc.RIGHT, EvalModifier),
            (Keyword('and'), 2, opAssoc.LEFT, EvalLogic),
            (Keyword('or'), 2, opAssoc.LEFT, EvalLogic),
            (Keyword('->'), 2, opAssoc.LEFT, EvalArith),
        ])
Beispiel #26
0
def read_sets_java(string):
    from pyparsing import nestedExpr, alphas, Word, nums, ParserElement, delimitedList
    ParserElement.setDefaultWhitespaceChars(" ,")
    element = Word(alphas + nums).setParseAction(parse_elem_java)
    elements = delimitedList(element)
    setofsets = nestedExpr(
        "[", "]", content=elements).setParseAction(lambda x: frozenset(x[0]))
    return setofsets.parseString(string).asList()[0]
Beispiel #27
0
 def __init__(self, alphabet):
     self.operators = alphabet.getOperators()
     self.constants = alphabet.getConstants()
     self.notNeedSpace = alphabet.notNeedSpace()
     
     self.ffactory = FormulaFactory()
     self.__createGram()
     ParserElement.enablePackrat()
Beispiel #28
0
 def __init__(self,showErrors=True,debug=False):
     '''
     Constructor
     Args:
         showErrors(bool): True if errors should be shown/printed
         debug(bool): True if debugging should be enabled
     '''
     self.showError=showErrors
     self.debug=debug
     self.grammar=None
     ParserElement.setDefaultWhitespaceChars(" \t")
Beispiel #29
0
def PyParsingDefaultWhitespaceChars(whitespace_chars):
    '''Set the given whitespace_chars as pyparsing's default whitespace chars while the context manager is active.

    Since ParserElement.DEFAULT_WHITE_CHARS is a global variable, this method is not thread-safe (but no pyparsing parser construction is thread-safe for the same reason anyway).
    '''
    # A possible solution to this problem:
    # Since the pyparsing code is basically a single big file, we could just copy it (under aspio/vendor or something like that) and have our own "private" version of pyparsing. (TODO: think about this some more and maybe do it)
    previous_whitespace_chars = ParserElement.DEFAULT_WHITE_CHARS
    ParserElement.setDefaultWhitespaceChars(whitespace_chars)
    yield
    ParserElement.setDefaultWhitespaceChars(previous_whitespace_chars)
Beispiel #30
0
 def _parse_items(self, source):
     ParserElement.setDefaultWhitespaceChars(' \t\r')
     EOL = LineEnd().suppress()
     comment = Literal('#') + Optional( restOfLine ) + EOL
     string = CharsNotIn("\n")
     line = Group(
         Word(alphanums + '-')('key') + Literal(':').suppress() + Optional(Combine(string + ZeroOrMore(EOL + Literal(' ') + string)))("value") + EOL
     )
     group = ZeroOrMore(line)
     group.ignore(comment)
     return group.parseString(source, True)
Beispiel #31
0
def get_parser():
    from pyparsing import CharsNotIn, ParserElement, Suppress, ZeroOrMore

    ParserElement.enablePackrat()

    word = CharsNotIn(f"{PERIOD}{LBRACK}{RBRACK}")
    idx = Suppress(LBRACK) + word + Suppress(RBRACK)
    attr = Suppress(PERIOD) + word
    parser = word + ZeroOrMore(attr ^ idx)
    parser.setParseAction(PERIOD.join)

    return parser
Beispiel #32
0
def build_parser():
    """
    Build a pyparsing parser for our custom topology description language.

    :return: A pyparsing parser.
    :rtype: pyparsing.MatchFirst
    """
    ParserElement.setDefaultWhitespaceChars(' \t')
    nl = Suppress(LineEnd())
    inumber = Word(nums).setParseAction(lambda l, s, t: int(t[0]))
    fnumber = (Combine(
        Optional('-') + Word(nums) + '.' + Word(nums) +
        Optional('E' | 'e' + Optional('-') + Word(nums)))
               ).setParseAction(lambda toks: float(toks[0]))
    boolean = (CaselessLiteral('true') | CaselessLiteral('false')
               ).setParseAction(lambda l, s, t: t[0].casefold() == 'true')
    comment = Literal('#') + restOfLine + nl
    text = QuotedString('"')
    identifier = Word(alphas, alphanums + '_')
    empty_line = LineStart() + LineEnd()
    item_list = ((text | fnumber | inumber | boolean) +
                 Optional(Suppress(',')) + Optional(nl))
    custom_list = (Suppress('(') + Optional(nl) + Group(OneOrMore(item_list)) +
                   Optional(nl) +
                   Suppress(')')).setParseAction(lambda tok: tok.asList())
    attribute = Group(
        identifier('key') + Suppress(Literal('=')) +
        (custom_list | text | fnumber | inumber | boolean
         | identifier)('value') + Optional(nl))
    attributes = (Suppress(Literal('[')) + Optional(nl) +
                  OneOrMore(attribute) + Suppress(Literal(']')))

    node = identifier('node')
    port = Group(node + Suppress(Literal(':')) +
                 (identifier | inumber)('port'))
    link = Group(
        port('endpoint_a') + Suppress(Literal('--')) + port('endpoint_b'))

    environment_spec = (attributes + nl).setResultsName('env_spec',
                                                        listAllMatches=True)
    nodes_spec = (Group(
        Optional(attributes)('attributes') + Group(OneOrMore(node))('nodes')) +
                  nl).setResultsName('node_spec', listAllMatches=True)
    ports_spec = (Group(
        Optional(attributes)('attributes') + Group(OneOrMore(port))('ports')) +
                  nl).setResultsName('port_spec', listAllMatches=True)
    link_spec = (Group(Optional(attributes)('attributes') + link('links')) +
                 nl).setResultsName('link_spec', listAllMatches=True)

    statements = OneOrMore(comment | link_spec | ports_spec | nodes_spec
                           | environment_spec | empty_line)
    return statements
Beispiel #33
0
    def pythonVar(self):
        if not self._pythonVar:
            from pyparsing import (ParserElement, Word, alphas, alphanums,
                                   Literal, Suppress, FollowedBy)
            _ws = ' \t'
            ParserElement.setDefaultWhitespaceChars(_ws)
            ident = Word(alphas+"_", alphanums+"_")
            lparen = Literal("(")
            dot = Literal(".")
            dollar = Literal("$")

            self._pythonVar = Suppress(dollar) + ident + ~FollowedBy((dot+ident) | lparen)
            self._pythonVar.setParseAction(self.onPythonVar)
        return self._pythonVar
    def set_delimiters(self, delimiter):
        """Lets you change the delimiter that is used to identify field
        boundaries.
        
        delimiter: str
            A string containing characters to be used as delimiters. The
            default value is ' \t'. which means that spaces and tabs are not
            taken as data but instead mark the boundaries. Note that the
            parser is smart enough to recognize characters within quotes as
            non-delimiters."""

        self.delimiter = delimiter
        if delimiter != "columns":
            ParserElement.setDefaultWhitespaceChars(str(delimiter))
Beispiel #35
0
def setLoggingDebugActionForParserElement(
        parser_element: pyparsing.ParserElement) -> None:
    '''
    helper function to set up the custom debug actions for a ParserElement with our
    own functions that use the logging framework rather than `print()`

    this also calls setDebug(True) for the parser element as well
    '''

    parser_element.setDebug(True)

    parser_element.setDebugActions(pyparsingLoggingStartDebugAction,
                                   pyparsingLoggingSuccessDebugAction,
                                   pyparsingLoggingExceptionDebugAction)
Beispiel #36
0
    def __init__(self):
        self._generate_kinds()

        ParserElement.setDefaultWhitespaceChars(' \t')

        self._parser = OneOrMore(
            Group(
                Suppress('{') + Word(alphanums) + Suppress('\n') + OneOrMore(
                    Group(
                        Word(':' + alphanums + '_' + '-') + ZeroOrMore(
                            Word(self._param_value_chrs) ^ Suppress('"') +
                            Word(self._param_value_string) + Suppress('"')) +
                        Suppress('\n'))) + Suppress('}') +
                ZeroOrMore(Suppress('\n'))))
Beispiel #37
0
 def set_delimiters(self, delimiter):
     """Lets you change the delimiter that is used to identify field
     boundaries.
     
     delimiter: str
         A string containing characters to be used as delimiters. The
         default value is ' \t'. which means that spaces and tabs are not
         taken as data but instead mark the boundaries. Note that the
         parser is smart enough to recognize characters within quotes as
         non-delimiters."""
     
     self.delimiter = delimiter
     if delimiter != "columns":
         ParserElement.setDefaultWhitespaceChars(str(delimiter))
    def __init__(self):
        # a class-level static method to enable a memoizing performance
        # enhancement, known as "packrat parsing".
        ParserElement.enablePackrat()

        NUMBER = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?").setParseAction(
            Immediate)
        IDENT = Word(alphas, alphanums + '_').setParseAction(Variable)
        self.OPERAND = (NUMBER | IDENT)
        self.MATH_OPERATORS = [(oneOf('+ -'), 1, opAssoc.RIGHT, SignTerm),
                               ('^', 2, opAssoc.RIGHT, MathTerm),
                               (oneOf('* /'), 2, opAssoc.LEFT, MathTerm),
                               (oneOf('+ -'), 2, opAssoc.LEFT, MathTerm)]
        self.ARITH_EXPR = infixNotation(self.OPERAND, self.MATH_OPERATORS)
        self.pattern = self.ARITH_EXPR + StringEnd()
Beispiel #39
0
    def pythonVar(self):
        if not self._pythonVar:
            from pyparsing import (ParserElement, Word, alphas, alphanums,
                                   Literal, Suppress, FollowedBy)
            _ws = ' \t'
            ParserElement.setDefaultWhitespaceChars(_ws)
            ident = Word(alphas + "_", alphanums + "_")
            lparen = Literal("(")
            dot = Literal(".")
            dollar = Literal("$")

            self._pythonVar = Suppress(dollar) + ident + ~FollowedBy(
                (dot + ident) | lparen)
            self._pythonVar.setParseAction(self.onPythonVar)
        return self._pythonVar
Beispiel #40
0
def parser():
    global _parser
    if _parser is None:
        ParserElement.setDefaultWhitespaceChars("")

        lbrack = Literal("[")
        rbrack = Literal("]")
        lbrace = Literal("{")
        rbrace = Literal("}")
        lparen = Literal("(")
        rparen = Literal(")")

        reMacro = Suppress("\\") + oneOf(list("dwsZ"))
        escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables)))
        reLiteralChar = "".join(c for c in string.printable
                                if c not in r"\[]{}().*?+|")

        reRange = Combine(lbrack.suppress() +
                          SkipTo(rbrack, ignore=escapedChar) +
                          rbrack.suppress())
        reLiteral = (escapedChar | oneOf(list(reLiteralChar)))
        reDot = Literal(".")
        repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) |
                      (lbrace + Word(nums).setResultsName("minCount") + "," +
                       Word(nums).setResultsName("maxCount") + rbrace)
                      | oneOf(list("*+?")))
        reExpr = Forward()
        reGroup = (lparen.suppress() +
                   Optional(Literal("?").suppress() +
                            oneOf(list(":P"))).setResultsName("option") +
                   reExpr.setResultsName("expr") + rparen.suppress())

        reTerm = (reLiteral | reRange | reMacro | reDot | reGroup)
        reExpr << operatorPrecedence(reTerm, [
            (repetition, 1, opAssoc.LEFT, create(Repetition)),
            (None, 2, opAssoc.LEFT, create(Sequence)),
            (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)),
        ])

        reGroup.setParseAction(create(Group))
        reRange.setParseAction(create(Range))
        reLiteral.setParseAction(create(Character))
        reMacro.setParseAction(create(Macro))
        reDot.setParseAction(create(Dot))

        _parser = reExpr

    return _parser
Beispiel #41
0
def parser():
    global _parser
    if _parser is None:
        ParserElement.setDefaultWhitespaceChars("")
        
        lbrack = Literal("[")
        rbrack = Literal("]")
        lbrace = Literal("{")
        rbrace = Literal("}")
        lparen = Literal("(")
        rparen = Literal(")")
        
        reMacro = Suppress("\\") + oneOf(list("dwsZ"))
        escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables)))
        reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|")

        reRange = Combine(lbrack.suppress() + SkipTo(rbrack,ignore=escapedChar) + rbrack.suppress())
        reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) )
        reDot = Literal(".")
        repetition = (
            ( lbrace + Word(nums).setResultsName("count") + rbrace ) |
            ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) |
            oneOf(list("*+?"))
            )
        reExpr = Forward()
        reGroup = (lparen.suppress() +
                   Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") +
                   reExpr.setResultsName("expr") +
                   rparen.suppress())

        reTerm = ( reLiteral | reRange | reMacro | reDot | reGroup )
        reExpr << operatorPrecedence( reTerm,
            [
            (repetition, 1, opAssoc.LEFT, create(Repetition)),
            (None, 2, opAssoc.LEFT, create(Sequence)),
            (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)),
            ]
            )

        reGroup.setParseAction(create(Group))
        reRange.setParseAction(create(Range))
        reLiteral.setParseAction(create(Character))
        reMacro.setParseAction(create(Macro))
        reDot.setParseAction(create(Dot))
        
        _parser = reExpr
        
    return _parser
Beispiel #42
0
def _worth_extracting(element: pyparsing.ParserElement) -> bool:
    """
    Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
    themselves have children, then its complex enough to extract
    """
    children = element.recurse()
    return any(child.recurse() for child in children)
Beispiel #43
0
def test_succeed(parser: ParserElement,
                 text: str,
                 target: str = None,
                 skip_target: bool = False,
                 show_raw: bool = False,
                 verbose: bool = False) -> None:
    log.critical("Testing to succeed: " + text)
    if target is None:
        target = text
    try:
        p = parser.parseString(text, parseAll=True)
        log.debug("Success: {} -> {}", text, text_from_parsed(p))
        if show_raw:
            log.debug("... raw: {}", p)
        if verbose:
            log.debug("... dump:\n{}", p.dump())
    except ParseException as exception:
        log.debug("ParseException on: {}\n... parser: {}", text, parser)
        print(statement_and_failure_marker(text, exception))
        raise
    if not skip_target:
        intended = standardize_for_testing(target)
        raw = text_from_parsed(p)
        actual = standardize_for_testing(raw)
        if intended != actual:
            raise ValueError(f"Failure on: {text}\n"
                             f"-> Raw output:\n"
                             f"{raw!r}\n"
                             f"-> Standardized output:\n"
                             f"{actual!r}\n"
                             f"... should have been:\n"
                             f"{intended!r}\n"
                             f"... parser: {parser}\n"
                             f"... as list: {p.asList()!r}]")
Beispiel #44
0
def pn_from_sis(filename):
    """Loads a PN in SIS format."""
    # definition of PN grammar
    ParserElement.setDefaultWhitespaceChars(" \t")
    id = Word(alphanums+"_\"':-")
    #place = Literal("p") + Word(nums)
    number = Word(nums).setParseAction(lambda tokens: int(tokens[0]))
    newlines = Suppress(OneOrMore(LineEnd()))
    modelName = ".model" + id("modelName") + newlines
    signalNames = ZeroOrMore( Suppress(oneOf(".inputs .outputs .dummy")) + OneOrMore( id ) + newlines)("signals")
    arc = id + ZeroOrMore(Group(id + Optional(Suppress("(")+number+Suppress(")"), default=1))) + newlines
    graph = Literal(".graph") + Suppress(OneOrMore(LineEnd())) + OneOrMore(Group(arc))("arcs")
    capacity_list = ZeroOrMore(Group(id+Suppress("=")+number))
    capacity = ".capacity" + capacity_list("capacities") + newlines
    marking_list = ZeroOrMore(Group(id+Optional(Suppress("=")+number,default=1)))
    marking = ".marking"+Suppress("{") + marking_list("marking") + Suppress("}") + newlines
    pn = Optional(newlines) + Optional(modelName) + signalNames + graph + Optional(capacity) + marking + ".end"
    pn.ignore(pythonStyleComment)
    net = PetriNet(filename=filename, format='sis')
    ast = pn.parseFile( filename )
    for t in ast.signals:
        net.add_transition( t )
    #net.name = ast.modelName
    net.set_name(ast.modelName)
    #net.signals.update( ast.signals )
#    tuplelist = [ (m[0],m[1]) for m in ast.capacities ]
#    net.capacities = dict( tuplelist )
#    net.initial_marking = dict( [ (m[0],m[1]) for m in ast.marking ] )
    #print ast.arcs
    transitions = set(net.get_transitions())
    for a in ast.arcs:
        #print a[0]
        if a[0] not in transitions:
            # it's a place
            p = net.add_place(a[0])
            for t in a[1:]:
                net.add_edge(p,t[0],t[1])
        else:
            for t in a[1:]:
                p = net.add_place(t[0])
                net.add_edge(a[0],p,t[1])
    for m in ast.marking:
        net.set_initial_marking(m[0],m[1])
    for m in ast.capacities:
        net.set_capacity(m[0],m[1])
    net.to_initial_marking()
    return net
Beispiel #45
0
def rfc2822():
    global _rfc2822
    if _rfc2822 is None:
        ParserElement.setDefaultWhitespaceChars("")
        
        CRLF = Literal("\r\n")
        ATEXT = Regex("[a-zA-Z0-9!#$%&'*+\-/=\?^_`{|}~]")
        TEXT = Regex("[\x01-\x09\x0b\x0c\x0e-\x7f]")
        QTEXT = Regex("[\x01-\x08\x0b\x0c\x0d-\x1f\x21\x23-\x5b\x5d-\x7f]")
        LOWASCII = Regex("[\x00-\x7f]")
        DTEXT = Regex("[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x5e-\x7f]")
        WSP = Regex("[\x20\x09]")
        CTEXT = Regex("[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x27\x2a-\x5b\x5d-\x7f]")

        obsQp = r"\\" + LOWASCII
        quotedPair = (r"\\" + TEXT) | obsQp

        obsFWS = OneOrMore(WSP) + ZeroOrMore(CRLF + OneOrMore(WSP))
        FWS = (Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP)) | obsFWS
        comment = Forward()
        ccontent = CTEXT | quotedPair | comment
        comment << "(" + ZeroOrMore(Optional(FWS) + ccontent) + Optional(FWS) + ")"
        CFWS = ZeroOrMore(Optional(FWS) + comment) + ((Optional(FWS) + comment) | FWS)

        atom = Optional(CFWS) + OneOrMore(ATEXT) + Optional(CFWS)
        dotAtomText = OneOrMore(ATEXT) + ZeroOrMore("." + OneOrMore(ATEXT))
        dotAtom = Optional(CFWS) + dotAtomText + Optional(CFWS)

        qcontent = QTEXT | quotedPair
        quotedString = Optional(CFWS) + '"' + ZeroOrMore(Optional(FWS) + qcontent) + Optional(FWS) + '"'

        word = atom | quotedString
        obsLocalPart = word + ZeroOrMore("." + word)

        localPart = dotAtom | quotedString | obsLocalPart

        dcontent = DTEXT | quotedPair
        domainLiteral = Optional(CFWS) + "[" + ZeroOrMore(Optional(FWS) + dcontent) + Optional(FWS) + "]" + Optional(CFWS)

        obsDomain = atom + ZeroOrMore("." + atom)

        domain = dotAtom | domainLiteral | obsDomain

        addrSpec = localPart + "@" + domain
        _rfc2822 = addrSpec
    return _rfc2822
def read_sets(string):
    """
    >>> read_sets("{}")
    frozenset([])
    
    >>> read_sets("{1}")
    frozenset([1])
    
    >>> read_sets("{{}, {}}")   # invalid, outer set contains two equal sets
    frozenset([frozenset([])])
    
    >>> read_sets("{{{1}, {2}}, {3}}")
    frozenset([frozenset([frozenset([2]), frozenset([1])]), frozenset([3])])
    
    >>> read_sets("{1, 2,3}")
    frozenset([1, 2, 3])
    
    >>> read_sets("{{1, 2}, {3, 4}}")
    frozenset([frozenset([1, 2]), frozenset([3, 4])])
    
    >>> read_sets("{a,b,c}")
    frozenset(['a', 'c', 'b'])
    
    >>> read_sets('[{1,2,3},{a,c,b}]')
    [frozenset([1, 2, 3]), frozenset(['a', 'c', 'b'])]
    
    >>> read_sets('{a}')
    frozenset(['a'])
    
    >>> read_sets('{{x1,x2},{x3}}')
    frozenset([frozenset(['x2', 'x1']), frozenset(['x3'])])
    
    >>> read_sets('{{23gat,24gat}}')
    frozenset([frozenset(['23gat', '24gat'])])
    """
    from pyparsing import nestedExpr, alphas, Word, nums, ParserElement, delimitedList
    ParserElement.setDefaultWhitespaceChars(" ,")
    element = Word(alphas + nums).setParseAction(parse_elem) 
    elements = delimitedList(element)
    setofsets = nestedExpr("{", "}", content=elements).setParseAction(lambda x: frozenset(x[0]))
    listofsets = nestedExpr("[", "]", content=setofsets)
    expr = setofsets | listofsets
    return expr.parseString(string).asList()[0]
Beispiel #47
0
def property_grammar():
    ParserElement.setDefaultWhitespaceChars(' ')

    model_props = [Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME')
                   - Suppress('=') -
                   (Literal('1'))('VALUE')
                  ]

    dimension = INTEGER('X') + Suppress('x') + INTEGER('Y')
    sz_props = (
            ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))),
            ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))),
            )
    size_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE')
                   for name, val in sz_props]

    grammar = Or(model_props + size_props);

    return grammar
Beispiel #48
0
def _int_expression():
    from pyparsing import Word, alphanums, nums, Forward, ZeroOrMore, Combine, CaselessLiteral, srange, ParserElement, Optional
    ParserElement.enablePackrat()

    entry_name = Word(alphanums + ' _+:.-/')
    integer = Combine(Optional('-') + Word(nums)).addParseAction(lambda s,l,t: [Constant(int(t[0]))])
    hex = Combine(CaselessLiteral("0x") + Word(srange("[0-9a-fA-F]"))).addParseAction(lambda s,l,t:[Constant(int(t[0][2:], 16))])
    named_reference = ('${' + entry_name + '}').addParseAction(lambda s,l,t:ValueResult(t[1]))
    length_reference = ('len{' + entry_name + '}').addParseAction(lambda s,l,t:LengthResult(t[1]))

    expression = Forward()
    factor = hex | integer | named_reference | length_reference | ('(' + expression + ')').addParseAction(lambda s,l,t:t[1])

    entry = factor
    for ops in _operators:
        op_parse = reduce(operator.or_,
                [(character + entry).addParseAction(_half(op)) for character, op in ops])
        entry = (entry + ZeroOrMore(op_parse)).addParseAction(_collapse)
    expression << entry
    return expression
Beispiel #49
0
def ts_from_file(filename):
    """Loads a TS (possibly extended with state frequencies) in SIS format."""
        # definition of TS grammar
    ParserElement.setDefaultWhitespaceChars(" \t")
    id = Word(alphanums+"_\"':-")
    #place = Literal("p") + Word(nums)
    number = Word(nums).setParseAction(lambda tokens: int(tokens[0]))
    newlines = Suppress(OneOrMore(LineEnd()))
    modelName = ".model" + id("modelName") + newlines
    signalNames = ZeroOrMore( Suppress(oneOf(".inputs .outputs .dummy")) + OneOrMore( id ) + newlines)("signals")
    arc = id + id + id + newlines
    graph = Literal(".state graph") + Suppress(OneOrMore(LineEnd())) + OneOrMore(Group(arc))("arcs")
    frequency_list = ZeroOrMore(Group(id+number)+newlines)
    frequency = ".frequencies" + Suppress(OneOrMore(LineEnd())) + frequency_list("frequencies")
    marking_list = ZeroOrMore(id)
    marking = ".marking"+Suppress("{") + marking_list("marking") + Suppress("}") + newlines
    ts_grammar = Optional(newlines) + Optional(modelName) + signalNames + graph + marking + Optional(frequency) + ".end"
    ts_grammar.ignore(pythonStyleComment)
    try:
        ast = ts_grammar.parseFile( filename )
    except ParseException, pe:
        print pe
        raise pe
Beispiel #50
0
def property_grammar():
    ParserElement.setDefaultWhitespaceChars(' ')

    model_props = [Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME')
                   - Suppress('=') -
                   (Literal('1'))('VALUE')
                  ]

    dimension = INTEGER('X') + Suppress('x') + INTEGER('Y')
    sz_props = (
            ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))),
            ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))),
            )

    size_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE')
                   for name, val in sz_props]

    reliability_tags = Or(('reliable', 'write_open'))
    reliability = [Literal('LIBINPUT_ATTR_LID_SWITCH_RELIABILITY')('NAME') -
                         Suppress('=') -
                         reliability_tags('VALUE')]

    tpkbcombo_tags = Or(('below'))
    tpkbcombo = [Literal('LIBINPUT_ATTR_TPKBCOMBO_LAYOUT')('NAME') -
                         Suppress('=') -
                         tpkbcombo_tags('VALUE')]

    pressure_range = INTEGER('X') + Suppress(':') + INTEGER('Y')
    pressure_prop = [ Literal('LIBINPUT_ATTR_PRESSURE_RANGE')('NAME') -
                      Suppress('=') -
                      Group(pressure_range('SETTINGS*')) ]

    grammar = Or(model_props + size_props + reliability + tpkbcombo +
                 pressure_prop)

    return grammar
Beispiel #51
0
def parser():
    global _parser
    if _parser is None:
        ParserElement.setDefaultWhitespaceChars("")
        lbrack,rbrack,lbrace,rbrace,lparen,rparen,colon,qmark = map(Literal,"[]{}():?")

        reMacro = Combine("\\" + oneOf(list("dws")))
        escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables)))
        reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t"

        reRange = Combine(lbrack + SkipTo(rbrack,ignore=escapedChar) + rbrack)
        reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) )
        reNonCaptureGroup = Suppress("?:")
        reDot = Literal(".")
        repetition = (
            ( lbrace + Word(nums)("count") + rbrace ) |
            ( lbrace + Word(nums)("minCount")+","+ Word(nums)("maxCount") + rbrace ) |
            oneOf(list("*+?")) 
            )

        reRange.setParseAction(handleRange)
        reLiteral.setParseAction(handleLiteral)
        reMacro.setParseAction(handleMacro)
        reDot.setParseAction(handleDot)
        
        reTerm = ( reLiteral | reRange | reMacro | reDot | reNonCaptureGroup)
        reExpr = infixNotation( reTerm,
            [
            (repetition, 1, opAssoc.LEFT, handleRepetition),
            (None, 2, opAssoc.LEFT, handleSequence),
            (Suppress('|'), 2, opAssoc.LEFT, handleAlternative),
            ]
            )
        _parser = reExpr
        
    return _parser
Beispiel #52
0
# A TAP output line may also indicate abort of the test suit with the line:
#   Bail out!
# optionally followed by a reason for bailing
#
# Copyright 2008, by Paul McGuire
#

from pyparsing import ParserElement,LineEnd,Optional,Word,nums,Regex,\
    Literal,CaselessLiteral,Group,OneOrMore,Suppress,restOfLine,\
    FollowedBy,empty

__all__ = ['tapOutputParser', 'TAPTest', 'TAPSummary']

# newlines are significant whitespace, so set default skippable
# whitespace to just spaces and tabs
ParserElement.setDefaultWhitespaceChars(" \t")
NL = LineEnd().suppress()

integer = Word(nums)
plan = '1..' + integer("ubound")

OK,NOT_OK = map(Literal,['ok','not ok'])
testStatus = (OK | NOT_OK)

description = Regex("[^#\n]+")
description.setParseAction(lambda t:t[0].lstrip('- '))

TODO,SKIP = map(CaselessLiteral,'TODO SKIP'.split())
directive = Group(Suppress('#') + (TODO + restOfLine | 
    FollowedBy(SKIP) + 
        restOfLine.copy().setParseAction(lambda t:['SKIP',t[0]]) ))
Beispiel #53
0
continuation_drift = 8    # minimum shift of the continuation line in js relative to the significant indentation
                          # if source indentation shift (compared to indent_len) is greater than this, greater value is used

gr = Word(alphanums)
>>> src = 'a bb ccc'
>>> for match, start, stop in gr.scanString(src):
  print(match, start, stop)

http://stackoverflow.com/questions/1661197/what-characters-are-valid-for-javascript-variable-names
unicodePrintables = u''.join(unichr(c) for c in xrange(65536)
                                        if not unichr(c).isspace())
>>> x=unicode('č')
>>> x.isalnum()

from pyparsing import ParserElement
ParserElement.setDefaultWhitespaceChars('')
lineBreak = Word('\r\n', exact=2) | Word('\n', exact=1)
quotedString = QuotedString('"', unquoteResults=False) | QuotedString("'", unquoteResults=False) | QuotedString('`', multiline=True, unquoteResults=False)
parseRules = cppStyleComment() | quotedString | lineBreak | CharsNotIn('"\'`/\r\n')

def javascript(ciderscript):
    """compiles from cjs to js"""
    context = {'js_indent_level': 0,        # current indentation level
               'indent_stack': [],          # stack of source indents, which is tuple (chars, scope) where
                                                # chars .. number of added spaces in this block begin,
                                                # scope .. =lineNo+1 for started function, 0 otherwise
               'indent_len': 0,             # current indent length (length of joined indent_stack)
               'multiline_comment': False,  # are we inside the multiline comment /*..*/ ?
               'multiline_string': False,   # are we inside the multiline string `..` ?
               }
Beispiel #54
0
 def initialize(self):
     ParserElement.setDefaultWhitespaceChars(' \t\r')
 
     integer = Regex(r"[+-]?\d+") \
         .setParseAction(lambda s,l,t: [ int(t[0]) ])
     number = Regex(r"[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?") \
         .setParseAction(lambda s,l,t: [ float(t[0]) ])
     color = Regex(r"#([0-9a-fA-F]{6})")
     angle = "'" + Regex(r"(360|3[0-5][0-9]|[12][0-9]{2}|[0-9]{1,2})") \
         .setParseAction(lambda s,l,t: [ int(t[0]) ])
     alpha = "'" + Regex(r"(360|3[0-5][0-9]|[12][0-9]{2}|[0-9]{1,2})") \
         .setParseAction(lambda s,l,t: [ int(t[0]) ])
     variable = Word(alphas, exact=1).setParseAction(self.addVar)
     colon = Literal(":").suppress()
     comma = Literal(",")
     lBrace = Literal("(")
     rBrace = Literal(")")
     lBracket = Literal("[")
     rBracket = Literal("]")
     lAngle = Literal("<")
     rAngle = Literal(">")
     plus = Literal("+")
     minus = Literal("-")
     FTerm = Literal("F")
     fTerm = Literal("f")
     ZTerm = Literal("Z")
     zTerm = Literal("z")
     xTerm = Literal("x")
     cTerm = Literal("c")
     
     eol = OneOrMore(LineEnd()).suppress()
     param = ( angle | color | "!" + number | "|" + number )
     self.pList = lBrace + param + ZeroOrMore(comma + param) + rBrace
     literal = ((lBracket + ( variable + Optional(self.pList) 
             | plus + Optional(self.pList) | minus + Optional(self.pList) ) + rBracket)
         | (variable + Optional(self.pList) | plus + Optional(self.pList) 
             | minus + Optional(self.pList)))
     terminal = (ZTerm | zTerm | FTerm | fTerm | xTerm | cTerm
         | plus | minus | lBracket | rBracket)
     lprod = ( 
         (OneOrMore(terminal) + lAngle + variable + rAngle + OneOrMore(terminal))
         | (OneOrMore(terminal) + lAngle + variable) 
         | (variable + rAngle + OneOrMore(terminal)) 
         | variable )
     rProd = OneOrMore(literal | terminal)
     comment = Suppress((LineStart() + "#" + SkipTo(eol, include=True)))
     rules = ( 
         (lprod + Literal("=") + rProd + eol).setParseAction(self.addRule) \
         | comment )
     defaults = ( ( ("Dimensions" + colon + integer + comma + integer) 
         | ("Position" + colon + integer + comma + integer)
         | ("Iterations" + colon + integer)
         | ("Angle" + colon + angle)
         | ("Linelength" + colon + number)
         | ("Linewidth" + colon + number)
         | ("Linecolor" + colon + color)
         | ("Background" + colon + color)
         | ("Axiom" + colon + rProd) ) + eol ).setParseAction(self.setAttribute)
     header = ( defaults | comment )
     self.grammar = Suppress(ZeroOrMore(LineEnd())) \
         + ZeroOrMore(header) \
         + OneOrMore(rules)
         
     try:
         L = self.grammar.parseString( self.stream )
     except ParseException, err:
         print err.line
         print " "*(err.column-1) + "^"
         print err
Beispiel #55
0
def make_amr_parser():
    """
    Pyparsing parser for AMRs. This will return an abstract syntax tree that
    needs to be converted into an AMR using ast_to_amr.
    """
    def debug(s, loc, tok):
        if len(tok) > 1:
            flat = [tok[0]] + tok[1:]
        else: 
            flat =  tok
        return flat

    def parse_concept_expr(s, loc, tok):       
        node_name = tok[0]
        concept_name = None

        roles = []
        if len(tok) > 1:
           if type(tok[1]) is tuple:
                roles = tok[1:]
           else: 
              concept_name = tok[1]
              if len(tok) > 2:
                roles = tok[2:]
        return (node_name, concept_name, roles)
   
    ParserElement.enablePackrat() # Hopefully no bug in here...


    def parse_role(s,loc,tok):
        if len(tok) >= 2:
            r, ch = tok[0], []
            for v in tok[1:]:
                if isinstance(v, StrLiteral):
                # Parse the node alignment and move it to the edge
                    parts = v.replace(" ","").rsplit("~",1)   
                    if len(parts) >= 2:
                        v, align = parts
                        v = StrLiteral(v)
                        r = "%s~%s" % (r.strip(), align.strip())
                elif isinstance(v, SpecialValue):
                    parts = v.replace(" ","").rsplit("~",1)
                    if len(parts) >= 2:                            
                        v, align = parts
                        v = StrLiteral(v)
                        r = "%s~%s" % (r.strip(), align.strip())
                ch.append(v)              
            return  r, ch       
        else:
            return tok[0] 

    # Number are all mapped to the same node in the graph because of interning
    parse_quantity = lambda s, loc, tok: StrLiteral(" ".join(tok)) #float(tok[0]) if "." in tok[0] else int(tok[0]) 

    parse_string_literal = lambda s, loc, tok: StrLiteral(" ".join(tok)) 

    parse_special_value = lambda s, loc, tok: SpecialValue(" ".join(tok))

    lpar  = Literal( "(" ).suppress()
    rpar  = Literal( ")" ).suppress()

    quantity = Word(nums+".,").setParseAction(parse_quantity)

    node_name =  Word(alphas+nums+"""@-_.~$/<>%&!+\*?^`"'""") #Word(alphas+nums+"_@.")  

    lit_string = Literal('"').suppress() + CharsNotIn('"') + Literal('"').suppress() 
    concept_name = lit_string | Word(alphas+nums+"""-_.,`~$/<>%&!+\*?^"'""")     
    role_name = Word(alphas+nums+"""-_.,~$/<>%&!+\*:?^`"'""") | Literal("#").suppress()+Word(alphas+nums+"[]-$_").setParseAction(lambda s, loc, tok: NonterminalLabel(tok[0]))     


    special_attr = (Literal("-") | Literal("interrogative") | Literal("SHOULD") | Literal("MUST") | Literal("HAVE-TO")| Literal("WOULD") | Literal("CAN") |  Literal("DARE-TO")| Literal("BE-TO") | Literal("MAY") | Literal("GOING-TO") | Literal("MIGHT") | Literal("USED-TO")) + Optional(Literal("~")+Word(alphas+nums+"."))
    

    expr = Forward()
    value =  expr |\
             quantity.setParseAction(parse_quantity) |\
             special_attr.setParseAction(parse_special_value) | \
             node_name |\
             (lit_string + Optional(Literal("~")+Word(alphas+nums+"."))).setParseAction(parse_string_literal) 
             
  

    valuelist = Forward()
    valuelist << (value + Literal(",").suppress() + valuelist | value).setParseAction(debug)
    role = (Literal(":").suppress() + role_name + valuelist).setParseAction(parse_role)
 
    expr.setParseAction(parse_concept_expr) 
    expr << (lpar +  node_name + Optional(Literal("/").suppress() + concept_name) + ZeroOrMore(role) + rpar)
    
    return expr 
Beispiel #56
0
 def set_default_white_spaces():
     default = ParserElement.DEFAULT_WHITE_CHARS
     ParserElement.setDefaultWhitespaceChars(' \t')
     yield
     ParserElement.setDefaultWhitespaceChars(default)
Beispiel #57
0
                       nums,
                       alphas,
                       Combine,
                       oneOf,
                       opAssoc,
                       operatorPrecedence,
                       QuotedString,
                       Literal,
                       ParserElement,
                       ParseException,
                       Forward,
                       Group,
                       Suppress,
                       Optional,
                       Regex)
ParserElement.enablePackrat()

from sqlalchemy import and_, or_, func
#from sqlalchemy.orm import aliased

import operator
import re


def dbobject(obj):
    return getattr(obj, '__moyadbobject__', lambda: obj)()


@implements_to_string
class DBExpressionError(Exception):
    hide_py_traceback = True
Beispiel #58
0
    def parse(content, basedir=None, resolve=True):
        """parse a HOCON content

        :param content: HOCON content to parse
        :type content: basestring
        :param resolve: If true, resolve substitutions
        :type resolve: boolean
        :return: a ConfigTree or a list
        """

        def norm_string(value):
            for k, v in ConfigParser.REPLACEMENTS.items():
                value = value.replace(k, v)
            return value

        def unescape_string(tokens):
            return ConfigUnquotedString(norm_string(tokens[0]))

        def parse_multi_string(tokens):
            # remove the first and last 3 "
            return tokens[0][3: -3]

        def convert_number(tokens):
            n = tokens[0]
            try:
                return int(n)
            except ValueError:
                return float(n)

        # ${path} or ${?path} for optional substitution
        SUBSTITUTION = "\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>\s*)"

        def create_substitution(instring, loc, token):
            # remove the ${ and }
            match = re.match(SUBSTITUTION, token[0])
            variable = match.group('variable')
            ws = match.group('ws')
            optional = match.group('optional') == '?'
            substitution = ConfigSubstitution(variable, optional, ws, instring, loc)
            return substitution

        def include_config(token):
            url = None
            file = None
            if len(token) == 1:  # include "test"
                if token[0].startswith("http://") or token[0].startswith("https://") or token[0].startswith("file://"):
                    url = token[0]
                else:
                    file = token[0]
            elif len(token) == 2:  # include url("test") or file("test")
                if token[0] == 'url':
                    url = token[1]
                else:
                    file = token[1]

            if url is not None:
                logger.debug('Loading config from url %s', url)
                obj = ConfigFactory.parse_URL(url, resolve=False)

            if file is not None:
                path = file if basedir is None else os.path.join(basedir, file)
                logger.debug('Loading config from file %s', path)
                obj = ConfigFactory.parse_file(path, required=False, resolve=False)

            return ConfigInclude(obj if isinstance(obj, list) else obj.items())

        ParserElement.setDefaultWhitespaceChars(' \t')

        assign_expr = Forward()
        true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True))
        false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False))
        null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(None))
        key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + '._- ')

        eol = Word('\n\r').suppress()
        eol_comma = Word('\n\r,').suppress()
        comment = (Literal('#') | Literal('//')) - SkipTo(eol)
        comment_eol = Suppress(Optional(eol_comma) + comment)
        comment_no_comma_eol = (comment | eol).suppress()
        number_expr = Regex('[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE]\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))',
                            re.DOTALL).setParseAction(convert_number)

        # multi line string using """
        # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969
        multiline_string = Regex('""".*?"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string)
        # single quoted line string
        quoted_string = QuotedString(quoteChar='"', escChar='\\', multiline=True)
        # unquoted string that takes the rest of the line until an optional comment
        # we support .properties multiline support which is like this:
        # line1  \
        # line2 \
        # so a backslash precedes the \n
        unquoted_string = Regex(r'(\\[ \t]*[\r\n]|[^\[\{\n\r\]\}#,=\$])+?(?=($|\$|[ \t]*(//|[\}\],#\n\r])))',
                                re.DOTALL).setParseAction(unescape_string)
        substitution_expr = Regex('[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution)
        string_expr = multiline_string | quoted_string | unquoted_string

        value_expr = number_expr | true_expr | false_expr | null_expr | string_expr

        include_expr = (Keyword("include", caseless=True).suppress() - (
            quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress()))) \
            .setParseAction(include_config)

        dict_expr = Forward()
        list_expr = Forward()
        multi_value_expr = ZeroOrMore((Literal(
            '\\') - eol).suppress() | comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr)
        # for a dictionary : or = is optional
        # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation
        inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma))
        dict_expr << Suppress('{') - inside_dict_expr - Suppress('}')
        list_entry = ConcatenatedValueParser(multi_value_expr)
        list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']')

        # special case when we have a value assignment where the string can potentially be the remainder of the line
        assign_expr << Group(
            key -
            ZeroOrMore(comment_no_comma_eol) -
            (dict_expr | Suppress(Literal('=') | Literal(':')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr))
        )

        # the file can be { ... } where {} can be omitted or []
        config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | dict_expr | inside_dict_expr) + ZeroOrMore(comment_eol | eol_comma)
        config = config_expr.parseString(content, parseAll=True)[0]
        if resolve:
            ConfigParser.resolve_substitutions(config)
        return config
Beispiel #59
0
    def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION):
        """parse a HOCON content

        :param content: HOCON content to parse
        :type content: basestring
        :param resolve: if true, resolve substitutions
        :type resolve: boolean
        :param unresolved_value: assigned value value to unresolved substitution.
        If overriden with a default value, it will replace all unresolved value to the default value.
        If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x})
        :type unresolved_value: boolean
        :return: a ConfigTree or a list
        """

        unescape_pattern = re.compile(r'\\.')

        def replace_escape_sequence(match):
            value = match.group(0)
            return cls.REPLACEMENTS.get(value, value)

        def norm_string(value):
            return unescape_pattern.sub(replace_escape_sequence, value)

        def unescape_string(tokens):
            return ConfigUnquotedString(norm_string(tokens[0]))

        def parse_multi_string(tokens):
            # remove the first and last 3 "
            return tokens[0][3: -3]

        def convert_number(tokens):
            n = tokens[0]
            try:
                return int(n, 10)
            except ValueError:
                return float(n)

        # ${path} or ${?path} for optional substitution
        SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)"

        def create_substitution(instring, loc, token):
            # remove the ${ and }
            match = re.match(SUBSTITUTION_PATTERN, token[0])
            variable = match.group('variable')
            ws = match.group('ws')
            optional = match.group('optional') == '?'
            substitution = ConfigSubstitution(variable, optional, ws, instring, loc)
            return substitution

        # ${path} or ${?path} for optional substitution
        STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)'

        def create_quoted_string(instring, loc, token):
            # remove the ${ and }
            match = re.match(STRING_PATTERN, token[0])
            value = norm_string(match.group('value'))
            ws = match.group('ws')
            return ConfigQuotedString(value, ws, instring, loc)

        def include_config(instring, loc, token):
            url = None
            file = None
            required = False

            if token[0] == 'required':
                required = True
                final_tokens = token[1:]
            else:
                final_tokens = token

            if len(final_tokens) == 1:  # include "test"
                value = final_tokens[0].value if isinstance(final_tokens[0], ConfigQuotedString) else final_tokens[0]
                if value.startswith("http://") or value.startswith("https://") or value.startswith("file://"):
                    url = value
                else:
                    file = value
            elif len(final_tokens) == 2:  # include url("test") or file("test")
                value = final_tokens[1].value if isinstance(token[1], ConfigQuotedString) else final_tokens[1]
                if final_tokens[0] == 'url':
                    url = value
                else:
                    file = value

            if url is not None:
                logger.debug('Loading config from url %s', url)
                obj = ConfigFactory.parse_URL(
                    url,
                    resolve=False,
                    required=required,
                    unresolved_value=NO_SUBSTITUTION
                )
            elif file is not None:
                path = file if basedir is None else os.path.join(basedir, file)
                logger.debug('Loading config from file %s', path)
                obj = ConfigFactory.parse_file(
                    path,
                    resolve=False,
                    required=required,
                    unresolved_value=NO_SUBSTITUTION
                )
            else:
                raise ConfigException('No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring)

            return ConfigInclude(obj if isinstance(obj, list) else obj.items())

        ParserElement.setDefaultWhitespaceChars(' \t')

        assign_expr = Forward()
        true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True))
        false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False))
        null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(NoneValue()))
        key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /')

        eol = Word('\n\r').suppress()
        eol_comma = Word('\n\r,').suppress()
        comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd())
        comment_eol = Suppress(Optional(eol_comma) + comment)
        comment_no_comma_eol = (comment | eol).suppress()
        number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))',
                            re.DOTALL).setParseAction(convert_number)

        # multi line string using """
        # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969
        multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string)
        # single quoted line string
        quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string)
        # unquoted string that takes the rest of the line until an optional comment
        # we support .properties multiline support which is like this:
        # line1  \
        # line2 \
        # so a backslash precedes the \n
        unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string)
        substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution)
        string_expr = multiline_string | quoted_string | unquoted_string

        value_expr = number_expr | true_expr | false_expr | null_expr | string_expr

        include_content = (quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress()))
        include_expr = (
            Keyword("include", caseless=True).suppress() + (
                include_content | (
                    Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress()
                )
            )
        ).setParseAction(include_config)

        root_dict_expr = Forward()
        dict_expr = Forward()
        list_expr = Forward()
        multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal(
            '\\') - eol).suppress())
        # for a dictionary : or = is optional
        # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation
        inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma))
        inside_root_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True)
        dict_expr << Suppress('{') - inside_dict_expr - Suppress('}')
        root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress('}')
        list_entry = ConcatenatedValueParser(multi_value_expr)
        list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']')

        # special case when we have a value assignment where the string can potentially be the remainder of the line
        assign_expr << Group(
            key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore(
                comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr))
        )

        # the file can be { ... } where {} can be omitted or []
        config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore(
            comment_eol | eol_comma)
        config = config_expr.parseString(content, parseAll=True)[0]

        if resolve:
            allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION
            has_unresolved = cls.resolve_substitutions(config, allow_unresolved)
            if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION:
                raise ConfigSubstitutionException('resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION')

        if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION:
            cls.unresolve_substitutions_to_value(config, unresolved_value)
        return config
# -*- coding: utf-8 -*-
"""Atomic components; probably shouldn't use these directly"""
import string

from pyparsing import Optional, ParserElement, Regex, Suppress, Word
from six.moves.html_parser import HTMLParser

from regparser.grammar.utils import Marker, SuffixMarker, WordBoundaries


# Set whitespace for all parsing; include unicode whitespace chars
ParserElement.setDefaultWhitespaceChars(
    string.whitespace +
    HTMLParser().unescape('&ensp;&emsp;&thinsp;&zwnj;&zwj;&lrm;&rlm;'))


lower_p = (
    Suppress("(") +
    Regex(r"[ivx]{1}|[a-hj-uwyz]{1,2}").setResultsName("p1") +
    Suppress(")"))
digit_p = (
    Suppress("(") +
    Word(string.digits).setResultsName("p2") +
    Suppress(")"))
roman_p = (
    Suppress("(") +
    Word("ivxlcdm").setResultsName("p3") +
    Suppress(")"))
upper_p = (
    Suppress("(") +
    Word(string.ascii_uppercase).setResultsName("p4") +