def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') dpi_setting = (Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ'))('SETTINGS*') mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL mount_matrix = (mount_matrix_row + ';' + mount_matrix_row + ';' + mount_matrix_row)('MOUNT_MATRIX') props = (('MOUSE_DPI', Group(OneOrMore(dpi_setting))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER), ('ID_INPUT_TRACKBALL', Literal('1')), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ('XKB_FIXED_LAYOUT', STRING), ('XKB_FIXED_VARIANT', STRING), ('ACCEL_MOUNT_MATRIX', mount_matrix), ) fixed_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props] kbd_props = [Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) + EOL return grammar
def parse_equation(eq: str) -> dict: ParserElement.enablePackrat() if getrecursionlimit() <= LOWER_RECURSION_LIMIT: setrecursionlimit(LOWER_RECURSION_LIMIT) # Define atoms NUM = pyparsing_common.number VARIABLE = Word(['x', 'y'], exact=True) operand = NUM | VARIABLE # Define production rules expr = infixNotation(operand, [(Literal(op), 1, opAssoc.RIGHT, op_rep) for op in uniops] + [(Literal(op), 2, opAssoc.LEFT, op_rep) for op in binops]) comp = infixNotation(expr, [(Literal(op), 2, opAssoc.LEFT, op_rep) for op in compops]) cond = infixNotation(comp, [(Literal(op), 1, opAssoc.RIGHT, op_rep) for op in logicuniops] + [(Literal(op), 2, opAssoc.LEFT, op_rep) for op in logicbinops]) try: return cond.parseString(eq, parseAll=True)[0] except ParseException as pex: print('Error while parsing "%s": %s' % (eq, str(pex)), file=stderr) return None
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') setting = Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress( '@') + INTEGER('HZ') props = ( ('MOUSE_DPI', Group(OneOrMore(setting('SETTINGS*')))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('ID_INPUT_TRACKBALL', Literal('1')), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ) fixed_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props ] kbd_props = [ Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [ Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) return grammar
def __init__(self): ParserElement.enablePackrat() decimal_integer = Word(nums).setName('decimal integer') \ .setParseAction(lambda t: int(''.join(t))) hexadecimal_integer = Combine(Word(nums, hexnums) + Word('hH')) \ .setName('hexadecimal integer') \ .setParseAction(lambda t: int((''.join(t))[:-1], 16)) identifier = Word(alphas, alphanums + '_@?') \ .setName('identifier') # XXX and maybe dollar sign? baseExpr = (hexadecimal_integer | decimal_integer | identifier) operators = [ (oneOf('+ - ~'), 1, opAssoc.RIGHT, self.nest_operand_pairs), (oneOf('* /'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('+ -'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('<< >>'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('&'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('^'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('|'), 2, opAssoc.LEFT, self.nest_operand_pairs), ] self.expr = infixNotation(baseExpr, operators) + StringEnd()
def _path_contains_grammar(grammar: ParserElement, path: str) -> dict: """ Return a dict mapping the path to the lines where the grammar matched. :param grammar: Grammar to be searched for in path. :param path: Path to the destination file. """ with open(path, encoding='latin-1') as file_d: lines = file_d.read().splitlines() lines_length = tuple(map(lambda x: len(x) + 1, lines)) file_as_string = '\n'.join(lines) # Given scanString expands tabs to 'n' number of spaces # And we count tabs as '1' char width # And scanString reports the match column relative to the expanded version # When a file contains tabs # Then the line numbers will get an offset # Given we force to parse without expanding tabs grammar.parseWithTabs() # Then the line numbers are reported correctly matched_lines = [ _get_line_number(start, lines_length) for _, start, _ in grammar.scanString(file_as_string) ] if matched_lines: return { path: { 'lines': str(matched_lines)[1:-1], 'sha256': get_sha256(path), } } return {}
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') dpi_setting = Group( Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ'))('SETTINGS*') mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL mount_matrix = Group(mount_matrix_row + ';' + mount_matrix_row + ';' + mount_matrix_row)('MOUNT_MATRIX') xkb_setting = Optional(Word(alphanums + '+-/@._')) props = ( ('MOUSE_DPI', Group(OneOrMore(dpi_setting))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER), ('ID_AUTOSUSPEND', Or((Literal('0'), Literal('1')))), ('ID_INPUT', Or((Literal('0'), Literal('1')))), ('ID_INPUT_ACCELEROMETER', Or((Literal('0'), Literal('1')))), ('ID_INPUT_JOYSTICK', Or((Literal('0'), Literal('1')))), ('ID_INPUT_KEY', Or((Literal('0'), Literal('1')))), ('ID_INPUT_KEYBOARD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_MOUSE', Or((Literal('0'), Literal('1')))), ('ID_INPUT_POINTINGSTICK', Or((Literal('0'), Literal('1')))), ('ID_INPUT_SWITCH', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TABLET', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TABLET_PAD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TOUCHPAD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TOUCHSCREEN', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TRACKBALL', Or((Literal('0'), Literal('1')))), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_JOYSTICK_INTEGRATION', Or(('internal', 'external'))), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ('XKB_FIXED_LAYOUT', xkb_setting), ('XKB_FIXED_VARIANT', xkb_setting), ('XKB_FIXED_MODEL', xkb_setting), ('KEYBOARD_LED_NUMLOCK', Literal('0')), ('KEYBOARD_LED_CAPSLOCK', Literal('0')), ('ACCEL_MOUNT_MATRIX', mount_matrix), ('ACCEL_LOCATION', Or(('display', 'base'))), ('PROXIMITY_NEAR_LEVEL', INTEGER), ) fixed_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props ] kbd_props = [ Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [ Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) + EOL return grammar
def _evaluate_expression(expr_list, keyword_list, referred_checks_result): # Convert the expression now in the format to be parsed by pyparsing module parsed_list = [] for expr in expr_list: if expr.upper() not in keyword_list: # Check reference is passed. Pass the fetched value instead of original check id parsed_list.append(referred_checks_result.get(expr)) else: parsed_list.append(expr.upper()) parsed_expr = " ".join(parsed_list) # Logic to use boolean expression parser using pyparsing library # We are passing the boolean expression in the following form: # check1 and not (check2 or (check3 and not check4) ) # --> check1 and not ( check2 or ( check3 and not check4 ) ) # --> True and not ( False or ( True and not False ) ) ParserElement.enablePackrat() TRUE = Keyword("True") FALSE = Keyword("False") boolOperand = TRUE | FALSE | Word(alphas, max=1) boolOperand.setParseAction(BoolOperand) boolExpr = infixNotation( boolOperand, [ ("NOT", 1, opAssoc.RIGHT, BoolNot), ("AND", 2, opAssoc.LEFT, BoolAnd), ("OR", 2, opAssoc.LEFT, BoolOr), ], ) return boolExpr.parseString(parsed_expr)[0]
def defineParsers(): #Enable a fast parsing mode with caching. ParserElement.enablePackrat() #end of line terminates statements, so it is not regular whitespace ParserElement.setDefaultWhitespaceChars('\t ') func_call = Forward() #forward declaration because this is a recursive rule #The "terminal" rules symbol = Word(alphas+'_-', alphanums+'_-') .setParseAction(action_symbol) q_symbol = quotedString .setParseAction(action_q_symbol) bracket_term = Literal("(").suppress() - func_call \ + Literal(")").suppress() word = symbol | q_symbol | bracket_term #The function call #Parse: "foo | bar | baz" or "foo" pipeline = (word + ZeroOrMore("|" - word)) .setParseAction(action_pipeline) #Parse "foo|bar op1 op2 op3" func_call << (pipeline - ZeroOrMore(word)) .setParseAction(action_func_call) #High level structure of program line = LineEnd() | func_call - LineEnd() #empty line or function call program = ZeroOrMore(line) + StringEnd() #multiple lines are a program #define the comments program.ignore('%' + restOfLine) #no tab expansion program.parseWithTabs() #return additional func_call parser to make testing more easy return program, func_call
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack, rbrack, lbrace, rbrace, lparen, rparen = map(Literal, "[]{}()") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~ reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ( (lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?")) ) reRange.setParseAction(handle_range) reLiteral.setParseAction(handle_literal) reMacro.setParseAction(handle_macro) reDot.setParseAction(handle_dot) reTerm = (reLiteral | reRange | reMacro | reDot) reExpr = operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, handle_repetition), (None, 2, opAssoc.LEFT, handle_sequence), (Suppress('|'), 2, opAssoc.LEFT, handle_alternative), ]) _parser = reExpr return _parser
def hwdb_grammar(): ParserElement.setDefaultWhitespaceChars('') prefix = Or(category + ':' + Or(conn) + ':' for category, conn in TYPES.items()) matchline_typed = Combine(prefix + Word(printables + ' ' + '®')) matchline_general = Combine( Or(GENERAL_MATCHES) + ':' + Word(printables + ' ' + '®')) matchline = (matchline_typed | matchline_general) + EOL propertyline = ( White(' ', exact=1).suppress() + Combine(UDEV_TAG - '=' - Optional(Word(alphanums + '_=:@*.!-;, "/')) - Optional(pythonStyleComment)) + EOL) propertycomment = White(' ', exact=1) + pythonStyleComment + EOL group = ( OneOrMore(matchline('MATCHES*') ^ COMMENTLINE.suppress()) - OneOrMore(propertyline('PROPERTIES*') ^ propertycomment.suppress()) - (EMPTYLINE ^ stringEnd()).suppress()) commentgroup = OneOrMore(COMMENTLINE).suppress() - EMPTYLINE.suppress() grammar = OneOrMore(Group(group)('GROUPS*') ^ commentgroup) + stringEnd() return grammar
def set_parse_action_magic(rule_name: str, parser_element: pp.ParserElement) -> None: if rule_name == rule_name.upper(): return if getattr(parser_element, 'name', None) and parser_element.name.isidentifier(): rule_name = parser_element.name if rule_name in ('bin_op', ): def bin_op_parse_action(s, loc, tocs): node = tocs[0] if not isinstance(node, AstNode): node = bin_op_parse_action(s, loc, node) for i in range(1, len(tocs) - 1, 2): second_node = tocs[i + 1] if not isinstance(second_node, AstNode): second_node = bin_op_parse_action(s, loc, second_node) node = BinOpNode(BinOp(tocs[i]), node, second_node, loc=loc) return node parser_element.setParseAction(bin_op_parse_action) else: cls = ''.join(x.capitalize() for x in rule_name.split('_')) + 'Node' with suppress(NameError): cls = eval(cls) if not inspect.isabstract(cls): def parse_action(s, loc, tocs): if cls is FuncNode: return FuncNode(tocs[0], tocs[1], tocs[2:-1], tocs[-1], loc=loc) else: return cls(*tocs, loc=loc) parser_element.setParseAction(parse_action)
def read_sets_java(string): from pyparsing import nestedExpr, alphas, Word, nums, ParserElement, delimitedList ParserElement.setDefaultWhitespaceChars(" ,") element = Word(alphas + nums).setParseAction(parse_elem_java) elements = delimitedList(element) setofsets = nestedExpr("[", "]", content=elements).setParseAction(lambda x: frozenset(x[0])) return setofsets.parseString(string).asList()[0]
def set_parse_action_magic(rule_name: str, parser: pp.ParserElement)->None: if rule_name == rule_name.upper(): return if getattr(parser, 'name', None) and parser.name.isidentifier(): rule_name = parser.name if rule_name in ('bin_op', ): def bin_op_parse_action(s, loc, tocs): node = tocs[0] if not isinstance(node, AstNode): node = bin_op_parse_action(s, loc, node) for i in range(1, len(tocs) - 1, 2): secondNode = tocs[i + 1] if not isinstance(secondNode, AstNode): secondNode = bin_op_parse_action(s, loc, secondNode) node = BinOpNode(BinOp(tocs[i]), node, secondNode) return node parser.setParseAction(bin_op_parse_action) else: cls = ''.join(x.capitalize() for x in rule_name.split('_')) + 'Node' #разбитие названия переменной на куски по _, создание заглавной первой буквы и прибавление Node with suppress(NameError): cls = eval(cls) if not inspect.isabstract(cls): def parse_action(s, loc, tocs): return cls(*tocs) parser.setParseAction(parse_action)
def hwdb_grammar(): ParserElement.setDefaultWhitespaceChars('') prefix = Or(category + ':' + Or(conn) + ':' for category, conn in TYPES.items()) matchline = Combine(prefix + Word(printables + ' ' + '®')) + EOL propertyline = ( White(' ', exact=1).suppress() + Combine(UDEV_TAG - '=' - Word(alphanums + '_=:@*.! ') - Optional(pythonStyleComment)) + EOL ) propertycomment = White(' ', exact=1) + pythonStyleComment + EOL group = ( OneOrMore(matchline('MATCHES*') ^ COMMENTLINE.suppress()) - OneOrMore(propertyline('PROPERTIES*') ^ propertycomment.suppress()) - (EMPTYLINE ^ stringEnd()).suppress() ) commentgroup = OneOrMore(COMMENTLINE).suppress() - EMPTYLINE.suppress() grammar = OneOrMore(group('GROUPS*') ^ commentgroup) + stringEnd() return grammar
def intersperse_parser(parser_list: list, interspersed: pp.ParserElement) -> pp.ParserElement: ret = interspersed.copy() for i in parser_list: ret = ret + i.copy() + interspersed.copy() return ret
def __init__(self): from pyparsing import (ParserElement, StringEnd, LineEnd, Literal, pythonStyleComment, ZeroOrMore, Suppress, Optional, Combine, OneOrMore, Regex, oneOf, QuotedString, Group, ParseException) ParserElement.setDefaultWhitespaceChars("\t ") EOF = StringEnd() EOL = ~EOF + LineEnd() # EOL must not match on EOF escape = Literal("\\") comment = pythonStyleComment junk = ZeroOrMore(comment | EOL).suppress() ## word (i.e: single argument string) word = Suppress(escape + EOL + Optional(comment)) \ | Combine(OneOrMore( escape.suppress() + Regex(".") | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True) | Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") | Suppress(escape + EOL) )) ## redirector (aka bash file redirectors, such as "2>&1" sequences) fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0])) fd_dst = Suppress("&") + fd_src # "[n]<word" || "[n]<&word" || "[n]<&digit-" fd_redir = (Optional(fd_src, 0) + Literal("<") |Optional(fd_src, 1) + Literal(">"))\ +(word | (fd_dst + Optional("-"))) # "&>word" || ">&word" full_redir = (oneOf("&> >&") + word)\ .setParseAction(lambda t:("&" ,">", t[-1])) # "<<<word" || "<<[-]word" here_doc = Regex("<<(<|-?)") + word # "[n]>>word" add_to_file = Optional(fd_src | Literal("&"), 1) + \ Literal(">>") + word # "[n]<>word" fd_bind = Optional(fd_src, 0) + Literal("<>") + word redirector = (fd_redir | full_redir | here_doc | add_to_file | fd_bind)\ .setParseAction(lambda token: tuple(token)) ## single command (args/redir list) command = Group(OneOrMore(redirector | word)) ## logical operators (section splits) semicolon = Suppress(";") + junk connector = (oneOf("&& || |") + junk) | semicolon ## pipeline, aka logical block of interconnected commands pipeline = junk + Group(command + ZeroOrMore(connector + command) + Optional(semicolon)) # define object attributes self.LEXER = pipeline.ignore(comment) + EOF self.parseException = ParseException
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack, rbrack, lbrace, rbrace, lparen, rparen, colon, qmark = map( Literal, "[]{}():?") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join( c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reNonCaptureGroup = Suppress("?:") reDot = Literal(".") repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?"))) reRange.setParseAction(handleRange) reLiteral.setParseAction(handleLiteral) reMacro.setParseAction(handleMacro) reDot.setParseAction(handleDot) reTerm = (reLiteral | reRange | reMacro | reDot | reNonCaptureGroup) reExpr = infixNotation(reTerm, [ (repetition, 1, opAssoc.LEFT, handleRepetition), (None, 2, opAssoc.LEFT, handleSequence), (Suppress('|'), 2, opAssoc.LEFT, handleAlternative), ]) _parser = reExpr return _parser
def _make_grammar(): """Make a grammar for parsing a sanitized F5 config The syntax is Tcl, except for a 'Sanitized out =' string at the top. We only parse enough to find commands and their arguments. Return a ParseResult where 'prog' is a list of commands. Each command has a name and some arguments. These arguments can be further nested lists in case of '{ ... }' and '[ ... ]' blocks. """ ParserElement.setDefaultWhitespaceChars(' ') white = Suppress(Optional(White())) comment = white + '#' - restOfLine lbrace, rbrace = Suppress('{'), Suppress('}') lbracket, rbracket = Suppress('['), Suppress(']') cmds = Forward() braces = Group(lbrace - white - Optional(cmds) - white - rbrace) brackets = Group(lbracket - white - Optional(cmds) - white - rbracket) string = QuotedString(quoteChar='"', escChar='\\', multiline=True) word = string | braces | brackets | Word(alphanums + '-:()_./<>%*$|!=&?') cmd = Group(word('name') + ZeroOrMore(word)('args')) cmd_sep = OneOrMore(Literal('\n') | ';') cmds << (cmd + ZeroOrMore(Suppress(cmd_sep) + cmd)) prog_end = Suppress(Optional(cmd_sep)) + StringEnd() prog = cmds + prog_end sanitized_begin = Suppress(Optional(White())) sanitized = sanitized_begin + Optional('Sanitized out =') + prog('prog') sanitized.ignore(comment) return sanitized
def __init__(self): ParserElement.enablePackrat() hexadecimal_integer = Combine(CaselessLiteral('0x') + Word(hexnums)) \ .setName('hexadecimal integer') \ .setParseAction(lambda *t: int(t[2][0][2:], 16)) decimal_integer = Word(nums) \ .setName('decimal integer') \ .setParseAction(lambda t: int(''.join(t))) identifier = Word(alphanums + '_$') \ .setName('identifier') baseExpr = (hexadecimal_integer | decimal_integer | identifier) operators = [ (oneOf('+ - ~ !'), 1, opAssoc.RIGHT, self.nest_operand_pairs), (oneOf('* /'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('+ -'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('<< >>'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('<= < > >='), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('== !='), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('&'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('^'), 2, opAssoc.LEFT, self.nest_operand_pairs), (oneOf('|'), 2, opAssoc.LEFT, self.nest_operand_pairs), ] self.expr = infixNotation(baseExpr, operators) + StringEnd()
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') model_props = [ Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') - (Literal('1'))('VALUE') ] dimension = INTEGER('X') + Suppress('x') + INTEGER('Y') crange = INTEGER('X') + Suppress(':') + INTEGER('Y') vprops = ( ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_PRESSURE_RANGE', Group(crange('SETTINGS*'))), ('LIBINPUT_ATTR_TOUCH_SIZE_RANGE', Group(crange('SETTINGS*'))), ('LIBINPUT_ATTR_TPKBCOMBO_LAYOUT', Or(('below'))), ('LIBINPUT_ATTR_LID_SWITCH_RELIABILITY', Or(('reliable', 'write_open'))), ('LIBINPUT_ATTR_KEYBOARD_INTEGRATION', Or(('internal', 'external'))), ) value_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in vprops] tprops = ( ('LIBINPUT_ATTR_PALM_PRESSURE_THRESHOLD', INTEGER('X')), ('LIBINPUT_ATTR_PALM_SIZE_THRESHOLD', INTEGER('X')), ) typed_props = [Literal(name)('NAME') - Suppress('=') - val for name, val in tprops] grammar = Or(model_props + value_props + typed_props) return grammar
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') model_props = [ Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') - (Literal('1'))('VALUE') ] dimension = INTEGER('X') + Suppress('x') + INTEGER('Y') sz_props = ( ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))), ) size_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in sz_props ] reliability_tags = Or(('reliable', 'write_open')) reliability = [ Literal('LIBINPUT_ATTR_LID_SWITCH_RELIABILITY')('NAME') - Suppress('=') - reliability_tags('VALUE') ] tpkbcombo_tags = Or(('below')) tpkbcombo = [ Literal('LIBINPUT_ATTR_TPKBCOMBO_LAYOUT')('NAME') - Suppress('=') - tpkbcombo_tags('VALUE') ] grammar = Or(model_props + size_props + reliability + tpkbcombo) return grammar
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') setting = Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ') props = (('MOUSE_DPI', Group(OneOrMore(setting('SETTINGS*')))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER), ('ID_INPUT_TRACKBALL', Literal('1')), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ) fixed_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props] kbd_props = [Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) return grammar
def __init__(self): """ Initialise the class and enable packrat parsing. Packrat speeds up parsing considerably. """ ParserElement.enablePackrat()
def __init__(self): from pyparsing import (ParserElement, StringEnd, LineEnd, Literal, pythonStyleComment, ZeroOrMore, Suppress, Optional, Combine, OneOrMore, Regex, oneOf, QuotedString, Group, ParseException) ParserElement.setDefaultWhitespaceChars("\t ") EOF = StringEnd() EOL = ~EOF + LineEnd() # EOL must not match on EOF escape = Literal("\\") comment = pythonStyleComment junk = ZeroOrMore(comment | EOL).suppress() # word (i.e: single argument string) word = Suppress(escape + EOL + Optional(comment)) \ | Combine(OneOrMore( escape.suppress() + Regex(".") | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True) | Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") | Suppress(escape + EOL))) # redirector (aka bash file redirectors, such as "2>&1" sequences) fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0])) fd_dst = Suppress("&") + fd_src # "[n]<word" || "[n]<&word" || "[n]<&digit-" fd_redir = (Optional(fd_src, 0) + Literal("<") | Optional(fd_src, 1) + Literal(">")) + \ (word | (fd_dst + Optional("-"))) # "&>word" || ">&word" obj = (oneOf("&> >&") + word) full_redir = obj.setParseAction(lambda t: ("&", ">", t[-1])) # "<<<word" || "<<[-]word" here_doc = Regex("<<(<|-?)") + word # "[n]>>word" add_to_file = (Optional(fd_src | Literal("&"), 1) + Literal(">>") + word) # "[n]<>word" fd_bind = Optional(fd_src, 0) + Literal("<>") + word obj = (fd_redir | full_redir | here_doc | add_to_file | fd_bind) redirector = obj.setParseAction(lambda token: tuple(token)) # single command (args/redir list) command = Group(OneOrMore(redirector | word)) # logical operators (section splits) semicolon = Suppress(";") + junk connector = (oneOf("&& || |") + junk) | semicolon # pipeline, aka logical block of interconnected commands pipeline = junk + Group(command + ZeroOrMore(connector + command) + Optional(semicolon)) # define object attributes self.LEXER = pipeline.ignore(comment) + EOF self.parseException = ParseException
def __init__(self): # speed up infixNotation considerably at the price of some cache memory ParserElement.enablePackrat() boolean = Keyword('True') | Keyword('False') none = Keyword('None') integer = Word(nums) real = Combine(Word(nums) + "." + Word(nums)) string = (QuotedString('"', escChar='\\') | QuotedString("'", escChar='\\')) regex = QuotedString('/', escChar='\\') identifier = Word(alphas, alphanums + '_') dereference = infixNotation(identifier, [ (Literal('.'), 2, opAssoc.LEFT, EvalArith), ]) result = (Keyword('bad') | Keyword('fail') | Keyword('good') | Keyword('ignore') | Keyword('unknown')) rval = boolean | none | real | integer | string | regex | result | dereference rvallist = Group( Suppress('[') + Optional(delimitedList(rval)) + Suppress(']')) rvalset = Group( Suppress('{') + Optional(delimitedList(rval)) + Suppress('}')) operand = rval | rvallist | rvalset # parse actions replace the parsed tokens with an instantiated object # which we can later call into for evaluation of its content boolean.setParseAction(EvalBoolean) none.setParseAction(EvalNone) integer.setParseAction(EvalInteger) real.setParseAction(EvalReal) string.setParseAction(EvalString) regex.setParseAction(EvalRegex) identifier.setParseAction(EvalIdentifier) result.setParseAction(EvalResult) rvallist.setParseAction(EvalList) rvalset.setParseAction(EvalSet) identity_test = Keyword('is') + ~Keyword('not') | Combine( Keyword('is') + Keyword('not'), adjacent=False, joinString=' ') membership_test = Keyword('in') | Combine( Keyword('not') + Keyword('in'), adjacent=False, joinString=' ') comparison_op = oneOf('< <= > >= != == isdisjoint') comparison = identity_test | membership_test | comparison_op self.parser = infixNotation(operand, [ (Literal('**'), 2, opAssoc.LEFT, EvalPower), (oneOf('+ - ~'), 1, opAssoc.RIGHT, EvalModifier), (oneOf('* / // %'), 2, opAssoc.LEFT, EvalArith), (oneOf('+ -'), 2, opAssoc.LEFT, EvalArith), (oneOf('<< >>'), 2, opAssoc.LEFT, EvalArith), (Literal('&'), 2, opAssoc.LEFT, EvalArith), (Literal('^'), 2, opAssoc.LEFT, EvalArith), (Literal('|'), 2, opAssoc.LEFT, EvalArith), (comparison, 2, opAssoc.LEFT, EvalLogic), (Keyword('not'), 1, opAssoc.RIGHT, EvalModifier), (Keyword('and'), 2, opAssoc.LEFT, EvalLogic), (Keyword('or'), 2, opAssoc.LEFT, EvalLogic), (Keyword('->'), 2, opAssoc.LEFT, EvalArith), ])
def read_sets_java(string): from pyparsing import nestedExpr, alphas, Word, nums, ParserElement, delimitedList ParserElement.setDefaultWhitespaceChars(" ,") element = Word(alphas + nums).setParseAction(parse_elem_java) elements = delimitedList(element) setofsets = nestedExpr( "[", "]", content=elements).setParseAction(lambda x: frozenset(x[0])) return setofsets.parseString(string).asList()[0]
def __init__(self, alphabet): self.operators = alphabet.getOperators() self.constants = alphabet.getConstants() self.notNeedSpace = alphabet.notNeedSpace() self.ffactory = FormulaFactory() self.__createGram() ParserElement.enablePackrat()
def __init__(self,showErrors=True,debug=False): ''' Constructor Args: showErrors(bool): True if errors should be shown/printed debug(bool): True if debugging should be enabled ''' self.showError=showErrors self.debug=debug self.grammar=None ParserElement.setDefaultWhitespaceChars(" \t")
def PyParsingDefaultWhitespaceChars(whitespace_chars): '''Set the given whitespace_chars as pyparsing's default whitespace chars while the context manager is active. Since ParserElement.DEFAULT_WHITE_CHARS is a global variable, this method is not thread-safe (but no pyparsing parser construction is thread-safe for the same reason anyway). ''' # A possible solution to this problem: # Since the pyparsing code is basically a single big file, we could just copy it (under aspio/vendor or something like that) and have our own "private" version of pyparsing. (TODO: think about this some more and maybe do it) previous_whitespace_chars = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(whitespace_chars) yield ParserElement.setDefaultWhitespaceChars(previous_whitespace_chars)
def _parse_items(self, source): ParserElement.setDefaultWhitespaceChars(' \t\r') EOL = LineEnd().suppress() comment = Literal('#') + Optional( restOfLine ) + EOL string = CharsNotIn("\n") line = Group( Word(alphanums + '-')('key') + Literal(':').suppress() + Optional(Combine(string + ZeroOrMore(EOL + Literal(' ') + string)))("value") + EOL ) group = ZeroOrMore(line) group.ignore(comment) return group.parseString(source, True)
def get_parser(): from pyparsing import CharsNotIn, ParserElement, Suppress, ZeroOrMore ParserElement.enablePackrat() word = CharsNotIn(f"{PERIOD}{LBRACK}{RBRACK}") idx = Suppress(LBRACK) + word + Suppress(RBRACK) attr = Suppress(PERIOD) + word parser = word + ZeroOrMore(attr ^ idx) parser.setParseAction(PERIOD.join) return parser
def build_parser(): """ Build a pyparsing parser for our custom topology description language. :return: A pyparsing parser. :rtype: pyparsing.MatchFirst """ ParserElement.setDefaultWhitespaceChars(' \t') nl = Suppress(LineEnd()) inumber = Word(nums).setParseAction(lambda l, s, t: int(t[0])) fnumber = (Combine( Optional('-') + Word(nums) + '.' + Word(nums) + Optional('E' | 'e' + Optional('-') + Word(nums))) ).setParseAction(lambda toks: float(toks[0])) boolean = (CaselessLiteral('true') | CaselessLiteral('false') ).setParseAction(lambda l, s, t: t[0].casefold() == 'true') comment = Literal('#') + restOfLine + nl text = QuotedString('"') identifier = Word(alphas, alphanums + '_') empty_line = LineStart() + LineEnd() item_list = ((text | fnumber | inumber | boolean) + Optional(Suppress(',')) + Optional(nl)) custom_list = (Suppress('(') + Optional(nl) + Group(OneOrMore(item_list)) + Optional(nl) + Suppress(')')).setParseAction(lambda tok: tok.asList()) attribute = Group( identifier('key') + Suppress(Literal('=')) + (custom_list | text | fnumber | inumber | boolean | identifier)('value') + Optional(nl)) attributes = (Suppress(Literal('[')) + Optional(nl) + OneOrMore(attribute) + Suppress(Literal(']'))) node = identifier('node') port = Group(node + Suppress(Literal(':')) + (identifier | inumber)('port')) link = Group( port('endpoint_a') + Suppress(Literal('--')) + port('endpoint_b')) environment_spec = (attributes + nl).setResultsName('env_spec', listAllMatches=True) nodes_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(node))('nodes')) + nl).setResultsName('node_spec', listAllMatches=True) ports_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(port))('ports')) + nl).setResultsName('port_spec', listAllMatches=True) link_spec = (Group(Optional(attributes)('attributes') + link('links')) + nl).setResultsName('link_spec', listAllMatches=True) statements = OneOrMore(comment | link_spec | ports_spec | nodes_spec | environment_spec | empty_line) return statements
def pythonVar(self): if not self._pythonVar: from pyparsing import (ParserElement, Word, alphas, alphanums, Literal, Suppress, FollowedBy) _ws = ' \t' ParserElement.setDefaultWhitespaceChars(_ws) ident = Word(alphas+"_", alphanums+"_") lparen = Literal("(") dot = Literal(".") dollar = Literal("$") self._pythonVar = Suppress(dollar) + ident + ~FollowedBy((dot+ident) | lparen) self._pythonVar.setParseAction(self.onPythonVar) return self._pythonVar
def set_delimiters(self, delimiter): """Lets you change the delimiter that is used to identify field boundaries. delimiter: str A string containing characters to be used as delimiters. The default value is ' \t'. which means that spaces and tabs are not taken as data but instead mark the boundaries. Note that the parser is smart enough to recognize characters within quotes as non-delimiters.""" self.delimiter = delimiter if delimiter != "columns": ParserElement.setDefaultWhitespaceChars(str(delimiter))
def setLoggingDebugActionForParserElement( parser_element: pyparsing.ParserElement) -> None: ''' helper function to set up the custom debug actions for a ParserElement with our own functions that use the logging framework rather than `print()` this also calls setDebug(True) for the parser element as well ''' parser_element.setDebug(True) parser_element.setDebugActions(pyparsingLoggingStartDebugAction, pyparsingLoggingSuccessDebugAction, pyparsingLoggingExceptionDebugAction)
def __init__(self): self._generate_kinds() ParserElement.setDefaultWhitespaceChars(' \t') self._parser = OneOrMore( Group( Suppress('{') + Word(alphanums) + Suppress('\n') + OneOrMore( Group( Word(':' + alphanums + '_' + '-') + ZeroOrMore( Word(self._param_value_chrs) ^ Suppress('"') + Word(self._param_value_string) + Suppress('"')) + Suppress('\n'))) + Suppress('}') + ZeroOrMore(Suppress('\n'))))
def __init__(self): # a class-level static method to enable a memoizing performance # enhancement, known as "packrat parsing". ParserElement.enablePackrat() NUMBER = Regex(r"[+-]?\d+(:?\.\d*)?(:?[eE][+-]?\d+)?").setParseAction( Immediate) IDENT = Word(alphas, alphanums + '_').setParseAction(Variable) self.OPERAND = (NUMBER | IDENT) self.MATH_OPERATORS = [(oneOf('+ -'), 1, opAssoc.RIGHT, SignTerm), ('^', 2, opAssoc.RIGHT, MathTerm), (oneOf('* /'), 2, opAssoc.LEFT, MathTerm), (oneOf('+ -'), 2, opAssoc.LEFT, MathTerm)] self.ARITH_EXPR = infixNotation(self.OPERAND, self.MATH_OPERATORS) self.pattern = self.ARITH_EXPR + StringEnd()
def pythonVar(self): if not self._pythonVar: from pyparsing import (ParserElement, Word, alphas, alphanums, Literal, Suppress, FollowedBy) _ws = ' \t' ParserElement.setDefaultWhitespaceChars(_ws) ident = Word(alphas + "_", alphanums + "_") lparen = Literal("(") dot = Literal(".") dollar = Literal("$") self._pythonVar = Suppress(dollar) + ident + ~FollowedBy( (dot + ident) | lparen) self._pythonVar.setParseAction(self.onPythonVar) return self._pythonVar
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack, ignore=escapedChar) + rbrack.suppress()) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?"))) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = (reLiteral | reRange | reMacro | reDot | reGroup) reExpr << operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ]) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack,ignore=escapedChar) + rbrack.suppress()) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reDot = Literal(".") repetition = ( ( lbrace + Word(nums).setResultsName("count") + rbrace ) | ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) | oneOf(list("*+?")) ) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = ( reLiteral | reRange | reMacro | reDot | reGroup ) reExpr << operatorPrecedence( reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ] ) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def _worth_extracting(element: pyparsing.ParserElement) -> bool: """ Returns true if this element is worth having its own sub-diagram. Simply, if any of its children themselves have children, then its complex enough to extract """ children = element.recurse() return any(child.recurse() for child in children)
def test_succeed(parser: ParserElement, text: str, target: str = None, skip_target: bool = False, show_raw: bool = False, verbose: bool = False) -> None: log.critical("Testing to succeed: " + text) if target is None: target = text try: p = parser.parseString(text, parseAll=True) log.debug("Success: {} -> {}", text, text_from_parsed(p)) if show_raw: log.debug("... raw: {}", p) if verbose: log.debug("... dump:\n{}", p.dump()) except ParseException as exception: log.debug("ParseException on: {}\n... parser: {}", text, parser) print(statement_and_failure_marker(text, exception)) raise if not skip_target: intended = standardize_for_testing(target) raw = text_from_parsed(p) actual = standardize_for_testing(raw) if intended != actual: raise ValueError(f"Failure on: {text}\n" f"-> Raw output:\n" f"{raw!r}\n" f"-> Standardized output:\n" f"{actual!r}\n" f"... should have been:\n" f"{intended!r}\n" f"... parser: {parser}\n" f"... as list: {p.asList()!r}]")
def pn_from_sis(filename): """Loads a PN in SIS format.""" # definition of PN grammar ParserElement.setDefaultWhitespaceChars(" \t") id = Word(alphanums+"_\"':-") #place = Literal("p") + Word(nums) number = Word(nums).setParseAction(lambda tokens: int(tokens[0])) newlines = Suppress(OneOrMore(LineEnd())) modelName = ".model" + id("modelName") + newlines signalNames = ZeroOrMore( Suppress(oneOf(".inputs .outputs .dummy")) + OneOrMore( id ) + newlines)("signals") arc = id + ZeroOrMore(Group(id + Optional(Suppress("(")+number+Suppress(")"), default=1))) + newlines graph = Literal(".graph") + Suppress(OneOrMore(LineEnd())) + OneOrMore(Group(arc))("arcs") capacity_list = ZeroOrMore(Group(id+Suppress("=")+number)) capacity = ".capacity" + capacity_list("capacities") + newlines marking_list = ZeroOrMore(Group(id+Optional(Suppress("=")+number,default=1))) marking = ".marking"+Suppress("{") + marking_list("marking") + Suppress("}") + newlines pn = Optional(newlines) + Optional(modelName) + signalNames + graph + Optional(capacity) + marking + ".end" pn.ignore(pythonStyleComment) net = PetriNet(filename=filename, format='sis') ast = pn.parseFile( filename ) for t in ast.signals: net.add_transition( t ) #net.name = ast.modelName net.set_name(ast.modelName) #net.signals.update( ast.signals ) # tuplelist = [ (m[0],m[1]) for m in ast.capacities ] # net.capacities = dict( tuplelist ) # net.initial_marking = dict( [ (m[0],m[1]) for m in ast.marking ] ) #print ast.arcs transitions = set(net.get_transitions()) for a in ast.arcs: #print a[0] if a[0] not in transitions: # it's a place p = net.add_place(a[0]) for t in a[1:]: net.add_edge(p,t[0],t[1]) else: for t in a[1:]: p = net.add_place(t[0]) net.add_edge(a[0],p,t[1]) for m in ast.marking: net.set_initial_marking(m[0],m[1]) for m in ast.capacities: net.set_capacity(m[0],m[1]) net.to_initial_marking() return net
def rfc2822(): global _rfc2822 if _rfc2822 is None: ParserElement.setDefaultWhitespaceChars("") CRLF = Literal("\r\n") ATEXT = Regex("[a-zA-Z0-9!#$%&'*+\-/=\?^_`{|}~]") TEXT = Regex("[\x01-\x09\x0b\x0c\x0e-\x7f]") QTEXT = Regex("[\x01-\x08\x0b\x0c\x0d-\x1f\x21\x23-\x5b\x5d-\x7f]") LOWASCII = Regex("[\x00-\x7f]") DTEXT = Regex("[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x5e-\x7f]") WSP = Regex("[\x20\x09]") CTEXT = Regex("[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x27\x2a-\x5b\x5d-\x7f]") obsQp = r"\\" + LOWASCII quotedPair = (r"\\" + TEXT) | obsQp obsFWS = OneOrMore(WSP) + ZeroOrMore(CRLF + OneOrMore(WSP)) FWS = (Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP)) | obsFWS comment = Forward() ccontent = CTEXT | quotedPair | comment comment << "(" + ZeroOrMore(Optional(FWS) + ccontent) + Optional(FWS) + ")" CFWS = ZeroOrMore(Optional(FWS) + comment) + ((Optional(FWS) + comment) | FWS) atom = Optional(CFWS) + OneOrMore(ATEXT) + Optional(CFWS) dotAtomText = OneOrMore(ATEXT) + ZeroOrMore("." + OneOrMore(ATEXT)) dotAtom = Optional(CFWS) + dotAtomText + Optional(CFWS) qcontent = QTEXT | quotedPair quotedString = Optional(CFWS) + '"' + ZeroOrMore(Optional(FWS) + qcontent) + Optional(FWS) + '"' word = atom | quotedString obsLocalPart = word + ZeroOrMore("." + word) localPart = dotAtom | quotedString | obsLocalPart dcontent = DTEXT | quotedPair domainLiteral = Optional(CFWS) + "[" + ZeroOrMore(Optional(FWS) + dcontent) + Optional(FWS) + "]" + Optional(CFWS) obsDomain = atom + ZeroOrMore("." + atom) domain = dotAtom | domainLiteral | obsDomain addrSpec = localPart + "@" + domain _rfc2822 = addrSpec return _rfc2822
def read_sets(string): """ >>> read_sets("{}") frozenset([]) >>> read_sets("{1}") frozenset([1]) >>> read_sets("{{}, {}}") # invalid, outer set contains two equal sets frozenset([frozenset([])]) >>> read_sets("{{{1}, {2}}, {3}}") frozenset([frozenset([frozenset([2]), frozenset([1])]), frozenset([3])]) >>> read_sets("{1, 2,3}") frozenset([1, 2, 3]) >>> read_sets("{{1, 2}, {3, 4}}") frozenset([frozenset([1, 2]), frozenset([3, 4])]) >>> read_sets("{a,b,c}") frozenset(['a', 'c', 'b']) >>> read_sets('[{1,2,3},{a,c,b}]') [frozenset([1, 2, 3]), frozenset(['a', 'c', 'b'])] >>> read_sets('{a}') frozenset(['a']) >>> read_sets('{{x1,x2},{x3}}') frozenset([frozenset(['x2', 'x1']), frozenset(['x3'])]) >>> read_sets('{{23gat,24gat}}') frozenset([frozenset(['23gat', '24gat'])]) """ from pyparsing import nestedExpr, alphas, Word, nums, ParserElement, delimitedList ParserElement.setDefaultWhitespaceChars(" ,") element = Word(alphas + nums).setParseAction(parse_elem) elements = delimitedList(element) setofsets = nestedExpr("{", "}", content=elements).setParseAction(lambda x: frozenset(x[0])) listofsets = nestedExpr("[", "]", content=setofsets) expr = setofsets | listofsets return expr.parseString(string).asList()[0]
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') model_props = [Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') - (Literal('1'))('VALUE') ] dimension = INTEGER('X') + Suppress('x') + INTEGER('Y') sz_props = ( ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))), ) size_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in sz_props] grammar = Or(model_props + size_props); return grammar
def _int_expression(): from pyparsing import Word, alphanums, nums, Forward, ZeroOrMore, Combine, CaselessLiteral, srange, ParserElement, Optional ParserElement.enablePackrat() entry_name = Word(alphanums + ' _+:.-/') integer = Combine(Optional('-') + Word(nums)).addParseAction(lambda s,l,t: [Constant(int(t[0]))]) hex = Combine(CaselessLiteral("0x") + Word(srange("[0-9a-fA-F]"))).addParseAction(lambda s,l,t:[Constant(int(t[0][2:], 16))]) named_reference = ('${' + entry_name + '}').addParseAction(lambda s,l,t:ValueResult(t[1])) length_reference = ('len{' + entry_name + '}').addParseAction(lambda s,l,t:LengthResult(t[1])) expression = Forward() factor = hex | integer | named_reference | length_reference | ('(' + expression + ')').addParseAction(lambda s,l,t:t[1]) entry = factor for ops in _operators: op_parse = reduce(operator.or_, [(character + entry).addParseAction(_half(op)) for character, op in ops]) entry = (entry + ZeroOrMore(op_parse)).addParseAction(_collapse) expression << entry return expression
def ts_from_file(filename): """Loads a TS (possibly extended with state frequencies) in SIS format.""" # definition of TS grammar ParserElement.setDefaultWhitespaceChars(" \t") id = Word(alphanums+"_\"':-") #place = Literal("p") + Word(nums) number = Word(nums).setParseAction(lambda tokens: int(tokens[0])) newlines = Suppress(OneOrMore(LineEnd())) modelName = ".model" + id("modelName") + newlines signalNames = ZeroOrMore( Suppress(oneOf(".inputs .outputs .dummy")) + OneOrMore( id ) + newlines)("signals") arc = id + id + id + newlines graph = Literal(".state graph") + Suppress(OneOrMore(LineEnd())) + OneOrMore(Group(arc))("arcs") frequency_list = ZeroOrMore(Group(id+number)+newlines) frequency = ".frequencies" + Suppress(OneOrMore(LineEnd())) + frequency_list("frequencies") marking_list = ZeroOrMore(id) marking = ".marking"+Suppress("{") + marking_list("marking") + Suppress("}") + newlines ts_grammar = Optional(newlines) + Optional(modelName) + signalNames + graph + marking + Optional(frequency) + ".end" ts_grammar.ignore(pythonStyleComment) try: ast = ts_grammar.parseFile( filename ) except ParseException, pe: print pe raise pe
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') model_props = [Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') - (Literal('1'))('VALUE') ] dimension = INTEGER('X') + Suppress('x') + INTEGER('Y') sz_props = ( ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))), ) size_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in sz_props] reliability_tags = Or(('reliable', 'write_open')) reliability = [Literal('LIBINPUT_ATTR_LID_SWITCH_RELIABILITY')('NAME') - Suppress('=') - reliability_tags('VALUE')] tpkbcombo_tags = Or(('below')) tpkbcombo = [Literal('LIBINPUT_ATTR_TPKBCOMBO_LAYOUT')('NAME') - Suppress('=') - tpkbcombo_tags('VALUE')] pressure_range = INTEGER('X') + Suppress(':') + INTEGER('Y') pressure_prop = [ Literal('LIBINPUT_ATTR_PRESSURE_RANGE')('NAME') - Suppress('=') - Group(pressure_range('SETTINGS*')) ] grammar = Or(model_props + size_props + reliability + tpkbcombo + pressure_prop) return grammar
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack,rbrack,lbrace,rbrace,lparen,rparen,colon,qmark = map(Literal,"[]{}():?") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack,ignore=escapedChar) + rbrack) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reNonCaptureGroup = Suppress("?:") reDot = Literal(".") repetition = ( ( lbrace + Word(nums)("count") + rbrace ) | ( lbrace + Word(nums)("minCount")+","+ Word(nums)("maxCount") + rbrace ) | oneOf(list("*+?")) ) reRange.setParseAction(handleRange) reLiteral.setParseAction(handleLiteral) reMacro.setParseAction(handleMacro) reDot.setParseAction(handleDot) reTerm = ( reLiteral | reRange | reMacro | reDot | reNonCaptureGroup) reExpr = infixNotation( reTerm, [ (repetition, 1, opAssoc.LEFT, handleRepetition), (None, 2, opAssoc.LEFT, handleSequence), (Suppress('|'), 2, opAssoc.LEFT, handleAlternative), ] ) _parser = reExpr return _parser
# A TAP output line may also indicate abort of the test suit with the line: # Bail out! # optionally followed by a reason for bailing # # Copyright 2008, by Paul McGuire # from pyparsing import ParserElement,LineEnd,Optional,Word,nums,Regex,\ Literal,CaselessLiteral,Group,OneOrMore,Suppress,restOfLine,\ FollowedBy,empty __all__ = ['tapOutputParser', 'TAPTest', 'TAPSummary'] # newlines are significant whitespace, so set default skippable # whitespace to just spaces and tabs ParserElement.setDefaultWhitespaceChars(" \t") NL = LineEnd().suppress() integer = Word(nums) plan = '1..' + integer("ubound") OK,NOT_OK = map(Literal,['ok','not ok']) testStatus = (OK | NOT_OK) description = Regex("[^#\n]+") description.setParseAction(lambda t:t[0].lstrip('- ')) TODO,SKIP = map(CaselessLiteral,'TODO SKIP'.split()) directive = Group(Suppress('#') + (TODO + restOfLine | FollowedBy(SKIP) + restOfLine.copy().setParseAction(lambda t:['SKIP',t[0]]) ))
continuation_drift = 8 # minimum shift of the continuation line in js relative to the significant indentation # if source indentation shift (compared to indent_len) is greater than this, greater value is used gr = Word(alphanums) >>> src = 'a bb ccc' >>> for match, start, stop in gr.scanString(src): print(match, start, stop) http://stackoverflow.com/questions/1661197/what-characters-are-valid-for-javascript-variable-names unicodePrintables = u''.join(unichr(c) for c in xrange(65536) if not unichr(c).isspace()) >>> x=unicode('č') >>> x.isalnum() from pyparsing import ParserElement ParserElement.setDefaultWhitespaceChars('') lineBreak = Word('\r\n', exact=2) | Word('\n', exact=1) quotedString = QuotedString('"', unquoteResults=False) | QuotedString("'", unquoteResults=False) | QuotedString('`', multiline=True, unquoteResults=False) parseRules = cppStyleComment() | quotedString | lineBreak | CharsNotIn('"\'`/\r\n') def javascript(ciderscript): """compiles from cjs to js""" context = {'js_indent_level': 0, # current indentation level 'indent_stack': [], # stack of source indents, which is tuple (chars, scope) where # chars .. number of added spaces in this block begin, # scope .. =lineNo+1 for started function, 0 otherwise 'indent_len': 0, # current indent length (length of joined indent_stack) 'multiline_comment': False, # are we inside the multiline comment /*..*/ ? 'multiline_string': False, # are we inside the multiline string `..` ? }
def initialize(self): ParserElement.setDefaultWhitespaceChars(' \t\r') integer = Regex(r"[+-]?\d+") \ .setParseAction(lambda s,l,t: [ int(t[0]) ]) number = Regex(r"[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?") \ .setParseAction(lambda s,l,t: [ float(t[0]) ]) color = Regex(r"#([0-9a-fA-F]{6})") angle = "'" + Regex(r"(360|3[0-5][0-9]|[12][0-9]{2}|[0-9]{1,2})") \ .setParseAction(lambda s,l,t: [ int(t[0]) ]) alpha = "'" + Regex(r"(360|3[0-5][0-9]|[12][0-9]{2}|[0-9]{1,2})") \ .setParseAction(lambda s,l,t: [ int(t[0]) ]) variable = Word(alphas, exact=1).setParseAction(self.addVar) colon = Literal(":").suppress() comma = Literal(",") lBrace = Literal("(") rBrace = Literal(")") lBracket = Literal("[") rBracket = Literal("]") lAngle = Literal("<") rAngle = Literal(">") plus = Literal("+") minus = Literal("-") FTerm = Literal("F") fTerm = Literal("f") ZTerm = Literal("Z") zTerm = Literal("z") xTerm = Literal("x") cTerm = Literal("c") eol = OneOrMore(LineEnd()).suppress() param = ( angle | color | "!" + number | "|" + number ) self.pList = lBrace + param + ZeroOrMore(comma + param) + rBrace literal = ((lBracket + ( variable + Optional(self.pList) | plus + Optional(self.pList) | minus + Optional(self.pList) ) + rBracket) | (variable + Optional(self.pList) | plus + Optional(self.pList) | minus + Optional(self.pList))) terminal = (ZTerm | zTerm | FTerm | fTerm | xTerm | cTerm | plus | minus | lBracket | rBracket) lprod = ( (OneOrMore(terminal) + lAngle + variable + rAngle + OneOrMore(terminal)) | (OneOrMore(terminal) + lAngle + variable) | (variable + rAngle + OneOrMore(terminal)) | variable ) rProd = OneOrMore(literal | terminal) comment = Suppress((LineStart() + "#" + SkipTo(eol, include=True))) rules = ( (lprod + Literal("=") + rProd + eol).setParseAction(self.addRule) \ | comment ) defaults = ( ( ("Dimensions" + colon + integer + comma + integer) | ("Position" + colon + integer + comma + integer) | ("Iterations" + colon + integer) | ("Angle" + colon + angle) | ("Linelength" + colon + number) | ("Linewidth" + colon + number) | ("Linecolor" + colon + color) | ("Background" + colon + color) | ("Axiom" + colon + rProd) ) + eol ).setParseAction(self.setAttribute) header = ( defaults | comment ) self.grammar = Suppress(ZeroOrMore(LineEnd())) \ + ZeroOrMore(header) \ + OneOrMore(rules) try: L = self.grammar.parseString( self.stream ) except ParseException, err: print err.line print " "*(err.column-1) + "^" print err
def make_amr_parser(): """ Pyparsing parser for AMRs. This will return an abstract syntax tree that needs to be converted into an AMR using ast_to_amr. """ def debug(s, loc, tok): if len(tok) > 1: flat = [tok[0]] + tok[1:] else: flat = tok return flat def parse_concept_expr(s, loc, tok): node_name = tok[0] concept_name = None roles = [] if len(tok) > 1: if type(tok[1]) is tuple: roles = tok[1:] else: concept_name = tok[1] if len(tok) > 2: roles = tok[2:] return (node_name, concept_name, roles) ParserElement.enablePackrat() # Hopefully no bug in here... def parse_role(s,loc,tok): if len(tok) >= 2: r, ch = tok[0], [] for v in tok[1:]: if isinstance(v, StrLiteral): # Parse the node alignment and move it to the edge parts = v.replace(" ","").rsplit("~",1) if len(parts) >= 2: v, align = parts v = StrLiteral(v) r = "%s~%s" % (r.strip(), align.strip()) elif isinstance(v, SpecialValue): parts = v.replace(" ","").rsplit("~",1) if len(parts) >= 2: v, align = parts v = StrLiteral(v) r = "%s~%s" % (r.strip(), align.strip()) ch.append(v) return r, ch else: return tok[0] # Number are all mapped to the same node in the graph because of interning parse_quantity = lambda s, loc, tok: StrLiteral(" ".join(tok)) #float(tok[0]) if "." in tok[0] else int(tok[0]) parse_string_literal = lambda s, loc, tok: StrLiteral(" ".join(tok)) parse_special_value = lambda s, loc, tok: SpecialValue(" ".join(tok)) lpar = Literal( "(" ).suppress() rpar = Literal( ")" ).suppress() quantity = Word(nums+".,").setParseAction(parse_quantity) node_name = Word(alphas+nums+"""@-_.~$/<>%&!+\*?^`"'""") #Word(alphas+nums+"_@.") lit_string = Literal('"').suppress() + CharsNotIn('"') + Literal('"').suppress() concept_name = lit_string | Word(alphas+nums+"""-_.,`~$/<>%&!+\*?^"'""") role_name = Word(alphas+nums+"""-_.,~$/<>%&!+\*:?^`"'""") | Literal("#").suppress()+Word(alphas+nums+"[]-$_").setParseAction(lambda s, loc, tok: NonterminalLabel(tok[0])) special_attr = (Literal("-") | Literal("interrogative") | Literal("SHOULD") | Literal("MUST") | Literal("HAVE-TO")| Literal("WOULD") | Literal("CAN") | Literal("DARE-TO")| Literal("BE-TO") | Literal("MAY") | Literal("GOING-TO") | Literal("MIGHT") | Literal("USED-TO")) + Optional(Literal("~")+Word(alphas+nums+".")) expr = Forward() value = expr |\ quantity.setParseAction(parse_quantity) |\ special_attr.setParseAction(parse_special_value) | \ node_name |\ (lit_string + Optional(Literal("~")+Word(alphas+nums+"."))).setParseAction(parse_string_literal) valuelist = Forward() valuelist << (value + Literal(",").suppress() + valuelist | value).setParseAction(debug) role = (Literal(":").suppress() + role_name + valuelist).setParseAction(parse_role) expr.setParseAction(parse_concept_expr) expr << (lpar + node_name + Optional(Literal("/").suppress() + concept_name) + ZeroOrMore(role) + rpar) return expr
def set_default_white_spaces(): default = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(' \t') yield ParserElement.setDefaultWhitespaceChars(default)
nums, alphas, Combine, oneOf, opAssoc, operatorPrecedence, QuotedString, Literal, ParserElement, ParseException, Forward, Group, Suppress, Optional, Regex) ParserElement.enablePackrat() from sqlalchemy import and_, or_, func #from sqlalchemy.orm import aliased import operator import re def dbobject(obj): return getattr(obj, '__moyadbobject__', lambda: obj)() @implements_to_string class DBExpressionError(Exception): hide_py_traceback = True
def parse(content, basedir=None, resolve=True): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: If true, resolve substitutions :type resolve: boolean :return: a ConfigTree or a list """ def norm_string(value): for k, v in ConfigParser.REPLACEMENTS.items(): value = value.replace(k, v) return value def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3: -3] def convert_number(tokens): n = tokens[0] try: return int(n) except ValueError: return float(n) # ${path} or ${?path} for optional substitution SUBSTITUTION = "\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>\s*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution def include_config(token): url = None file = None if len(token) == 1: # include "test" if token[0].startswith("http://") or token[0].startswith("https://") or token[0].startswith("file://"): url = token[0] else: file = token[0] elif len(token) == 2: # include url("test") or file("test") if token[0] == 'url': url = token[1] else: file = token[1] if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False) if file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file(path, required=False, resolve=False) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) ParserElement.setDefaultWhitespaceChars(' \t') assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(None)) key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + '._- ') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex('[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE]\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex('""".*?"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = QuotedString(quoteChar='"', escChar='\\', multiline=True) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex(r'(\\[ \t]*[\r\n]|[^\[\{\n\r\]\}#,=\$])+?(?=($|\$|[ \t]*(//|[\}\],#\n\r])))', re.DOTALL).setParseAction(unescape_string) substitution_expr = Regex('[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = number_expr | true_expr | false_expr | null_expr | string_expr include_expr = (Keyword("include", caseless=True).suppress() - ( quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress()))) \ .setParseAction(include_config) dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore((Literal( '\\') - eol).suppress() | comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group( key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | Suppress(Literal('=') | Literal(':')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr)) ) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | dict_expr | inside_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: ConfigParser.resolve_substitutions(config) return config
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: if true, resolve substitutions :type resolve: boolean :param unresolved_value: assigned value value to unresolved substitution. If overriden with a default value, it will replace all unresolved value to the default value. If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) :type unresolved_value: boolean :return: a ConfigTree or a list """ unescape_pattern = re.compile(r'\\.') def replace_escape_sequence(match): value = match.group(0) return cls.REPLACEMENTS.get(value, value) def norm_string(value): return unescape_pattern.sub(replace_escape_sequence, value) def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3: -3] def convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: return float(n) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance(final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith("https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance(token[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL( url, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION ) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file( path, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION ) else: raise ConfigException('No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) ParserElement.setDefaultWhitespaceChars(' \t') assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(NoneValue())) key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = number_expr | true_expr | false_expr | null_expr | string_expr include_content = (quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) include_expr = ( Keyword("include", caseless=True).suppress() + ( include_content | ( Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress() ) ) ).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal( '\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group( key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore( comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr)) ) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore( comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION has_unresolved = cls.resolve_substitutions(config, allow_unresolved) if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: raise ConfigSubstitutionException('resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION') if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: cls.unresolve_substitutions_to_value(config, unresolved_value) return config
# -*- coding: utf-8 -*- """Atomic components; probably shouldn't use these directly""" import string from pyparsing import Optional, ParserElement, Regex, Suppress, Word from six.moves.html_parser import HTMLParser from regparser.grammar.utils import Marker, SuffixMarker, WordBoundaries # Set whitespace for all parsing; include unicode whitespace chars ParserElement.setDefaultWhitespaceChars( string.whitespace + HTMLParser().unescape('   ‌‍‎‏')) lower_p = ( Suppress("(") + Regex(r"[ivx]{1}|[a-hj-uwyz]{1,2}").setResultsName("p1") + Suppress(")")) digit_p = ( Suppress("(") + Word(string.digits).setResultsName("p2") + Suppress(")")) roman_p = ( Suppress("(") + Word("ivxlcdm").setResultsName("p3") + Suppress(")")) upper_p = ( Suppress("(") + Word(string.ascii_uppercase).setResultsName("p4") +