def hwdb_grammar(): ParserElement.setDefaultWhitespaceChars('') prefix = Or(category + ':' + Or(conn) + ':' for category, conn in TYPES.items()) matchline_typed = Combine(prefix + Word(printables + ' ' + '®')) matchline_general = Combine( Or(GENERAL_MATCHES) + ':' + Word(printables + ' ' + '®')) matchline = (matchline_typed | matchline_general) + EOL propertyline = ( White(' ', exact=1).suppress() + Combine(UDEV_TAG - '=' - Optional(Word(alphanums + '_=:@*.!-;, "/')) - Optional(pythonStyleComment)) + EOL) propertycomment = White(' ', exact=1) + pythonStyleComment + EOL group = ( OneOrMore(matchline('MATCHES*') ^ COMMENTLINE.suppress()) - OneOrMore(propertyline('PROPERTIES*') ^ propertycomment.suppress()) - (EMPTYLINE ^ stringEnd()).suppress()) commentgroup = OneOrMore(COMMENTLINE).suppress() - EMPTYLINE.suppress() grammar = OneOrMore(Group(group)('GROUPS*') ^ commentgroup) + stringEnd() return grammar
def read_sets_java(string): from pyparsing import nestedExpr, alphas, Word, nums, ParserElement, delimitedList ParserElement.setDefaultWhitespaceChars(" ,") element = Word(alphas + nums).setParseAction(parse_elem_java) elements = delimitedList(element) setofsets = nestedExpr("[", "]", content=elements).setParseAction(lambda x: frozenset(x[0])) return setofsets.parseString(string).asList()[0]
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack, rbrack, lbrace, rbrace, lparen, rparen, colon, qmark = map( Literal, "[]{}():?") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join( c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reNonCaptureGroup = Suppress("?:") reDot = Literal(".") repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?"))) reRange.setParseAction(handleRange) reLiteral.setParseAction(handleLiteral) reMacro.setParseAction(handleMacro) reDot.setParseAction(handleDot) reTerm = (reLiteral | reRange | reMacro | reDot | reNonCaptureGroup) reExpr = infixNotation(reTerm, [ (repetition, 1, opAssoc.LEFT, handleRepetition), (None, 2, opAssoc.LEFT, handleSequence), (Suppress('|'), 2, opAssoc.LEFT, handleAlternative), ]) _parser = reExpr return _parser
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack, rbrack, lbrace, rbrace, lparen, rparen = map(Literal, "[]{}()") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~ reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ( (lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?")) ) reRange.setParseAction(handle_range) reLiteral.setParseAction(handle_literal) reMacro.setParseAction(handle_macro) reDot.setParseAction(handle_dot) reTerm = (reLiteral | reRange | reMacro | reDot) reExpr = operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, handle_repetition), (None, 2, opAssoc.LEFT, handle_sequence), (Suppress('|'), 2, opAssoc.LEFT, handle_alternative), ]) _parser = reExpr return _parser
def hwdb_grammar(): ParserElement.setDefaultWhitespaceChars('') prefix = Or(category + ':' + Or(conn) + ':' for category, conn in TYPES.items()) matchline = Combine(prefix + Word(printables + ' ' + '®')) + EOL propertyline = ( White(' ', exact=1).suppress() + Combine(UDEV_TAG - '=' - Word(alphanums + '_=:@*.! ') - Optional(pythonStyleComment)) + EOL ) propertycomment = White(' ', exact=1) + pythonStyleComment + EOL group = ( OneOrMore(matchline('MATCHES*') ^ COMMENTLINE.suppress()) - OneOrMore(propertyline('PROPERTIES*') ^ propertycomment.suppress()) - (EMPTYLINE ^ stringEnd()).suppress() ) commentgroup = OneOrMore(COMMENTLINE).suppress() - EMPTYLINE.suppress() grammar = OneOrMore(group('GROUPS*') ^ commentgroup) + stringEnd() return grammar
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') setting = Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ') props = (('MOUSE_DPI', Group(OneOrMore(setting('SETTINGS*')))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER), ('ID_INPUT_TRACKBALL', Literal('1')), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ) fixed_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props] kbd_props = [Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) return grammar
def _make_grammar(): """Make a grammar for parsing a sanitized F5 config The syntax is Tcl, except for a 'Sanitized out =' string at the top. We only parse enough to find commands and their arguments. Return a ParseResult where 'prog' is a list of commands. Each command has a name and some arguments. These arguments can be further nested lists in case of '{ ... }' and '[ ... ]' blocks. """ ParserElement.setDefaultWhitespaceChars(' ') white = Suppress(Optional(White())) comment = white + '#' - restOfLine lbrace, rbrace = Suppress('{'), Suppress('}') lbracket, rbracket = Suppress('['), Suppress(']') cmds = Forward() braces = Group(lbrace - white - Optional(cmds) - white - rbrace) brackets = Group(lbracket - white - Optional(cmds) - white - rbracket) string = QuotedString(quoteChar='"', escChar='\\', multiline=True) word = string | braces | brackets | Word(alphanums + '-:()_./<>%*$|!=&?') cmd = Group(word('name') + ZeroOrMore(word)('args')) cmd_sep = OneOrMore(Literal('\n') | ';') cmds << (cmd + ZeroOrMore(Suppress(cmd_sep) + cmd)) prog_end = Suppress(Optional(cmd_sep)) + StringEnd() prog = cmds + prog_end sanitized_begin = Suppress(Optional(White())) sanitized = sanitized_begin + Optional('Sanitized out =') + prog('prog') sanitized.ignore(comment) return sanitized
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') dpi_setting = (Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ'))('SETTINGS*') mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL mount_matrix = (mount_matrix_row + ';' + mount_matrix_row + ';' + mount_matrix_row)('MOUNT_MATRIX') props = (('MOUSE_DPI', Group(OneOrMore(dpi_setting))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER), ('ID_INPUT_TRACKBALL', Literal('1')), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ('XKB_FIXED_LAYOUT', STRING), ('XKB_FIXED_VARIANT', STRING), ('ACCEL_MOUNT_MATRIX', mount_matrix), ) fixed_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props] kbd_props = [Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) + EOL return grammar
def __init__(self): from pyparsing import (ParserElement, StringEnd, LineEnd, Literal, pythonStyleComment, ZeroOrMore, Suppress, Optional, Combine, OneOrMore, Regex, oneOf, QuotedString, Group, ParseException) ParserElement.setDefaultWhitespaceChars("\t ") EOF = StringEnd() EOL = ~EOF + LineEnd() # EOL must not match on EOF escape = Literal("\\") comment = pythonStyleComment junk = ZeroOrMore(comment | EOL).suppress() ## word (i.e: single argument string) word = Suppress(escape + EOL + Optional(comment)) \ | Combine(OneOrMore( escape.suppress() + Regex(".") | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True) | Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") | Suppress(escape + EOL) )) ## redirector (aka bash file redirectors, such as "2>&1" sequences) fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0])) fd_dst = Suppress("&") + fd_src # "[n]<word" || "[n]<&word" || "[n]<&digit-" fd_redir = (Optional(fd_src, 0) + Literal("<") |Optional(fd_src, 1) + Literal(">"))\ +(word | (fd_dst + Optional("-"))) # "&>word" || ">&word" full_redir = (oneOf("&> >&") + word)\ .setParseAction(lambda t:("&" ,">", t[-1])) # "<<<word" || "<<[-]word" here_doc = Regex("<<(<|-?)") + word # "[n]>>word" add_to_file = Optional(fd_src | Literal("&"), 1) + \ Literal(">>") + word # "[n]<>word" fd_bind = Optional(fd_src, 0) + Literal("<>") + word redirector = (fd_redir | full_redir | here_doc | add_to_file | fd_bind)\ .setParseAction(lambda token: tuple(token)) ## single command (args/redir list) command = Group(OneOrMore(redirector | word)) ## logical operators (section splits) semicolon = Suppress(";") + junk connector = (oneOf("&& || |") + junk) | semicolon ## pipeline, aka logical block of interconnected commands pipeline = junk + Group(command + ZeroOrMore(connector + command) + Optional(semicolon)) # define object attributes self.LEXER = pipeline.ignore(comment) + EOF self.parseException = ParseException
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') setting = Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress( '@') + INTEGER('HZ') props = ( ('MOUSE_DPI', Group(OneOrMore(setting('SETTINGS*')))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('ID_INPUT_TRACKBALL', Literal('1')), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ) fixed_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props ] kbd_props = [ Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [ Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) return grammar
def defineParsers(): #Enable a fast parsing mode with caching. ParserElement.enablePackrat() #end of line terminates statements, so it is not regular whitespace ParserElement.setDefaultWhitespaceChars('\t ') func_call = Forward() #forward declaration because this is a recursive rule #The "terminal" rules symbol = Word(alphas+'_-', alphanums+'_-') .setParseAction(action_symbol) q_symbol = quotedString .setParseAction(action_q_symbol) bracket_term = Literal("(").suppress() - func_call \ + Literal(")").suppress() word = symbol | q_symbol | bracket_term #The function call #Parse: "foo | bar | baz" or "foo" pipeline = (word + ZeroOrMore("|" - word)) .setParseAction(action_pipeline) #Parse "foo|bar op1 op2 op3" func_call << (pipeline - ZeroOrMore(word)) .setParseAction(action_func_call) #High level structure of program line = LineEnd() | func_call - LineEnd() #empty line or function call program = ZeroOrMore(line) + StringEnd() #multiple lines are a program #define the comments program.ignore('%' + restOfLine) #no tab expansion program.parseWithTabs() #return additional func_call parser to make testing more easy return program, func_call
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') model_props = [ Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') - (Literal('1'))('VALUE') ] dimension = INTEGER('X') + Suppress('x') + INTEGER('Y') sz_props = ( ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))), ) size_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in sz_props ] reliability_tags = Or(('reliable', 'write_open')) reliability = [ Literal('LIBINPUT_ATTR_LID_SWITCH_RELIABILITY')('NAME') - Suppress('=') - reliability_tags('VALUE') ] tpkbcombo_tags = Or(('below')) tpkbcombo = [ Literal('LIBINPUT_ATTR_TPKBCOMBO_LAYOUT')('NAME') - Suppress('=') - tpkbcombo_tags('VALUE') ] grammar = Or(model_props + size_props + reliability + tpkbcombo) return grammar
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') model_props = [ Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') - (Literal('1'))('VALUE') ] dimension = INTEGER('X') + Suppress('x') + INTEGER('Y') crange = INTEGER('X') + Suppress(':') + INTEGER('Y') vprops = ( ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_PRESSURE_RANGE', Group(crange('SETTINGS*'))), ('LIBINPUT_ATTR_TOUCH_SIZE_RANGE', Group(crange('SETTINGS*'))), ('LIBINPUT_ATTR_TPKBCOMBO_LAYOUT', Or(('below'))), ('LIBINPUT_ATTR_LID_SWITCH_RELIABILITY', Or(('reliable', 'write_open'))), ('LIBINPUT_ATTR_KEYBOARD_INTEGRATION', Or(('internal', 'external'))), ) value_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in vprops] tprops = ( ('LIBINPUT_ATTR_PALM_PRESSURE_THRESHOLD', INTEGER('X')), ('LIBINPUT_ATTR_PALM_SIZE_THRESHOLD', INTEGER('X')), ) typed_props = [Literal(name)('NAME') - Suppress('=') - val for name, val in tprops] grammar = Or(model_props + value_props + typed_props) return grammar
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') dpi_setting = Group( Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ'))('SETTINGS*') mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL mount_matrix = Group(mount_matrix_row + ';' + mount_matrix_row + ';' + mount_matrix_row)('MOUNT_MATRIX') xkb_setting = Optional(Word(alphanums + '+-/@._')) props = ( ('MOUSE_DPI', Group(OneOrMore(dpi_setting))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER), ('ID_AUTOSUSPEND', Or((Literal('0'), Literal('1')))), ('ID_INPUT', Or((Literal('0'), Literal('1')))), ('ID_INPUT_ACCELEROMETER', Or((Literal('0'), Literal('1')))), ('ID_INPUT_JOYSTICK', Or((Literal('0'), Literal('1')))), ('ID_INPUT_KEY', Or((Literal('0'), Literal('1')))), ('ID_INPUT_KEYBOARD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_MOUSE', Or((Literal('0'), Literal('1')))), ('ID_INPUT_POINTINGSTICK', Or((Literal('0'), Literal('1')))), ('ID_INPUT_SWITCH', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TABLET', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TABLET_PAD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TOUCHPAD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TOUCHSCREEN', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TRACKBALL', Or((Literal('0'), Literal('1')))), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_JOYSTICK_INTEGRATION', Or(('internal', 'external'))), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ('XKB_FIXED_LAYOUT', xkb_setting), ('XKB_FIXED_VARIANT', xkb_setting), ('XKB_FIXED_MODEL', xkb_setting), ('KEYBOARD_LED_NUMLOCK', Literal('0')), ('KEYBOARD_LED_CAPSLOCK', Literal('0')), ('ACCEL_MOUNT_MATRIX', mount_matrix), ('ACCEL_LOCATION', Or(('display', 'base'))), ('PROXIMITY_NEAR_LEVEL', INTEGER), ) fixed_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props ] kbd_props = [ Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [ Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) + EOL return grammar
def __init__(self): from pyparsing import (ParserElement, StringEnd, LineEnd, Literal, pythonStyleComment, ZeroOrMore, Suppress, Optional, Combine, OneOrMore, Regex, oneOf, QuotedString, Group, ParseException) ParserElement.setDefaultWhitespaceChars("\t ") EOF = StringEnd() EOL = ~EOF + LineEnd() # EOL must not match on EOF escape = Literal("\\") comment = pythonStyleComment junk = ZeroOrMore(comment | EOL).suppress() # word (i.e: single argument string) word = Suppress(escape + EOL + Optional(comment)) \ | Combine(OneOrMore( escape.suppress() + Regex(".") | QuotedString("'", escChar='\\', multiline=True) | QuotedString('"', escChar='\\', multiline=True) | Regex("[^ \t\r\n\f\v\\\\$&<>();\|\'\"`]+") | Suppress(escape + EOL))) # redirector (aka bash file redirectors, such as "2>&1" sequences) fd_src = Regex("[0-2]").setParseAction(lambda t: int(t[0])) fd_dst = Suppress("&") + fd_src # "[n]<word" || "[n]<&word" || "[n]<&digit-" fd_redir = (Optional(fd_src, 0) + Literal("<") | Optional(fd_src, 1) + Literal(">")) + \ (word | (fd_dst + Optional("-"))) # "&>word" || ">&word" obj = (oneOf("&> >&") + word) full_redir = obj.setParseAction(lambda t: ("&", ">", t[-1])) # "<<<word" || "<<[-]word" here_doc = Regex("<<(<|-?)") + word # "[n]>>word" add_to_file = (Optional(fd_src | Literal("&"), 1) + Literal(">>") + word) # "[n]<>word" fd_bind = Optional(fd_src, 0) + Literal("<>") + word obj = (fd_redir | full_redir | here_doc | add_to_file | fd_bind) redirector = obj.setParseAction(lambda token: tuple(token)) # single command (args/redir list) command = Group(OneOrMore(redirector | word)) # logical operators (section splits) semicolon = Suppress(";") + junk connector = (oneOf("&& || |") + junk) | semicolon # pipeline, aka logical block of interconnected commands pipeline = junk + Group(command + ZeroOrMore(connector + command) + Optional(semicolon)) # define object attributes self.LEXER = pipeline.ignore(comment) + EOF self.parseException = ParseException
def read_sets_java(string): from pyparsing import nestedExpr, alphas, Word, nums, ParserElement, delimitedList ParserElement.setDefaultWhitespaceChars(" ,") element = Word(alphas + nums).setParseAction(parse_elem_java) elements = delimitedList(element) setofsets = nestedExpr( "[", "]", content=elements).setParseAction(lambda x: frozenset(x[0])) return setofsets.parseString(string).asList()[0]
def __init__(self,showErrors=True,debug=False): ''' Constructor Args: showErrors(bool): True if errors should be shown/printed debug(bool): True if debugging should be enabled ''' self.showError=showErrors self.debug=debug self.grammar=None ParserElement.setDefaultWhitespaceChars(" \t")
def _parse_items(self, source): ParserElement.setDefaultWhitespaceChars(' \t\r') EOL = LineEnd().suppress() comment = Literal('#') + Optional( restOfLine ) + EOL string = CharsNotIn("\n") line = Group( Word(alphanums + '-')('key') + Literal(':').suppress() + Optional(Combine(string + ZeroOrMore(EOL + Literal(' ') + string)))("value") + EOL ) group = ZeroOrMore(line) group.ignore(comment) return group.parseString(source, True)
def PyParsingDefaultWhitespaceChars(whitespace_chars): '''Set the given whitespace_chars as pyparsing's default whitespace chars while the context manager is active. Since ParserElement.DEFAULT_WHITE_CHARS is a global variable, this method is not thread-safe (but no pyparsing parser construction is thread-safe for the same reason anyway). ''' # A possible solution to this problem: # Since the pyparsing code is basically a single big file, we could just copy it (under aspio/vendor or something like that) and have our own "private" version of pyparsing. (TODO: think about this some more and maybe do it) previous_whitespace_chars = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(whitespace_chars) yield ParserElement.setDefaultWhitespaceChars(previous_whitespace_chars)
def build_parser(): """ Build a pyparsing parser for our custom topology description language. :return: A pyparsing parser. :rtype: pyparsing.MatchFirst """ ParserElement.setDefaultWhitespaceChars(' \t') nl = Suppress(LineEnd()) inumber = Word(nums).setParseAction(lambda l, s, t: int(t[0])) fnumber = (Combine( Optional('-') + Word(nums) + '.' + Word(nums) + Optional('E' | 'e' + Optional('-') + Word(nums))) ).setParseAction(lambda toks: float(toks[0])) boolean = (CaselessLiteral('true') | CaselessLiteral('false') ).setParseAction(lambda l, s, t: t[0].casefold() == 'true') comment = Literal('#') + restOfLine + nl text = QuotedString('"') identifier = Word(alphas, alphanums + '_') empty_line = LineStart() + LineEnd() item_list = ((text | fnumber | inumber | boolean) + Optional(Suppress(',')) + Optional(nl)) custom_list = (Suppress('(') + Optional(nl) + Group(OneOrMore(item_list)) + Optional(nl) + Suppress(')')).setParseAction(lambda tok: tok.asList()) attribute = Group( identifier('key') + Suppress(Literal('=')) + (custom_list | text | fnumber | inumber | boolean | identifier)('value') + Optional(nl)) attributes = (Suppress(Literal('[')) + Optional(nl) + OneOrMore(attribute) + Suppress(Literal(']'))) node = identifier('node') port = Group(node + Suppress(Literal(':')) + (identifier | inumber)('port')) link = Group( port('endpoint_a') + Suppress(Literal('--')) + port('endpoint_b')) environment_spec = (attributes + nl).setResultsName('env_spec', listAllMatches=True) nodes_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(node))('nodes')) + nl).setResultsName('node_spec', listAllMatches=True) ports_spec = (Group( Optional(attributes)('attributes') + Group(OneOrMore(port))('ports')) + nl).setResultsName('port_spec', listAllMatches=True) link_spec = (Group(Optional(attributes)('attributes') + link('links')) + nl).setResultsName('link_spec', listAllMatches=True) statements = OneOrMore(comment | link_spec | ports_spec | nodes_spec | environment_spec | empty_line) return statements
def set_delimiters(self, delimiter): """Lets you change the delimiter that is used to identify field boundaries. delimiter: str A string containing characters to be used as delimiters. The default value is ' \t'. which means that spaces and tabs are not taken as data but instead mark the boundaries. Note that the parser is smart enough to recognize characters within quotes as non-delimiters.""" self.delimiter = delimiter if delimiter != "columns": ParserElement.setDefaultWhitespaceChars(str(delimiter))
def __init__(self): self._generate_kinds() ParserElement.setDefaultWhitespaceChars(' \t') self._parser = OneOrMore( Group( Suppress('{') + Word(alphanums) + Suppress('\n') + OneOrMore( Group( Word(':' + alphanums + '_' + '-') + ZeroOrMore( Word(self._param_value_chrs) ^ Suppress('"') + Word(self._param_value_string) + Suppress('"')) + Suppress('\n'))) + Suppress('}') + ZeroOrMore(Suppress('\n'))))
def pythonVar(self): if not self._pythonVar: from pyparsing import (ParserElement, Word, alphas, alphanums, Literal, Suppress, FollowedBy) _ws = ' \t' ParserElement.setDefaultWhitespaceChars(_ws) ident = Word(alphas+"_", alphanums+"_") lparen = Literal("(") dot = Literal(".") dollar = Literal("$") self._pythonVar = Suppress(dollar) + ident + ~FollowedBy((dot+ident) | lparen) self._pythonVar.setParseAction(self.onPythonVar) return self._pythonVar
def pythonVar(self): if not self._pythonVar: from pyparsing import (ParserElement, Word, alphas, alphanums, Literal, Suppress, FollowedBy) _ws = ' \t' ParserElement.setDefaultWhitespaceChars(_ws) ident = Word(alphas + "_", alphanums + "_") lparen = Literal("(") dot = Literal(".") dollar = Literal("$") self._pythonVar = Suppress(dollar) + ident + ~FollowedBy( (dot + ident) | lparen) self._pythonVar.setParseAction(self.onPythonVar) return self._pythonVar
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack, ignore=escapedChar) + rbrack.suppress()) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?"))) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = (reLiteral | reRange | reMacro | reDot | reGroup) reExpr << operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ]) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack = Literal("[") rbrack = Literal("]") lbrace = Literal("{") rbrace = Literal("}") lparen = Literal("(") rparen = Literal(")") reMacro = Suppress("\\") + oneOf(list("dwsZ")) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in string.printable if c not in r"\[]{}().*?+|") reRange = Combine(lbrack.suppress() + SkipTo(rbrack,ignore=escapedChar) + rbrack.suppress()) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reDot = Literal(".") repetition = ( ( lbrace + Word(nums).setResultsName("count") + rbrace ) | ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) | oneOf(list("*+?")) ) reExpr = Forward() reGroup = (lparen.suppress() + Optional(Literal("?").suppress() + oneOf(list(":P"))).setResultsName("option") + reExpr.setResultsName("expr") + rparen.suppress()) reTerm = ( reLiteral | reRange | reMacro | reDot | reGroup ) reExpr << operatorPrecedence( reTerm, [ (repetition, 1, opAssoc.LEFT, create(Repetition)), (None, 2, opAssoc.LEFT, create(Sequence)), (Suppress('|'), 2, opAssoc.LEFT, create(Alternation)), ] ) reGroup.setParseAction(create(Group)) reRange.setParseAction(create(Range)) reLiteral.setParseAction(create(Character)) reMacro.setParseAction(create(Macro)) reDot.setParseAction(create(Dot)) _parser = reExpr return _parser
def pn_from_sis(filename): """Loads a PN in SIS format.""" # definition of PN grammar ParserElement.setDefaultWhitespaceChars(" \t") id = Word(alphanums+"_\"':-") #place = Literal("p") + Word(nums) number = Word(nums).setParseAction(lambda tokens: int(tokens[0])) newlines = Suppress(OneOrMore(LineEnd())) modelName = ".model" + id("modelName") + newlines signalNames = ZeroOrMore( Suppress(oneOf(".inputs .outputs .dummy")) + OneOrMore( id ) + newlines)("signals") arc = id + ZeroOrMore(Group(id + Optional(Suppress("(")+number+Suppress(")"), default=1))) + newlines graph = Literal(".graph") + Suppress(OneOrMore(LineEnd())) + OneOrMore(Group(arc))("arcs") capacity_list = ZeroOrMore(Group(id+Suppress("=")+number)) capacity = ".capacity" + capacity_list("capacities") + newlines marking_list = ZeroOrMore(Group(id+Optional(Suppress("=")+number,default=1))) marking = ".marking"+Suppress("{") + marking_list("marking") + Suppress("}") + newlines pn = Optional(newlines) + Optional(modelName) + signalNames + graph + Optional(capacity) + marking + ".end" pn.ignore(pythonStyleComment) net = PetriNet(filename=filename, format='sis') ast = pn.parseFile( filename ) for t in ast.signals: net.add_transition( t ) #net.name = ast.modelName net.set_name(ast.modelName) #net.signals.update( ast.signals ) # tuplelist = [ (m[0],m[1]) for m in ast.capacities ] # net.capacities = dict( tuplelist ) # net.initial_marking = dict( [ (m[0],m[1]) for m in ast.marking ] ) #print ast.arcs transitions = set(net.get_transitions()) for a in ast.arcs: #print a[0] if a[0] not in transitions: # it's a place p = net.add_place(a[0]) for t in a[1:]: net.add_edge(p,t[0],t[1]) else: for t in a[1:]: p = net.add_place(t[0]) net.add_edge(a[0],p,t[1]) for m in ast.marking: net.set_initial_marking(m[0],m[1]) for m in ast.capacities: net.set_capacity(m[0],m[1]) net.to_initial_marking() return net
def rfc2822(): global _rfc2822 if _rfc2822 is None: ParserElement.setDefaultWhitespaceChars("") CRLF = Literal("\r\n") ATEXT = Regex("[a-zA-Z0-9!#$%&'*+\-/=\?^_`{|}~]") TEXT = Regex("[\x01-\x09\x0b\x0c\x0e-\x7f]") QTEXT = Regex("[\x01-\x08\x0b\x0c\x0d-\x1f\x21\x23-\x5b\x5d-\x7f]") LOWASCII = Regex("[\x00-\x7f]") DTEXT = Regex("[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x5e-\x7f]") WSP = Regex("[\x20\x09]") CTEXT = Regex("[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x27\x2a-\x5b\x5d-\x7f]") obsQp = r"\\" + LOWASCII quotedPair = (r"\\" + TEXT) | obsQp obsFWS = OneOrMore(WSP) + ZeroOrMore(CRLF + OneOrMore(WSP)) FWS = (Optional(ZeroOrMore(WSP) + CRLF) + OneOrMore(WSP)) | obsFWS comment = Forward() ccontent = CTEXT | quotedPair | comment comment << "(" + ZeroOrMore(Optional(FWS) + ccontent) + Optional(FWS) + ")" CFWS = ZeroOrMore(Optional(FWS) + comment) + ((Optional(FWS) + comment) | FWS) atom = Optional(CFWS) + OneOrMore(ATEXT) + Optional(CFWS) dotAtomText = OneOrMore(ATEXT) + ZeroOrMore("." + OneOrMore(ATEXT)) dotAtom = Optional(CFWS) + dotAtomText + Optional(CFWS) qcontent = QTEXT | quotedPair quotedString = Optional(CFWS) + '"' + ZeroOrMore(Optional(FWS) + qcontent) + Optional(FWS) + '"' word = atom | quotedString obsLocalPart = word + ZeroOrMore("." + word) localPart = dotAtom | quotedString | obsLocalPart dcontent = DTEXT | quotedPair domainLiteral = Optional(CFWS) + "[" + ZeroOrMore(Optional(FWS) + dcontent) + Optional(FWS) + "]" + Optional(CFWS) obsDomain = atom + ZeroOrMore("." + atom) domain = dotAtom | domainLiteral | obsDomain addrSpec = localPart + "@" + domain _rfc2822 = addrSpec return _rfc2822
def read_sets(string): """ >>> read_sets("{}") frozenset([]) >>> read_sets("{1}") frozenset([1]) >>> read_sets("{{}, {}}") # invalid, outer set contains two equal sets frozenset([frozenset([])]) >>> read_sets("{{{1}, {2}}, {3}}") frozenset([frozenset([frozenset([2]), frozenset([1])]), frozenset([3])]) >>> read_sets("{1, 2,3}") frozenset([1, 2, 3]) >>> read_sets("{{1, 2}, {3, 4}}") frozenset([frozenset([1, 2]), frozenset([3, 4])]) >>> read_sets("{a,b,c}") frozenset(['a', 'c', 'b']) >>> read_sets('[{1,2,3},{a,c,b}]') [frozenset([1, 2, 3]), frozenset(['a', 'c', 'b'])] >>> read_sets('{a}') frozenset(['a']) >>> read_sets('{{x1,x2},{x3}}') frozenset([frozenset(['x2', 'x1']), frozenset(['x3'])]) >>> read_sets('{{23gat,24gat}}') frozenset([frozenset(['23gat', '24gat'])]) """ from pyparsing import nestedExpr, alphas, Word, nums, ParserElement, delimitedList ParserElement.setDefaultWhitespaceChars(" ,") element = Word(alphas + nums).setParseAction(parse_elem) elements = delimitedList(element) setofsets = nestedExpr( "{", "}", content=elements).setParseAction(lambda x: frozenset(x[0])) listofsets = nestedExpr("[", "]", content=setofsets) expr = setofsets | listofsets return expr.parseString(string).asList()[0]
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') dpi_setting = (Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ'))('SETTINGS*') mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL mount_matrix = (mount_matrix_row + ';' + mount_matrix_row + ';' + mount_matrix_row)('MOUNT_MATRIX') props = ( ('MOUSE_DPI', Group(OneOrMore(dpi_setting))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER), ('ID_INPUT_TRACKBALL', Literal('1')), ('MOUSE_WHEEL_TILT_HORIZONTAL', Literal('1')), ('MOUSE_WHEEL_TILT_VERTICAL', Literal('1')), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ('XKB_FIXED_LAYOUT', STRING), ('XKB_FIXED_VARIANT', STRING), ('KEYBOARD_LED_NUMLOCK', Literal('0')), ('KEYBOARD_LED_CAPSLOCK', Literal('0')), ('ACCEL_MOUNT_MATRIX', mount_matrix), ) fixed_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props ] kbd_props = [ Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [ Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) + EOL return grammar
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') model_props = [Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') - (Literal('1'))('VALUE') ] dimension = INTEGER('X') + Suppress('x') + INTEGER('Y') sz_props = ( ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))), ) size_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in sz_props] grammar = Or(model_props + size_props); return grammar
def read_sets(string): """ >>> read_sets("{}") frozenset([]) >>> read_sets("{1}") frozenset([1]) >>> read_sets("{{}, {}}") # invalid, outer set contains two equal sets frozenset([frozenset([])]) >>> read_sets("{{{1}, {2}}, {3}}") frozenset([frozenset([frozenset([2]), frozenset([1])]), frozenset([3])]) >>> read_sets("{1, 2,3}") frozenset([1, 2, 3]) >>> read_sets("{{1, 2}, {3, 4}}") frozenset([frozenset([1, 2]), frozenset([3, 4])]) >>> read_sets("{a,b,c}") frozenset(['a', 'c', 'b']) >>> read_sets('[{1,2,3},{a,c,b}]') [frozenset([1, 2, 3]), frozenset(['a', 'c', 'b'])] >>> read_sets('{a}') frozenset(['a']) >>> read_sets('{{x1,x2},{x3}}') frozenset([frozenset(['x2', 'x1']), frozenset(['x3'])]) >>> read_sets('{{23gat,24gat}}') frozenset([frozenset(['23gat', '24gat'])]) """ from pyparsing import nestedExpr, alphas, Word, nums, ParserElement, delimitedList ParserElement.setDefaultWhitespaceChars(" ,") element = Word(alphas + nums).setParseAction(parse_elem) elements = delimitedList(element) setofsets = nestedExpr("{", "}", content=elements).setParseAction(lambda x: frozenset(x[0])) listofsets = nestedExpr("[", "]", content=setofsets) expr = setofsets | listofsets return expr.parseString(string).asList()[0]
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') model_props = [ Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') - (Literal('1'))('VALUE') ] dimension = INTEGER('X') + Suppress('x') + INTEGER('Y') sz_props = ( ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))), ) size_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in sz_props ] grammar = Or(model_props + size_props) return grammar
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') model_props = [ Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') - (Literal('1'))('VALUE') ] dimension = INTEGER('X') + Suppress('x') + INTEGER('Y') crange = INTEGER('X') + Suppress(':') + INTEGER('Y') vprops = ( ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_PRESSURE_RANGE', Group(crange('SETTINGS*'))), ('LIBINPUT_ATTR_TOUCH_SIZE_RANGE', Group(crange('SETTINGS*'))), ('LIBINPUT_ATTR_TPKBCOMBO_LAYOUT', Or(('below'))), ('LIBINPUT_ATTR_LID_SWITCH_RELIABILITY', Or( ('reliable', 'write_open'))), ('LIBINPUT_ATTR_KEYBOARD_INTEGRATION', Or(('internal', 'external'))), ('LIBINPUT_ATTR_TRACKPOINT_RANGE', INTEGER('Y')), ('LIBINPUT_ATTR_THUMB_PRESSURE_THRESHOLD', INTEGER('Y')), ) value_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in vprops ] tprops = ( ('LIBINPUT_ATTR_PALM_PRESSURE_THRESHOLD', INTEGER('X')), ('LIBINPUT_ATTR_PALM_SIZE_THRESHOLD', INTEGER('X')), ) typed_props = [ Literal(name)('NAME') - Suppress('=') - val for name, val in tprops ] grammar = Or(model_props + value_props + typed_props) return grammar
def ts_from_file(filename): """Loads a TS (possibly extended with state frequencies) in SIS format.""" # definition of TS grammar ParserElement.setDefaultWhitespaceChars(" \t") id = Word(alphanums+"_\"':-") #place = Literal("p") + Word(nums) number = Word(nums).setParseAction(lambda tokens: int(tokens[0])) newlines = Suppress(OneOrMore(LineEnd())) modelName = ".model" + id("modelName") + newlines signalNames = ZeroOrMore( Suppress(oneOf(".inputs .outputs .dummy")) + OneOrMore( id ) + newlines)("signals") arc = id + id + id + newlines graph = Literal(".state graph") + Suppress(OneOrMore(LineEnd())) + OneOrMore(Group(arc))("arcs") frequency_list = ZeroOrMore(Group(id+number)+newlines) frequency = ".frequencies" + Suppress(OneOrMore(LineEnd())) + frequency_list("frequencies") marking_list = ZeroOrMore(id) marking = ".marking"+Suppress("{") + marking_list("marking") + Suppress("}") + newlines ts_grammar = Optional(newlines) + Optional(modelName) + signalNames + graph + marking + Optional(frequency) + ".end" ts_grammar.ignore(pythonStyleComment) try: ast = ts_grammar.parseFile( filename ) except ParseException, pe: print pe raise pe
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack,rbrack,lbrace,rbrace,lparen,rparen,colon,qmark = map(Literal,"[]{}():?") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack,ignore=escapedChar) + rbrack) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reNonCaptureGroup = Suppress("?:") reDot = Literal(".") repetition = ( ( lbrace + Word(nums)("count") + rbrace ) | ( lbrace + Word(nums)("minCount")+","+ Word(nums)("maxCount") + rbrace ) | oneOf(list("*+?")) ) reRange.setParseAction(handleRange) reLiteral.setParseAction(handleLiteral) reMacro.setParseAction(handleMacro) reDot.setParseAction(handleDot) reTerm = ( reLiteral | reRange | reMacro | reDot | reNonCaptureGroup) reExpr = infixNotation( reTerm, [ (repetition, 1, opAssoc.LEFT, handleRepetition), (None, 2, opAssoc.LEFT, handleSequence), (Suppress('|'), 2, opAssoc.LEFT, handleAlternative), ] ) _parser = reExpr return _parser
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') model_props = [Regex(r'LIBINPUT_MODEL_[_0-9A-Z]+')('NAME') - Suppress('=') - (Literal('1'))('VALUE') ] dimension = INTEGER('X') + Suppress('x') + INTEGER('Y') sz_props = ( ('LIBINPUT_ATTR_SIZE_HINT', Group(dimension('SETTINGS*'))), ('LIBINPUT_ATTR_RESOLUTION_HINT', Group(dimension('SETTINGS*'))), ) size_props = [Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in sz_props] reliability_tags = Or(('reliable', 'write_open')) reliability = [Literal('LIBINPUT_ATTR_LID_SWITCH_RELIABILITY')('NAME') - Suppress('=') - reliability_tags('VALUE')] tpkbcombo_tags = Or(('below')) tpkbcombo = [Literal('LIBINPUT_ATTR_TPKBCOMBO_LAYOUT')('NAME') - Suppress('=') - tpkbcombo_tags('VALUE')] pressure_range = INTEGER('X') + Suppress(':') + INTEGER('Y') pressure_prop = [ Literal('LIBINPUT_ATTR_PRESSURE_RANGE')('NAME') - Suppress('=') - Group(pressure_range('SETTINGS*')) ] grammar = Or(model_props + size_props + reliability + tpkbcombo + pressure_prop) return grammar
def __init__(self): # By default pyparsing collapses whitespace characters. # Markdown cares about whitespace containing double newlines, so we # can't collapse newlines. ParserElement.setDefaultWhitespaceChars("") grammar = (self._parser_piece_is_image() + self._parser_piece_text() + self._parser_piece_destination_and_title()) def parse_action(_s, _l, toks): token = toks is_image = token.is_image title = token.title text = token.text destination = token.destination # Use self.scan_string not grammar.scan_string # so that parse actions attached to LinkParser fire for the nested # links, which seems desirable. text_links = tuple(self.scan_string(text)) link = MarkdownLink( text=text, destination=destination, title=title, is_image=is_image, text_links=text_links, ) return ParseResults.from_dict({"link": link}) grammar.setParseAction(parse_action) restore_initial_default_whitespace_chars() super().__init__(grammar)
def set_default_white_spaces(): default = ParserElement.DEFAULT_WHITE_CHARS ParserElement.setDefaultWhitespaceChars(' \t') yield ParserElement.setDefaultWhitespaceChars(default)
def initialize(self): ParserElement.setDefaultWhitespaceChars(' \t\r') integer = Regex(r"[+-]?\d+") \ .setParseAction(lambda s,l,t: [ int(t[0]) ]) number = Regex(r"[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?") \ .setParseAction(lambda s,l,t: [ float(t[0]) ]) color = Regex(r"#([0-9a-fA-F]{6})") angle = "'" + Regex(r"(360|3[0-5][0-9]|[12][0-9]{2}|[0-9]{1,2})") \ .setParseAction(lambda s,l,t: [ int(t[0]) ]) alpha = "'" + Regex(r"(360|3[0-5][0-9]|[12][0-9]{2}|[0-9]{1,2})") \ .setParseAction(lambda s,l,t: [ int(t[0]) ]) variable = Word(alphas, exact=1).setParseAction(self.addVar) colon = Literal(":").suppress() comma = Literal(",") lBrace = Literal("(") rBrace = Literal(")") lBracket = Literal("[") rBracket = Literal("]") lAngle = Literal("<") rAngle = Literal(">") plus = Literal("+") minus = Literal("-") FTerm = Literal("F") fTerm = Literal("f") ZTerm = Literal("Z") zTerm = Literal("z") xTerm = Literal("x") cTerm = Literal("c") eol = OneOrMore(LineEnd()).suppress() param = ( angle | color | "!" + number | "|" + number ) self.pList = lBrace + param + ZeroOrMore(comma + param) + rBrace literal = ((lBracket + ( variable + Optional(self.pList) | plus + Optional(self.pList) | minus + Optional(self.pList) ) + rBracket) | (variable + Optional(self.pList) | plus + Optional(self.pList) | minus + Optional(self.pList))) terminal = (ZTerm | zTerm | FTerm | fTerm | xTerm | cTerm | plus | minus | lBracket | rBracket) lprod = ( (OneOrMore(terminal) + lAngle + variable + rAngle + OneOrMore(terminal)) | (OneOrMore(terminal) + lAngle + variable) | (variable + rAngle + OneOrMore(terminal)) | variable ) rProd = OneOrMore(literal | terminal) comment = Suppress((LineStart() + "#" + SkipTo(eol, include=True))) rules = ( (lprod + Literal("=") + rProd + eol).setParseAction(self.addRule) \ | comment ) defaults = ( ( ("Dimensions" + colon + integer + comma + integer) | ("Position" + colon + integer + comma + integer) | ("Iterations" + colon + integer) | ("Angle" + colon + angle) | ("Linelength" + colon + number) | ("Linewidth" + colon + number) | ("Linecolor" + colon + color) | ("Background" + colon + color) | ("Axiom" + colon + rProd) ) + eol ).setParseAction(self.setAttribute) header = ( defaults | comment ) self.grammar = Suppress(ZeroOrMore(LineEnd())) \ + ZeroOrMore(header) \ + OneOrMore(rules) try: L = self.grammar.parseString( self.stream ) except ParseException, err: print err.line print " "*(err.column-1) + "^" print err
def parser(text): cvtTuple = lambda toks: tuple(toks.asList()) cvtRaw = lambda toks: RawString(' '.join(map(str, toks.asList()))) #cvtDict = lambda toks: dict(toks.asList()) cvtGlobDict = lambda toks: GlobDict(toks.asList()) cvtDict = cvtGlobDict extractText = lambda s, l, t: RawString(s[t._original_start:t._original_end]) def pythonize(toks): s = toks[0] if s == 'true': return True elif s == 'false': return False elif s == 'none': return [None] elif s.isdigit(): return int(s) elif re.match('(?i)^-?(\d+\.?e\d+|\d+\.\d*|\.\d+)$', s): return float(s) return toks[0] def noneDefault(s, loc, t): return t if len(t) else [RawEOL] # define punctuation as suppressed literals lbrace, rbrace = map(Suppress, "{}") identifier = Word(printables, excludeChars='{}"\'') quotedStr = QuotedString('"', escChar='\\', multiline=True) | \ QuotedString('\'', escChar='\\', multiline=True) quotedIdentifier = QuotedString('"', escChar='\\', unquoteResults=False) | \ QuotedString('\'', escChar='\\', unquoteResults=False) dictStr = Forward() setStr = Forward() objStr = Forward() #anyIdentifier = identifier | quotedIdentifier oddIdentifier = identifier + quotedIdentifier dictKey = dictStr | quotedStr | \ Combine(oddIdentifier).setParseAction(cvtRaw) dictKey.setParseAction(cvtRaw) dictValue = quotedStr | dictStr | setStr | \ Combine(oddIdentifier).setParseAction(cvtRaw) if OLD_STYLE_KEYS: dictKey |= Combine(identifier + ZeroOrMore(White(' ') + (identifier + ~FollowedBy(Optional(White(' ')) + LineEnd())))) dictValue |= identifier.setParseAction(pythonize) else: dictKey |= identifier dictValue |= delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True).setParseAction(pythonize) ParserElement.setDefaultWhitespaceChars(' \t') #dictEntry = Group(Combine(OneOrMore(identifier | quotedIdentifier)).setParseAction(cvtRaw) + dictEntry = Group(dictKey + Optional(White(' ').suppress() + dictValue).setParseAction(noneDefault) + Optional(White(' ').suppress()) + LineEnd().suppress()) #dictEntry = Group(SkipTo(dictKey + LineEnd() + dictKey)) dictStr << (lbrace + ZeroOrMore(dictEntry) + rbrace) dictStr.setParseAction(cvtDict) ParserElement.setDefaultWhitespaceChars(' \t\r\n') setEntry = identifier.setParseAction(pythonize) | quotedString.setParseAction(removeQuotes) setStr << (lbrace + delimitedList(setEntry, delim=White()) + rbrace) setStr.setParseAction(cvtTuple) # TODO: take other literals as arguments blobObj = Group(((Literal('ltm') + Literal('rule') + identifier) | \ (Literal('rule') + identifier)).setParseAction(cvtRaw) + originalTextFor(nestedExpr('{', '}')).setParseAction(extractText)) objEntry = Group(OneOrMore(identifier | quotedIdentifier).setParseAction(cvtRaw) + Optional(dictStr).setParseAction(noneDefault)) objStr << (Optional(delimitedList(blobObj | objEntry, delim=LineEnd()))) objStr.setParseAction(cvtGlobDict) #objStr.setParseAction(cvtTuple) objStr.ignore(pythonStyleComment) return objStr.parseString(text)[0]
def translate_cadp(cex, info): def pprint_init_agent(args): tid, iface = args[1], args[2][1:] agent = pprint_agent(info, tid) init = "".join(f"{agent}:\t{info.pprint_assign('I', int(k), v)}\n" for k, v in enumerate(iface)) if len(args) == 4: return init lstig = args[3][1:] init += "".join( f"{agent}:\t{info.pprint_assign('L', int(k), v[1])},{v[2]}\n" for k, v in enumerate(lstig)) return init def pprint_init_env(args): return "".join(f"\t{info.pprint_assign('E', int(k), v)}\n" for k, v in enumerate(args[1:])) lines = cex.split('\n') first_line = next(i + 1 for i, l in enumerate(lines) if "<initial state>" in l) last_line = next(i for i, l in enumerate(lines[first_line:], first_line) if "<goal state>" in l or "<deadlock>" in l) lines = [ l[1:-1] for l in lines[first_line:last_line] if l and l[0] == '"' ] # noqa: E501, E741 ParserElement.setDefaultWhitespaceChars(' \t\n\x01\x02') BOOLEAN = (Keyword("TRUE").setParseAction(replaceWith(True)) | Keyword("FALSE").setParseAction(replaceWith(False))) NAME = Word(alphanums) LPAR, RPAR = map(Suppress, "()") RECORD = Forward() OBJ = (ppc.number() | BOOLEAN | Group(RECORD)) RECORD <<= (NAME + LPAR + delimitedList(OBJ) + RPAR) QUOTES = dblQuotedString.setParseAction(removeQuotes) ASGN = NAME + ZeroOrMore(Suppress("!") + OBJ) MONITOR = (Keyword("MONITOR") + Suppress("!") + (BOOLEAN | QUOTES)) STEP = ppc.number() | ASGN | MONITOR yield "<initialization>\n" for l in lines: # noqa: E741 if "invisible transition" in l: # skip internal moves continue elif "<deadlock>" in l: yield l continue step = STEP.parseString(l, parseAll=True) if step[0] == "ENDINIT": yield "<end initialization>\n" elif step[0] == "MONITOR" and step[1] == "deadlock": yield "<deadlock>\n" elif step[0] == "MONITOR": yield f"""<property {"satisfied" if step[1] else "violated"}>\n""" elif step[0] == "E": agent = pprint_agent(info, step[1]) yield f"{step.asList()}" yield f"{agent}:\t{info.pprint_assign(*step[:3])}\n" elif step[0] == "ATTR": agent = pprint_agent(info, step[1]) yield f"{agent}:\t{info.pprint_assign('I', *step[2:4])}\n" elif step[0] == "L": agent = pprint_agent(info, step[1]) if len(step) > 4: # This was a stigmergic message sent from another agent yield f"{agent}:\t{info.pprint_assign('L', *step[2:4])}\t(from {pprint_agent(info, step[4])})\n" # noqa: E501 else: # This was an assignment from the agent itself yield f"{agent}:\t{info.pprint_assign('L', *step[2:4])}\n" else: yield f"<could not parse: {step}>\n"
def parse(content, basedir=None, resolve=True): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: If true, resolve substitutions :type resolve: boolean :return: a ConfigTree or a list """ def norm_string(value): for k, v in ConfigParser.REPLACEMENTS.items(): value = value.replace(k, v) return value def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3:-3] def convert_number(tokens): n = tokens[0] try: return int(n) except ValueError: return float(n) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = "\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '(")(?P<value>[^"]*)\\1(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(token): url = None file = None if len(token) == 1: # include "test" value = token[0].value if isinstance( token[0], ConfigQuotedString) else token[0] if value.startswith("http://") or value.startswith( "https://") or value.startswith("file://"): url = value else: file = value elif len(token) == 2: # include url("test") or file("test") value = token[1].value if isinstance( token[1], ConfigQuotedString) else token[1] if token[0] == 'url': url = value else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False) if file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file(path, required=False, resolve=False) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) ParserElement.setDefaultWhitespaceChars(' \t') assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction( replaceWith(NoneValue())) key = QuotedString( '"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- ') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex( '[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE]\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex( '""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex('".*?"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex('(?:\\\\|[^\[\{\s\]\}#,=\$])+[ \t]*' ).setParseAction(unescape_string) substitution_expr = Regex('[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction( create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = number_expr | true_expr | false_expr | null_expr | string_expr include_expr = (Keyword("include", caseless=True).suppress() + ( quoted_string | ( (Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress()))) \ .setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal('\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser( ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser( list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group(key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr))) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + ( list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: ConfigParser.resolve_substitutions(config) return config
from pyparsing import ParseException, ParseSyntaxException, ParserElement from pydbml.definitions.index import (index_type, index_setting, index_settings, subject, single_index_syntax, composite_index_syntax, index, indexes) from unittest import TestCase ParserElement.setDefaultWhitespaceChars(' \t\r') class TestIndexType(TestCase): def test_correct(self): val = 'Type: BTREE' res = index_type.parseString(val, parseAll=True) self.assertEqual(res['type'], 'btree') val2 = 'type:\nhash' res2 = index_type.parseString(val2, parseAll=True) self.assertEqual(res2['type'], 'hash') def test_incorrect(self): val = 'type: wrong' with self.assertRaises(ParseSyntaxException): index_type.parseString(val, parseAll=True) class TestIndexSetting(TestCase): def test_unique(self): val = 'unique' res = index_setting.parseString(val, parseAll=True) self.assertEqual(res['unique'], 'unique')
def braces_parser(text, opener=BLOB_OPENER, closer=BLOB_CLOSER): cvtTuple = lambda toks: tuple(toks.asList()) # @IgnorePep8 cvtRaw = lambda toks: RawString(' '.join(map(str, toks.asList())) ) # @IgnorePep8 cvtDict = lambda toks: GlobDict(toks.asList()) # @IgnorePep8 extractText = lambda s, l, t: RawString(s[t._original_start:t._original_end ]) # @IgnorePep8 def pythonize(toks): s = toks[0] if s == 'true': return True elif s == 'false': return False elif s == 'none': return [None] elif s.isdigit(): return int(s) elif re.match('(?i)^-?(\d+\.?e\d+|\d+\.\d*|\.\d+)$', s): return float(s) return toks[0] def noneDefault(s, loc, t): return t if len(t) else [RawEOL] # define punctuation as suppressed literals lbrace, rbrace = map(Suppress, "{}") identifier = Word(printables, excludeChars='{}"\'') quotedStr = QuotedString('"', escChar='\\', multiline=True) | \ QuotedString('\'', escChar='\\', multiline=True) quotedIdentifier = QuotedString('"', escChar='\\', unquoteResults=False) | \ QuotedString('\'', escChar='\\', unquoteResults=False) dictStr = Forward() setStr = Forward() objStr = Forward() oddIdentifier = identifier + quotedIdentifier dictKey = quotedIdentifier | \ Combine(oddIdentifier).setParseAction(cvtRaw) dictKey.setParseAction(cvtRaw) dictValue = quotedStr | dictStr | setStr | \ Combine(oddIdentifier).setParseAction(cvtRaw) if OLD_STYLE_KEYS: dictKey |= Combine(identifier + ZeroOrMore( White(' ') + (identifier + ~FollowedBy(Optional(White(' ')) + LineEnd())))) dictValue |= identifier.setParseAction(pythonize) else: dictKey |= identifier dictValue |= Or([ delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True), Combine( delimitedList(identifier | quotedIdentifier, delim=White(' '), combine=True) + Optional( White(' ') + originalTextFor(nestedExpr('{', '}')). setParseAction(extractText))).setParseAction(cvtRaw) ]) ParserElement.setDefaultWhitespaceChars(' \t') dictEntry = Group(dictKey + Optional(White(' ').suppress() + dictValue).setParseAction(noneDefault) + Optional(White(' ').suppress()) + LineEnd().suppress()) dictStr << (lbrace + ZeroOrMore(dictEntry) + rbrace) dictStr.setParseAction(cvtDict) ParserElement.setDefaultWhitespaceChars(' \t\r\n') setEntry = identifier.setParseAction( pythonize) | quotedString.setParseAction(removeQuotes) | dictStr setStr << (lbrace + delimitedList(setEntry, delim=White()) + rbrace) setStr.setParseAction(cvtTuple) objEntry = dictStr.ignore(pythonStyleComment) objStr << delimitedList(objEntry, delim=LineEnd()) return objStr.parseString(text)[0]
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: if true, resolve substitutions :type resolve: boolean :param unresolved_value: assigned value value to unresolved substitution. If overriden with a default value, it will replace all unresolved value to the default value. If it is set to to pyhocon.STR_SUBSTITUTION then it will replace the value by its substitution expression (e.g., ${x}) :type unresolved_value: boolean :return: a ConfigTree or a list """ unescape_pattern = re.compile(r'\\.') def replace_escape_sequence(match): value = match.group(0) return cls.REPLACEMENTS.get(value, value) def norm_string(value): return unescape_pattern.sub(replace_escape_sequence, value) def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3: -3] def convert_number(tokens): n = tokens[0] try: return int(n, 10) except ValueError: return float(n) # ${path} or ${?path} for optional substitution SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION_PATTERN, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution # ${path} or ${?path} for optional substitution STRING_PATTERN = '"(?P<value>(?:[^"\\\\]|\\\\.)*)"(?P<ws>[ \t]*)' def create_quoted_string(instring, loc, token): # remove the ${ and } match = re.match(STRING_PATTERN, token[0]) value = norm_string(match.group('value')) ws = match.group('ws') return ConfigQuotedString(value, ws, instring, loc) def include_config(instring, loc, token): url = None file = None required = False if token[0] == 'required': required = True final_tokens = token[1:] else: final_tokens = token if len(final_tokens) == 1: # include "test" value = final_tokens[0].value if isinstance(final_tokens[0], ConfigQuotedString) else final_tokens[0] if value.startswith("http://") or value.startswith("https://") or value.startswith("file://"): url = value else: file = value elif len(final_tokens) == 2: # include url("test") or file("test") value = final_tokens[1].value if isinstance(token[1], ConfigQuotedString) else final_tokens[1] if final_tokens[0] == 'url': url = value else: file = value if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL( url, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION ) elif file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file( path, resolve=False, required=required, unresolved_value=NO_SUBSTITUTION ) else: raise ConfigException('No file or URL specified at: {loc}: {instring}', loc=loc, instring=instring) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) ParserElement.setDefaultWhitespaceChars(' \t') assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(NoneValue())) key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + alphas8bit + '._- /') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol | StringEnd()) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = Regex(r'"(?:[^"\\\n]|\\.)*"[ \t]*', re.UNICODE).setParseAction(create_quoted_string) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex(r'(?:[^^`+?!@*&"\[\{\s\]\}#,=\$\\]|\\.)+[ \t]*', re.UNICODE).setParseAction(unescape_string) substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = number_expr | true_expr | false_expr | null_expr | string_expr include_content = (quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress())) include_expr = ( Keyword("include", caseless=True).suppress() + ( include_content | ( Keyword("required") - Literal('(').suppress() - include_content - Literal(')').suppress() ) ) ).setParseAction(include_config) root_dict_expr = Forward() dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore(comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr | (Literal( '\\') - eol).suppress()) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) inside_root_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma), root=True) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') root_dict_expr << Suppress('{') - inside_root_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group( key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | (Literal('=') | Literal(':') | Literal('+=')) - ZeroOrMore( comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr)) ) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | root_dict_expr | inside_root_dict_expr) + ZeroOrMore( comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: allow_unresolved = resolve and unresolved_value is not DEFAULT_SUBSTITUTION and unresolved_value is not MANDATORY_SUBSTITUTION has_unresolved = cls.resolve_substitutions(config, allow_unresolved) if has_unresolved and unresolved_value is MANDATORY_SUBSTITUTION: raise ConfigSubstitutionException('resolve cannot be set to True and unresolved_value to MANDATORY_SUBSTITUTION') if unresolved_value is not NO_SUBSTITUTION and unresolved_value is not DEFAULT_SUBSTITUTION: cls.unresolve_substitutions_to_value(config, unresolved_value) return config
# Copyright: 2019, NLnet Labs and the Internet.nl contributors # SPDX-License-Identifier: Apache-2.0 import ipaddress from pyparsing import Word, Regex, White, Optional, ZeroOrMore, OneOrMore from pyparsing import Group, CaselessLiteral, ParseException from pyparsing import alphanums, alphas, nums, printables from pyparsing import ParserElement, StringEnd, Combine ParserElement.setDefaultWhitespaceChars('') # Whitespace is in the grammar # Parser for SPF records. # # The record is parsed based on section 12 (Collected ABNF) of RFC-7208. # [ https://tools.ietf.org/html/rfc7208#section-12 ] # # Most of the tokens have been combined together for easier access to the # records parts. # The terms can be found under <parsed_result>['terms'] if any. def _parse_ipv6(tokens): """ Helper function to parse IPv6 addresses. """ match = str(tokens[0]) ipv6 = None try: ipv6 = ipaddress.IPv6Address(match) except ipaddress.AddressValueError: try:
def parse(content, basedir=None, resolve=True): """parse a HOCON content :param content: HOCON content to parse :type content: basestring :param resolve: If true, resolve substitutions :type resolve: boolean :return: a ConfigTree or a list """ def norm_string(value): for k, v in ConfigParser.REPLACEMENTS.items(): value = value.replace(k, v) return value def unescape_string(tokens): return ConfigUnquotedString(norm_string(tokens[0])) def parse_multi_string(tokens): # remove the first and last 3 " return tokens[0][3: -3] def convert_number(tokens): n = tokens[0] try: return int(n) except ValueError: return float(n) # ${path} or ${?path} for optional substitution SUBSTITUTION = "\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>\s*)" def create_substitution(instring, loc, token): # remove the ${ and } match = re.match(SUBSTITUTION, token[0]) variable = match.group('variable') ws = match.group('ws') optional = match.group('optional') == '?' substitution = ConfigSubstitution(variable, optional, ws, instring, loc) return substitution def include_config(token): url = None file = None if len(token) == 1: # include "test" if token[0].startswith("http://") or token[0].startswith("https://") or token[0].startswith("file://"): url = token[0] else: file = token[0] elif len(token) == 2: # include url("test") or file("test") if token[0] == 'url': url = token[1] else: file = token[1] if url is not None: logger.debug('Loading config from url %s', url) obj = ConfigFactory.parse_URL(url, resolve=False) if file is not None: path = file if basedir is None else os.path.join(basedir, file) logger.debug('Loading config from file %s', path) obj = ConfigFactory.parse_file(path, required=False, resolve=False) return ConfigInclude(obj if isinstance(obj, list) else obj.items()) ParserElement.setDefaultWhitespaceChars(' \t') assign_expr = Forward() true_expr = Keyword("true", caseless=True).setParseAction(replaceWith(True)) false_expr = Keyword("false", caseless=True).setParseAction(replaceWith(False)) null_expr = Keyword("null", caseless=True).setParseAction(replaceWith(None)) key = QuotedString('"', escChar='\\', unquoteResults=False) | Word(alphanums + '._- ') eol = Word('\n\r').suppress() eol_comma = Word('\n\r,').suppress() comment = (Literal('#') | Literal('//')) - SkipTo(eol) comment_eol = Suppress(Optional(eol_comma) + comment) comment_no_comma_eol = (comment | eol).suppress() number_expr = Regex('[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE]\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))', re.DOTALL).setParseAction(convert_number) # multi line string using """ # Using fix described in http://pyparsing.wikispaces.com/share/view/3778969 multiline_string = Regex('""".*?"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string) # single quoted line string quoted_string = QuotedString(quoteChar='"', escChar='\\', multiline=True) # unquoted string that takes the rest of the line until an optional comment # we support .properties multiline support which is like this: # line1 \ # line2 \ # so a backslash precedes the \n unquoted_string = Regex(r'(\\[ \t]*[\r\n]|[^\[\{\n\r\]\}#,=\$])+?(?=($|\$|[ \t]*(//|[\}\],#\n\r])))', re.DOTALL).setParseAction(unescape_string) substitution_expr = Regex('[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution) string_expr = multiline_string | quoted_string | unquoted_string value_expr = number_expr | true_expr | false_expr | null_expr | string_expr include_expr = (Keyword("include", caseless=True).suppress() - ( quoted_string | ((Keyword('url') | Keyword('file')) - Literal('(').suppress() - quoted_string - Literal(')').suppress()))) \ .setParseAction(include_config) dict_expr = Forward() list_expr = Forward() multi_value_expr = ZeroOrMore((Literal( '\\') - eol).suppress() | comment_eol | include_expr | substitution_expr | dict_expr | list_expr | value_expr) # for a dictionary : or = is optional # last zeroOrMore is because we can have t = {a:4} {b: 6} {c: 7} which is dictionary concatenation inside_dict_expr = ConfigTreeParser(ZeroOrMore(comment_eol | include_expr | assign_expr | eol_comma)) dict_expr << Suppress('{') - inside_dict_expr - Suppress('}') list_entry = ConcatenatedValueParser(multi_value_expr) list_expr << Suppress('[') - ListParser(list_entry - ZeroOrMore(eol_comma - list_entry)) - Suppress(']') # special case when we have a value assignment where the string can potentially be the remainder of the line assign_expr << Group( key - ZeroOrMore(comment_no_comma_eol) - (dict_expr | Suppress(Literal('=') | Literal(':')) - ZeroOrMore(comment_no_comma_eol) - ConcatenatedValueParser(multi_value_expr)) ) # the file can be { ... } where {} can be omitted or [] config_expr = ZeroOrMore(comment_eol | eol) + (list_expr | dict_expr | inside_dict_expr) + ZeroOrMore(comment_eol | eol_comma) config = config_expr.parseString(content, parseAll=True)[0] if resolve: ConfigParser.resolve_substitutions(config) return config
def define_grammar(): global grammar def removechar(s, removechar): # Returns string <s> having removed all occurrences of character <removechar> return ''.join(s.split(removechar)) def removechars(s, removechars): for c in removechars: s = removechar(s, c) return s # pyparsing's setResultsName() seems to have a bug: it names both a Group AND its children with the tag, which confuses us later when we traverse the tree # So instead, we use setParseAction() to manually create a special tag attribute # This can then be read from any x=ParseResults with x.storyelement (which will be the empty string if it doesn't exist) def setattr_choice(this): this.__setattr__("storyelement","choice") def setattr_varlookup(this): this.__setattr__("storyelement","var") def setattr_varassign(this): this[0].insert(0,SETVARINDICATOR) def setattr_constassign(this): this[0].insert(0,SETCONSTINDICATOR) # Our Grammar... # Returns a pyparsing object ParserElement.setDefaultWhitespaceChars("") one_expr = Forward() # Exactly one expression (of any sort) one_or_more_expr = Forward() # Compound expression (must have at least one expression) zero_or_more_expr = Forward() # Compound expression (can have zero expressions) # Any expression which has to be forward-declared MUST then be assigned-to later using "<<" not "=" !!! # WARNING: You must put brackets after << otherwise the wrong thing happens. e.g. A << (B | C) without the brackets fails # Define allchars as all characters allchars = srange("[!-~]") # All ASCII characters (same as "printables"?) allchars = "\t\n " + allchars # And all whitespace chars # Bodytext is all characters which don't have special meaning bodychars = removechars(allchars, "[]{|}=<>$") # Bodytext cannot contain special characters # Bodytext (meaning text we want to leave alone) is # anything consisting entirely of non-special characters bodytext = OneOrMore(Word(bodychars)).leaveWhitespace() # and don't try to strip off any leading whitespace htmlchars = allchars htmlchars = removechars(htmlchars, ">") # HTML tags need to be left alone. htmltags = Literal("<") - ZeroOrMore(Word(htmlchars)).leaveWhitespace() - Literal(">") # A valid variable name starts with a letter varname_expr = Word(alphas,alphanums + '_') # A use of a variable looks like [varname] # varname can be computed, e.g. [{A|B}] is fine # but it can't be empty, e.g. [] var_expr = Group(Literal("[") - one_or_more_expr - Literal("]")).setParseAction(setattr_varlookup) # A choice looks like {} or {A} or {A|B} etc. # (and of course A & B can be any expression at all, i.e. recursive) # and can include null entries e.g. {A|} choice_expr = (Group(Literal("{") - Group(Optional(one_or_more_expr,default=""))("firstargs") - ZeroOrMore(Literal("|").suppress() - Group(Optional(one_or_more_expr, default="")("subsargs"))) - Literal("}")).setParseAction(setattr_choice)) # We use Optional(one_or_more_expr) rather than zero_or_more_expr because that way we can explicitly capture null expressions as choices # We use Group() around the subexpressions to ensure that if they are complex they don't get flattened (otherwise we'll end-up choosing from things that are just sub-lists) # There seems to be a but with SetResultsName() which causes both the Group and its children to both be named the same (meaning you can't tell when traversing the tree whether you are a choice group, or an element within a choice group). # So instead we just leave the {} brackets unsuppressed and use them as a label # Setting a variable looks like [varname]={blah} setvar_expr = Group(var_expr + Literal("=") - choice_expr).setParseAction(setattr_varassign) # We use "+" (not "-") before Literal("=") because it IS legal for there to be a var_expr on it's own (i.e. when a var is used not set) # Using a constant looks like $const const_expr = Group(Literal("$") - varname_expr) # Setting a constant looks like $const={choice} or $const=[var] setconst_expr = Group(const_expr + Literal("=") - (choice_expr ^ var_expr)).setParseAction(setattr_constassign) # We use "+" (not "-") before Literal("=") because it IS legal when using rather than assigning a constant # Putting it all together. # Any earlier Forward-defined expressions must be assigned to using "<<" rather than "=" # REMEMBER: You MUST put () around the whole RHS of << assignment, or the wrong thing happens one_expr << (setconst_expr | const_expr | setvar_expr | var_expr | choice_expr | htmltags | bodytext) # "|" is pyparsing's MatchFirst operator, so put bodytext at the end so it only gets matched if nothing else does # "^" is pyparsing's Or operator, which finds the longest match one_or_more_expr << (OneOrMore(one_expr)) zero_or_more_expr << (ZeroOrMore(one_expr)) zero_or_more_expr.validate() # Check for recursive loops (NOTE: Highly compute-intensive) final_expr = zero_or_more_expr + StringEnd() grammar = final_expr
def property_grammar(): ParserElement.setDefaultWhitespaceChars(' ') dpi_setting = Group( Optional('*')('DEFAULT') + INTEGER('DPI') + Suppress('@') + INTEGER('HZ'))('SETTINGS*') mount_matrix_row = SIGNED_REAL + ',' + SIGNED_REAL + ',' + SIGNED_REAL mount_matrix = Group(mount_matrix_row + ';' + mount_matrix_row + ';' + mount_matrix_row)('MOUNT_MATRIX') xkb_setting = Optional(Word(alphanums + '+-/@._')) # Although this set doesn't cover all of characters in database entries, it's enough for test targets. name_literal = Word(printables + ' ') props = ( ('MOUSE_DPI', Group(OneOrMore(dpi_setting))), ('MOUSE_WHEEL_CLICK_ANGLE', INTEGER), ('MOUSE_WHEEL_CLICK_ANGLE_HORIZONTAL', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT', INTEGER), ('MOUSE_WHEEL_CLICK_COUNT_HORIZONTAL', INTEGER), ('ID_AUTOSUSPEND', Or((Literal('0'), Literal('1')))), ('ID_PERSIST', Or((Literal('0'), Literal('1')))), ('ID_INPUT', Or((Literal('0'), Literal('1')))), ('ID_INPUT_ACCELEROMETER', Or((Literal('0'), Literal('1')))), ('ID_INPUT_JOYSTICK', Or((Literal('0'), Literal('1')))), ('ID_INPUT_KEY', Or((Literal('0'), Literal('1')))), ('ID_INPUT_KEYBOARD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_MOUSE', Or((Literal('0'), Literal('1')))), ('ID_INPUT_POINTINGSTICK', Or((Literal('0'), Literal('1')))), ('ID_INPUT_SWITCH', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TABLET', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TABLET_PAD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TOUCHPAD', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TOUCHSCREEN', Or((Literal('0'), Literal('1')))), ('ID_INPUT_TRACKBALL', Or((Literal('0'), Literal('1')))), ('ID_SIGNAL_ANALYZER', Or((Literal('0'), Literal('1')))), ('POINTINGSTICK_SENSITIVITY', INTEGER), ('POINTINGSTICK_CONST_ACCEL', REAL), ('ID_INPUT_JOYSTICK_INTEGRATION', Or(('internal', 'external'))), ('ID_INPUT_TOUCHPAD_INTEGRATION', Or(('internal', 'external'))), ('XKB_FIXED_LAYOUT', xkb_setting), ('XKB_FIXED_VARIANT', xkb_setting), ('XKB_FIXED_MODEL', xkb_setting), ('KEYBOARD_LED_NUMLOCK', Literal('0')), ('KEYBOARD_LED_CAPSLOCK', Literal('0')), ('ACCEL_MOUNT_MATRIX', mount_matrix), ('ACCEL_LOCATION', Or(('display', 'base'))), ('PROXIMITY_NEAR_LEVEL', INTEGER), ('IEEE1394_UNIT_FUNCTION_MIDI', Or((Literal('0'), Literal('1')))), ('IEEE1394_UNIT_FUNCTION_AUDIO', Or((Literal('0'), Literal('1')))), ('IEEE1394_UNIT_FUNCTION_VIDEO', Or((Literal('0'), Literal('1')))), ('ID_VENDOR_FROM_DATABASE', name_literal), ('ID_MODEL_FROM_DATABASE', name_literal), ('ID_TAG_MASTER_OF_SEAT', Literal('1')), ('ID_INFRARED_CAMERA', Or((Literal('0'), Literal('1')))), ('ID_CAMERA_DIRECTION', Or(('front', 'rear'))), ) fixed_props = [ Literal(name)('NAME') - Suppress('=') - val('VALUE') for name, val in props ] kbd_props = [ Regex(r'KEYBOARD_KEY_[0-9a-f]+')('NAME') - Suppress('=') - ('!' ^ (Optional('!') - Word(alphanums + '_')))('VALUE') ] abs_props = [ Regex(r'EVDEV_ABS_[0-9a-f]{2}')('NAME') - Suppress('=') - Word(nums + ':')('VALUE') ] grammar = Or(fixed_props + kbd_props + abs_props) + EOL return grammar
b) larger, then new code block will open appropriate { character will be generated c) smaller, then as many code blocks will close as matches the history of increasing in the indent stack ; will be added to the previous non-empty code line (if there isn't one already) + appropriate } character(s) will be generated 4) Where new code block starts after the word 'function', new variable scope opens. All variables from assignments and the controlling variable of 'for' cycle will be auto-declared as var. To prevent this behavior for single variable(s) use: global var1 [, var2, .., varN] // global doesn't go to the output To prevent this behavior completly, set: declare_variables = False """ import sys from pyparsing import ParserElement, Word, CharsNotIn, cppStyleComment, QuotedString, Optional, Keyword, alphas, alphanums ParserElement.setDefaultWhitespaceChars('') # whitespace is important # ciderscript tab_spaces = 4 # tab's will be expand to <tab_spaces> spaces (only in the whitespace before the code) continuation_min_indent = 6 # minimal increment of indent to see the line as continuation line (5+ is always recommended) # javascript declare_variables = True # will declare as 'var ..' all variables not listed in 'global ..' jsindent = 4 # number of spaces for javascript indentation step continuation_shift = 8 # minimum shift of the continuation line in js relative to the significant indentation # if source indentation shift (compared to indent_len) is greater than this, greater value is used """ http://stackoverflow.com/questions/1661197/what-characters-are-valid-for-javascript-variable-names unicodePrintables = u''.join(unichr(c) for c in xrange(65536) if not unichr(c).isspace())
continuation_drift = 8 # minimum shift of the continuation line in js relative to the significant indentation # if source indentation shift (compared to indent_len) is greater than this, greater value is used gr = Word(alphanums) >>> src = 'a bb ccc' >>> for match, start, stop in gr.scanString(src): print(match, start, stop) http://stackoverflow.com/questions/1661197/what-characters-are-valid-for-javascript-variable-names unicodePrintables = u''.join(unichr(c) for c in xrange(65536) if not unichr(c).isspace()) >>> x=unicode('č') >>> x.isalnum() from pyparsing import ParserElement ParserElement.setDefaultWhitespaceChars('') lineBreak = Word('\r\n', exact=2) | Word('\n', exact=1) quotedString = QuotedString('"', unquoteResults=False) | QuotedString("'", unquoteResults=False) | QuotedString('`', multiline=True, unquoteResults=False) parseRules = cppStyleComment() | quotedString | lineBreak | CharsNotIn('"\'`/\r\n') def javascript(ciderscript): """compiles from cjs to js""" context = {'js_indent_level': 0, # current indentation level 'indent_stack': [], # stack of source indents, which is tuple (chars, scope) where # chars .. number of added spaces in this block begin, # scope .. =lineNo+1 for started function, 0 otherwise 'indent_len': 0, # current indent length (length of joined indent_stack) 'multiline_comment': False, # are we inside the multiline comment /*..*/ ? 'multiline_string': False, # are we inside the multiline string `..` ? }
# -*- coding: utf-8 -*- """Atomic components; probably shouldn't use these directly""" import string from pyparsing import Optional, ParserElement, Regex, Suppress, Word from six.moves.html_parser import HTMLParser from regparser.grammar.utils import Marker, SuffixMarker, WordBoundaries # Set whitespace for all parsing; include unicode whitespace chars ParserElement.setDefaultWhitespaceChars( string.whitespace + HTMLParser().unescape('   ‌‍‎‏')) lower_p = (Suppress("(") + Regex(r"[ivx]{1}|[a-hj-uwyz]{1,2}").setResultsName("p1") + Suppress(")")) digit_p = (Suppress("(") + Word(string.digits).setResultsName("p2") + Suppress(")")) roman_p = (Suppress("(") + Word("ivxlcdm").setResultsName("p3") + Suppress(")")) upper_p = (Suppress("(") + Word(string.ascii_uppercase).setResultsName("p4") + Suppress(")")) em_digit_p = (Suppress(Regex(r"\(<E[^>]*>")) + Word(string.digits).setResultsName("p5") + Suppress("</E>)")) em_roman_p = (Suppress(Regex(r"\(<E[^>]*>")) + Word("ivxlcdm").setResultsName("p5") + Suppress("</E>)")) # Allow a plaintext version of italic paragraph markers plaintext_level5_p = (Suppress("(") +
# A TAP output line may also indicate abort of the test suit with the line: # Bail out! # optionally followed by a reason for bailing # # Copyright 2008, by Paul McGuire # from pyparsing import ParserElement,LineEnd,Optional,Word,nums,Regex,\ Literal,CaselessLiteral,Group,OneOrMore,Suppress,restOfLine,\ FollowedBy,empty __all__ = ['tapOutputParser', 'TAPTest', 'TAPSummary'] # newlines are significant whitespace, so set default skippable # whitespace to just spaces and tabs ParserElement.setDefaultWhitespaceChars(" \t") NL = LineEnd().suppress() integer = Word(nums) plan = '1..' + integer("ubound") OK,NOT_OK = map(Literal,['ok','not ok']) testStatus = (OK | NOT_OK) description = Regex("[^#\n]+") description.setParseAction(lambda t:t[0].lstrip('- ')) TODO,SKIP = map(CaselessLiteral,'TODO SKIP'.split()) directive = Group(Suppress('#') + (TODO + restOfLine | FollowedBy(SKIP) + restOfLine.copy().setParseAction(lambda t:['SKIP',t[0]]) ))
from pyparsing import (Optional, ZeroOrMore, Literal, ParserElement, ParseException, Suppress, FollowedBy, LineEnd) from rdflib.query import Result, ResultParser from rdflib.plugins.sparql.parser import (Var, STRING_LITERAL1, STRING_LITERAL2, IRIREF, BLANK_NODE_LABEL, NumericLiteral, BooleanLiteral, LANGTAG) from rdflib.plugins.sparql.parserutils import Comp, Param, CompValue from rdflib import Literal as RDFLiteral from rdflib.py3compat import bytestype ParserElement.setDefaultWhitespaceChars(" \n") String = STRING_LITERAL1 | STRING_LITERAL2 RDFLITERAL = Comp( 'literal', Param('string', String) + Optional( Param('lang', LANGTAG.leaveWhitespace()) | Literal('^^').leaveWhitespace() + Param('datatype', IRIREF).leaveWhitespace())) NONE_VALUE = object() EMPTY = FollowedBy(LineEnd()) | FollowedBy("\t") EMPTY.setParseAction(lambda x: NONE_VALUE)
# -*- coding: utf-8 -*- """Atomic components; probably shouldn't use these directly""" import string from pyparsing import Optional, ParserElement, Regex, Suppress, Word from six.moves.html_parser import HTMLParser from regparser.grammar.utils import Marker, SuffixMarker, WordBoundaries # Set whitespace for all parsing; include unicode whitespace chars ParserElement.setDefaultWhitespaceChars( string.whitespace + HTMLParser().unescape('   ‌‍‎‏')) lower_p = ( Suppress("(") + Regex(r"[ivx]{1}|[a-hj-uwyz]{1,2}").setResultsName("p1") + Suppress(")")) digit_p = ( Suppress("(") + Word(string.digits).setResultsName("p2") + Suppress(")")) roman_p = ( Suppress("(") + Word("ivxlcdm").setResultsName("p3") + Suppress(")")) upper_p = ( Suppress("(") + Word(string.ascii_uppercase).setResultsName("p4") +
def create_grammar(container_ids, secret_ids): """ Create the grammar for the editfile """ from pyparsing import (nums, alphas, lineEnd, stringEnd, OneOrMore, ZeroOrMore, SkipTo, Optional, And, Word, CharsNotIn, Empty, QuotedString, Literal, Suppress, Group, Combine, originalTextFor, Forward, ParserElement) # Read from bottom to top whiteSpaceChars = ' \t' ParserElement.setDefaultWhitespaceChars(whiteSpaceChars) word = Empty() + CharsNotIn(whiteSpaceChars + '\n') quotedString = QuotedString(quoteChar='"', escChar='\\').setParseAction( # NOTE the second replace is a work-around for # pyparsing bug #68. # https://sourceforge.net/p/pyparsing/bugs/68/ lambda s,l,t: t[0].replace("\\n", "\n").replace( "\\\\", "\\")) def secretIdNumberParseAction(s, loc, tokens): v = int(tokens[0]) if not v in secret_ids: raise ParseException(s, loc, "Not a valid secret id") return v secretIdNumber = Word(nums).setParseAction(secretIdNumberParseAction) def containerIdParseAction(s, loc, tokens): v = int(tokens[0]) if not v in container_ids: raise ParseException(s, loc, "Not a valid container id") return v containerId = Word(nums).setParseAction(containerIdParseAction) key = quotedString | word secretString = ~Literal('#') + (quotedString | word) secretId = Suppress('#') + secretIdNumber secret = secretString | secretId note = quotedString | originalTextFor(OneOrMore(word)) containerKeyword = Suppress('CONTAINER') entry = (~containerKeyword + Group(key - secret - Optional(note)) - Suppress(lineEnd)) comment = Suppress(lineEnd | '#' + SkipTo(lineEnd)) line = comment | entry containerLine = containerKeyword + containerId + comment # Instead of the following recursive grammar, we could have simply used # # containerBlock = ZeroOrMore(comment) + Group(containerLine # + Group(OneOrMore(line))) # multipleContainers = OneOrMore(containerBlock) # # but then any parsing error in line will result in a "expected stringEnd" # or "expected CONTAINER". _multipleContainers_head = Forward() _multipleContainers_body = Forward() _multipleContainers_head << (stringEnd | comment + _multipleContainers_head | containerLine + _multipleContainers_body) _multipleContainers_body << (stringEnd | (containerLine | line) + _multipleContainers_body) _multipleContainers_entry = And([entry]) multipleContainers = And([_multipleContainers_head]) # TODO ibidem below containerLine.setParseAction(lambda s,l,t: [[None, t[0]]]) def multipleContainersParseAction(s, loc, tokens): curEntries = [] curId = None ret = [] for t in tuple(tokens) + ((None, None),): if t[0] is not None: assert curId is not None curEntries.append(t) continue if curId is not None: ret.append([curId, curEntries]) curId = t[1] curEntries = [] return ret multipleContainers.setParseAction(multipleContainersParseAction) oneContainer = ZeroOrMore(line) + stringEnd oneContainer.setParseAction(lambda s,l,t: [[None, t]]) grammar = multipleContainers | oneContainer return grammar