def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack, rbrack, lbrace, rbrace, lparen, rparen = map(Literal, "[]{}()") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join( c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack, ignore=escapedChar) + rbrack) reLiteral = (escapedChar | oneOf(list(reLiteralChar))) reDot = Literal(".") repetition = ((lbrace + Word(nums).setResultsName("count") + rbrace) | (lbrace + Word(nums).setResultsName("minCount") + "," + Word(nums).setResultsName("maxCount") + rbrace) | oneOf(list("*+?"))) reRange.setParseAction(handleRange) reLiteral.setParseAction(handleLiteral) reMacro.setParseAction(handleMacro) reDot.setParseAction(handleDot) reTerm = (reLiteral | reRange | reMacro | reDot) reExpr = operatorPrecedence(reTerm, [ (repetition, 1, opAssoc.LEFT, handleRepetition), (None, 2, opAssoc.LEFT, handleSequence), (Suppress('|'), 2, opAssoc.LEFT, handleAlternative), ]) _parser = reExpr return _parser
def parser(): global _parser if _parser is None: ParserElement.setDefaultWhitespaceChars("") lbrack,rbrack,lbrace,rbrace,lparen,rparen = map(Literal,"[]{}()") reMacro = Combine("\\" + oneOf(list("dws"))) escapedChar = ~reMacro + Combine("\\" + oneOf(list(printables))) reLiteralChar = "".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t" reRange = Combine(lbrack + SkipTo(rbrack,ignore=escapedChar) + rbrack) reLiteral = ( escapedChar | oneOf(list(reLiteralChar)) ) reDot = Literal(".") repetition = ( ( lbrace + Word(nums).setResultsName("count") + rbrace ) | ( lbrace + Word(nums).setResultsName("minCount")+","+ Word(nums).setResultsName("maxCount") + rbrace ) | oneOf(list("*+?")) ) reRange.setParseAction(handleRange) reLiteral.setParseAction(handleLiteral) reMacro.setParseAction(handleMacro) reDot.setParseAction(handleDot) reTerm = ( reLiteral | reRange | reMacro | reDot ) reExpr = operatorPrecedence( reTerm, [ (repetition, 1, opAssoc.LEFT, handleRepetition), (None, 2, opAssoc.LEFT, handleSequence), (Suppress('|'), 2, opAssoc.LEFT, handleAlternative), ] ) _parser = reExpr return _parser
sizing the data with the NUMBER similar to Dan Bernstein's netstrings setup. SPACE White space is basically ignored. This is interesting because since Stackish is serialized consistently this means you can use \n as the separation character and perform reasonable diffs on two structures. """ from pyparsingOD import Suppress,Word,nums,alphas,alphanums,Combine,oneOf,\ Optional,QuotedString,Forward,Group,ZeroOrMore,printables,srange MARK,UNMARK,AT,COLON,QUOTE = map(Suppress,"[]@:'") NUMBER = Word(nums) NUMBER.setParseAction(lambda t:int(t[0])) FLOAT = Combine(oneOf("+ -") + Word(nums) + "." + Optional(Word(nums))) FLOAT.setParseAction(lambda t:float(t[0])) STRING = QuotedString('"', multiline=True) WORD = Word(alphas,alphanums+"_:") ATTRIBUTE = Combine(AT + WORD) strBody = Forward() def setBodyLength(tokens): strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0])) return "" BLOB = Combine(QUOTE + Word(nums).setParseAction(setBodyLength) + COLON + strBody + QUOTE) item = Forward() def assignUsing(s): def assignPA(tokens): if s in tokens:
sizing the data with the NUMBER similar to Dan Bernstein's netstrings setup. SPACE White space is basically ignored. This is interesting because since Stackish is serialized consistently this means you can use \n as the separation character and perform reasonable diffs on two structures. """ from pyparsingOD import Suppress,Word,nums,alphas,alphanums,Combine,oneOf,\ Optional,QuotedString,Forward,Group,ZeroOrMore,printables,srange MARK, UNMARK, AT, COLON, QUOTE = map(Suppress, "[]@:'") NUMBER = Word(nums) NUMBER.setParseAction(lambda t: int(t[0])) FLOAT = Combine(oneOf("+ -") + Word(nums) + "." + Optional(Word(nums))) FLOAT.setParseAction(lambda t: float(t[0])) STRING = QuotedString('"', multiline=True) WORD = Word(alphas, alphanums + "_:") ATTRIBUTE = Combine(AT + WORD) strBody = Forward() def setBodyLength(tokens): strBody << Word(srange(r'[\0x00-\0xffff]'), exact=int(tokens[0])) return "" BLOB = Combine(QUOTE + Word(nums).setParseAction(setBodyLength) + COLON + strBody + QUOTE)