def repeat(tokens): if tokens.length() == 1: return tokens.value() try: operand, operator = tokens except Exception as cause: Log.error("not expected", cause=cause) mode = operator["mode"] if not mode: if operator["exact"]: return Many(operand, PLAIN_ENGINE, exact=int(operator["exact"])) else: return Many(operand, PLAIN_ENGINE, min_match=int(operator["min"]), max_match=int(operator["max"])) elif mode in "*?": return ZeroOrMore(operand, PLAIN_ENGINE) elif mode in "+?": return OneOrMore(operand, PLAIN_ENGINE) elif mode == "?": return Optional(operand, PLAIN_ENGINE) else: Log.error("not expected")
def repeat(tokens): if tokens.length() == 1: return tokens.value() operand, operator = tokens mode = operator["mode"] if not mode: if operator["exact"]: return Many(operand, exact=int(operator["exact"])) else: return Many( operand, min_match=int(operator["min"]), max_match=int(operator["max"]) ) elif mode in "*?": return ZeroOrMore(operand) elif mode in "+?": return OneOrMore(operand) elif mode == "?": return Optional(operand) else: Log.error("not expected")
def dictOf(key, value): """Helper to easily and clearly define a dictionary by specifying the respective patterns for the key and value. Takes care of defining the `Dict`, `ZeroOrMore`, and `Group` tokens in the proper order. The key pattern can include delimiting markers or punctuation, as long as they are suppressed, thereby leaving the significant key text. The value pattern can include named results, so that the `Dict` results can include named token fields. Example:: text = "shape: SQUARE posn: upper left color: light blue texture: burlap" attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).addParseAction(' '.join)) print(OneOrMore(attr_expr).parseString(text)) attr_label = label attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).addParseAction(' '.join) # similar to Dict, but simpler call format result = dictOf(attr_label, attr_value).parseString(text) print(result) print(result['shape']) print(result.shape) # object attribute access works too print(result) prints:: [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] - color: light blue - posn: upper left - shape: SQUARE - texture: burlap SQUARE SQUARE {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} """ return Dict(OneOrMore(Group(key + value)))
def indentedBlock(blockStatementExpr, indent=True): """Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. Parameters: - blockStatementExpr - expression defining syntax of statement that is repeated within the indented block - indentStack - list created by caller to manage indentation stack (multiple statementWithIndentedBlock expressions within a single grammar should share a common indentStack) - indent - boolean indicating whether block must be indented beyond the current level; set to False for block of left-most statements (default= ``True``) A valid block must contain at least one ``blockStatement``. """ blockStatementExpr.engine.add_ignore("\\" + LineEnd()) PEER = Forward() DEDENT = Forward() def _reset_stack(p=None, l=None, s=None, ex=None): oldCol, oldPeer, oldDedent = _indent_stack.pop() PEER << oldPeer DEDENT << oldDedent def peer_stack(expectedCol): def output(t, l, s): if l >= len(s): return curCol = col(l, s) if curCol != expectedCol: if curCol > expectedCol: raise ParseException(t.type, s, l, "illegal nesting") raise ParseException(t.type, l, s, "not a peer entry") return output def dedent_stack(expectedCol): def output(t, l, s): if l >= len(s): return curCol = col(l, s) if curCol not in (i for i, _, _ in _indent_stack): raise ParseException(s, l, "not an unindent") if curCol < _indent_stack[-1][0]: oldCol, oldPeer, oldDedent = _indent_stack.pop() PEER << oldPeer DEDENT << oldDedent return output def indent_stack(t, l, s): curCol = col(l, s) if curCol > _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, l, s, "not a subentry") def nodent_stack(t, l, s): curCol = col(l, s) if curCol == _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, s, l, "not a subentry") NL = OneOrMore(LineEnd().suppress()) INDENT = Empty().addParseAction(indent_stack) NODENT = Empty().addParseAction(nodent_stack) if indent: smExpr = Group( Optional(NL) + INDENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)) + DEDENT ) else: smExpr = Group( Optional(NL) + NODENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)) + DEDENT ) return smExpr.setFailAction(_reset_stack).set_parser_name("indented block")
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): """Helper method for defining nested lists enclosed in opening and closing delimiters ("(" and ")" are the default). Parameters: - opener - opening character for a nested list (default= ``"("``); can also be a mo_parsing expression - closer - closing character for a nested list (default= ``")"``); can also be a mo_parsing expression - content - expression for items within the nested lists (default= ``None``) - ignoreExpr - expression for ignoring opening and closing delimiters (default= `quotedString`) If an expression is not provided for the content argument, the nested expression will capture all whitespace-delimited content between delimiters as a list of separate values. Use the ``ignoreExpr`` argument to define expressions that may contain opening or closing characters that should not be treated as opening or closing characters for nesting, such as quotedString or a comment expression. Specify multiple expressions using an `Or` or `MatchFirst`. The default is `quotedString`, but if no expressions are to be ignored, then pass ``None`` for this argument. """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: if not isinstance(opener, text) or not isinstance(closer, text): raise ValueError( "opening and closing arguments must be strings if no content expression" " is given" ) ignore_chars = engine.CURRENT.white_chars with Engine(""): def scrub(t): return t[0].strip() if len(opener) == 1 and len(closer) == 1: if ignoreExpr is not None: content = Combine(OneOrMore( ~ignoreExpr + CharsNotIn(opener + closer + "".join(ignore_chars), exact=1,) )).addParseAction(scrub) else: content = Empty + CharsNotIn( opener + closer + "".join(ignore_chars) ).addParseAction(scrub) else: if ignoreExpr is not None: content = Combine(OneOrMore( ~ignoreExpr + ~Literal(opener) + ~Literal(closer) + CharsNotIn(ignore_chars, exact=1) )).addParseAction(scrub) else: content = Combine(OneOrMore( ~Literal(opener) + ~Literal(closer) + CharsNotIn(ignore_chars, exact=1) )).addParseAction(scrub) ret = Forward() if ignoreExpr is not None: ret <<= Group( Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) ) else: ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) ret.set_parser_name("nested %s%s expression" % (opener, closer)) return ret
with Engine("") as engine: restOfLine = Regex(r"[^\n]*").set_parser_name("rest of line") dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment") cppStyleComment = Combine( Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment ).set_parser_name("C++ style comment") javaStyleComment = cppStyleComment pythonStyleComment = Regex(r"#[^\n]*").set_parser_name("Python style comment") _commasepitem = ( Combine(OneOrMore( Word(printables, exclude=",") + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()) )) .addParseAction(lambda t: text(t).strip()) .set_parser_name("commaItem") ) commaSeparatedList = delimitedList(Optional( quotedString | _commasepitem, default="" )).set_parser_name("commaSeparatedList") convertToInteger = tokenMap(int) convertToFloat = tokenMap(float) integer = Word(nums).set_parser_name("integer").addParseAction(convertToInteger) hex_integer = (
_escapedHexChar = ( Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction(lambda t: unichr(int( t[0].lstrip('\\').lstrip('0').lstrip('xX'), 16 ))) ) _escapedOctChar = Regex(r"\\0[0-7]+").addParseAction(lambda t, l, s: unichr(int( t[0][1:], 8 ))) _singleChar = ( _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) ) _charRange = Group(_singleChar + Suppress("-") + _singleChar) _reBracketExpr = ( Literal("[") + Optional("^").set_token_name("negate") + Group(OneOrMore(_charRange | _singleChar)).set_token_name("body") + "]" ) def srange(s): r"""Helper to easily define string ranges for use in Word construction. Borrows syntax from regexp '[]' string range definitions:: srange("[0-9]") -> "0123456789" srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" The input string must be enclosed in []'s, and the returned string is the expanded character set joined into a single string. The
ret.set_parser_name("nested %s%s expression" % (opener, closer)) return ret with Engine(""): _escapedPunc = Word("\\", r"\[]-*.$+^?()~ ", exact=2).addParseAction(lambda t, l, s: t[0][1]) _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction( lambda t: unichr(int(t[0].lstrip("\\").lstrip("0").lstrip("xX"), 16))) _escapedOctChar = Regex(r"\\0[0-7]+").addParseAction( lambda t, l, s: unichr(int(t[0][1:], 8))) _singleChar = (_escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)) _charRange = Group(_singleChar + Suppress("-") + _singleChar) _reBracketExpr = ("[" + Optional("^").set_token_name("negate") + Group( OneOrMore(_charRange | _singleChar)).set_token_name("body") + "]") def srange(s): r"""Helper to easily define string ranges for use in Word construction. Borrows syntax from regexp '[]' string range definitions:: srange("[0-9]") -> "0123456789" srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" The input string must be enclosed in []'s, and the returned string is the expanded character set joined into a single string. The values enclosed in the []'s may be:
restOfLine = Regex(r"[^\n]*").set_parser_name("rest of line") dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment") cppStyleComment = Combine( Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment).set_parser_name("C++ style comment") javaStyleComment = cppStyleComment pythonStyleComment = Regex(r"#[^\n]*").set_parser_name( "Python style comment") _commasepitem = (Combine( OneOrMore( Word(printables, exclude=",") + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()))).addParseAction( lambda t: text(t).strip()).set_parser_name("commaItem")) commaSeparatedList = delimitedList( Optional(quotedString | _commasepitem, default="")).set_parser_name("commaSeparatedList") convertToInteger = tokenMap(int) convertToFloat = tokenMap(float) integer = Word(nums).set_parser_name("integer").addParseAction( convertToInteger) hex_integer = (Word(hexnums).set_parser_name("hex integer").addParseAction( tokenMap(int, 16)))
| not_wordchar | not_whitechar | CR | LF | any_char | bs_char | tab_char ) escapedChar = ( ~macro + Combine("\\" + AnyChar()) ).addParseAction(lambda t: Literal(t.value()[1])) plainChar = Char(exclude=r"\]").addParseAction(lambda t: Literal(t.value())) escapedHexChar = Combine( (Literal("\\0x") | Literal("\\x") | Literal("\\X")) # lookup literals is faster + OneOrMore(Char(hexnums)) ).addParseAction(hex_to_char) escapedOctChar = Combine( Literal("\\0") + OneOrMore(Char("01234567")) ).addParseAction(lambda t: Literal(unichr(int(t.value()[2:], 8)))) singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range) brackets = ( "[" + Optional("^")("negate") + OneOrMore(Group(charRange | singleChar | macro)("body")) + "]"
| not_wordchar | not_whitechar | CR | LF | any_char | bs_char | tab_char) escapedChar = ( ~macro + Combine("\\" + AnyChar())).addParseAction(lambda t: Literal(t.value()[1])) plainChar = Char(exclude=r"\]").addParseAction(lambda t: Literal(t.value())) escapedHexChar = Combine((Literal("\\0x") | Literal("\\x") | Literal("\\X")) # lookup literals is faster + OneOrMore(Char(hexnums))).addParseAction(hex_to_char) escapedOctChar = Combine(Literal("\\0") + OneOrMore(Char("01234567"))).addParseAction( lambda t: Literal(unichr(int(t.value()[2:], 8)))) singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range) brackets = ("[" + Optional("^")("negate") + OneOrMore(Group(charRange | singleChar | macro)("body")) + "]").addParseAction(to_bracket) #########################################################################################
| not_wordchar | not_whitechar | CR | LF | any_char | bs_char | tab_char) escapedChar = ( ~macro + Combine("\\" + AnyChar())).addParseAction(lambda t: Literal(t.value()[1])) plainChar = Char(exclude=r"\]").addParseAction(lambda t: Literal(t.value())) escapedHexChar = Combine( (Literal("\\0x") | Literal("\\x") | Literal("\\X")) # lookup literals is faster + OneOrMore(Char(hexnums), PLAIN_ENGINE)).addParseAction(hex_to_char) escapedOctChar = Combine( Literal("\\0") + OneOrMore(Char("01234567"), PLAIN_ENGINE)).addParseAction( lambda t: Literal(unichr(int(t.value()[2:], 8)))) singleChar = escapedHexChar | escapedOctChar | escapedChar | plainChar charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range) brackets = ( "[" + Optional("^", PLAIN_ENGINE)("negate") + OneOrMore(Group(charRange | singleChar | macro)("body"), PLAIN_ENGINE) + "]").addParseAction(to_bracket)