def indentedBlock(blockStatementExpr, indent=True): """Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. Parameters: - blockStatementExpr - expression defining syntax of statement that is repeated within the indented block - indentStack - list created by caller to manage indentation stack (multiple statementWithIndentedBlock expressions within a single grammar should share a common indentStack) - indent - boolean indicating whether block must be indented beyond the current level; set to False for block of left-most statements (default= ``True``) A valid block must contain at least one ``blockStatement``. """ blockStatementExpr.engine.add_ignore("\\" + LineEnd()) PEER = Forward() DEDENT = Forward() def _reset_stack(p=None, l=None, s=None, ex=None): oldCol, oldPeer, oldDedent = _indent_stack.pop() PEER << oldPeer DEDENT << oldDedent def peer_stack(expectedCol): def output(t, l, s): if l >= len(s): return curCol = col(l, s) if curCol != expectedCol: if curCol > expectedCol: raise ParseException(t.type, s, l, "illegal nesting") raise ParseException(t.type, l, s, "not a peer entry") return output def dedent_stack(expectedCol): def output(t, l, s): if l >= len(s): return curCol = col(l, s) if curCol not in (i for i, _, _ in _indent_stack): raise ParseException(s, l, "not an unindent") if curCol < _indent_stack[-1][0]: oldCol, oldPeer, oldDedent = _indent_stack.pop() PEER << oldPeer DEDENT << oldDedent return output def indent_stack(t, l, s): curCol = col(l, s) if curCol > _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, l, s, "not a subentry") def nodent_stack(t, l, s): curCol = col(l, s) if curCol == _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, s, l, "not a subentry") NL = OneOrMore(LineEnd().suppress()) INDENT = Empty().addParseAction(indent_stack) NODENT = Empty().addParseAction(nodent_stack) if indent: smExpr = Group( Optional(NL) + INDENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)) + DEDENT ) else: smExpr = Group( Optional(NL) + NODENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)) + DEDENT ) return smExpr.setFailAction(_reset_stack).set_parser_name("indented block")
restOfLine = Regex(r"[^\n]*").set_parser_name("rest of line") dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment") cppStyleComment = Combine( Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment ).set_parser_name("C++ style comment") javaStyleComment = cppStyleComment pythonStyleComment = Regex(r"#[^\n]*").set_parser_name("Python style comment") _commasepitem = ( Combine(OneOrMore( Word(printables, exclude=",") + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()) )) .addParseAction(lambda t: text(t).strip()) .set_parser_name("commaItem") ) commaSeparatedList = delimitedList(Optional( quotedString | _commasepitem, default="" )).set_parser_name("commaSeparatedList") convertToInteger = tokenMap(int) convertToFloat = tokenMap(float) integer = Word(nums).set_parser_name("integer").addParseAction(convertToInteger) hex_integer = (
dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").set_parser_name("// comment") cppStyleComment = Combine( Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dblSlashComment).set_parser_name("C++ style comment") javaStyleComment = cppStyleComment pythonStyleComment = Regex(r"#[^\n]*").set_parser_name( "Python style comment") _commasepitem = (Combine( OneOrMore( Word(printables, excludeChars=",") + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()))).addParseAction( lambda t: text(t).strip()).set_parser_name("commaItem")) commaSeparatedList = delimitedList( Optional(quotedString | _commasepitem, default="")).set_parser_name("commaSeparatedList") """Here are some common low-level expressions that may be useful in jump-starting parser development: - numeric forms (`integers<integer>`, `reals<real>`, `scientific notation<sci_real>`) - common `programming identifiers<identifier>` - network addresses (`MAC<mac_address>`, `IPv4<ipv4_address>`, `IPv6<ipv6_address>`) - ISO8601 `dates<iso8601_date>` and `datetime<iso8601_datetime>` - `UUID<uuid>`
)).addParseAction(scrub) ret = Forward() if ignoreExpr is not None: ret <<= Group( Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) ) else: ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) ret.set_parser_name("nested %s%s expression" % (opener, closer)) return ret # convenience constants for positional expressions empty = Empty().set_parser_name("empty") lineStart = LineStart().set_parser_name("lineStart") lineEnd = LineEnd().set_parser_name("lineEnd") stringStart = StringStart().set_parser_name("stringStart") stringEnd = StringEnd().set_parser_name("stringEnd") _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).addParseAction(lambda t, l, s: t[0][1]) _escapedHexChar = ( Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction(lambda t: unichr(int( t[0].lstrip('\\').lstrip('0').lstrip('xX'), 16 ))) ) _escapedOctChar = Regex(r"\\0[0-7]+").addParseAction(lambda t, l, s: unichr(int( t[0][1:], 8 )))
charRange = Group(singleChar("min") + "-" + singleChar("max")).addParseAction(to_range) brackets = ( "[" + Optional("^")("negate") + OneOrMore(Group(charRange | singleChar | macro)("body")) + "]" ).addParseAction(to_bracket) ######################################################################################### # REGEX regex = Forward() line_start = Literal("^").addParseAction(lambda: LineStart()) line_end = Literal("$").addParseAction(lambda: LineEnd()) word_edge = Literal("\\b").addParseAction(lambda: NotAny(any_wordchar)) simple_char = Word( printables, exclude=r".^$*+{}[]\|()" ).addParseAction(lambda t: Literal(t.value())) esc_char = ("\\" + AnyChar()).addParseAction(lambda t: Literal(t.value()[1])) with Engine(): # ALLOW SPACES IN THE RANGE repetition = ( Word(nums)("exact") + "}" | Word(nums)("min") + "," + Word(nums)("max") + "}" | Word(nums)("min") + "," + "}" | "," + Word(nums)("max") + "}" )