def nodent_stack(t, l, s): curCol = col(l, s) if curCol == _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, s, l, "not a subentry")
def originalTextFor(expr, asString=True): """Helper to return the original, untokenized text for a given expression. Useful to restore the parsed fields of an HTML start tag into the raw tag text itself, or to revert separate tokens with intervening whitespace back to the original matching input text. By default, returns astring containing the original parsed text. If the optional ``asString`` argument is passed as ``False``, then the return value is a `ParseResults` containing any results names that were originally matched, and a single token containing the original matched text from the input string. So if the expression passed to `originalTextFor` contains expressions with defined results names, you must set ``asString`` to ``False`` if you want to preserve those results name values. Example:: src = "this is test <b> bold <i>text</i> </b> normal text " for tag in ("b", "i"): opener, closer = makeHTMLTags(tag) patt = originalTextFor(opener + SkipTo(closer) + closer) print(patt.searchString(src)[0]) prints:: ['<b> bold <i>text</i> </b>'] ['<i>text</i>'] """ locMarker = Empty().addParseAction(lambda t, l, s: l) matchExpr = locMarker("_original_start") + Group(expr) + locMarker("_original_end") matchExpr = matchExpr.addParseAction(extractText) return matchExpr
def locatedExpr(expr): """Helper to decorate a returned token with its starting and ending locations in the input string. This helper adds the following results names: - locn_start = location where matched expression begins - locn_end = location where matched expression ends - value = the actual parsed results Be careful if the input text contains ``<TAB>`` characters, you may want to call `ParserElement.parseWithTabs` Example:: wd = Word(alphas) for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): print(match) prints:: [[0, 'ljsdf', 5]] [[8, 'lksdjjf', 15]] [[18, 'lkkjj', 23]] """ locator = Empty().addParseAction(lambda t, l, s: l) return Group(locator("locn_start") + Group(expr)("value") + locator("locn_end"))
def streamline(self): if self.streamlined: return self self.streamlined = True # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d) # but only if there are no parse actions or resultsNames on the nested And's # (likewise for Or's and MatchFirst's) if not self.is_annotated() and not self.exprs: return Empty(self.parser_name) acc = [] same = True for e in self.exprs: f = e.streamline() same = same and f is e if f.is_annotated(): acc.append(f) elif isinstance(f, self.__class__): same = False acc.extend(f.exprs) else: acc.append(f) if same: return self output = self.copy() output.exprs = acc output.streamlined = True return output
def streamline(self): if self.streamlined: return self output = ParseExpression.streamline(self) if isinstance(output, Empty): return output if not output.is_annotated(): if len(output.exprs) == 0: output = Empty() if len(output.exprs) == 1: output = output.exprs[0] output.streamlined = True output.checkRecursion() return output
def copyTokenToRepeater(t, l, s): if t: if len(t) == 1: rep << t[0] else: # flatten t tokens tflat = _flatten(t) rep << And(Literal(tt) for tt in tflat) else: rep << Empty()
def streamline(self): if self.streamlined: return self if not self.exprs: return Empty(self.parser_name) if len(self.exprs) == 1 and not self.is_annotated(): return self.exprs[0].streamline() # collapse any _PendingSkip's same = True exprs = self.exprs if any( isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip) for e in exprs[:-1] ): same = False for i, e in enumerate(exprs[:-1]): if ( isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip) ): ee = e.exprs[-1] + exprs[i + 1] e.exprs[-1] = ee e.streamlined = False exprs[i + 1] = None # streamline INDIVIDUAL EXPRESSIONS acc = [] for e in exprs: if e is None: continue f = e.streamline() same = same and f is e if f.is_annotated(): acc.append(f) elif isinstance(f, And) and f.parser_config.engine is self.parser_config.engine: same = False acc.extend(f.exprs) else: acc.append(f) if same: self.streamlined = True return self output = self.copy() output.exprs = acc output.streamlined = True return output
def __init__(self, exprs): if exprs and Ellipsis in exprs: tmp = [] for i, expr in enumerate(exprs): if expr is Ellipsis: if i < len(exprs) - 1: skipto_arg = (Empty() + exprs[i + 1]).exprs[-1] tmp.append(SkipTo(skipto_arg)("_skipped")) else: raise Exception( "cannot construct And with sequence ending in ...") else: tmp.append(expr) exprs[:] = tmp super(And, self).__init__(exprs)
def streamline(self): if self.streamlined: return self self.streamlined = True # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d) # but only if there are no parse actions or resultsNames on the nested And's # (likewise for Or's and MatchFirst's) if not self.exprs: return Empty(self.parser_name) acc = [] for e in self.exprs: e = e.streamline() if isinstance(e, self.__class__) and not is_decorated(e): acc.extend(e.exprs) else: acc.append(e) self.exprs = acc return self
def streamline(self): if self.streamlined: return self self.streamlined = True # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d) # but only if there are no parse actions or resultsNames on the nested And's # (likewise for Or's and MatchFirst's) if not self.is_annotated() and not self.exprs: return Empty(self.parser_name) acc = [] same = True clazz = self.__class__ if clazz == Or: clazz = ( Or, MatchFirst, ) # TODO: not correct, but allows merging of the two to a single longer list for e in self.exprs: f = e.streamline() same = same and f is e if f.is_annotated(): acc.append(f) elif isinstance(f, clazz): same = False acc.extend(f.exprs) else: acc.append(f) if same: return self output = self.copy() output.exprs = acc output.streamlined = True return output
def indentedBlock(blockStatementExpr, indent=True): """Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. Parameters: - blockStatementExpr - expression defining syntax of statement that is repeated within the indented block - indentStack - list created by caller to manage indentation stack (multiple statementWithIndentedBlock expressions within a single grammar should share a common indentStack) - indent - boolean indicating whether block must be indented beyond the current level; set to False for block of left-most statements (default= ``True``) A valid block must contain at least one ``blockStatement``. """ blockStatementExpr.engine.add_ignore("\\" + LineEnd()) PEER = Forward() DEDENT = Forward() def _reset_stack(p=None, l=None, s=None, ex=None): oldCol, oldPeer, oldDedent = _indent_stack.pop() PEER << oldPeer DEDENT << oldDedent def peer_stack(expectedCol): def output(t, l, s): if l >= len(s): return curCol = col(l, s) if curCol != expectedCol: if curCol > expectedCol: raise ParseException(t.type, s, l, "illegal nesting") raise ParseException(t.type, l, s, "not a peer entry") return output def dedent_stack(expectedCol): def output(t, l, s): if l >= len(s): return curCol = col(l, s) if curCol not in (i for i, _, _ in _indent_stack): raise ParseException(s, l, "not an unindent") if curCol < _indent_stack[-1][0]: oldCol, oldPeer, oldDedent = _indent_stack.pop() PEER << oldPeer DEDENT << oldDedent return output def indent_stack(t, l, s): curCol = col(l, s) if curCol > _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, l, s, "not a subentry") def nodent_stack(t, l, s): curCol = col(l, s) if curCol == _indent_stack[-1][0]: PEER << Empty().addParseAction(peer_stack(curCol)) DEDENT << Empty().addParseAction(dedent_stack(curCol)) _indent_stack.append((curCol, PEER, DEDENT)) else: raise ParseException(t.type, s, l, "not a subentry") NL = OneOrMore(LineEnd().suppress()) INDENT = Empty().addParseAction(indent_stack) NODENT = Empty().addParseAction(nodent_stack) if indent: smExpr = Group( Optional(NL) + INDENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)) + DEDENT ) else: smExpr = Group( Optional(NL) + NODENT + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)) + DEDENT ) return smExpr.setFailAction(_reset_stack).set_parser_name("indented block")
+ ~Literal(closer) + CharsNotIn(ignore_chars, exact=1) )).addParseAction(scrub) ret = Forward() if ignoreExpr is not None: ret <<= Group( Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) ) else: ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) ret.set_parser_name("nested %s%s expression" % (opener, closer)) return ret # convenience constants for positional expressions empty = Empty().set_parser_name("empty") lineStart = LineStart().set_parser_name("lineStart") lineEnd = LineEnd().set_parser_name("lineEnd") stringStart = StringStart().set_parser_name("stringStart") stringEnd = StringEnd().set_parser_name("stringEnd") _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).addParseAction(lambda t, l, s: t[0][1]) _escapedHexChar = ( Regex(r"\\0?[xX][0-9a-fA-F]+").addParseAction(lambda t: unichr(int( t[0].lstrip('\\').lstrip('0').lstrip('xX'), 16 ))) ) _escapedOctChar = Regex(r"\\0[0-7]+").addParseAction(lambda t, l, s: unichr(int(
singles = [s for s in symbols if len(s) == 1] rest = list( sorted([s for s in symbols if len(s) != 1], key=lambda s: -len(s))) acc = [] acc.extend(re.escape(sym) for sym in rest) if singles: acc.append(regex_range("".join(singles))) regex = "|".join(acc) return Regex(regex).streamline() LEFT_ASSOC = object() RIGHT_ASSOC = object() _no_op = Empty() def infixNotation(baseExpr, spec, lpar=Suppress("("), rpar=Suppress(")")): """ :param baseExpr: expression representing the most basic element for the nested :param spec: list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form ``(opExpr, numTerms, rightLeftAssoc, parseAction)``, where: - opExpr is the mo_parsing expression for the operator; may also be a string, which will be converted to a Literal; if numTerms is 3, opExpr is a tuple of two expressions, for the two operators separating the 3 terms - numTerms is the number of terms for this operator (must be 1,