def delimitedList(expr, separator=",", combine=False): """ PARSE DELIMITED LIST OF expr Example:: delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] """ if combine: return Combine(expr + ZeroOrMore(separator + expr)) else: return expr + ZeroOrMore(Suppress(separator) + expr)
def repeat(tokens): if tokens.length() == 1: return tokens.value() try: operand, operator = tokens except Exception as cause: Log.error("not expected", cause=cause) mode = operator["mode"] if not mode: if operator["exact"]: return Many(operand, PLAIN_ENGINE, exact=int(operator["exact"])) else: return Many(operand, PLAIN_ENGINE, min_match=int(operator["min"]), max_match=int(operator["max"])) elif mode in "*?": return ZeroOrMore(operand, PLAIN_ENGINE) elif mode in "+?": return OneOrMore(operand, PLAIN_ENGINE) elif mode == "?": return Optional(operand, PLAIN_ENGINE) else: Log.error("not expected")
def makeHTMLTags(tagStr, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")): """Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. """ if isinstance(tagStr, text): resname = tagStr tagStr = Keyword(tagStr, caseless=True) else: resname = tagStr.parser_name tagAttrName = Word(alphas, alphanums + "_-:") tagAttrValue = quotedString.addParseAction(removeQuotes) | Word( printables, exclude=">" ) simpler_name = "".join(resname.replace(":", " ").title().split()) openTag = ( ( suppress_LT + tagStr("tag") + OpenDict(ZeroOrMore(Group( tagAttrName.addParseAction(downcaseTokens) + Optional(Suppress("=") + tagAttrValue) ))) + Optional( "/", default=[False] )("empty").addParseAction(lambda t, l, s: t[0] == "/") + suppress_GT ) .set_token_name("start" + simpler_name) .set_parser_name("<%s>" % resname) ) closeTag = ( Combine(Literal("</") + tagStr + ">") .set_token_name("end" + simpler_name) .set_parser_name("</%s>" % resname) ) # openTag.tag = resname # closeTag.tag = resname # openTag.tag_body = SkipTo(closeTag) return openTag, closeTag
def repeat(tokens): if tokens.length() == 1: return tokens.value() operand, operator = tokens mode = operator["mode"] if not mode: if operator["exact"]: return Many(operand, exact=int(operator["exact"])) else: return Many( operand, min_match=int(operator["min"]), max_match=int(operator["max"]) ) elif mode in "*?": return ZeroOrMore(operand) elif mode in "+?": return OneOrMore(operand) elif mode == "?": return Optional(operand) else: Log.error("not expected")
def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): """Helper method for defining nested lists enclosed in opening and closing delimiters ("(" and ")" are the default). Parameters: - opener - opening character for a nested list (default= ``"("``); can also be a mo_parsing expression - closer - closing character for a nested list (default= ``")"``); can also be a mo_parsing expression - content - expression for items within the nested lists (default= ``None``) - ignoreExpr - expression for ignoring opening and closing delimiters (default= `quotedString`) If an expression is not provided for the content argument, the nested expression will capture all whitespace-delimited content between delimiters as a list of separate values. Use the ``ignoreExpr`` argument to define expressions that may contain opening or closing characters that should not be treated as opening or closing characters for nesting, such as quotedString or a comment expression. Specify multiple expressions using an `Or` or `MatchFirst`. The default is `quotedString`, but if no expressions are to be ignored, then pass ``None`` for this argument. """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: if not isinstance(opener, text) or not isinstance(closer, text): raise ValueError( "opening and closing arguments must be strings if no content expression" " is given" ) ignore_chars = engine.CURRENT.white_chars with Engine(""): def scrub(t): return t[0].strip() if len(opener) == 1 and len(closer) == 1: if ignoreExpr is not None: content = Combine(OneOrMore( ~ignoreExpr + CharsNotIn(opener + closer + "".join(ignore_chars), exact=1,) )).addParseAction(scrub) else: content = Empty + CharsNotIn( opener + closer + "".join(ignore_chars) ).addParseAction(scrub) else: if ignoreExpr is not None: content = Combine(OneOrMore( ~ignoreExpr + ~Literal(opener) + ~Literal(closer) + CharsNotIn(ignore_chars, exact=1) )).addParseAction(scrub) else: content = Combine(OneOrMore( ~Literal(opener) + ~Literal(closer) + CharsNotIn(ignore_chars, exact=1) )).addParseAction(scrub) ret = Forward() if ignoreExpr is not None: ret <<= Group( Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer) ) else: ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) ret.set_parser_name("nested %s%s expression" % (opener, closer)) return ret
def infixNotation(baseExpr, spec, lpar=Suppress("("), rpar=Suppress(")")): """ :param baseExpr: expression representing the most basic element for the nested :param spec: list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form ``(opExpr, numTerms, rightLeftAssoc, parseAction)``, where: - opExpr is the mo_parsing expression for the operator; may also be a string, which will be converted to a Literal; if numTerms is 3, opExpr is a tuple of two expressions, for the two operators separating the 3 terms - numTerms is the number of terms for this operator (must be 1, 2, or 3) - rightLeftAssoc is the indicator whether the operator is right or left associative, using the mo_parsing-defined constants ``RIGHT_ASSOC`` and ``LEFT_ASSOC``. - parseAction is the parse action to be associated with expressions matching this operator expression (the parse action tuple member may be omitted); if the parse action is passed a tuple or list of functions, this is equivalent to calling ``setParseAction(*fn)`` (:class:`ParserElement.addParseAction`) :param lpar: expression for matching left-parentheses (default= ``Suppress('(')``) :param rpar: expression for matching right-parentheses (default= ``Suppress(')')``) :return: ParserElement """ all_op = {} def norm(op): output = all_op.get(id(op)) if output: return output def record_self(tok): ParseResults(tok.type, [tok.type.parser_name]) output = engine.CURRENT.normalize(op) is_suppressed = isinstance(output, Suppress) if is_suppressed: output = output.expr output = output.addParseAction(record_self) all_op[id(op)] = is_suppressed, output return is_suppressed, output opList = [] """ SCRUBBED LIST OF OPERATORS * expr - used exclusively for ParseResult(expr, [...]), not used to match * op - used to match * arity - same * assoc - same * parse_actions - same """ for operDef in spec: op, arity, assoc, rest = operDef[0], operDef[1], operDef[2], operDef[3:] parse_actions = list(map(wrap_parse_action, listwrap(rest[0]))) if rest else [] if arity == 1: is_suppressed, op = norm(op) if assoc == RIGHT_ASSOC: opList.append(( Group(baseExpr + op), op, is_suppressed, arity, assoc, parse_actions, )) else: opList.append(( Group(op + baseExpr), op, is_suppressed, arity, assoc, parse_actions, )) elif arity == 2: is_suppressed, op = norm(op) opList.append(( Group(baseExpr + op + baseExpr), op, is_suppressed, arity, assoc, parse_actions, )) elif arity == 3: is_suppressed, op = zip(norm(op[0]), norm(op[1])) opList.append(( Group(baseExpr + op[0] + baseExpr + op[1] + baseExpr), op, is_suppressed, arity, assoc, parse_actions, )) opList = tuple(opList) def record_op(op): def output(tokens): return ParseResults(NO_PARSER, [(tokens, op)]) return output prefix_ops = MatchFirst([ op.addParseAction(record_op(op)) for expr, op, is_suppressed, arity, assoc, pa in opList if arity == 1 and assoc == RIGHT_ASSOC ]) suffix_ops = MatchFirst([ op.addParseAction(record_op(op)) for expr, op, is_suppressed, arity, assoc, pa in opList if arity == 1 and assoc == LEFT_ASSOC ]) ops = Or([ opPart.addParseAction(record_op(opPart)) for expr, op, is_suppressed, arity, assoc, pa in opList if arity > 1 for opPart in (op if isinstance(op, tuple) else [op]) ]) def make_tree(tokens, loc, string): flat_tokens = list(tokens) num = len(opList) op_index = 0 while len(flat_tokens) > 1 and op_index < num: expr, op, is_suppressed, arity, assoc, parse_actions = opList[op_index] if arity == 1: if assoc == RIGHT_ASSOC: # PREFIX OPERATOR -3 todo = list(reversed(list(enumerate(flat_tokens[:-1])))) for i, (r, o) in todo: if o == op: if is_suppressed: result = ParseResults(expr, (flat_tokens[i + 1][0],)) else: result = ParseResults(expr, (r, flat_tokens[i + 1][0])) break else: op_index += 1 continue else: # SUFFIX OPERATOR 3! todo = list(enumerate(flat_tokens[1:])) for i, (r, o) in todo: if o == op: if is_suppressed: result = ParseResults(expr, (flat_tokens[i][0],)) else: result = ParseResults(expr, (flat_tokens[i][0], r,)) break else: op_index += 1 continue elif arity == 2: todo = list(enumerate(flat_tokens[1:-1])) if assoc == RIGHT_ASSOC: todo = list(reversed(todo)) for i, (r, o) in todo: if o == op: if is_suppressed: result = ParseResults( expr, (flat_tokens[i][0], flat_tokens[i + 2][0]) ) else: result = ParseResults( expr, (flat_tokens[i][0], r, flat_tokens[i + 2][0]) ) break else: op_index += 1 continue else: # arity==3 todo = list(enumerate(flat_tokens[1:-3])) if assoc == RIGHT_ASSOC: todo = list(reversed(todo)) for i, (r0, o0) in todo: if o0 == op[0]: r1, o1 = flat_tokens[i + 3] if o1 == op[1]: seq = [ flat_tokens[i][0], flat_tokens[i + 2][0], flat_tokens[i + 4][0], ] s0, s1 = is_suppressed if not s1: seq.insert(2, r1) if not s0: seq.insert(1, r0) result = ParseResults(expr, seq) break else: op_index += 1 continue for p in parse_actions: result = p(result, -1, string) offset = (0, 2, 3, 5)[arity] flat_tokens[i : i + offset] = [(result, (expr,))] op_index = 0 return flat_tokens[0][0] flat = Forward() iso = lpar.suppress() + flat + rpar.suppress() atom = (baseExpr | iso).addParseAction(record_op(baseExpr)) modified = ZeroOrMore(prefix_ops) + atom + ZeroOrMore(suffix_ops) flat << (modified + ZeroOrMore(ops + modified)).addParseAction(make_tree) return flat