def parse(input): period = sometok("period") string = p.oneplus(sometok("string")) >> (lambda x: " ".join(x)) number = sometok("number") title = string + p.skip(period) >> RecipeTitle ingredients_start = sometok("ingredients_start") + p.skip(period) >> IngredientStart dry_measure = p.maybe(sometok("measure_type")) + sometok("dry_measure") liquid_measure = sometok("liquid_measure") mix_measure = sometok("mix_measure") # is this valid ? 'g of butter', unit w/o initial_value ingredient = p.maybe(number) + p.maybe(dry_measure | liquid_measure | mix_measure) + string >> unarg(Ingredient) ingredients = p.many(ingredient) cooking_time = p.skip(sometok("cooking_time")) + (number >> unarg(CookingTime)) + p.skip(sometok("period")) oven_temp = p.skip(sometok("oven")) + p.many(number) + p.skip(sometok("oven_temp")) >> unarg(Oven) method_start = sometok("method_start") + p.skip(period) comment = p.skip(p.many(string | period)) header = title + p.maybe(comment) instruction = (string + p.skip(period)) >> parse_instruction instructions = p.many(instruction) program = (method_start + instructions) >> unarg(MethodStart) serves = (sometok("serve") + number >> (lambda x: Serve("serve", x[1]))) + p.skip(period) ingredients_section = (ingredients_start + ingredients) >> unarg(IngredientSection) recipe = ( header + p.maybe(ingredients_section) + p.maybe(cooking_time) + p.maybe(oven_temp) + p.maybe(program) + p.maybe(serves) ) >> RecipeNode main_parser = p.oneplus(recipe) return main_parser.parse(tokenize(input))
def parser(last_error=None): last_error = LastError() if last_error is None else last_error def apl(f): return lambda x: f(*x) def delim(t): return skip(_tok(t)) symbol = _tok(Token.SYMBOL) >> _gen(Symbol) string = _tok(Token.STRING) >> _gen(String) placeholder = _tok(Token.PLACEHOLDER) >> _gen(Placeholder) keyword = _tok(Token.KEYWORD) >> _gen(Keyword) # Note: tokenizer guarantee that value consists of dots and digits # TODO: convert exceptions number = _tok(Token.NUMBER) >> _gen(Number, literal_eval) expr = forward_decl() implicit_tuple = forward_decl() list_ = ((_tok(Token.OPEN_BRACKET) + many(expr | keyword) + _tok(Token.CLOSE_BRACKET)) >> apl(_list)) dict_ = (error_ctx( _tok(Token.OPEN_BRACE) + many(keyword + expr) + _tok(Token.CLOSE_BRACE), last_error, DICT_ERROR) >> apl(_dict)) inline_args = many(expr | keyword) explicit_tuple = (error_ctx( _tok(Token.OPEN_PAREN) + symbol + inline_args + _tok(Token.CLOSE_PAREN), last_error, EXPLICIT_TUPLE_ERROR) >> apl(_tuple)) indented_arg = ( oneplus(implicit_tuple | expr + delim(Token.NEWLINE)) >> _maybe_join) indented_kwarg = (((keyword + expr + delim(Token.NEWLINE)) | (keyword + delim(Token.NEWLINE) + delim(Token.INDENT) + indented_arg + delim(Token.DEDENT)))) indented_args_kwargs = ( (many(indented_kwarg) + many(indented_arg)) >> apl(lambda pairs, args: list(chain(*(pairs + [args]))))) implicit_tuple.define( error_ctx( symbol + inline_args + delim(Token.NEWLINE) + maybe( delim(Token.INDENT) + indented_args_kwargs + delim(Token.DEDENT)), last_error, IMPLICIT_TUPLE_ERROR) >> apl( _implicit_tuple)) expr.define(symbol | string | number | explicit_tuple | list_ | dict_ | placeholder) body = ((many(implicit_tuple) + _tok(Token.EOF)) >> apl(_module)) return body
def grammar(): lparen = skip(a(LParen())) rparen = skip(a(RParen())) def collapse(t): t[0].terms = t[1] return t[0] @with_forward_decls def ldap_filter(): return (ldap_and | ldap_or | ldap_not | ldap_test) ldap_and = (lparen + a(And()) + oneplus(ldap_filter) + rparen) >> collapse ldap_or = (lparen + a(Or()) + oneplus(ldap_filter) + rparen) >> collapse ldap_not = (lparen + a(Not()) + ldap_filter + rparen) >> collapse ldap_test = lparen + a(Test()) + rparen return ldap_filter + skip(finished)
def grammar(): lparen = skip(a(LParen())) rparen = skip(a(RParen())) def collapse(t): t[0].terms = t[1] return t[0] @with_forward_decls def ldap_filter(): return (ldap_and | ldap_or | ldap_not | ldap_test) ldap_and = (lparen + a(And()) + oneplus(ldap_filter) + rparen) >> collapse ldap_or = (lparen + a(Or()) + oneplus(ldap_filter) + rparen) >> collapse ldap_not = (lparen + a(Not()) + ldap_filter + rparen) >> collapse ldap_test = lparen + a(Test()) + rparen return ldap_filter + skip(finished)
def parse(tokens): ## building blocks kw_priority = some(toktype("kw_priority")) kw_probability = some(toktype("kw_probability")) kw_reaction = some(toktype("kw_reaction")) kw_exists = some(toktype("kw_exists")) kw_as = some(toktype("kw_as")) op_tilde = some(toktype("op_tilde")) op_priority_maximal = some(toktype("op_priority_maximal")) op_production = some(toktype("op_production")) atom = some(toktype("name")) number = some(toktype("number")) dissolve = some(toktype("op_dissolve")) osmose = some(toktype("op_osmose")) osmose_location = some(toktype("op_osmose_location")) env_open = some(toktype("env_open")) env_close = some(toktype("env_close")) membrane_open = some(toktype("membrane_open")) membrane_close = some(toktype("membrane_close")) ## grammar from the bottom up name = atom | number symbol = atom | (dissolve + maybe(name)) | (osmose + name + maybe(osmose_location + name)) priority = kw_priority + op_tilde + name + op_priority_maximal + name reaction = (kw_reaction + maybe(kw_as + name) + op_tilde + oneplus(name) + op_production + many(symbol)) exists = kw_exists + op_tilde + oneplus(name) expr = (exists | reaction | priority) statement = with_forward_decls(lambda: membrane | expr) >> Statement body = maybe(name) + many(statement) membrane = (skip(membrane_open) + body + skip(membrane_close)) >> Membrane env = (skip(env_open) + body + skip(env_close)) >> Environment program = many(env) + skip(finished) >> Program return program.parse(tokens)
def get_number_parser(): """Return parser that reads (float and int) numbers with whitespace.""" number = (parser.some(lambda tok: tok.type == 'NUMBER') >> token_value >> string_to_number) indent = parser.some(lambda t: t.code == token.INDENT) dedent = parser.a(Token(token.DEDENT, '')) newline = parser.a(Token(54, '\n')) ignored_whitespace = parser.skip(indent | dedent | newline) return parser.oneplus(number | ignored_whitespace)
def parse(tokens): ## building blocks kw_priority = some(toktype("kw_priority")) kw_probability = some(toktype("kw_probability")) kw_reaction = some(toktype("kw_reaction")) kw_exists = some(toktype("kw_exists")) kw_as = some(toktype("kw_as")) op_tilde = some(toktype("op_tilde")) op_priority_maximal = some(toktype("op_priority_maximal")) op_production = some(toktype("op_production")) atom = some(toktype("name")) number = some(toktype("number")) dissolve = some(toktype("op_dissolve")) osmose = some(toktype("op_osmose")) osmose_location = some(toktype("op_osmose_location")) env_open = some(toktype("env_open")) env_close = some(toktype("env_close")) membrane_open = some(toktype("membrane_open")) membrane_close = some(toktype("membrane_close")) ## grammar from the bottom up name = atom | number symbol = atom | (dissolve + maybe(name)) | (osmose + name + maybe(osmose_location + name)) priority = kw_priority + op_tilde + name + op_priority_maximal + name reaction = kw_reaction + maybe(kw_as + name) + op_tilde + oneplus(name) + op_production + many(symbol) exists = kw_exists + op_tilde + oneplus(name) expr = exists | reaction | priority statement = with_forward_decls(lambda: membrane | expr) >> Statement body = maybe(name) + many(statement) membrane = (skip(membrane_open) + body + skip(membrane_close)) >> Membrane env = (skip(env_open) + body + skip(env_close)) >> Environment program = many(env) + skip(finished) >> Program return program.parse(tokens)
def parse(tokens): var = some(toktype("name")) | some(toktype("number")) open_form = some(toktype("form_open")) close_form = some(toktype("form_close")) op_lambda = some(toktype("op_lambda")) op_map = some(toktype("op_map")) prim_bind = some(toktype("kw_bind")) prim_halt = some(toktype("kw_halt")) exp = with_forward_decls(lambda: lam | var | prim_exp | exprn) >> Expression lam = open_form + op_lambda + many(var) + op_map + oneplus(exp) + close_form >> Lambda bind_exp = open_form + prim_bind + var + lam + close_form halt_exp = open_form + prim_halt + exp + close_form prim_exp = bind_exp | halt_exp exprn = open_form + oneplus(exp) + close_form >> Form prog = many(exp) + skip(finished) >> Program return prog.parse(tokens)
def parse(seq): """Sequence(Token) -> object""" unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token(u'Name', s)) >> tokval op = lambda s: a(Token(u'Op', s)) >> tokval op_ = lambda s: skip(op(s)) id_types = [u'Name', u'Number', u'String'] id = some(lambda t: t.type in id_types).named(u'id') >> tokval make_graph_attr = lambda args: DefAttrs(u'graph', [Attr(*args)]) make_edge = lambda x, xs, attrs: Edge([x] + xs, attrs) node_id = id # + maybe(port) a_list = ( id + maybe(op_(u'=') + id) + skip(maybe(op(u','))) >> unarg(Attr)) attr_list = (many(op_(u'[') + many(a_list) + op_(u']')) >> flatten) attr_stmt = ( (n(u'graph') | n(u'node') | n(u'edge')) + attr_list >> unarg(DefAttrs)) graph_attr = id + op_(u'=') + id >> make_graph_attr node_stmt = node_id + attr_list >> unarg(Node) # We use a forward_decl becaue of circular definitions like (stmt_list -> # stmt -> subgraph -> stmt_list) subgraph = forward_decl() edge_rhs = skip(op(u'->') | op(u'--')) + (subgraph | node_id) edge_stmt = ((subgraph | node_id) + oneplus(edge_rhs) + attr_list >> unarg(make_edge)) stmt = (attr_stmt | edge_stmt | subgraph | graph_attr | node_stmt) stmt_list = many(stmt + skip(maybe(op(u';')))) subgraph.define( skip(n(u'subgraph')) + maybe(id) + op_(u'{') + stmt_list + op_(u'}') >> unarg(SubGraph)) graph = (maybe(n(u'strict')) + maybe(n(u'graph') | n(u'digraph')) + maybe(id) + op_(u'{') + stmt_list + op_(u'}') >> unarg(Graph)) dotfile = graph + skip(finished) return dotfile.parse(seq)
return _not_parser a = lambda value: p.some(lambda tok: tok.value == value) string = lambda s: p.some(lambda tok: tok.value.lower() == s.lower()).named(s) some_type = lambda t: p.some(lambda tok: tok.type == t).named(t) not_type = lambda t: p.some(lambda tok: tok.type != t).named("!{}".format(t)) any_type = p.some(lambda _: True).named("Any") eof = p.finished.named("") some = p.some maybe = lambda parser: p.maybe(parser).named("[{}]".format(parser.name)) many = lambda parser: p.many(parser).named("[{}]...".format(parser.name)) skip = lambda parser: p.skip(parser).named("") oneplus = lambda parser: p.oneplus(parser).named("{},[{}]...".format(parser.name, parser.name)) sparsed = lambda parser: (skip(many(not_parser(parser))) + parser).named("_{}".format(parser.name)) integer = (some_type(TokenType.Integer) >> to_i).named("I") number = ((some_type(TokenType.Integer) | some_type(TokenType.Float)) >> to_f).named("N") word = (some_type(TokenType.Word) >> to_s).named("W") mention = (some_type(TokenType.Mention) >> extract_mention_id).named("M") channel = (some_type(TokenType.Channel) >> extract_channel_id).named("C") emoji = (some_type(TokenType.Emoji) >> extract_emoji_id).named("E") snippet = (some_type(TokenType.Snippet) >> extract_snippet).named("S") # High level helpers on_off_switch = (string("on") >> const(True)) | (string("off") >> const(False)) url = p.some(lambda tok: tok.type == TokenType.Word and tok.value.startswith("http"))
## [88] Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29] ## [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE] digitsxml = re.compile(ur'[\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29]') letters = re.compile(ur'[\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CDE\u0CE0-\u0CE1\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60-\u0D61\u0E01-\u0E2E\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD\u0EC0-\u0EC4\u0F40-\u0F47\u0F49-\u0F69\u10A0-\u10C5\u10D0-\u10F6\u1100\u1102-\u1103\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112\u113C\u113E\u1140\u114C\u114E\u1150\u1154-\u1155\u1159\u115F-\u1161\u1163\u1165\u1167\u1169\u116D-\u116E\u1172-\u1173\u1175\u119E\u11A8\u11AB\u11AE-\u11AF\u11B7-\u11B8\u11BA\u11BC-\u11C2\u11EB\u11F0\u11F9\u1E00-\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u212E\u2180-\u2182\u3041-\u3094\u30A1-\u30FA\u3105-\u312C\uAC00-\uD7A3\u4E00-\u9FA5\u3007\u3021-\u3029]') letterxml = p.some(letters.match) combiningchars = re.compile(ur'[\u0300-\u0345\u0360-\u0361\u0483-\u0486\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4\u064B-\u0652\u0670\u06D6-\u06DC\u06DD-\u06DF\u06E0-\u06E4\u06E7-\u06E8\u06EA-\u06ED\u0901-\u0903\u093C\u093E-\u094C\u094D\u0951-\u0954\u0962-\u0963\u0981-\u0983\u09BC\u09BE\u09BF\u09C0-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09E2-\u09E3\u0A02\u0A3C\u0A3E\u0A3F\u0A40-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A70-\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0B01-\u0B03\u0B3C\u0B3E-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B82-\u0B83\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C82-\u0C83\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0D02-\u0D03\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC\u0EC8-\u0ECD\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A]') combiningcharxml = p.some(combiningchars.match) digitxml = p.some(digitsxml.match) expression = p.forward_decl() quote = p.skip(p.a("'")) singlequoted = quote + (p.oneplus(p.a("'") + p.a("'") | p.some(lambda x: x != "'")) >> join) + quote identifier = (letterxml + (p.many(letterxml | digitxml | p.a('_') | p.a('.') | combiningcharxml) >> join) >> join) >> string.upper >> tag(u'identifier') iri = p.oneplus(p.some(lambda x: x not in u"'#")) >> join >> tag(u'iri') ## Whitespace ::= #x20 | #x09 | #x0a | #x0d whitespace = oneof(u'\x20\x09\x0a\x0d') spaces = p.skip(p.many(whitespace)) ## Array ::= '{' MatrixRow ( RowSeparator MatrixRow )* '}'
def test_halting(): many(oneplus(x)).parse('x') many(p9 + x).parse('x')
def parse_instruction(spec): string = p.oneplus(sometok("string")) >> (lambda x: " ".join(x)) ordinal = sometok("ordinal") bowl = sometok("bowl") the = sometok("the") dish = sometok("dish") to = sometok("to") into = sometok("into") concat = lambda list: " ".join(list) take_i = sometok("take") + (p.oneplus(string) >> concat) + sometok("from") + sometok("refrigerator") put_i = ( sometok("put") + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + p.skip(into) + p.maybe(ordinal | the) + bowl ) liquefy_1 = sometok("liquefy") + sometok("contents") + p.maybe(ordinal) + bowl liquefy_2 = sometok("liquefy") + (p.oneplus(string) >> concat) liquefy_i = liquefy_1 | liquefy_2 pour_i = ( sometok("pour") + sometok("contents") + p.maybe(ordinal) + bowl + sometok("into") + the + p.maybe(ordinal) + dish ) fold_i = ( sometok("fold") + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + into + p.maybe(ordinal | the) + bowl ) # cleanup repitition add_i = sometok("add") + (p.oneplus(string) >> concat) + p.maybe(to + p.maybe(ordinal | the) + bowl) remove_i = ( sometok("remove") + (p.oneplus(string) >> concat) + p.maybe(sometok("from") + p.maybe(ordinal | the) + bowl) ) combine_i = sometok("combine") + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal | the) + bowl) divide_i = sometok("divide") + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal | the) + bowl) add_dry_i = sometok("add_dry") + p.maybe(to + p.maybe(ordinal | the) + bowl) stir_1 = ( sometok("stir") + p.maybe(the + p.maybe(ordinal | the) + bowl) + sometok("for") + sometok("number") + (sometok("minute") | sometok("minutes")) ) stir_2 = sometok("stir") + (p.oneplus(string) >> concat) + into + the + p.maybe(ordinal) + bowl stir_i = stir_1 | stir_2 mix_i = sometok("mix") + p.maybe(the + p.maybe(ordinal) + bowl) + sometok("well") clean_i = sometok("clean") + p.maybe(ordinal | the) + bowl loop_start_i = (sometok("string") + p.maybe(the) + (p.oneplus(string) >> concat)) >> (lambda x: ("loop_start", x)) loop_end_i = ( sometok("string") + p.maybe(p.maybe(the) + (p.oneplus(string) >> concat)) + sometok("until") + string ) >> (lambda x: ("loop_end", x)) set_aside_i = sometok("set") >> (lambda x: (x, None)) serve_with_i = sometok("serve_with") + (p.oneplus(string) >> concat) refrigerate_i = sometok("refrigerate") + p.maybe( sometok("for") + sometok("number") + (sometok("hour") | sometok("hours")) ) instruction = ( take_i | put_i | liquefy_i | pour_i | add_i | fold_i | remove_i | combine_i | divide_i | add_dry_i | stir_i | mix_i | clean_i | loop_end_i # -| ORDER matters | loop_start_i # -| | set_aside_i | serve_with_i | refrigerate_i ) >> (lambda x: Instruction(x[0].lower().replace(" ", "_"), x[1:])) return instruction.parse(tokenize_instruction(spec))
def parser(last_error=None): last_error = LastError() if last_error is None else last_error def apl(f): return lambda x: f(*x) def delim(t): return skip(_tok(t)) symbol = _tok(Token.SYMBOL) >> _gen(Symbol) string = _tok(Token.STRING) >> _gen(String) placeholder = _tok(Token.PLACEHOLDER) >> _gen(Placeholder) keyword = _tok(Token.KEYWORD) >> _gen(Keyword) # Note: tokenizer guarantee that value consists of dots and digits # TODO: convert exceptions number = _tok(Token.NUMBER) >> _gen(Number, literal_eval) expr = forward_decl() implicit_tuple = forward_decl() list_ = ( (_tok(Token.OPEN_BRACKET) + many(expr | keyword) + _tok(Token.CLOSE_BRACKET)) >> apl(_list) ) dict_ = ( error_ctx(_tok(Token.OPEN_BRACE) + many(keyword + expr) + _tok(Token.CLOSE_BRACE), last_error, DICT_ERROR) >> apl(_dict) ) inline_args = many(expr | keyword) explicit_tuple = ( error_ctx(_tok(Token.OPEN_PAREN) + symbol + inline_args + _tok(Token.CLOSE_PAREN), last_error, EXPLICIT_TUPLE_ERROR) >> apl(_tuple) ) indented_arg = ( oneplus(implicit_tuple | expr + delim(Token.NEWLINE)) >> _maybe_join ) indented_kwarg = ( ((keyword + expr + delim(Token.NEWLINE)) | (keyword + delim(Token.NEWLINE) + delim(Token.INDENT) + indented_arg + delim(Token.DEDENT))) ) indented_args_kwargs = ( (many(indented_kwarg) + many(indented_arg)) >> apl(lambda pairs, args: list(chain(*(pairs + [args])))) ) implicit_tuple.define( error_ctx(symbol + inline_args + delim(Token.NEWLINE) + maybe(delim(Token.INDENT) + indented_args_kwargs + delim(Token.DEDENT)), last_error, IMPLICIT_TUPLE_ERROR) >> apl(_implicit_tuple) ) expr.define(symbol | string | number | explicit_tuple | list_ | dict_ | placeholder) body = ( (many(implicit_tuple) + _tok(Token.EOF)) >> apl(_module) ) return body
#!/usr/bin/env python import string import funcparserlib.parser as p import nodes from timeseries import ScalarTimeSeries, TimeSeries, generate_two_dummy_data_sets if __name__ == "__main__": input_ts = dict(zip(['A_TS', 'B_TS'], generate_two_dummy_data_sets())) integer = p.oneplus(p.some(lambda c: c.isdigit())) >> (lambda toks: int(''.join(toks))) def to_number(toks): if not toks[1]: return float(toks[0]) return float("%s.%s".format(toks)) number = integer + p.maybe(p.skip(p.a('.')) + integer) >> to_number timeseries_name = p.oneplus(p.some(lambda c: c.isupper() or c == '_')) >> (lambda toks: ''.join(toks)) timeseries_expr = timeseries_name >> (lambda name: nodes.TimeSeriesNode(input_ts[name])) scalar_expr = number >> (lambda n: nodes.TimeSeriesNode(ScalarTimeSeries(n))) expr = p.forward_decl() expr_rest = p.forward_decl() expr.define( timeseries_expr + expr_rest >> (lambda x: x[1](x[0])) | scalar_expr + expr_rest >> (lambda x: x[1](x[0])) )
def parse(seq): """Sequence(Token) -> object""" unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) node_type_keywords = [ 'start', 'startCompensation', 'startConditional', 'startConditionalNon', 'startError', 'startEscalation', 'startEscalationNon', 'startMessage', 'startMessageNon', 'startMultiple', 'startMultipleNon', 'startParallelMultiple', 'startParallelMultipleNon', 'startSignal', 'startSignalNon', 'startTimer', 'startTimerNon', 'end', 'endCancel', 'endCompensation', 'endError', 'endEscalation', 'endMessage', 'endMultiple', 'endSignal', 'endTerminate', 'intermediate', 'catchCancel', 'catchCompensation', 'throwCompensation', 'catchError', 'catchEscalation', 'catchEscalationNon', 'throwEscalation', 'catchLink', 'throwLink', 'catchMessage', 'catchMessageNon', 'throwMessage', 'catchMultiple', 'catchMultipleNon', 'throwMultiple', 'catchParallelMultiple', 'catchParallelMultipleNon', 'catchSignal', 'catchSignalNon', 'throwSignal', 'conditional', 'conditionalNon', 'timer', 'timerNon', 'task', 'businessRuleTask', 'manualTask', 'receiveTask', 'scriptTask', 'sendTask', 'serviceTask', 'userTask', 'call', 'businessRuleCall', 'manualCall', 'scriptCall', 'userCall', 'process', 'adhoc', 'transaction', 'event', 'eventCompensation', 'eventConditional', 'eventConditionalNon', 'eventError', 'eventEscalation', 'eventEscalationNon', 'eventMessage', 'eventMessageNon', 'eventMultiple', 'eventMultipleNon', 'eventParallelMultiple', 'eventParallelMultipleNon', 'eventSignal', 'eventSignalNon', 'eventTimer', 'eventTimerNon', 'inclusive', 'exclusive', 'parallel', 'complex', 'eventBased', 'eventBasedStart', 'eventBasedParallelStart', 'data', 'dataCollection', 'dataInput', 'dataInputCollection', 'dataOutput', 'dataOutputCollection', 'dataStore' ] node_type = some(lambda t: t.value in node_type_keywords).named('type') >> tokval id_types = ['Name', 'Number', 'String'] id = some(lambda t: t.type in id_types).named('id') >> tokval make_graph_attr = lambda args: DefAttrs('graph', [Attr(*args)]) node_id = id a_list = ( id + maybe(op_('=') + id) + skip(maybe(op(';'))) >> unarg(Attr)) attr_list = ( many(op_('[') + many(a_list) + op_(']')) >> flatten) attr_stmt = ( (n('_') | n('node') | n('edge')) + attr_list >> unarg(DefAttrs)) graph_attr = id + op_('=') + id >> make_graph_attr node_stmt = node_type + node_id + attr_list >> unarg(Node) # We use a forward_decl becaue of circular definitions like (stmt_list -> stmt -> subgraph -> stmt_list) lane = forward_decl() pool = forward_decl() edge_type = some(lambda t: t.type == 'EdgeOp') >> tokval edge_rhs = (edge_type + node_id) >> (lambda t: [t[0], t[1]]) edge_stmt = ( node_id + oneplus(edge_rhs) + attr_list >> unarg(Edge)) stmt = ( attr_stmt | edge_stmt | lane | pool | graph_attr | node_stmt ) stmt_list = many(stmt + skip(maybe(op(';')))) pool.define( skip(n('pool')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Pool)) lane.define( skip(n('lane')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Lane)) graph = ( maybe(n('graph')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Graph)) dotfile = graph + skip(finished) try: parsed = dotfile.parse(seq) return parsed except NoParseError as e: print(e.message) return None
if _DEBUG: _annotate = _annotate_debug _mkstr = _mkstr_debug _satisfies = _satisfies_debug else: _annotate = _annotate_production _mkstr = _mkstr_production _satisfies = _satisfies_production # --------------------------------------------------------------------- # elementary parts # --------------------------------------------------------------------- _nat = fp.oneplus(_satisfies(lambda c: c.isdigit())) >> ( lambda x: int(_mkstr(x))) _nl = fp.skip(_oneof("\r\n")) _comma = fp.skip(_oneof(",")) _semicolon = fp.skip(_oneof(";")) _fullstop = fp.skip(_oneof(".")) # horizontal only _sp = fp.skip(_many_char(lambda x: x not in "\r\n" and x.isspace())) _allsp = fp.skip(_many_char(lambda x: x.isspace())) _alphanum_str = _many_char(lambda x: x.isalnum()) _eof = fp.skip(fp.finished) class _OptionalBlock: """ For use with `_lines` only: wraps a parser so that we not
def parse(tokens): name = some(t('NAME')) >> tokval raw_string = some(t('STRING')) >> tokval >> strip('"') num = some(t('NUMBER')) >> tokval >> float true = const('true') >> always(True) false = const('false') >> always(False) null_ = const('null') >> always(None) enum_item = (num | true | false | null_ | name | raw_string) enum = many(enum_item + skip(op("|"))) + enum_item >> append \ >> anno("enum") boolean = const("bool") >> always(None) >> anno("boolean") null = const("null") >> always(None) >> anno("null") num_range = skip(op('{')) + maybe(num) + \ skip(op(",")) + maybe(num) + skip(op('}')) >> tuple regexp = some(t("REGEXP")) >> tokval >> strip("/") string = ((skip(const("str")) + maybe(num_range) + maybe(regexp)) | (maybe(num_range) + (regexp))) \ >> anno("string") _format = skip(op("%")) + name >> anno("format") num_range_step = skip(op('{')) + maybe(num) + \ skip(op(",")) + maybe(num) + \ maybe(skip(op(",")) + num) + skip(op('}')) >> tuple number = skip(const("num")) + maybe(num_range_step) >> anno("number") integer = skip(const("int")) + maybe(num_range_step) >> anno("integer") schema = forward_decl() array = skip(op('[')) \ + (many(schema + skip(op(","))) + maybe(schema) >> append) \ + skip(op(']')) + maybe(num_range) + maybe(op("!")) >> anno("array") indent = some(t("INDENT")) >> tokval >> anno("indent") dedent = some(t("DEDENT")) >> tokval nl = some(t('NL')) definition = op("@") + name key = (((name | string) + maybe(op("?"))) | definition) + skip(op(":")) ref = skip(op("@")) + (name | name + skip(op(":")) + name) >> anno("ref") ref_declaration = skip(op("@")) + name + raw_string \ >> (lambda name_url: (name_url[0], "extref", name_url[1])) base_schema = ref | string | number | integer | boolean | null | _format \ | array oneof = oneplus(base_schema + skip(op("|"))) + base_schema \ >> append >> anno("oneof") anyof = oneplus(base_schema + skip(op("/"))) + base_schema \ >> append >> anno("anyof") allof = oneplus(base_schema + skip(op("&"))) + base_schema \ >> append >> anno("allof") simple_schema = anyof | oneof | allof | base_schema | enum | array dots = op("...") >> always((None, "open", None)) refid = skip(op("@")) + raw_string >> (lambda x: (None, "id", x)) obj = forward_decl() nested_obj = skip(nl) + skip(indent) + obj + skip(dedent) obj.define(oneplus(((key + ((simple_schema + skip(nl)) | nested_obj)) | ((dots | refid | ref_declaration) + skip(nl))) >> list) >> list2dict >> anno("object")) schema.define(obj | simple_schema) exprs = skip(maybe(nl)) + schema + skip(maybe(nl)) + skip(finished) return exprs.parse(list(tokens))
from funcparserlib import parser as p from lib import ast from . import tokenizer as t def build_simple_parser(token_name, ast_class): return (p.some(lambda token: token.type == token_name) >> (lambda token: ast_class(repeat=token.value))) p_inc = build_simple_parser(token_name='inc', ast_class=ast.Inc) p_dec = build_simple_parser(token_name='dec', ast_class=ast.Dec) p_right = build_simple_parser(token_name='right', ast_class=ast.Right) p_left = build_simple_parser(token_name='left', ast_class=ast.Left) p_input = build_simple_parser(token_name='input', ast_class=ast.Input) p_output = build_simple_parser(token_name='output', ast_class=ast.Output) p_simple_expression = p_dec | p_inc | p_right | p_left | p_input | p_output p_loop_expression = p.forward_decl() p_expression = p.forward_decl() p_loop_expression.define( (p.skip(p.a(t.t_loop_start())) + p.maybe(p_expression) + p.skip(p.a(t.t_loop_end()))) >> (lambda contains: ast.Loop(contains=(contains if contains else list())))) p_expression.define(p.oneplus(p_simple_expression | p_loop_expression)) p_program = (p_expression >> (lambda contains: ast.Program(contains=contains)))
def test_halting(): many(oneplus(x)).parse('x') many(p9 + x).parse('x')
def test_non_halting_many(): assert_raises(GrammarError, lambda: many(many(x)).parse('')) assert_raises(GrammarError, lambda: oneplus(many(x)).parse('')) assert_raises(GrammarError, lambda: many(p1).parse('')) assert_raises(GrammarError, lambda: many(p5).parse('')) assert_raises(GrammarError, lambda: (x + many(p4)).parse(''))
def qname(x): return p.oneplus(qchar(x)) >> join
dash = tokenType("Dash") plus = tokenType("Plus") content = tokenType("VariableContents") string = tokenType("String") >> make_string unString = tokenType("String") # When the double quotes are still needed for internal processing seqString = tokenType("SequenceString") >> make_seqString unseqString = tokenType("SequenceString") >> make_unseqString # For use with variables # Code variants code_end = tokenType("CodeEnd") # Scan Codes scanCode_start = tokenType("ScanCodeStart") scanCode_range = number + skip(dash) + number >> make_scanCode_range scanCode_listElem = number >> listElem scanCode_innerList = oneplus((scanCode_range | scanCode_listElem) + skip(maybe(comma))) >> flatten scanCode_expanded = skip(scanCode_start) + scanCode_innerList + skip(code_end) scanCode_elem = scanCode >> listElem scanCode_combo = oneplus((scanCode_expanded | scanCode_elem) + skip(maybe(plus))) scanCode_sequence = oneplus(scanCode_combo + skip(maybe(comma))) # USB Codes usbCode_start = tokenType("USBCodeStart") usbCode_number = number >> make_usbCode_number usbCode_range = (usbCode_number | unString) + skip(dash) + (number | unString) >> make_usbCode_range usbCode_listElemTag = unString >> make_usbCode usbCode_listElem = (usbCode_number | usbCode_listElemTag) >> listElem usbCode_innerList = oneplus((usbCode_range | usbCode_listElem) + skip(maybe(comma))) >> flatten usbCode_expanded = skip(usbCode_start) + usbCode_innerList + skip(code_end) usbCode_elem = usbCode >> listElem usbCode_combo = oneplus((usbCode_expanded | usbCode_elem) + skip(maybe(plus))) >> listElem
return _not_parser a = lambda value: p.some(lambda tok: tok.value == value) string = lambda s: p.some(lambda tok: tok.value.lower() == s.lower()).named(s) some_type = lambda t: p.some(lambda tok: tok.type == t).named(t) not_type = lambda t: p.some(lambda tok: tok.type != t).named('!{}'.format(t)) any_type = p.some(lambda _: True).named('Any') eof = p.finished.named('') some = p.some maybe = lambda parser: p.maybe(parser).named('[{}]'.format(parser.name)) many = lambda parser: p.many(parser).named('[{}]...'.format(parser.name)) skip = lambda parser: p.skip(parser).named('') oneplus = lambda parser: p.oneplus(parser).named('{},[{}]...'.format( parser.name, parser.name)) sparsed = lambda parser: (skip(many(not_parser(parser))) + parser)\ .named('_{}'.format(parser.name)) integer = (some_type(TokenType.Integer) >> to_i).named('I') number = ((some_type(TokenType.Integer) | some_type(TokenType.Float)) >> to_f).named('N') word = (some_type(TokenType.Word) >> to_s).named('W') mention = (some_type(TokenType.Mention) >> extract_mention_id).named('M') channel = (some_type(TokenType.Channel) >> extract_channel_id).named('C') emoji = (some_type(TokenType.Emoji) >> extract_emoji_id).named('E') snippet = (some_type(TokenType.Snippet) >> extract_snippet).named('S') # High level helpers on_off_switch = ((string('on') >> const(True)) | (string('off') >> const(False)))
# ("hello"):len constant_method = ConstantMethod.match(opt_iden + p.skip(colon) + p.skip(close_rnd_brace) + (tok_string | tok_number) + p.skip(open_rnd_brace)) # ["foo"] or [42] constant_index_lookup = (p.skip(close_sq_brace) + (tok_string | tok_number) + p.skip(open_sq_brace)) # either .name or ["name"] dot_or_index_lookup = constant_index_lookup | (iden + p.skip(dot)) # foo[0]["bar"].baz # TODO: cleanup this rule _attr_chain = p.oneplus(dot_or_index_lookup) + first_iden _obj = _attr_chain | first_iden _attr_chain_noprefix = p.pure("") + p.skip(dot) + _obj obj_attr_chain = ObjectAttribute.match(_attr_chain | _attr_chain_noprefix) # foo["bar"] obj_indexed_complete = ObjectIndexedComplete.match( p.skip(close_sq_brace) + (tok_string | tok_number) + p.skip(open_sq_brace) + _obj) # foo["bar obj_attr_indexed = ObjectAttributeIndexed.match( opt_iden + # FIXME: spaces in keys quote + p.skip(open_sq_brace) + _obj) # foo.bar:baz
def parse(seq): """Sequence(Token) -> object""" unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) id_types = ['Name', 'Number', 'String'] id = some(lambda t: t.type in id_types).named('id') >> tokval make_graph_attr = lambda args: DefAttrs('graph', [Attr(*args)]) make_edge = lambda x, xs, attrs: Edge([x] + xs, attrs) node_id = id # + maybe(port) a_list = ( id + maybe(op_('=') + id) + skip(maybe(op(','))) >> unarg(Attr)) attr_list = ( many(op_('[') + many(a_list) + op_(']')) >> flatten) attr_stmt = ( (n('graph') | n('node') | n('edge')) + attr_list >> unarg(DefAttrs)) graph_attr = id + op_('=') + id >> make_graph_attr node_stmt = node_id + attr_list >> unarg(Node) # We use a forward_decl becaue of circular definitions like (stmt_list -> # stmt -> subgraph -> stmt_list) subgraph = forward_decl() edge_rhs = skip(op('->') | op('--')) + (subgraph | node_id) edge_stmt = ( (subgraph | node_id) + oneplus(edge_rhs) + attr_list >> unarg(make_edge)) stmt = ( attr_stmt | edge_stmt | subgraph | graph_attr | node_stmt ) stmt_list = many(stmt + skip(maybe(op(';')))) subgraph.define( skip(n('subgraph')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(SubGraph)) graph = ( maybe(n('strict')) + maybe(n('graph') | n('digraph')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Graph)) dotfile = graph + skip(finished) return dotfile.parse(seq)
from __future__ import absolute_import import funcparserlib.parser as p string = lambda x: x or '' cat = ''.join negative = p.maybe(p.a('-')) >> string digits = p.oneplus(p.some(lambda char: char.isdigit())) >> cat decimal_part = (p.maybe(p.a('.') + digits)) >> string >> cat number = (negative + digits + decimal_part) >> cat >> float addition = number + p.skip(p.a('+')) + number >> sum expression = addition | number expression = expression + p.finished def calculate(text): return expression.parse(text)[0]
def test_oneplus(self): x = a(u'x') y = a(u'y') expr = oneplus(x + y) self.assertEqual(expr.parse(u'xyxyxy'), ([(u'x', u'y'), (u'x', u'y'), (u'x', u'y')]))
p.skip(open_rnd_brace) ) # ["foo"] or [42] constant_index_lookup = ( p.skip(close_sq_brace) + (tok_string | tok_number) + p.skip(open_sq_brace) ) # either .name or ["name"] dot_or_index_lookup = constant_index_lookup | (iden + p.skip(dot)) # foo[0]["bar"].baz # TODO: cleanup this rule _attr_chain = p.oneplus(dot_or_index_lookup) + first_iden _obj = _attr_chain | first_iden _attr_chain_noprefix = p.pure("") + p.skip(dot) + _obj obj_attr_chain = ObjectAttribute.match(_attr_chain | _attr_chain_noprefix) # foo["bar"] obj_indexed_complete = ObjectIndexedComplete.match( p.skip(close_sq_brace) + (tok_string | tok_number) + p.skip(open_sq_brace) + _obj ) # foo["bar obj_attr_indexed = ObjectAttributeIndexed.match( opt_iden + # FIXME: spaces in keys
if _DEBUG: _annotate = _annotate_debug _mkstr = _mkstr_debug _satisfies = _satisfies_debug else: _annotate = _annotate_production _mkstr = _mkstr_production _satisfies = _satisfies_production # --------------------------------------------------------------------- # elementary parts # --------------------------------------------------------------------- _nat = fp.oneplus( _satisfies(lambda c: c.isdigit())) >> (lambda x: int(_mkstr(x))) _nl = fp.skip(_oneof("\r\n")) _comma = fp.skip(_oneof(",")) _semicolon = fp.skip(_oneof(";")) _fullstop = fp.skip(_oneof(".")) # horizontal only _sp = fp.skip(_many_char(lambda x: x not in "\r\n" and x.isspace())) _allsp = fp.skip(_many_char(lambda x: x.isspace())) _alphanum_str = _many_char(lambda x: x.isalnum()) _eof = fp.skip(fp.finished) class _OptionalBlock: """ For use with `_lines` only: wraps a parser so that we not only take in account that it's optional but that one of
plus = tokenType('Plus') content = tokenType('VariableContents') string = tokenType('String') >> make_string unString = tokenType('String') # When the double quotes are still needed for internal processing seqString = tokenType('SequenceString') >> make_seqString unseqString = tokenType('SequenceString') >> make_unseqString # For use with variables # Code variants code_start = tokenType('CodeStart') code_end = tokenType('CodeEnd') # Scan Codes scanCode_start = tokenType('ScanCodeStart') scanCode_range = number + skip( dash ) + number >> make_scanCode_range scanCode_listElem = number >> listElem scanCode_innerList = oneplus( ( scanCode_range | scanCode_listElem ) + skip( maybe( comma ) ) ) >> flatten scanCode_expanded = skip( scanCode_start ) + scanCode_innerList + skip( code_end ) scanCode_elem = scanCode >> listElem scanCode_combo = oneplus( ( scanCode_expanded | scanCode_elem ) + skip( maybe( plus ) ) ) scanCode_sequence = oneplus( scanCode_combo + skip( maybe( comma ) ) ) # USB Codes usbCode_start = tokenType('USBCodeStart') usbCode_number = number >> make_usbCode_number usbCode_range = ( usbCode_number | unString ) + skip( dash ) + ( number | unString ) >> make_usbCode_range usbCode_listElemTag = unString >> make_usbCode usbCode_listElem = ( usbCode_number | usbCode_listElemTag ) >> listElem usbCode_innerList = oneplus( ( usbCode_range | usbCode_listElem ) + skip( maybe( comma ) ) ) >> flatten usbCode_expanded = skip( usbCode_start ) + usbCode_innerList + skip( code_end ) usbCode_elem = usbCode >> listElem usbCode_combo = oneplus( ( usbCode_expanded | usbCode_elem ) + skip( maybe( plus ) ) ) >> listElem
def test_non_halting_many(): assert_raises(GrammarError, lambda: many(many(x)).parse('')) assert_raises(GrammarError, lambda: oneplus(many(x)).parse('')) assert_raises(GrammarError, lambda: many(p1).parse('')) assert_raises(GrammarError, lambda: many(p5).parse('')) assert_raises(GrammarError, lambda: (x + many(p4)).parse(''))
attr_list = ( many(op_('[') + many(a_list) + op_(']')) >> flatten) attr_stmt = ( (n('graph') | n('node') | n('edge')) + attr_list >> unarg(DefAttrs)) graph_attr = id + op_('=') + id >> make_graph_attr node_stmt = node_id + attr_list >> unarg(Node) # We use a fwd() because of circular definitions like (stmt_list -> stmt -> # subgraph -> stmt_list) subgraph = fwd() edge_rhs = skip(op('->') | op('--')) + (subgraph | node_id) edge_stmt = ( (subgraph | node_id) + oneplus(edge_rhs) + attr_list >> unarg(make_edge)) stmt = ( attr_stmt | edge_stmt | subgraph | graph_attr | node_stmt ) stmt_list = many(stmt + skip(maybe(op(';')))) subgraph.define(memoize( skip(n('subgraph')) + maybe(id) + op_('{') + stmt_list +
def make_name(n): assert len(n) == 2 return PLName(n[0], n[1]) def make_array(n): return PLArray(n) number = sometok('number') >> make_number string = sometok('string') >> make_string comment = sometok('comment') >> make_comment name = (sometok('name') | sometok('string')) + maybe(comment) >> make_name value = fwd() member = name + op_('=') + value >> make_member section = maybe(comment) + oneplus(member + op_(';')) + maybe(comment) >> make_section object = ( op_('{') + many(section) + op_('}')) >> make_object array = ( op_('(') + many(value + op_(',')) + op_(')')) >> make_array value.define( object | number | name | array) pbxproj_file = value + skip(eof)
return _helper if _DEBUG: _annotate = _annotate_debug _mkstr = _mkstr_debug _satisfies = _satisfies_debug else: _annotate = _annotate_production _mkstr = _mkstr_production _satisfies = _satisfies_production # --------------------------------------------------------------------- # elementary parts # --------------------------------------------------------------------- _nat = fp.oneplus(_satisfies(lambda c: c.isdigit())) >> (lambda x:int(_mkstr(x))) _nl = fp.skip(_oneof("\r\n")) _comma = fp.skip(_oneof(",")) _semicolon = fp.skip(_oneof(";")) _fullstop = fp.skip(_oneof(".")) # horizontal only _sp = fp.skip(_many_char(lambda x:x not in "\r\n" and x.isspace())) _allsp = fp.skip(_many_char(lambda x:x.isspace())) _alphanum_str = _many_char(lambda x:x.isalnum()) _eof = fp.skip(fp.finished) class _OptionalBlock: """ For use with `_lines` only: wraps a parser so that we not only take in account that it's optional but that one of the newlines around it is optional too
| (LSQB + EXPR + a("for") + NAME + a("in") + EXPR + RSQB) >> make_list_comp | (LSQB + EXPR + a("for") + NAME + a("in") + EXPR + a("if") + EXPR + RSQB) >> make_list_comp_with_if | (a("[") + a("]")) >> make_empty_list) DICT_MAKER.define( (LBRACE + EXPR + COLON + EXPR + many(COMMA + EXPR + COLON + EXPR) + maybe(COMMA) + RBRACE) >> make_dict # type: ignore | (a("{") + a("}")) >> make_empty_dict) TMPL: Final = (OPEN_TMPL + EXPR + CLOSE_TMPL) | (OPEN_TMPL2 + EXPR + CLOSE_TMPL2) TEXT: Final = some(make_toktype_predicate("TEXT")) >> make_text PARSER: Final = oneplus(TMPL | TEXT) + skip(finished) class BaseExpr(Generic[_T], abc.ABC): @abc.abstractmethod async def eval(self, root: RootABC) -> Optional[_T]: pass IMPLICIT_STR_CONCAT: Final[Tuple[type, ...]] = (str, RemotePath, LocalPath, URL) class Expr(BaseExpr[_T]): allow_none: ClassVar[bool] = True allow_expr: ClassVar[bool] = True
# Scan Codes scanCode_start = tokenType('ScanCodeStart') scanCode_range = number + skip(dash) + number >> Make.scanCode_range scanCode_listElem = number >> Make.scanCode scanCode_specifier = (scanCode_range | scanCode_listElem) + maybe(specifier_list) >> unarg( Make.specifierUnroll) scanCode_innerList = many(scanCode_specifier + skip(maybe(comma))) >> flatten scanCode_expanded = skip(scanCode_start) + scanCode_innerList + skip( code_end) + maybe(specifier_list) >> unarg(Make.specifierUnroll) scanCode_elem = scanCode + maybe(specifier_list) >> unarg(Make.specifierUnroll) scanCode_combo_elem = scanCode_expanded | scanCode_elem scanCode_single = (skip(scanCode_start) + scanCode_listElem + skip(code_end)) | scanCode scanCode_il_nospec = oneplus((scanCode_range | scanCode_listElem) + skip(maybe(comma))) scanCode_nospecifier = skip(scanCode_start) + scanCode_il_nospec + skip( code_end) # Cons Codes consCode_start = tokenType('ConsCodeStart') consCode_number = number >> Make.consCode_number consCode_range = (consCode_number | unString) + skip(dash) + ( number | unString) >> Make.consCode_range consCode_listElemTag = unString >> Make.consCode consCode_listElem = (consCode_number | consCode_listElemTag) consCode_specifier = (consCode_range | consCode_listElem) + maybe(specifier_list) >> unarg( Make.specifierUnroll) consCode_innerList = oneplus(consCode_specifier + skip(maybe(comma))) >> flatten
dash = tokenType('Dash') plus = tokenType('Plus') content = tokenType('VariableContents') string = tokenType('String') >> make_string unString = tokenType('String') # When the double quotes are still needed for internal processing seqString = tokenType('SequenceString') >> make_seqString unseqString = tokenType('SequenceString') >> make_unseqString # For use with variables # Code variants code_end = tokenType('CodeEnd') # Scan Codes scanCode_start = tokenType('ScanCodeStart') scanCode_range = number + skip( dash ) + number >> make_scanCode_range scanCode_listElem = number >> listElem scanCode_innerList = oneplus( ( scanCode_range | scanCode_listElem ) + skip( maybe( comma ) ) ) >> flatten scanCode_expanded = skip( scanCode_start ) + scanCode_innerList + skip( code_end ) scanCode_elem = scanCode >> listElem scanCode_combo = oneplus( ( scanCode_expanded | scanCode_elem ) + skip( maybe( plus ) ) ) scanCode_sequence = oneplus( scanCode_combo + skip( maybe( comma ) ) ) # USB Codes usbCode_start = tokenType('USBCodeStart') usbCode_range = ( number | unString ) + skip( dash ) + ( number | unString ) >> make_usbCode_range usbCode_listElemTag = unString >> make_usbCode usbCode_listElem = ( number | usbCode_listElemTag ) >> listElem usbCode_innerList = oneplus( ( usbCode_range | usbCode_listElem ) + skip( maybe( comma ) ) ) >> flatten usbCode_expanded = skip( usbCode_start ) + usbCode_innerList + skip( code_end ) usbCode_elem = usbCode >> listElem usbCode_combo = oneplus( ( usbCode_expanded | usbCode_elem ) + skip( maybe( plus ) ) ) >> listElem usbCode_sequence = oneplus( ( usbCode_combo | seqString ) + skip( maybe( comma ) ) ) >> oneLayerFlatten
unString = tokenType( 'String' ) # When the double quotes are still needed for internal processing seqString = tokenType('SequenceString') >> make_seqString unseqString = tokenType( 'SequenceString') >> make_unseqString # For use with variables # Code variants code_start = tokenType('CodeStart') code_end = tokenType('CodeEnd') # Scan Codes scanCode_start = tokenType('ScanCodeStart') scanCode_range = number + skip(dash) + number >> make_scanCode_range scanCode_listElem = number >> listElem scanCode_innerList = oneplus((scanCode_range | scanCode_listElem) + skip(maybe(comma))) >> flatten scanCode_expanded = skip(scanCode_start) + scanCode_innerList + skip(code_end) scanCode_elem = scanCode >> listElem scanCode_combo = oneplus((scanCode_expanded | scanCode_elem) + skip(maybe(plus))) scanCode_sequence = oneplus(scanCode_combo + skip(maybe(comma))) # USB Codes usbCode_start = tokenType('USBCodeStart') usbCode_number = number >> make_usbCode_number usbCode_range = (usbCode_number | unString) + skip(dash) + ( number | unString) >> make_usbCode_range usbCode_listElemTag = unString >> make_usbCode usbCode_listElem = (usbCode_number | usbCode_listElemTag) >> listElem usbCode_innerList = oneplus((usbCode_range | usbCode_listElem) + skip(maybe(comma))) >> flatten
# Specifier specifier_basic = ( timing >> Make.specifierTiming ) | ( name >> Make.specifierState ) specifier_complex = ( name + skip( operator(':') ) + timing ) >> unarg( Make.specifierState ) specifier_state = specifier_complex | specifier_basic specifier_analog = number >> Make.specifierAnalog specifier_list = skip( parenthesis('(') ) + many( ( specifier_state | specifier_analog ) + skip( maybe( comma ) ) ) + skip( parenthesis(')') ) # Scan Codes scanCode_start = tokenType('ScanCodeStart') scanCode_range = number + skip( dash ) + number >> Make.scanCode_range scanCode_listElem = number >> Make.scanCode scanCode_specifier = ( scanCode_range | scanCode_listElem ) + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) scanCode_innerList = many( scanCode_specifier + skip( maybe( comma ) ) ) >> flatten scanCode_expanded = skip( scanCode_start ) + scanCode_innerList + skip( code_end ) + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) scanCode_elem = scanCode + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) scanCode_combo = oneplus( ( scanCode_expanded | scanCode_elem ) + skip( maybe( plus ) ) ) scanCode_sequence = oneplus( scanCode_combo + skip( maybe( comma ) ) ) scanCode_single = ( skip( scanCode_start ) + scanCode_listElem + skip( code_end ) ) | scanCode scanCode_il_nospec = oneplus( ( scanCode_range | scanCode_listElem ) + skip( maybe( comma ) ) ) scanCode_nospecifier = skip( scanCode_start ) + scanCode_il_nospec + skip( code_end ) # Cons Codes consCode_start = tokenType('ConsCodeStart') consCode_number = number >> Make.consCode_number consCode_range = ( consCode_number | unString ) + skip( dash ) + ( number | unString ) >> Make.consCode_range consCode_listElemTag = unString >> Make.consCode consCode_listElem = ( consCode_number | consCode_listElemTag ) consCode_specifier = ( consCode_range | consCode_listElem ) + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) consCode_innerList = oneplus( consCode_specifier + skip( maybe( comma ) ) ) >> flatten consCode_expanded = skip( consCode_start ) + consCode_innerList + skip( code_end ) + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) consCode_elem = consCode + maybe( specifier_list ) >> unarg( Make.specifierUnroll )
def parse(seq): 'Sequence(Token) -> object' Host = namedtuple('Host', 'exename arch host np') LogSummary = namedtuple('LogSummary', 'host wtime stages options') def mkLevel(s): rfloat = r'[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?' rint = r'[-+]?\d+' capture = lambda m: '(' + m + ')' within_space = lambda m: r'\s*' + m + r'\s*' cfloat, cint = [within_space(capture(m)) for m in [rfloat, rint]] x = within_space('x') m = re.match( 'Level' + cint + r'domain size \(m\)' + cfloat + x + cfloat + x + cfloat + ', num elements' + cint + x + cint + x + cint + r'\(' + cint + r'\), size \(m\)' + cfloat + x + cfloat + x + cfloat, s) return Level(*m.groups()) def mkSNESIt(s): resline, ksp = s[0], s[1] res = float(resline.strip().split()[4]) indent = len(re.match(r'^( *)(?: | \d|\d\d)\d', resline).groups()[0]) // 2 return SNESIt(indent, res, ksp) def mkKSPIt(s): return float(s.strip().split()[4]) def mkKSP(s): return KSP(reason=('UNKNOWN' if len(s) == 1 else s[1]), res=s[0]) def mkSNES(s): res = s[0] reason = s[1] indent = res[0].indent for it in res[1:]: if it.indent != indent: raise RuntimeError( 'SNES monitors changed levels, perhaps -snes_converged_reason is missing:\n\tstarted with: %s\n\tunexpected: %s' % (res[0], it)) return SNES(level=None, indent=indent, reason=s[1], res=s[0]) def mkEvent(s): s = s.split() return Event(name=s[0], count=int(s[1]), time=float(s[3]), flops=float(s[5]), Mflops=float(s[-1])) def mkStage(stageheader, events): name = re.match(r'^--- Event Stage \d+: (.*)', stageheader).groups()[0] eventdict = dict((e.name, e) for e in events) return Stage(name, eventdict) def mkOption(s): return re.match(r'^(-\w+)(?:\s+(.+))?$', s).groups() def mkRun(levels, solves, logs): for x in solves: x.level = levels[-1 - x.indent] if not logs: logs = LogSummary(host=Host('unknown', 'unknown', 'unknown', -1), wtime=[], stages=[], options=[]) exename = logs[0].host.exename arch = logs[0].host.arch host = logs[0].host.host np = logs[0].host.np wtime = logs[0].wtime stages = logs[0].stages options = logs[0].options if len(logs) > 1: meanwtime = sum(h.wtime for h in logs) / len(logs) rej = 0.15 logs = [ h for h in logs if h.wtime < (1 + rej) * meanwtime and h.wtime > (1 - rej) * meanwtime ] # Exclude outliers nlogs = len(logs) wtime = sum(h.wtime for h in logs) / nlogs for i, stage in enumerate(stages): for event in stage.events: stage.events[event]._replace(time=sum( [h.stages[i].events[event].time for h in logs]) / nlogs) stage.events[event]._replace(flops=sum( [h.stages[i].events[event].flops for h in logs]) / nlogs) stage.events[event]._replace(Mflops=sum( [h.stages[i].events[event].Mflops for h in logs]) / nlogs) return Run(levels, solves, exename, arch, host, np, wtime, stages, options) def mkHost(s): (exename, arch, host, np) = re.match( r'^(\S+) on a (\S+) named (\S+) with (\d+) processors?, by .*$', s).groups() return Host(exename, arch, host, int(np)) def mkMWTime(s): mwtime = re.match( r'^Time \(sec\):\s+(\d\.\d{3}e[-+]\d\d)\s+\d\.\d{5}\s+\d\.\d{3}e[-+]\d\d$', s).groups()[0] return float(mwtime) level = sometok('level') >> mkLevel kspit = sometok('ksp_monitor') >> mkKSPIt ksp_converged = sometok('ksp_converged') >> ( lambda s: s.strip().split()[5]) ksp_diverged = sometok('ksp_diverged') >> (lambda s: s.strip().split()[7]) ksp = many(kspit) + maybe(ksp_converged | ksp_diverged) >> mkKSP snesit = sometok('snes_monitor') + maybe(ksp) >> mkSNESIt snes_converged = sometok('snes_converged') >> ( lambda s: s.strip().split()[5]) snes_diverged = sometok('snes_diverged') >> ( lambda s: s.strip().split()[7]) snes = oneplus(snesit) + (snes_converged | snes_diverged) >> mkSNES event = sometok('event') >> mkEvent stage = sometok('stage') + many(event) >> unarg(mkStage) memory_usage = sometok('memory_usage') + many( sometok('stage')) # No plans for memory usage option_table_entry = sometok('option_table_entry') >> mkOption option_table = skip(sometok('option_table_begin')) + many( option_table_entry) + skip(sometok('option_table_end')) >> dict host = sometok('hostline') >> mkHost max_wall_time = sometok('max_wall_time') >> mkMWTime log_summary = skip(sometok('summary_begin')) + host + max_wall_time + many( stage) + skip(memory_usage) + option_table >> unarg(LogSummary) petsc_log = many(level) + many(snes) + many(log_summary) + skip( finished) >> unarg(mkRun) return petsc_log.parse(seq)