def parse_identifier(line: str): """Parses just the identifer (first element) of the write""" tokval = lambda t: t.value joinval = "".join someToken = lambda type: some(lambda t: t.type == type) char = someToken('Char') >> tokval space = someToken('Space') >> tokval comma = someToken('Comma') >> tokval quote = someToken('Quote') >> tokval escape = someToken('Escape') >> tokval equal = someToken('Equal') >> tokval escape_space = skip(escape) + space >> joinval escape_comma = skip(escape) + comma >> joinval escape_equal = skip(escape) + equal >> joinval escape_escape = skip(escape) + escape >> joinval plain_int_text = someToken('Int') >> tokval plain_float_text = someToken('Float') >> tokval identifier = many(char | plain_float_text | plain_int_text | escape_space | escape_comma | escape_equal | escape_escape | plain_int_text | quote) >> joinval toplevel = identifier >> (lambda x: x) parsed = toplevel.parse(LineTokenizer.tokenize(line)) if len(parsed) == 0: raise NoParseError('parsed nothing') else: return parsed
def parse(seq): 'Sequence(Token) -> object' unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) value_flatten = lambda l: sum([[l[0]]] + list(l[1:]), []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) id = some(lambda t: t.type in ['Name', 'Number', 'Color', 'String']).named('id') >> tokval make_chart_attr = lambda args: DefAttrs(u'chart', [Attr(*args)]) node_id = id # + maybe(port) pair = ( op_('(') + id + skip(maybe(op(','))) + id + op_(')') >> tuple) value = (id | pair) value_list = ( value + many(op_(',') + value) >> value_flatten) a_list = ( id + maybe(op_('=') + id) + skip(maybe(op(','))) >> unarg(Attr)) attr_list = ( many(op_('[') + many(a_list) + op_(']')) >> flatten) chart_attr = id + (op_('=') | op_(':')) + value_list >> make_chart_attr node_stmt = node_id + attr_list >> unarg(Node) stmt = ( chart_attr | node_stmt ) stmt_list = many(stmt + skip(maybe(op(';')))) chart_type = ( n('p') | n('pie') | n('piechart') | n('p3') | n('pie3d') | n('piechart_3d') | n('lc') | n('line') | n('linechart') | n('lxy') | n('linechartxy') | n('bhs') | n('holizontal_barchart') | n('bvs') | n('vertical_barchart') | n('bhg') | n('holizontal_bargraph') | n('bvg') | n('vertical_bargraph') | n('v') | n('venn') | n('venndiagram') | n('s') | n('plot') | n('plotchart') ) chart = ( chart_type + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Chart)) dotfile = chart + skip(finished) return dotfile.parse(seq)
def parse(seq): """Returns the AST of the given token sequence.""" global depth unarg = lambda f: lambda x: f(*x) tokval = lambda x: x.value # returns the value of a token toktype = lambda t: some(lambda x: x.type == t) >> tokval # checks type of token paren = lambda s: a(Token('Parentheses', s)) >> tokval # return the value if token is Op paren_ = lambda s: skip(paren(s)) # checks if token is Op and ignores it def application(z, list): return reduce(lambda s, x: Application(s, x), list, z) depth = 0 variable = lambda x: Variable(str(x)+":"+str(depth)) def abstraction(x): global depth abst = Abstraction(str(x[0])+":"+str(depth), x[1]) depth += 1 return abst variable = toktype('Name') >> variable term = variable | with_forward_decls(lambda: paren_('(') + exp + paren_(')')) | \ with_forward_decls(lambda: skip(toktype('Lambda')) + toktype('Name') + \ skip(toktype('Dot')) + exp >> abstraction) exp = term + many(term) >> unarg(application) return exp.parse(seq)
def parse(seq): 'Sequence(Token) -> object' unarg = lambda f: lambda args: f(*args) tokval = lambda x: x.value flatten = lambda list: sum(list, []) n = lambda s: a(Token('Name', s)) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) date = some(lambda s: a(Token('Date', s))).named('date') >> tokval id = some(lambda t: t.type in ['Name', 'Number', 'String']).named( 'id') >> tokval make_chart_attr = lambda args: DefAttrs(u'chart', [Attr(*args)]) node_id = id # + maybe(port) term = date + op_('-') + date value = (id | term | date) a_list = (id + maybe(op_('=') + id) + skip(maybe(op(','))) >> unarg(Attr)) attr_list = (many(op_('[') + many(a_list) + op_(']')) >> flatten) chart_attr = id + (op_('=') | op_(':')) + value >> make_chart_attr node_stmt = node_id + attr_list >> unarg(Node) stmt = (chart_attr | node_stmt) stmt_list = many(stmt + skip(maybe(op(';')))) chart = (maybe(n('diagram')) + maybe(id) + op_('{') + stmt_list + op_('}') >> unarg(Chart)) dotfile = chart + skip(finished) return dotfile.parse(seq)
def parse(tokens): t = lambda s: some(lambda tok: tok.type == s) inttype = t('Int') chartype = t('Char') unsignedtype = t('Unsigned') name = t('Name') star = t('Star') void = t('Void') lpar = skip(t('LPar')) rpar = skip(t('RPar')) comma = skip(t('Comma')) semicolon = skip(t('SemiColon')) def collapse(x): bp() if len(x[1]) > 0: # TODO: handle multiple stars return Token("UserTypePointer", x[0].value + " " + x[1][0].value) else: return Token("UserType", x[0].value) def make_func(x): return Token('Function', x.value) def make_type(x): if len(x) == 3: return Token("UnsignedTypePointer", x) elif len(x) == 2: if x[0].type == "Unsigned": return Token("UnsignedType", x) else: return Token("TypePointer", x) else: return Token("Type", x) udt = name + many(star) >> collapse prim = (inttype | chartype | unsignedtype + inttype | unsignedtype + chartype ) + many(star) >> make_type voidptr = void + star + many(star) func = name >> make_func accepted_types = voidptr | prim | udt # Return Type rettype = void | accepted_types # Argument List decl = accepted_types + name decl_list = decl + many(comma + decl) arg_list = void | decl_list func_decl = rettype + func + lpar + arg_list + rpar + semicolon return func_decl.parse(tokens)
def get_marginal_parser(): """Return parser for tokens describing marginals.""" solution_type = parser.skip(parser.a(Token(token.NAME, 'MAR'))) minus = parser.a(Token(token.OP, '-')) begin = parser.skip( parser.maybe(minus + parser.a(Token(token.NAME, 'BEGIN')) + minus)) marginal_parser = (solution_type + parser.many(number_parser + begin) + end_parser) return marginal_parser
def parse_expression(expression): """Parse an expression that appears in an execution node, i.e. a block delimited by ``{% %}``. This can be a compound expression like a ``for`` statement with several sub-expressions, or it can just be a single statement such as ``endif``. :param list expression: Tokenised expression. """ from funcparserlib.parser import a, skip, some # For if expressions, we rely on the Python parser to process the # expression rather than using our own parser. if expression[0] == 'if': return IfNode(ast.parse(' '.join(expression[1:]), mode="eval")) variable_name = some(lambda x: re.match(r'[a-zA-Z_]+', x)) # TODO We use the same function twice, first to match the token # and then to extract the value we care about from the token # (namely the contents of the quoted string). This smells wrong. def extract_quoted_string(x): result = re.match(r'\"([^\"]*)\"', x) if result: return result.groups(1) quoted_string = some(extract_quoted_string) for_expression = (skip(a('for')) + (variable_name >> str) + skip(a('in')) + (variable_name >> str)) extends_expression = (skip(a('extends')) + (quoted_string >> extract_quoted_string)) block_expression = (skip(a('block')) + (variable_name >> str)) def make_for_node(x): return ForNode(*x) def make_extends_node(x): return ExtendsNode(*x) parser = ((for_expression >> make_for_node) | (extends_expression >> make_extends_node) | (block_expression >> BlockNode)) try: return parser.parse(expression) except funcparserlib.parser.NoParseError as e: raise Exception("Invalid expression '%s'" % expression)
def _parse(seq): const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token(u'Op', s)) >> tokval op_ = lambda s: skip(op(s)) def make_string(args): context, value = args if not context: context = 'any:' return String(unescape_str(value[1:-1]), context[:-1]) def make_regex(args): context, value = args value, modifiers = value.rsplit('/', 1) value = value[1:] if not context: context = 'any:' return Regex(unescape_regex(value), modifiers, context[:-1]) def make_or(args): return Or(*args) def make_and(args): return And(*args) def make_not(x): return Not(x) context = maybe(toktype(u'Prefix')) string = (context + toktype(u'String')) >> make_string regex = (context + toktype(u'Regex')) >> make_regex par_term = forward_decl() simple_term = forward_decl() term = forward_decl() not_term = forward_decl() and_term = forward_decl() or_term = forward_decl() par_term.define(op_(u'(') + term + op_(u')')) simple_term.define(par_term | string | regex) not_term.define(op_('not') + not_term >> make_not | simple_term) and_term.define(not_term + op_('and') + and_term >> make_and | not_term) or_term.define(and_term + op_('or') + or_term >> make_or | and_term) term.define(or_term) eof = skip(toktype(u'EOF')) filter_expr = (term + eof) | (eof >> const(Any())) return filter_expr.parse(seq)
def parse(seq): 'Sequence(Token) -> object' tokval = lambda x: x.value op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) id = some(lambda t: t.type in ['Name', 'Number', 'Color', 'String']).named( 'id') >> tokval date = some(lambda t: t.type == 'Date').named('date') >> tokval make_node = lambda args: Node(*args) node_stmt = id + op_(':') + date + maybe(op_('-') + date) >> make_node chart = (many(node_stmt + skip(maybe(op(';')))) >> Chart) dotfile = chart + skip(finished) return dotfile.parse(seq)
def test_fun_decl_param_decl(self): parser = tango.fun_decl_param + skip(finished) result = parser.parse(tango.tokenize('cst x: Int')) self.assertIsInstance(result, ast.FunctionParameterDecl) self.assertEqual(result.label, 'x') self.assertEqual(result.name, 'x') self.assertFalse(result.attributes) self.assertEqual(result.type_annotation.name, 'Int') result = parser.parse(tango.tokenize('mut x: Int')) self.assertIn('mutable', result.attributes) result = parser.parse(tango.tokenize('cst a x: Int')) self.assertIsInstance(result, ast.FunctionParameterDecl) self.assertEqual(result.label, 'a') self.assertEqual(result.name, 'x') self.assertFalse(result.attributes) self.assertEqual(result.type_annotation.name, 'Int') result = parser.parse(tango.tokenize('cst a x: Int')) self.assertIsInstance(result, ast.FunctionParameterDecl) self.assertEqual(result.label, 'a') self.assertEqual(result.name, 'x') self.assertFalse(result.attributes) self.assertEqual(result.type_annotation.name, 'Int')
def test_switch_case_clause(self): parser = tango.switch_case_clause + skip(finished) result = parser.parse(tango.tokenize('case a { }')) self.assertIsInstance(result, ast.SwitchCaseClause) self.assertEqual(result.pattern.name, 'a') self.assertFalse(result.body.statements)
def test_matching_pattern(self): parser = tango.matching_pattern + skip(finished) result = parser.parse(tango.tokenize('a ~= cst x')) self.assertIsInstance(result, ast.MatchingPattern) self.assertIsInstance(result.value, ast.Identifier) self.assertIsInstance(result.pattern, ast.ValueBindingPattern)
def test_expr(self): parser = tango.expr + skip(finished) result = parser.parse(tango.tokenize('x')) self.assertIsInstance(result, ast.Identifier) result = parser.parse(tango.tokenize('0')) self.assertIsInstance(result, ast.Literal) result = parser.parse(tango.tokenize('(((0)))')) self.assertIsInstance(result, ast.Literal) result = parser.parse(tango.tokenize('f()')) self.assertIsInstance(result, ast.Call) result = parser.parse(tango.tokenize('a.b')) self.assertIsInstance(result, ast.Select) result = parser.parse(tango.tokenize('+0')) self.assertIsInstance(result, ast.PrefixExpression) result = parser.parse(tango.tokenize('a?')) self.assertIsInstance(result, ast.PostfixExpression) result = parser.parse(tango.tokenize('0 + 0')) self.assertIsInstance(result, ast.BinaryExpression) result = parser.parse(tango.tokenize('Int.+(0 - -9)')) self.assertIsInstance(result, ast.Call) self.assertIsInstance(result.callee, ast.Select) self.assertIsInstance(result.arguments[0].value, ast.BinaryExpression) self.assertIsInstance(result.arguments[0].value.right, ast.PrefixExpression)
def test_binary_precedence(self): parser = tango.bin_expr + skip(finished) operators = [('or', 'and'), ('and', '|'), ('|', '^'), ('^', '&'), ('&', '=='), ('==', 'as?'), ('as?', '<'), ('as?', '<'), ('<', '+'), ('+', '*'), ('+', '>>')] for lower, higher in operators: result = parser.parse( tango.tokenize('a %s b %s c' % (lower, higher))) self.assertEqual(result.operator, lower) self.assertEqual(result.left.name, 'a') self.assertEqual(result.right.operator, higher) self.assertEqual(result.right.left.name, 'b') self.assertEqual(result.right.right.name, 'c') result = parser.parse( tango.tokenize('a %s b %s c' % (higher, lower))) self.assertEqual(result.operator, lower) self.assertEqual(result.left.operator, higher) self.assertEqual(result.left.left.name, 'a') self.assertEqual(result.left.right.name, 'b') self.assertEqual(result.right.name, 'c') result = parser.parse( tango.tokenize('(a %s b) %s c' % (lower, higher))) self.assertEqual(result.operator, higher) self.assertEqual(result.left.operator, lower) self.assertEqual(result.left.left.name, 'a') self.assertEqual(result.left.right.name, 'b') self.assertEqual(result.right.name, 'c')
def test_protocol_decl(self): parser = tango.protocol_decl + skip(finished) result = parser.parse(tango.tokenize('protocol P {}')) self.assertIsInstance(result, ast.ProtocolDecl) self.assertEqual(result.name, 'P') self.assertFalse(result.conformance_list) self.assertFalse(result.import_list) self.assertFalse(result.body.statements) result = parser.parse(tango.tokenize('protocol P : Q, R {}')) self.assertEqual(result.conformance_list[0].name, 'Q') self.assertEqual(result.conformance_list[1].name, 'R') self.assertFalse(result.import_list) result = parser.parse(tango.tokenize('protocol P: Q, R import T {}')) self.assertEqual(result.conformance_list[0].name, 'Q') self.assertEqual(result.conformance_list[1].name, 'R') self.assertEqual(result.import_list[0].name, 'T') result = parser.parse( tango.tokenize('''protocol P { mut x: Int }''')) self.assertIsInstance(result, ast.ProtocolDecl) self.assertEqual(result.body.statements[0].name, 'x') self.assertEqual(result.body.statements[0].type_annotation.name, 'Int') result = parser.parse( tango.tokenize('''protocol P { fun f(cst self: Self) }''')) self.assertIsInstance(result, ast.ProtocolDecl) self.assertIsInstance(result.body.statements[0], ast.FunctionDecl)
def test_identifier(self): parser = tango.identifier + skip(finished) result = parser.parse(tango.tokenize('Int')) self.assertIsInstance(result, ast.Identifier) self.assertEqual(result.name, 'Int') self.assertFalse(result.specializations) result = parser.parse(tango.tokenize('+')) self.assertIsInstance(result, ast.Identifier) self.assertEqual(result.name, '+') self.assertFalse(result.specializations) result = parser.parse(tango.tokenize('Array<T = Int>')) self.assertIsInstance(result, ast.Identifier) self.assertEqual(result.name, 'Array') self.assertEqual(result.specializations[0].name, 'T') self.assertEqual(result.specializations[0].value.name, 'Int') result = parser.parse( tango.tokenize('Dictionary<Key = Int, Value = String>')) self.assertIsInstance(result, ast.Identifier) self.assertEqual(result.name, 'Dictionary') self.assertEqual(result.specializations[0].name, 'Key') self.assertEqual(result.specializations[0].value.name, 'Int') self.assertEqual(result.specializations[1].name, 'Value') self.assertEqual(result.specializations[1].value.name, 'String')
def parse(seq): """Returns the AST of the given token sequence.""" def eval_expr(z, list): return reduce(lambda s, (f, x): f(s, x), list, z) unarg = lambda f: lambda x: f(*x) const = lambda x: lambda _: x # like ^^^ in Scala tokval = lambda x: x.value # returns the value of a token op = lambda s: a(Token('Op', s)) >> tokval # return the value if token is Op op_ = lambda s: skip(op(s)) # checks if token is Op and ignores it toktype = lambda t: some(lambda x: x.type == t) >> tokval # checks type of token def lst(h,t): return [h,] + t makeop = lambda s, f: op(s) >> const(f) or_op = makeop('|', Or) char = with_forward_decls(lambda: toktype('Char') >> Char | op_('(') + exp + op_(')')) star = char + op_('*') >> Star | char lst2_exp = star + many(star) >> unarg(lst) lst_exp = lst2_exp >> Lst exp = lst_exp + many(or_op + lst_exp) >> unarg(eval_expr) return exp.parse(seq)
def parse(sequence, query): tokval = lambda x: x.value toktype = lambda t: (some(lambda x: x.type == t).named('(type %s)' % t) >> tokval) operation = lambda s: a(Token('Op', s)) >> tokval operation_ = lambda s: skip(operation(s)) create_param = lambda param_name: query.get_aliased_param(param_name) make_and = lambda params: And(params[0], params[1]) make_or = lambda params: Or(params[0], params[1]) make_not = lambda inner: Not(inner) word = toktype('Word') inner_bracket = forward_decl() left_of_and = forward_decl() right_of_and = forward_decl() left_of_or = forward_decl() not_ = forward_decl() bracket = operation_('(') + inner_bracket + operation_(')') and_ = left_of_and + operation_('&') + right_of_and >> make_and or_ = left_of_or + operation_('|') + inner_bracket >> make_or param = word >> create_param not_.define(operation_('!') + (bracket | param)) not_ = not_ >> make_not left_of_or.define(and_ | bracket | not_ | param) left_of_and.define(bracket | not_ | param) right_of_and.define(left_of_and) inner_bracket.define(or_ | and_ | bracket | not_ | param) definition = (bracket | inner_bracket) + finished return definition.parse(sequence)
def test_fun_decl(self): parser = tango.fun_decl + skip(finished) result = parser.parse(tango.tokenize('fun f() {}')) self.assertIsInstance(result, ast.FunctionDecl) self.assertEqual(result.name, 'f') self.assertFalse(result.generic_parameters) self.assertFalse(result.signature.parameters) self.assertEqual(result.signature.return_type.name, 'Nothing') result = parser.parse( tango.tokenize('fun f(cst x: Int, cst y: String) -> Int {}')) self.assertIsInstance(result, ast.FunctionDecl) self.assertEqual(result.name, 'f') self.assertFalse(result.generic_parameters) self.assertEqual(result.signature.parameters[0].name, 'x') self.assertEqual(result.signature.parameters[1].name, 'y') self.assertEqual(result.signature.return_type.name, 'Int') result = parser.parse(tango.tokenize('fun f<T>(cst x: T) {}')) self.assertIsInstance(result, ast.FunctionDecl) self.assertEqual(result.name, 'f') self.assertEqual(result.generic_parameters[0], 'T') self.assertEqual(result.signature.parameters[0].name, 'x') self.assertEqual(result.signature.return_type.name, 'Nothing')
def test_prefix_expr(self): parser = tango.prefix_expr + skip(finished) result = parser.parse(tango.tokenize('not x')) self.assertIsInstance(result, ast.PrefixExpression) self.assertEqual(result.operator, 'not') self.assertEqual(result.operand.name, 'x')
def whole(parsers): """Parse the parsers in the given list one after another, then expect the end of the input.""" if len(parsers) == 0: return finished >> (lambda x: []) if len(parsers) == 1: return parsers[0] + finished >> (lambda x: x[:-1]) return reduce(add, parsers) + skip(finished)
def test_error_info(self): tokenize = make_tokenizer([ ('keyword', (r'(is|end)',)), ('id', (r'[a-z]+',)), ('space', (r'[ \t]+',)), ('nl', (r'[\n\r]+',)), ]) try: list(tokenize('f is ф')) except LexerError as e: self.assertEqual(str(e), 'cannot tokenize data: 1,6: "f is \u0444"') else: self.fail('must raise LexerError') sometok = lambda type: some(lambda t: t.type == type) keyword = lambda s: a(Token('keyword', s)) id = sometok('id') is_ = keyword('is') end = keyword('end') nl = sometok('nl') equality = id + skip(is_) + id >> tuple expr = equality + skip(nl) file = many(expr) + end msg = """\ spam is eggs eggs isnt spam end""" toks = [x for x in tokenize(msg) if x.type != 'space'] try: file.parse(toks) except NoParseError as e: self.assertEqual(e.msg, "got unexpected token: 2,11-2,14: id 'spam'") self.assertEqual(e.state.pos, 4) self.assertEqual(e.state.max, 7) # May raise KeyError t = toks[e.state.max] self.assertEqual(t, Token('id', 'spam')) self.assertEqual((t.start, t.end), ((2, 11), (2, 14))) else: self.fail('must raise NoParseError')
def test_error_info(self): tokenize = make_tokenizer([ ('keyword', (r'(is|end)',)), ('id', (r'[a-z]+',)), ('space', (r'[ \t]+',)), ('nl', (r'[\n\r]+',)), ]) try: list(tokenize('f is ф')) except LexerError as e: self.assertEqual(six.text_type(e), 'cannot tokenize data: 1,6: "f is \u0444"') else: self.fail('must raise LexerError') sometok = lambda type: some(lambda t: t.type == type) keyword = lambda s: a(Token('keyword', s)) id = sometok('id') is_ = keyword('is') end = keyword('end') nl = sometok('nl') equality = id + skip(is_) + id >> tuple expr = equality + skip(nl) file = many(expr) + end msg = """\ spam is eggs eggs isnt spam end""" toks = [x for x in tokenize(msg) if x.type != 'space'] try: file.parse(toks) except NoParseError as e: self.assertEqual(e.msg, "got unexpected token: 2,11-2,14: id 'spam'") self.assertEqual(e.state.pos, 4) self.assertEqual(e.state.max, 7) # May raise KeyError t = toks[e.state.max] self.assertEqual(t, Token('id', 'spam')) self.assertEqual((t.start, t.end), ((2, 11), (2, 14))) else: self.fail('must raise NoParseError')
def grammar(): lparen = skip(a(LParen())) rparen = skip(a(RParen())) def collapse(t): t[0].terms = t[1] return t[0] @with_forward_decls def ldap_filter(): return (ldap_and | ldap_or | ldap_not | ldap_test) ldap_and = (lparen + a(And()) + oneplus(ldap_filter) + rparen) >> collapse ldap_or = (lparen + a(Or()) + oneplus(ldap_filter) + rparen) >> collapse ldap_not = (lparen + a(Not()) + ldap_filter + rparen) >> collapse ldap_test = lparen + a(Test()) + rparen return ldap_filter + skip(finished)
def parse(tokenSequence): """Sequence(Token) -> object""" # Top-level Parser expression = ignore_expression | scanCode_expression | usbCode_expression | variable_expression | capability_expression | define_expression kll_text = many(expression) kll_file = maybe(kll_text) + skip(finished) return kll_file.parse(tokenSequence)
def parse( tokenSequence ): """Sequence(Token) -> object""" # Top-level Parser expression = scanCode_expression | usbCode_expression | variable_expression | capability_expression | define_expression kll_text = many( expression ) kll_file = maybe( kll_text ) + skip( finished ) return kll_file.parse( tokenSequence )
def get_number_parser(): """Return parser that reads (float and int) numbers with whitespace.""" number = (parser.some(lambda tok: tok.type == 'NUMBER') >> token_value >> string_to_number) indent = parser.some(lambda t: t.code == token.INDENT) dedent = parser.a(Token(token.DEDENT, '')) newline = parser.a(Token(54, '\n')) ignored_whitespace = parser.skip(indent | dedent | newline) return parser.oneplus(number | ignored_whitespace)
def test_break_stmt(self): parser = tango.break_stmt + skip(finished) result = parser.parse(tango.tokenize('break')) self.assertIsInstance(result, ast.Break) self.assertIsNone(result.label) result = parser.parse(tango.tokenize('break foo')) self.assertIsInstance(result, ast.Break) self.assertEqual(result.label, 'foo')
def test_continue_stmt(self): parser = tango.continue_stmt + skip(finished) result = parser.parse(tango.tokenize('continue')) self.assertIsInstance(result, ast.Continue) self.assertIsNone(result.label) result = parser.parse(tango.tokenize('continue foo')) self.assertIsInstance(result, ast.Continue) self.assertEqual(result.label, 'foo')
def parse(tokens): ## building blocks kw_priority = some(toktype("kw_priority")) kw_probability = some(toktype("kw_probability")) kw_reaction = some(toktype("kw_reaction")) kw_exists = some(toktype("kw_exists")) kw_as = some(toktype("kw_as")) op_tilde = some(toktype("op_tilde")) op_priority_maximal = some(toktype("op_priority_maximal")) op_production = some(toktype("op_production")) atom = some(toktype("name")) number = some(toktype("number")) dissolve = some(toktype("op_dissolve")) osmose = some(toktype("op_osmose")) osmose_location = some(toktype("op_osmose_location")) env_open = some(toktype("env_open")) env_close = some(toktype("env_close")) membrane_open = some(toktype("membrane_open")) membrane_close = some(toktype("membrane_close")) ## grammar from the bottom up name = atom | number symbol = atom | (dissolve + maybe(name)) | (osmose + name + maybe(osmose_location + name)) priority = kw_priority + op_tilde + name + op_priority_maximal + name reaction = (kw_reaction + maybe(kw_as + name) + op_tilde + oneplus(name) + op_production + many(symbol)) exists = kw_exists + op_tilde + oneplus(name) expr = (exists | reaction | priority) statement = with_forward_decls(lambda: membrane | expr) >> Statement body = maybe(name) + many(statement) membrane = (skip(membrane_open) + body + skip(membrane_close)) >> Membrane env = (skip(env_open) + body + skip(env_close)) >> Environment program = many(env) + skip(finished) >> Program return program.parse(tokens)
def parse(tokens): ## building blocks kw_priority = some(toktype("kw_priority")) kw_probability = some(toktype("kw_probability")) kw_reaction = some(toktype("kw_reaction")) kw_exists = some(toktype("kw_exists")) kw_as = some(toktype("kw_as")) op_tilde = some(toktype("op_tilde")) op_priority_maximal = some(toktype("op_priority_maximal")) op_production = some(toktype("op_production")) atom = some(toktype("name")) number = some(toktype("number")) dissolve = some(toktype("op_dissolve")) osmose = some(toktype("op_osmose")) osmose_location = some(toktype("op_osmose_location")) env_open = some(toktype("env_open")) env_close = some(toktype("env_close")) membrane_open = some(toktype("membrane_open")) membrane_close = some(toktype("membrane_close")) ## grammar from the bottom up name = atom | number symbol = atom | (dissolve + maybe(name)) | (osmose + name + maybe(osmose_location + name)) priority = kw_priority + op_tilde + name + op_priority_maximal + name reaction = kw_reaction + maybe(kw_as + name) + op_tilde + oneplus(name) + op_production + many(symbol) exists = kw_exists + op_tilde + oneplus(name) expr = exists | reaction | priority statement = with_forward_decls(lambda: membrane | expr) >> Statement body = maybe(name) + many(statement) membrane = (skip(membrane_open) + body + skip(membrane_close)) >> Membrane env = (skip(env_open) + body + skip(env_close)) >> Environment program = many(env) + skip(finished) >> Program return program.parse(tokens)
def parse(input): period = sometok("period") string = p.oneplus(sometok("string")) >> (lambda x: " ".join(x)) number = sometok("number") title = string + p.skip(period) >> RecipeTitle ingredients_start = sometok("ingredients_start") + p.skip(period) >> IngredientStart dry_measure = p.maybe(sometok("measure_type")) + sometok("dry_measure") liquid_measure = sometok("liquid_measure") mix_measure = sometok("mix_measure") # is this valid ? 'g of butter', unit w/o initial_value ingredient = p.maybe(number) + p.maybe(dry_measure | liquid_measure | mix_measure) + string >> unarg(Ingredient) ingredients = p.many(ingredient) cooking_time = p.skip(sometok("cooking_time")) + (number >> unarg(CookingTime)) + p.skip(sometok("period")) oven_temp = p.skip(sometok("oven")) + p.many(number) + p.skip(sometok("oven_temp")) >> unarg(Oven) method_start = sometok("method_start") + p.skip(period) comment = p.skip(p.many(string | period)) header = title + p.maybe(comment) instruction = (string + p.skip(period)) >> parse_instruction instructions = p.many(instruction) program = (method_start + instructions) >> unarg(MethodStart) serves = (sometok("serve") + number >> (lambda x: Serve("serve", x[1]))) + p.skip(period) ingredients_section = (ingredients_start + ingredients) >> unarg(IngredientSection) recipe = ( header + p.maybe(ingredients_section) + p.maybe(cooking_time) + p.maybe(oven_temp) + p.maybe(program) + p.maybe(serves) ) >> RecipeNode main_parser = p.oneplus(recipe) return main_parser.parse(tokenize(input))
def _parse_rule(seq): tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval sep = lambda s: a(Token(u'Sep', s)) >> tokval s_sep = lambda s: skip(sep(s)) level = toktype(u'Level') comparator = toktype(u'Comparator') >> COMPARATORS.get number = toktype(u'Number') >> float historical = toktype(u'Historical') unit = toktype(u'Unit') operator = toktype(u'Operator') logical_operator = toktype(u'LogicalOperator') >> LOGICAL_OPERATORS.get exp = comparator + ( (number + maybe(unit)) | historical) + maybe(operator + number) rule = (level + s_sep(':') + exp + many(logical_operator + exp)) overall = rule + skip(finished) return overall.parse(seq)
def field_query_parser(): """Return a parser for numeric queries. Example queries: '1900-1995' or '>= 1998' """ number = token_type('number') field_name = token_type('name') lt = token_type('lt') le = token_type('le') gt = token_type('gt') ge = token_type('ge') eq = token_type('equals') approx = token_type('approx') # Simple comparisons # NOTE: We put le before lt to parse both comparison = parser.maybe(token_type('not'))\ + field_name\ + (le | lt | ge | gt)\ + number # Values can be given as intervals ('1990-2000') interval = parser.maybe(token_type('not'))\ + field_name\ + skip('equals')\ + number\ + skip('dash')\ + number # Values can be given as ranges ('1990<=year<=2000') # NOTE: We put le before lt to parse both range_ = parser.maybe(token_type('not'))\ + number\ + (le | lt)\ + field_name\ + (le | lt)\ + number # Field value queries ('year=2000' or 'author~Augustus') field_value = parser.maybe(token_type('not'))\ + field_name\ + (eq | approx)\ + (token_type('name') | token_type('number') | token_type('any')) # Field occurrence ('publisher' or '^publisher') field_occurrence = parser.maybe(token_type('not')) + field_name return (interval >> make_query_result('interval') | comparison >> make_query_result('comparison') | range_ >> make_query_result('range') | field_value >> make_query_result('value') | field_occurrence >> make_query_result('occurrence'))\ + parser.skip(parser.finished)
def test_error_info(): tokenize = make_tokenizer([ Spec('keyword', r'(is|end)'), Spec('id', r'[a-z]+'), Spec('space', r'[ \t]+'), Spec('nl', r'[\n\r]+'), ]) try: list(tokenize('f is ф')) except LexerError as e: pass else: ok_(False, 'must raise LexerError') keyword = lambda s: tok('keyword', s) id = tok('id') is_ = keyword('is') end = keyword('end') nl = tok('nl') equality = id + skip(is_) + id >> tuple expr = equality + skip(nl) file = many(expr) + end msg = """\ rake is eggs eggs isnt spam end""" toks = [x for x in tokenize(msg) if x.type != 'space'] try: file.parse(toks) except ParserError as e: msg, pos, i = e.args eq_(msg, "got unexpected token: id 'spam'") eq_(pos, ((2, 11), (2, 14))) # May raise KeyError t = toks[i] eq_(t, Token('id', 'spam')) else: ok_(False, 'must raise ParserError')
def _create_type_rules(): comma = _token_type("comma") colon = _token_type("colon") question_mark = _token_type("question-mark") bar = _token_type("bar") equals = _token_type("equals") attr_name = type_name = arg_name = _token_type("name") >> _make_name primary_type = forward_decl() union_type = _one_or_more_with_separator(primary_type, bar) >> _make_union_type type_ = union_type type_ref = type_name >> _make_type_ref applied_type = ( type_ref + skip(_token_type("open")) + _one_or_more_with_separator(type_, comma) + skip(_token_type("close")) ) >> _make_apply arg = (maybe(question_mark) + maybe(arg_name + skip(colon)) + type_) >> _make_arg generic_params = maybe(type_name + _token_type("fat-arrow")) >> _make_params args = _zero_or_more_with_separator(arg, comma) signature = (generic_params + args + _token_type("arrow") + type_) >> _make_signature sub_signature = (_token_type("paren-open") + signature + _token_type("paren-close")) >> (lambda result: result[1]) primary_type.define(sub_signature | applied_type | type_ref) explicit_type = signature | type_ type_definition = (type_name + skip(equals) + type_ + skip(finished)) >> _make_type_definition structural_type_attr = (attr_name + skip(colon) + explicit_type) >> tuple structural_type_attrs = many(structural_type_attr) structural_type_definition = (type_name + skip(colon) + structural_type_attrs + skip(finished)) >> _make_structural_type_definition generic = (_one_or_more_with_separator(type_name, comma) + skip(finished)) >> _make_generic return explicit_type + skip(finished), type_definition, structural_type_definition, generic
def test_enum_case_param_decl(self): parser = tango.enum_case_param + skip(finished) result = parser.parse(tango.tokenize('_: Int')) self.assertIsInstance(result, ast.EnumCaseParameterDecl) self.assertIsNone(result.label) self.assertEqual(result.type_annotation.name, 'Int') result = parser.parse(tango.tokenize('x: Int')) self.assertIsInstance(result, ast.EnumCaseParameterDecl) self.assertEqual(result.label, 'x') self.assertEqual(result.type_annotation.name, 'Int')
def create_grammar(): tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) n = lambda s: a(Token('Name', s)) >> tokval null = n('null') true = n('true') false = n('false') number = toktype('Number') string = toktype('String') value = forward_decl() member = string + op_(':') + value object_ = (op_('{') + maybe(member + many(op_(',') + member)) + op_('}')) array = (op_('[') + maybe(value + many(op_(',') + value)) + op_(']')) value.define(null | true | false | object_ | array | number | string) json_text = object_ | array json_file = json_text + skip(finished) return json_file
def test_tuple_signature(self): parser = tango.tuple_signature + skip(finished) result = parser.parse(tango.tokenize('(cst x: Int)')) self.assertIsInstance(result, ast.TupleSignature) self.assertEqual(len(result.parameters), 1) self.assertEqual(result.parameters[0].label, 'x') result = parser.parse(tango.tokenize('(cst x: Int, cst y: Int)')) self.assertIsInstance(result, ast.TupleSignature) self.assertEqual(result.parameters[0].label, 'x') self.assertEqual(result.parameters[1].label, 'y')
def _parse_rule(seq): tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval sep = lambda s: a(Token(u'Sep', s)) >> tokval s_sep = lambda s: skip(sep(s)) level = toktype(u'Level') comparator = toktype(u'Comparator') >> COMPARATORS.get number = toktype(u'Number') >> float historical = toktype(u'Historical') unit = toktype(u'Unit') operator = toktype(u'Operator') logical_operator = toktype(u'LogicalOperator') >> LOGICAL_OPERATORS.get exp = comparator + ((number + maybe(unit)) | historical) + maybe(operator + number) rule = ( level + s_sep(':') + exp + many(logical_operator + exp) ) overall = rule + skip(finished) return overall.parse(seq)
def parse(source): task = Task() get_value = lambda x: x.value value_of = lambda t: some(lambda x: x.type == t) >> get_value keyword = lambda s: skip(value_of(s)) make_rule = lambda x: task.add_rule(Rule(**{x[0]: x[1][1:-1]})) set_root = lambda value: task.set_root_dir(value[1:-1]) set_mask = lambda value: task.set_mask(value[1:-1]) root = keyword('In') + value_of('Value') >> set_root mask = keyword('With') + value_of('Value') >> set_mask rule = keyword('Set') + \ value_of('Attribute') + \ keyword('Equals') + \ value_of('Value') \ >> make_rule parser = maybe(mask) + root + many(rule) parser.parse(source) return task
def parse(tokens): var = some(toktype("name")) | some(toktype("number")) open_form = some(toktype("form_open")) close_form = some(toktype("form_close")) op_lambda = some(toktype("op_lambda")) op_map = some(toktype("op_map")) prim_bind = some(toktype("kw_bind")) prim_halt = some(toktype("kw_halt")) exp = with_forward_decls(lambda: lam | var | prim_exp | exprn) >> Expression lam = open_form + op_lambda + many(var) + op_map + oneplus(exp) + close_form >> Lambda bind_exp = open_form + prim_bind + var + lam + close_form halt_exp = open_form + prim_halt + exp + close_form prim_exp = bind_exp | halt_exp exprn = open_form + oneplus(exp) + close_form >> Form prog = many(exp) + skip(finished) >> Program return prog.parse(tokens)
def parse(seq): """Returns the AST of the given token sequence.""" def eval_expr(z, list): return reduce(lambda s, (f, x): f(s, x), list, z) unarg = lambda f: lambda x: f(*x) const = lambda x: lambda _: x # like ^^^ in Scala tokval = lambda x: x.value # returns the value of a token op = lambda s: a(Token('Op', s)) >> tokval # return the value if token is Op op_ = lambda s: skip(op(s)) # checks if token is Op and ignores it toktype = lambda t: some(lambda x: x.type == t) >> tokval # checks type of token def lst(h,t): return [h,] + t call = lambda x: Call(x[0], x[1]) makeop = lambda s, f: op(s) >> const(f) add = makeop('+', Plus) sub = makeop('-', Minus) mul = makeop('*', Times) div = makeop('/', Div) def make_const(i): return const(int(i)) number = toktype('Number') >> Const mul_op = mul | div add_op = add | sub factor = with_forward_decls(lambda: number | op_('(') + exp + op_(')') | call) term = factor + many(mul_op + factor) >> unarg(eval_expr) exp = term + many(add_op + term) >> unarg(eval_expr) exp_lst = with_forward_decls(lambda: exp + many(op_(',') + exp) >> unarg(lst)) call = toktype('Name') + op_('(') + exp_lst + op_(')') >> call return exp.parse(seq)
def parse(sequence, query): tokval = lambda x: x.value toktype = lambda t: ( some(lambda x: x.type == t).named('(type %s)' % t) >> tokval ) operation = lambda s: a(Token('Op', s)) >> tokval operation_ = lambda s: skip(operation(s)) create_param = lambda param_name: query.get_aliased_param( param_name ) make_and = lambda params: And(params[0], params[1]) make_or = lambda params: Or(params[0], params[1]) make_not = lambda inner: Not(inner) word = toktype('Word') inner_bracket = forward_decl() left_of_and = forward_decl() right_of_and = forward_decl() left_of_or = forward_decl() not_ = forward_decl() bracket = operation_('(') + inner_bracket + operation_(')') and_ = left_of_and + operation_('&') + right_of_and >> make_and or_ = left_of_or + operation_('|') + inner_bracket >> make_or param = word >> create_param not_.define(operation_('!') + (bracket | param)) not_ = not_ >> make_not left_of_or.define(and_ | bracket | not_ | param) left_of_and.define(bracket | not_ | param) right_of_and.define(left_of_and) inner_bracket.define(or_ | and_ | bracket | not_ | param) definition = (bracket | inner_bracket) + finished return definition.parse(sequence)
list(tokenize(u'f is ф')) except LexerError, e: self.assertEqual(unicode(e), u'cannot tokenize data: 1,6: "f is \u0444"') else: self.fail(u'must raise LexerError') sometok = lambda type: some(lambda t: t.type == type) keyword = lambda s: a(Token(u'keyword', s)) id = sometok(u'id') is_ = keyword(u'is') end = keyword(u'end') nl = sometok(u'nl') equality = id + skip(is_) + id >> tuple expr = equality + skip(nl) file = many(expr) + end msg = """\ spam is eggs eggs isnt spam end""" toks = [x for x in tokenize(msg) if x.type != u'space'] try: file.parse(toks) except NoParseError, e: self.assertEqual(e.msg, u"got unexpected token: 2,11-2,14: id 'spam'") self.assertEqual(e.state.pos, 4) self.assertEqual(e.state.max, 7)
string = tokenType('String') >> Make.string unString = tokenType('String') # When the double quotes are still needed for internal processing seqString = tokenType('SequenceString') >> Make.seqString unseqString = tokenType('SequenceString') >> Make.unseqString # For use with variables colRowOperator = lambda s: a( Token( 'ColRowOperator', s ) ) relCROperator = lambda s: a( Token( 'RelCROperator', s ) ) pixelOperator = tokenType('PixelOperator') # Code variants code_begin = tokenType('CodeBegin') code_end = tokenType('CodeEnd') # Specifier specifier_basic = ( timing >> Make.specifierTiming ) | ( name >> Make.specifierState ) specifier_complex = ( name + skip( operator(':') ) + timing ) >> unarg( Make.specifierState ) specifier_state = specifier_complex | specifier_basic specifier_analog = number >> Make.specifierAnalog specifier_list = skip( parenthesis('(') ) + many( ( specifier_state | specifier_analog ) + skip( maybe( comma ) ) ) + skip( parenthesis(')') ) # Scan Codes scanCode_start = tokenType('ScanCodeStart') scanCode_range = number + skip( dash ) + number >> Make.scanCode_range scanCode_listElem = number >> Make.scanCode scanCode_specifier = ( scanCode_range | scanCode_listElem ) + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) scanCode_innerList = many( scanCode_specifier + skip( maybe( comma ) ) ) >> flatten scanCode_expanded = skip( scanCode_start ) + scanCode_innerList + skip( code_end ) + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) scanCode_elem = scanCode + maybe( specifier_list ) >> unarg( Make.specifierUnroll ) scanCode_combo = oneplus( ( scanCode_expanded | scanCode_elem ) + skip( maybe( plus ) ) ) scanCode_sequence = oneplus( scanCode_combo + skip( maybe( comma ) ) ) scanCode_single = ( skip( scanCode_start ) + scanCode_listElem + skip( code_end ) ) | scanCode
def delim(t): return skip(_tok(t))
# (right to left), starting from a token at cursor. tok_number = token("number") tok_string = token("string") dot = token(".") colon = token(":") single_quote = token('"') double_quote = token("'") quote = (single_quote | double_quote) open_sq_brace = token("[") close_sq_brace = token("]") open_rnd_brace = token("(") close_rnd_brace = token(")") tok_constant = p.some(lambda t: t.value in {'nil', 'true', 'false'}) iden_start = p.skip(p.some(lambda t: t.type not in ".:")) tok_splash = (p.a(Token("iden", "splash")) + iden_start) >> token_value iden = token("iden") opt_iden = iden | p.pure("") # =========== Expressions parser # FIXME: it should be rewritten using full Lua 5.2 grammar. BINARY_OPS = set("+-*/^%><") | {"..", "==", "~=", ">=", "<=", "and", "or"} UNARY_OPS = {"not", "-", "#"} binary_op = p.some(lambda t: t.value in BINARY_OPS) >> token_value unary_op = p.some(lambda t: t.value in UNARY_OPS) >> token_value # expressions with binary and unary ops + parenthesis @p.with_forward_decls
def sym(wanted): "Parse and skip the given symbol or keyword." if wanted.startswith(":"): return skip(a(HyKeyword(wanted[1:]))) return skip(some(lambda x: isinstance(x, HySymbol) and x == wanted))
def parse_instruction(spec): string = p.oneplus(sometok("string")) >> (lambda x: " ".join(x)) ordinal = sometok("ordinal") bowl = sometok("bowl") the = sometok("the") dish = sometok("dish") to = sometok("to") into = sometok("into") concat = lambda list: " ".join(list) take_i = sometok("take") + (p.oneplus(string) >> concat) + sometok("from") + sometok("refrigerator") put_i = ( sometok("put") + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + p.skip(into) + p.maybe(ordinal | the) + bowl ) liquefy_1 = sometok("liquefy") + sometok("contents") + p.maybe(ordinal) + bowl liquefy_2 = sometok("liquefy") + (p.oneplus(string) >> concat) liquefy_i = liquefy_1 | liquefy_2 pour_i = ( sometok("pour") + sometok("contents") + p.maybe(ordinal) + bowl + sometok("into") + the + p.maybe(ordinal) + dish ) fold_i = ( sometok("fold") + p.skip(p.maybe(the)) + (p.oneplus(string) >> concat) + into + p.maybe(ordinal | the) + bowl ) # cleanup repitition add_i = sometok("add") + (p.oneplus(string) >> concat) + p.maybe(to + p.maybe(ordinal | the) + bowl) remove_i = ( sometok("remove") + (p.oneplus(string) >> concat) + p.maybe(sometok("from") + p.maybe(ordinal | the) + bowl) ) combine_i = sometok("combine") + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal | the) + bowl) divide_i = sometok("divide") + (p.oneplus(string) >> concat) + p.maybe(into + p.maybe(ordinal | the) + bowl) add_dry_i = sometok("add_dry") + p.maybe(to + p.maybe(ordinal | the) + bowl) stir_1 = ( sometok("stir") + p.maybe(the + p.maybe(ordinal | the) + bowl) + sometok("for") + sometok("number") + (sometok("minute") | sometok("minutes")) ) stir_2 = sometok("stir") + (p.oneplus(string) >> concat) + into + the + p.maybe(ordinal) + bowl stir_i = stir_1 | stir_2 mix_i = sometok("mix") + p.maybe(the + p.maybe(ordinal) + bowl) + sometok("well") clean_i = sometok("clean") + p.maybe(ordinal | the) + bowl loop_start_i = (sometok("string") + p.maybe(the) + (p.oneplus(string) >> concat)) >> (lambda x: ("loop_start", x)) loop_end_i = ( sometok("string") + p.maybe(p.maybe(the) + (p.oneplus(string) >> concat)) + sometok("until") + string ) >> (lambda x: ("loop_end", x)) set_aside_i = sometok("set") >> (lambda x: (x, None)) serve_with_i = sometok("serve_with") + (p.oneplus(string) >> concat) refrigerate_i = sometok("refrigerate") + p.maybe( sometok("for") + sometok("number") + (sometok("hour") | sometok("hours")) ) instruction = ( take_i | put_i | liquefy_i | pour_i | add_i | fold_i | remove_i | combine_i | divide_i | add_dry_i | stir_i | mix_i | clean_i | loop_end_i # -| ORDER matters | loop_start_i # -| | set_aside_i | serve_with_i | refrigerate_i ) >> (lambda x: Instruction(x[0].lower().replace(" ", "_"), x[1:])) return instruction.parse(tokenize_instruction(spec))
# *Mark here are not really required, but if you are going to do # anything complex that requires that you discern between different # parsing paths, marks are often give you least hassle. expr = with_forward_decls( lambda: (number + pure(NumberMark) + expr_rest | paren_expr + pure(ParenMark) + expr_rest) >> make_expr) # This one allows us to add more complex expressions like function # application and ternary operators to the above definition with ease. # Otherwise terms such as `apply = expr lparen many(expr) rpanen` # would be impossible to add, always leading to infinite left recursion. expr_rest = maybe(op + expr) toplev = expr + skip(eof) @py.test.mark.parametrize("given, parser, expected", [ ("1", number, Number("1")), ("+", op, "+"), ("-", op, "-"), ("*", op, "*"), ("^", op, "^"), ]) def test_parse_primitives(given, parser, expected): data = parser.parse(list(tokenize(given))[:-1]) assert data == expected @py.test.mark.parametrize("given, expected", [
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. from funcparserlib.parser import skip, tok from functools import reduce __all__ = [ 'const', 'flatten', 'unarg', 'tokval', 'mktok', 'n', 'op', 'op_', 'sometok', 'sometoks', ] # Well-known functions const = lambda x: lambda _: x flatten = lambda list: sum(list, []) unarg = lambda f: lambda args: f(*args) # Auxiliary functions for lexers tokval = lambda tok: tok.value # Auxiliary functions for parsers mktok = lambda type: lambda value: tok(type, value) >> tokval n = mktok('name') op = mktok('op') op_ = lambda s: skip(op(s)) sometok = lambda type: tok(type) >> tokval sometoks = lambda types: reduce(lambda p, type: p | tok(type) if p else tok(type), types, None) >> tokval
def parse(seq): """Sequence(Token) -> object""" const = lambda x: lambda _: x tokval = lambda x: x.value toktype = lambda t: some(lambda x: x.type == t) >> tokval op = lambda s: a(Token('Op', s)) >> tokval op_ = lambda s: skip(op(s)) n = lambda s: a(Token('Name', s)) >> tokval def make_array(n): if n is None: return [] else: return [n[0]] + n[1] def make_object(n): return dict(make_array(n)) def make_number(n): try: return int(n) except ValueError: return float(n) def unescape(s): std = { '"': '"', '\\': '\\', '/': '/', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', } def sub(m): if m.group('standard') is not None: return std[m.group('standard')] else: return chr(int(m.group('unicode'), 16)) return re_esc.sub(sub, s) def make_string(n): return unescape(n[1:-1]) null = n('null') >> const(None) true = n('true') >> const(True) false = n('false') >> const(False) number = toktype('Number') >> make_number string = toktype('String') >> make_string value = forward_decl() member = string + op_(':') + value >> tuple object = ( op_('{') + maybe(member + many(op_(',') + member)) + op_('}') >> make_object) array = ( op_('[') + maybe(value + many(op_(',') + value)) + op_(']') >> make_array) value.define( null | true | false | object | array | number | string) json_text = object | array json_file = json_text + skip(finished) return json_file.parse(seq)
def evaluate(expression, environment): """Evaluate an expression in the specified variable environment.""" # Well known functions const = lambda x: lambda _: x unarg = lambda f: lambda args: f(*args) # Semantic actions and auxiliary functions tokval = lambda tok: tok.value makeop = lambda s, f: op(s) >> const(f) sometok = lambda type: some(lambda tok: tok.type == type) def eval_name(s): try: return environment[s] # Case-sensitive except KeyError: raise ValueError('unbound variable: %s' % s) def make_number(s): try: return int(s) except ValueError: return float(s) def eval_expr(expr, op_expr_pairs): result = expr for op, expr in op_expr_pairs: result = op(result, expr) return result def eval_call(func_name, maybe_expr_and_exprs): if maybe_expr_and_exprs: expr, exprs = maybe_expr_and_exprs args = [expr] + exprs else: args = [] f = eval_name(func_name) if not callable(f): raise TypeError('variable is not callable: %s' % func_name) argcount = len(args) f_argcount = f.func_code.co_argcount if f_argcount != argcount: raise TypeError('%s takes %d arguments (%d given)' % (func_name, f_argcount, argcount)) return f(*args) # Primitives number = ( sometok('number') >> tokval >> make_number) raw_name = sometok('name') >> tokval name = raw_name >> eval_name op = lambda s: a(Token('op', s)) >> tokval op_ = lambda s: skip(op(s)) add = makeop('+', operator.add) sub = makeop('-', operator.sub) mul = makeop('*', operator.mul) div = makeop('/', operator.div) mul_op = mul | div add_op = add | sub # Means of composition expr = forward_decl() call = ( raw_name + op_('(') + maybe(expr + many(op_(',') + expr)) + op_(')') >> unarg(eval_call)) primary = ( number | call | name | op_('(') + expr + op_(')')) term = ( primary + many(mul_op + primary) >> unarg(eval_expr)) expr.define( term + many(add_op + term) >> unarg(eval_expr)) # Toplevel parsers toplevel = maybe(expr) + skip(finished) return toplevel.parse(tokenize(expression))
from __future__ import absolute_import import funcparserlib.parser as p string = lambda x: x or '' cat = ''.join negative = p.maybe(p.a('-')) >> string digits = p.oneplus(p.some(lambda char: char.isdigit())) >> cat decimal_part = (p.maybe(p.a('.') + digits)) >> string >> cat number = (negative + digits + decimal_part) >> cat >> float addition = number + p.skip(p.a('+')) + number >> sum expression = addition | number expression = expression + p.finished def calculate(text): return expression.parse(text)[0]
def decode_tokens(s: typing.List[Token]): def _to_int(token): _log.debug('_to_int: %r', token) return int(token) str_strip_re = re.compile(rb'^\d+:') def _to_string(s): _log.debug('_to_string: %r', s) return str_strip_re.sub(b'', s) def _to_list(_tokens): _log.debug('_to_list: %r', _tokens) return _tokens def _to_dict(n): _log.debug('_to_dict: %r', n) return dict(_to_list(n)) def token_value(x): return x.value def token_type(t): return p.some(lambda x: x.name == t) >> token_value def type_decl(type_name): return p.a( Token('Type', type_name) ).named('Type({})'.format(type_name)) value = p.forward_decl().named('Value') integer = token_type('Number') end = p.a(Token('End', b'e')) # String is special, has no type str_decl = ( token_type('String') >> _to_string ).named('String') dict_decl = ( p.skip(type_decl(b'd')) + p.many(value + value) + p.skip(end) >> _to_dict ).named('Dict') list_decl = ( p.skip(type_decl(b'l')) + p.many(value) + p.skip(end) >> _to_list ).named('List') integer_decl = ( p.skip(type_decl(b'i')) + integer + p.skip(end) >> _to_int ).named('Integer') value.define( integer_decl | dict_decl | list_decl | str_decl ) bencode_decl = ( value + p.skip(p.finished) ).named('Bencode') return bencode_decl.parse(s)