def parse_parameter_list(index): """ Parse a function parameter list. Returns a list of decl_nodes arguments and the index right after the last argument token. This index should be the index of a closing parenthesis, but that check is left to the caller. index - index right past the opening parenthesis. """ # List of decl_nodes arguments params = [] # No arguments if token_is(index, token_kinds.close_paren): return params, index while True: # Try parsing declaration specifiers, quit if no more exist specs, index = parse_decl_specifiers(index) decl, index = parse_declarator(index) params.append(decl_nodes.Root(specs, [decl])) # Expect a comma, and break if there isn't one if token_is(index, token_kinds.comma): index += 1 else: break return params, index
def get_for_clauses(index): """Get the three clauses of a for-statement. index - Index of the beginning of the first clause. returns - Tuple (Node, Node, Node, index). Each Node is the corresponding clause, or None if that clause is empty The index is that of first token after the close paren terminating the for clauses. Raises exception on malformed input. """ first, index = get_first_for_clause(index) if token_is(index, token_kinds.semicolon): second = None index += 1 else: second, index = parse_expression(index) index = match_token(index, token_kinds.semicolon, ParserError.AFTER) if token_is(index, token_kinds.close_paren): third = None index += 1 else: third, index = parse_expression(index) index = match_token(index, token_kinds.close_paren, ParserError.AFTER) return first, second, third, index
def parse_postfix(index): """ Parse postfix expression """ cur, index = parse_primary(index) while True: old_range = cur.r if token_is(index, token_kinds.open_sq_brack): index += 1 arg, index = parse_expression(index) cur = expr_nodes.ArraySubsc(cur, arg) match_token(index, token_kinds.close_sq_brack, ParserError.GOT) index += 1 elif token_is(index, token_kinds.dot) or token_is( index, token_kinds.arrow): index += 1 match_token(index, token_kinds.identifier, ParserError.AFTER) member = utils.tokens[index] if token_is(index - 1, token_kinds.dot): cur = expr_nodes.ObjMember(cur, member) else: cur = expr_nodes.ObjPtrMember(cur, member) index += 1 elif token_is(index, token_kinds.open_paren): args = [] index += 1 if token_is(index, token_kinds.close_paren): return expr_nodes.FuncCall(cur, args), index + 1 while True: arg, index = parse_assignment(index) args.append(arg) if token_is(index, token_kinds.comma): index += 1 else: break index = match_token(index, token_kinds.close_paren, ParserError.GOT) return expr_nodes.FuncCall(cur, args), index elif token_is(index, token_kinds.incr): index += 1 cur = expr_nodes.PostIncr(cur) elif token_is(index, token_kinds.decr): index += 1 cur = expr_nodes.PostDecr(cur) else: return cur, index cur.r = old_range + utils.tokens[index - 1].r
def find_const(index): """ Check for a continuous sequence of `const`. Returns a tuple containing a boolean for whether any such `const` sequence exists and the first index that is not a `const`. If no `const` is found, returns the index passed in for the second argument. """ has_const = False while token_is(index, token_kinds.const_kw): index += 1 has_const = True return has_const, index
def parse_expr_statement(index): """Parse a statement that is an expression. Ex: a = 3 + 4 """ if token_is(index, token_kinds.semicolon): return nodes.EmptyStatement(), index + 1 node, index = parse_expression(index) index = match_token(index, token_kinds.semicolon, ParserError.AFTER) return nodes.ExprStatement(node), index
def parse_primary(index): """ Parse primary expression """ if token_is(index, token_kinds.open_paren): node, index = parse_expression(index + 1) index = match_token(index, token_kinds.close_paren, ParserError.GOT) return expr_nodes.ParenExpr(node), index elif token_is(index, token_kinds.number): return expr_nodes.Number(utils.tokens[index]), index + 1 elif token_is(index, token_kinds.identifier) and not utils.symbols.is_typedef( utils.tokens[index]): return expr_nodes.Identifier(utils.tokens[index]), index + 1 elif token_is(index, token_kinds.string): return expr_nodes.String(utils.tokens[index].content), index + 1 elif token_is(index, token_kinds.char_string): chars = utils.tokens[index].content return expr_nodes.Number(chars[0]), index + 1 else: raise_error("expected expression", index, ParserError.GOT)
def find_decl_end(index): """ Find the end of the declarator that starts at given index. If a valid declarator starts at the given index, this function is guaranteed to return the correct end point. Returns an index one greater than the last index in this declarator. """ if (token_is(index, token_kinds.star) or token_is(index, token_kinds.identifier) or token_is(index, token_kinds.const_kw)): return find_decl_end(index + 1) elif token_is(index, token_kinds.open_paren): close = find_pair_forward(index) return find_decl_end(close + 1) elif token_is(index, token_kinds.open_sq_brack): mess = "mismatched square brackets in declaration" close = find_pair_forward(index, token_kinds.open_sq_brack, token_kinds.close_sq_brack, mess) return find_decl_end(close + 1) else: # Unknown token. If this declaration is correctly formatted, then this must be the end of the declaration. return index
def parse_struct_members(index): """ Parse the list of members of struct as a list of Root nodes. index - index right past the open bracket starting the members list. """ members = [] while True: if token_is(index, token_kinds.close_brack): return members, index + 1 node, index = parse_decls_inits(index, False) members.append(node)
def parse_return(index): """ Parse a return statement. Ex: return 5; """ index = match_token(index, token_kinds.return_kw, ParserError.GOT) if token_is(index, token_kinds.semicolon): return nodes.Return(None), index node, index = parse_expression(index) index = match_token(index, token_kinds.semicolon, ParserError.AFTER) return nodes.Return(node), index
def parse_decls_inits(index, parse_inits=True): """Parse declarations and initializers into a decl_nodes.Root node. The decl_nodes node is used by the caller to create a tree.nodes.Declaration node, and the decl_nodes node is traversed during the IL generation step to convert it into an appropriate ctype. If `parse_inits` is false, do not permit initializers. This is useful for parsing struct objects. """ specs, index = parse_decl_specifiers(index) # If declaration specifiers are followed directly by semicolon if token_is(index, token_kinds.semicolon): return decl_nodes.Root(specs, []), index + 1 is_typedef = any(tok.kind == token_kinds.typedef_kw for tok in specs) decls, inits = [], [] while True: node, index = parse_declarator(index, is_typedef) decls.append(node) if token_is(index, token_kinds.equals) and parse_inits: # Parse initializer expression from myparser.expression import parse_assignment expr, index = parse_assignment(index + 1) inits.append(expr) else: inits.append(None) # Expect a comma, break if there isn't one if token_is(index, token_kinds.comma): index += 1 else: break index = match_token(index, token_kinds.semicolon, ParserError.AFTER) node = decl_nodes.Root(specs, decls, inits) return node, index
def parse_struct_spec(index, node_type): """ Parse a struct specifier. A struct specifier includes everything between the `struct` keyword to the end of the member list if one exists. index - index right past the type definition keyword. node_type - either decl_nodes.Struct. """ start_r = p.tokens[index - 1].r name = None if token_is(index, token_kinds.identifier): name = p.tokens[index] index += 1 members = None if token_is(index, token_kinds.open_brack): members, index = parse_struct_members(index + 1) if name is None and members is None: err = "expected identifier or member list" raise_error(err, index, ParserError.AFTER) r = start_r + p.tokens[index - 1].r return node_type(name, members, r), index
def try_parse_func_decl(start, end, is_typedef=False): """ Parse a function declarator between start and end. Expects that tokens[end-1] is a close parenthesis. If a function declarator is successfully parsed, returns the decl_node.Function object. Otherwise, returns None. """ if not token_is(end - 1, token_kinds.close_paren): return None open_paren = find_pair_backward(end - 1) with log_error(): params, index = parse_parameter_list(open_paren + 1) if index == end - 1: return decl_nodes.Function( params, _parse_declarator(start, open_paren, is_typedef)) return None
def parse_if_statement(index): """ Parse an if statement """ index = match_token(index, token_kinds.if_kw, ParserError.GOT) index = match_token(index, token_kinds.open_paren, ParserError.AFTER) conditional, index = parse_expression(index) index = match_token(index, token_kinds.close_paren, ParserError.AFTER) statement, index = parse_statement(index) # If there is an else that follows, parse that too. is_else = token_is(index, token_kinds.else_kw) if not is_else: else_statement = None else: index = match_token(index, token_kinds.else_kw, ParserError.GOT) else_statement, index = parse_statement(index) return nodes.IfStatement(conditional, statement, else_statement), index
def get_first_for_clause(index): """Get the first clause of a for-statement. index - Index of the beginning of the first clause in the for-statement. returns - Tuple. First element is a node if a clause is found and None if there is no clause (i.e. semicolon terminating the clause). Second element is an integer index where the next token begins. If malformed, raises exception. """ if token_is(index, token_kinds.semicolon): return None, index + 1 with log_error(): return parse_declaration(index) clause, index = parse_expression(index) index = match_token(index, token_kinds.semicolon, ParserError.AFTER) return clause, index
def parse_series(index, parse_base, separators): """ Parse a series of symbols joined together with given separator(s). index (int) - Index at which to start searching. parse_base (function) - A parse_* function that parses the base symbol. separators (Dict(TokenKind -> Node)) - The separators that join instances of the base symbol. Each separator corresponds to a Node, which is the Node produced to join two expressions connected with that separator. """ cur, index = parse_base(index) while True: for s in separators: if token_is(index, s): break else: return cur, index tok = utils.tokens[index] new, index = parse_base(index + 1) cur = separators[s](cur, new, tok)
def parse_decl_specifiers(index, spec_qual=False): """ Parse a declaration specifier. Examples: int/const char The returned `specs` list may contain two types of elements: tokens and Node objects. A Node object will be included for a struct declaration, and a token for all other declaration specifiers. """ type_specs = set(ctypes.simple_types.keys()) type_specs |= {token_kinds.signed_kw, token_kinds.unsigned_kw} type_quals = {token_kinds.const_kw} storage_specs = { token_kinds.auto_kw, token_kinds.static_kw, token_kinds.extern_kw, token_kinds.typedef_kw } specs = [] # The type specifier class, either SIMPLE, STRUCT, or TYPEDEF, represents the allowed kinds of type specifiers. # Once the first specifier is parsed, the type specifier class is set. If the type specifier class is set to STRUCT # or TYPEDEF, no further type specifiers are permitted in the type specifier list. If it is set to SIMPLE, more # simple type specifiers are permitted. This is important for typedef parsing. SIMPLE = 1 STRUCT = 2 TYPEDEF = 3 type_spec_class = None while True: # Parse a struct specifier if there is one. if not type_spec_class and token_is(index, token_kinds.struct_kw): node, index = parse_struct(index + 1) specs.append(node) type_spec_class = STRUCT # Match a typedef name elif not type_spec_class and token_is( index, token_kinds.identifier) and p.symbols.is_typedef( p.tokens[index]): specs.append(p.tokens[index]) index += 1 type_spec_class = TYPEDEF elif type_spec_class in {None, SIMPLE} and token_in(index, type_specs): specs.append(p.tokens[index]) index += 1 type_spec_class = SIMPLE elif token_in(index, type_quals): specs.append(p.tokens[index]) index += 1 elif token_in(index, storage_specs): if not spec_qual: specs.append(p.tokens[index]) else: err = "storage specifier not permitted here" error_collector.add(CompilerError(err, p.tokens[index].r)) index += 1 else: break if specs: return specs, index else: raise_error("expected declaration specifier", index, ParserError.AT)