def _parse_struct_union_spec(index, node_type): """Parse a struct or union specifier. A struct/union specifier includes everything between the `struct` keyword to the end of the member list if one exists. index - index right past the type definition keyword. node_type - either decl_nodes.Struct or decl_nodes.Union. """ start_r = p.tokens[index - 1].r name = None if token_is(index, token_kinds.identifier): name = p.tokens[index] index += 1 members = None if token_is(index, token_kinds.open_brack): members, index = parse_struct_union_members(index + 1) if name is None and members is None: err = "expected identifier or member list" raise_error(err, index, ParserError.AFTER) r = start_r + p.tokens[index - 1].r return node_type(name, members, r), index
def parse_root(index): """Parse the given tokens into an AST.""" items = [] while True: try: item, index = parse_main(index) items.append(item) except ParserError as e: log_error(e) else: continue try: item, index = parse_declaration(index) items.append(item) except ParserError as e: log_error(e) else: continue # If neither parse attempt above worked, break break # If there are tokens that remain unparsed, complain if not p.tokens[index:]: return nodes.Root(items), index else: raise_error("unexpected token", index, ParserError.AT)
def parse_primary(index): """Parse primary expression.""" if token_is(index, token_kinds.open_paren): node, index = parse_expression(index + 1) index = match_token(index, token_kinds.close_paren, ParserError.GOT) return expr_nodes.ParenExpr(node), index elif token_is(index, token_kinds.number): return expr_nodes.Number(p.tokens[index]), index + 1 elif token_is(index, token_kinds.identifier): return expr_nodes.Identifier(p.tokens[index]), index + 1 elif token_is(index, token_kinds.string): return expr_nodes.String(p.tokens[index].content), index + 1 elif token_is(index, token_kinds.char_string): chars = p.tokens[index].content return expr_nodes.Number(chars[0]), index + 1 else: raise_error("expected expression", index, ParserError.GOT)
def parse_primary(index): """Parse primary expression.""" if token_is(index, token_kinds.open_paren): node, index = parse_expression(index + 1) index = match_token(index, token_kinds.close_paren, ParserError.GOT) return expr_nodes.ParenExpr(node), index elif token_is(index, token_kinds.number): return expr_nodes.Number(p.tokens[index]), index + 1 elif (token_is(index, token_kinds.identifier) and not p.symbols.is_typedef(p.tokens[index])): return expr_nodes.Identifier(p.tokens[index]), index + 1 elif token_is(index, token_kinds.string): return expr_nodes.String(p.tokens[index].content), index + 1 elif token_is(index, token_kinds.char_string): chars = p.tokens[index].content return expr_nodes.Number(chars[0]), index + 1 else: raise_error("expected expression", index, ParserError.GOT)
def _parse_declarator_raw(start, end, is_typedef): """Like _parse_declarator, but doesn't add `.r` range attribute.""" if start == end: return decl_nodes.Identifier(None) elif (start + 1 == end and p.tokens[start].kind == token_kinds.identifier): p.symbols.add_symbol(p.tokens[start], is_typedef) return decl_nodes.Identifier(p.tokens[start]) elif p.tokens[start].kind == token_kinds.star: const, index = _find_const(start + 1) return decl_nodes.Pointer( _parse_declarator(index, end, is_typedef), const) func_decl = _try_parse_func_decl(start, end, is_typedef) if func_decl: return func_decl # First and last elements make a parenthesis pair elif (p.tokens[start].kind == token_kinds.open_paren and _find_pair_forward(start) == end - 1): return _parse_declarator(start + 1, end - 1, is_typedef) # Last element indicates an array type elif p.tokens[end - 1].kind == token_kinds.close_sq_brack: open_sq = _find_pair_backward( end - 1, token_kinds.open_sq_brack, token_kinds.close_sq_brack, "mismatched square brackets in declaration") if open_sq == end - 2: num_el = None else: num_el, index = parse_expression(open_sq + 1) if index != end - 1: err = "unexpected token in array size" raise_error(err, index, ParserError.AFTER) return decl_nodes.Array( num_el, _parse_declarator(start, open_sq, is_typedef)) raise_error("faulty declaration syntax", start, ParserError.AT)
def _parse_declarator_raw(start, end, is_typedef): """Like _parse_declarator, but doesn't add `.r` range attribute.""" if start == end: return decl_nodes.Identifier(None) elif (start + 1 == end and p.tokens[start].kind == token_kinds.identifier): p.symbols.add_symbol(p.tokens[start], is_typedef) return decl_nodes.Identifier(p.tokens[start]) elif p.tokens[start].kind == token_kinds.star: const, index = _find_const(start + 1) return decl_nodes.Pointer(_parse_declarator(index, end, is_typedef), const) func_decl = _try_parse_func_decl(start, end, is_typedef) if func_decl: return func_decl # First and last elements make a parenthesis pair elif (p.tokens[start].kind == token_kinds.open_paren and _find_pair_forward(start) == end - 1): return _parse_declarator(start + 1, end - 1, is_typedef) # Last element indicates an array type elif p.tokens[end - 1].kind == token_kinds.close_sq_brack: open_sq = _find_pair_backward( end - 1, token_kinds.open_sq_brack, token_kinds.close_sq_brack, "mismatched square brackets in declaration") if open_sq == end - 2: num_el = None else: num_el, index = parse_expression(open_sq + 1) if index != end - 1: err = "unexpected token in array size" raise_error(err, index, ParserError.AFTER) return decl_nodes.Array(num_el, _parse_declarator(start, open_sq, is_typedef)) raise_error("faulty declaration syntax", start, ParserError.AT)
def parse_decl_specifiers(index): """Parse a declaration specifier. Examples: int const char typedef int The returned `specs` list may contain two types of elements: tokens and Node objects. A Node object will be included for a struct or union declaration, and a token for all other declaration specifiers. """ decl_specifiers = (list(ctypes.simple_types.keys()) + [ token_kinds.signed_kw, token_kinds.unsigned_kw, token_kinds.auto_kw, token_kinds.static_kw, token_kinds.extern_kw, token_kinds.const_kw ]) specs = [] matching = True while matching: matching = False # Parse a struct specifier if there is one. if token_is(index, token_kinds.struct_kw): node, index = parse_struct_spec(index + 1) specs.append(node) matching = True continue # Try parsing any of the other specifiers for spec in decl_specifiers: if token_is(index, spec): specs.append(p.tokens[index]) index += 1 matching = True break if specs: return specs, index else: raise_error("expected declaration specifier", index, ParserError.AT)
def parse_declarator(start, end): """Parse the given tokens that comprises a declarator. This function parses both declarator and abstract-declarators. For an abstract declarator, the Identifier node at the leaf of the generated tree has the identifier None. Expects the declarator to start at start and end at end-1 inclusive. Returns a decl_nodes.Node. """ if start == end: return decl_nodes.Identifier(None) elif (start + 1 == end and p.tokens[start].kind == token_kinds.identifier): return decl_nodes.Identifier(p.tokens[start]) # First and last elements make a parenthesis pair elif (p.tokens[start].kind == token_kinds.open_paren and find_pair_forward(start) == end - 1): return parse_declarator(start + 1, end - 1) elif p.tokens[start].kind == token_kinds.star: const, index = find_const(start + 1) return decl_nodes.Pointer(parse_declarator(index, end), const) # Last element indicates a function type elif p.tokens[end - 1].kind == token_kinds.close_paren: open_paren = find_pair_backward(end - 1) params, index = parse_parameter_list(open_paren + 1) if index == end - 1: return decl_nodes.Function(params, parse_declarator(start, open_paren)) # Last element indicates an array type elif p.tokens[end - 1].kind == token_kinds.close_sq_brack: first = p.tokens[end - 3].kind == token_kinds.open_sq_brack number = p.tokens[end - 2].kind == token_kinds.number if first and number: return decl_nodes.Array(int(p.tokens[end - 2].content), parse_declarator(start, end - 3)) raise_error("faulty declaration syntax", start, ParserError.AT)
def find_pair_backward(index, open=token_kinds.open_paren, close=token_kinds.close_paren, mess="mismatched parentheses in declaration"): """Find the opening parenthesis for the closing at given index. Same parameters as _find_pair_forward above. """ depth = 0 for i in range(index, -1, -1): if p.tokens[i].kind == close: depth += 1 elif p.tokens[i].kind == open: depth -= 1 if depth == 0: break else: # if loop did not break, no open paren was found raise_error(mess, index, ParserError.AT) return i
def _find_pair_backward(index, open=token_kinds.open_paren, close=token_kinds.close_paren, mess="mismatched parentheses in declaration"): """Find the opening parenthesis for the closing at given index. Same parameters as _find_pair_forward above. """ depth = 0 for i in range(index, -1, -1): if p.tokens[i].kind == close: depth += 1 elif p.tokens[i].kind == open: depth -= 1 if depth == 0: break else: # if loop did not break, no open paren was found raise_error(mess, index, ParserError.AT) return i
def parse_struct_spec(index): """Parse a struct specifier as a decl_nodes.Struct node. index - index right past the `struct` keyword """ start_r = p.tokens[index - 1].r name = None if token_is(index, token_kinds.identifier): name = p.tokens[index] index += 1 members = None if token_is(index, token_kinds.open_brack): members, index = parse_struct_members(index + 1) if name is None and members is None: err = "expected identifier or member list" raise_error(err, index, ParserError.AFTER) r = start_r + p.tokens[index - 1].r return decl_nodes.Struct(name, members, r), index
def parse_root(index): """Parse the given tokens into an AST.""" items = [] while True: with log_error(): item, index = parse_func_definition(index) items.append(item) continue with log_error(): item, index = parse_declaration(index) items.append(item) continue # If neither parse attempt above worked, break break # If there are tokens that remain unparsed, complain if not p.tokens[index:]: return nodes.Root(items), index else: raise_error("unexpected token", index, ParserError.AT)
def find_pair_forward(index, open=token_kinds.open_paren, close=token_kinds.close_paren, mess="mismatched parentheses in declaration"): """Find the closing parenthesis for the opening at given index. index - position to start search, should be of kind `open` open - token kind representing the open parenthesis close - token kind representing the close parenthesis mess - message for error on mismatch """ depth = 0 for i in range(index, len(p.tokens)): if p.tokens[i].kind == open: depth += 1 elif p.tokens[i].kind == close: depth -= 1 if depth == 0: break else: # if loop did not break, no close paren was found raise_error(mess, index, ParserError.AT) return i
def _find_pair_forward(index, open=token_kinds.open_paren, close=token_kinds.close_paren, mess="mismatched parentheses in declaration"): """Find the closing parenthesis for the opening at given index. index - position to start search, should be of kind `open` open - token kind representing the open parenthesis close - token kind representing the close parenthesis mess - message for error on mismatch """ depth = 0 for i in range(index, len(p.tokens)): if p.tokens[i].kind == open: depth += 1 elif p.tokens[i].kind == close: depth -= 1 if depth == 0: break else: # if loop did not break, no close paren was found raise_error(mess, index, ParserError.AT) return i
def parse_decl_specifiers(index, _spec_qual=False): """Parse a declaration specifier list. Examples: int const char typedef int If _spec_qual=True, produces a CompilerError if given any specifiers that are neither type specifier nor type qualifier. The returned `specs` list may contain two types of elements: tokens and Node objects. A Node object will be included for a struct or union declaration, and a token for all other declaration specifiers. """ type_specs = set(ctypes.simple_types.keys()) type_specs |= {token_kinds.signed_kw, token_kinds.unsigned_kw} type_quals = {token_kinds.const_kw} storage_specs = { token_kinds.auto_kw, token_kinds.static_kw, token_kinds.extern_kw, token_kinds.typedef_kw } specs = [] # The type specifier class, either SIMPLE, STRUCT, or TYPEDEF, # represents the allowed kinds of type specifiers. Once the first # specifier is parsed, the type specifier class is set. If the type # specifier class is set to STRUCT or TYPEDEF, no further type # specifiers are permitted in the type specifier list. If it is set to # SIMPLE, more simple type specifiers are permitted. This is important # for typedef parsing. SIMPLE = 1 STRUCT = 2 TYPEDEF = 3 type_spec_class = None while True: # Parse a struct specifier if there is one. if not type_spec_class and token_is(index, token_kinds.struct_kw): node, index = parse_struct_spec(index + 1) specs.append(node) type_spec_class = STRUCT # Parse a union specifier if there is one. elif not type_spec_class and token_is(index, token_kinds.union_kw): node, index = parse_union_spec(index + 1) specs.append(node) type_spec_class = STRUCT # Match a typedef name elif (not type_spec_class and token_is(index, token_kinds.identifier) and p.symbols.is_typedef(p.tokens[index])): specs.append(p.tokens[index]) index += 1 type_spec_class = TYPEDEF elif type_spec_class in {None, SIMPLE} and token_in(index, type_specs): specs.append(p.tokens[index]) index += 1 type_spec_class = SIMPLE elif token_in(index, type_quals): specs.append(p.tokens[index]) index += 1 elif token_in(index, storage_specs): if not _spec_qual: specs.append(p.tokens[index]) else: err = "storage specifier not permitted here" error_collector.add(CompilerError(err, p.tokens[index].r)) index += 1 else: break if specs: return specs, index else: raise_error("expected declaration specifier", index, ParserError.AT)
def parse_decl_specifiers(index, _spec_qual=False): """Parse a declaration specifier list. Examples: int const char typedef int If _spec_qual=True, produces a CompilerError if given any specifiers that are neither type specifier nor type qualifier. The returned `specs` list may contain two types of elements: tokens and Node objects. A Node object will be included for a struct or union declaration, and a token for all other declaration specifiers. """ type_specs = set(ctypes.simple_types.keys()) type_specs |= {token_kinds.signed_kw, token_kinds.unsigned_kw} type_quals = {token_kinds.const_kw} storage_specs = {token_kinds.auto_kw, token_kinds.static_kw, token_kinds.extern_kw, token_kinds.typedef_kw} specs = [] # The type specifier class, either SIMPLE, STRUCT, or TYPEDEF, # represents the allowed kinds of type specifiers. Once the first # specifier is parsed, the type specifier class is set. If the type # specifier class is set to STRUCT or TYPEDEF, no further type # specifiers are permitted in the type specifier list. If it is set to # SIMPLE, more simple type specifiers are permitted. This is important # for typedef parsing. SIMPLE = 1 STRUCT = 2 TYPEDEF = 3 type_spec_class = None while True: # Parse a struct specifier if there is one. if not type_spec_class and token_is(index, token_kinds.struct_kw): node, index = parse_struct_spec(index + 1) specs.append(node) type_spec_class = STRUCT # Parse a union specifier if there is one. elif not type_spec_class and token_is(index, token_kinds.union_kw): node, index = parse_union_spec(index + 1) specs.append(node) type_spec_class = STRUCT # Match a typedef name elif (not type_spec_class and token_is(index, token_kinds.identifier) and p.symbols.is_typedef(p.tokens[index])): specs.append(p.tokens[index]) index += 1 type_spec_class = TYPEDEF elif type_spec_class in {None, SIMPLE} and token_in(index, type_specs): specs.append(p.tokens[index]) index += 1 type_spec_class = SIMPLE elif token_in(index, type_quals): specs.append(p.tokens[index]) index += 1 elif token_in(index, storage_specs): if not _spec_qual: specs.append(p.tokens[index]) else: err = "storage specifier not permitted here" error_collector.add(CompilerError(err, p.tokens[index].r)) index += 1 else: break if specs: return specs, index else: raise_error("expected declaration specifier", index, ParserError.AT)