def parse_id_expr(): id_name = GLOBALS["CUR_VALUE"] match(TK.ID) # check if it is a variable if GLOBALS["CUR_TOKEN"] != TK.LPAREN: sym = GLOBALS["SYMBOL_TABLE"].lookup(id_name) if sym: var_ast = VariableExprAST(sym) # check for postfix operators if is_prepost_token(): postfix_op = GLOBALS["CUR_TOKEN"] get_token() return UnaryExprAST(postfix_op, var_ast, True) else: return var_ast else: processing_error("'{}' undeclared".format(id_name)) # else it is a function call match(TK.LPAREN) args = [] if GLOBALS["CUR_TOKEN"] != TK.RPAREN: while True: arg = parse_expression() args.append(arg) if GLOBALS["CUR_TOKEN"] != TK.RPAREN: match(TK.COMMA) else: break match(TK.RPAREN) return CallExprAST(id_name, args)
def insert(self, symb): name = symb["name"] if self.lookup(name): processing_error("'{}' already initialized".format(name)) symb["table"] = self self.table[name] = symb return symb
def parse_type_flags(): flags = set() storage = None while is_typemod_token(): if GLOBALS["CUR_TOKEN"] in TYPE_FLAG_TOKENS: flags.add(GLOBALS["CUR_TOKEN"]) elif GLOBALS["CUR_TOKEN"] in TYPE_STORAGE_TOKENS: if storage: processing_error("multiple storage classes in declaration") flags.add(GLOBALS["CUR_TOKEN"]) storage = GLOBALS["CUR_TOKEN"] get_token() return list(flags)
def parse_unary(): UNARY_OPERATORS = [ TK.NOT, TK.BIT_NOT, TK.INCR, TK.DECR, ] if GLOBALS["CUR_TOKEN"] not in UNARY_OPERATORS: return parse_primary() else: prefix_op = GLOBALS["CUR_TOKEN"] if is_prepost_token(): get_token() var_ast = parse_id_expr() if type(var_ast) is VariableExprAST: return UnaryExprAST(prefix_op, var_ast) else: processing_error("Expected variable after {}".format(prefix_op)) get_token() operand = parse_unary() if operand: return UnaryExprAST(prefix_op, operand) return None
def parse_id_decl(params=None): if not params: flags = [] if is_typemod_token(): flags = parse_type_flags() typ, signed = parse_type_dec() else: flags, typ, signed = params id_name = GLOBALS["CUR_VALUE"] match(TK.ID) # check if it is a variable if GLOBALS["CUR_TOKEN"] != TK.LPAREN: if typ == TYPE.VOID: processing_error("variable '{}' declared void".format(id_name)) symb = make_symbol(id_name, TK.VAR, flags, typ, signed) GLOBALS["SYMBOL_TABLE"].insert_symbol(symb) return VariableExprAST(symb,True) # else it is a function prototype symb = GLOBALS["SYMBOL_TABLE"].lookup(id_name) if not symb: symb = GLOBALS["SYMBOL_TABLE"].insert(id_name, TK.FUNC, flags, typ, signed) proto = parse_prototype(symb) if GLOBALS["CUR_TOKEN"] == TK.SEMICOLON: get_token() return proto match(TK.LBRACE) GLOBALS["SYMBOL_TABLE"].enter_scope() for arg in proto.args: GLOBALS["SYMBOL_TABLE"].insert_symbol(arg.symbol) body = parse_body() match(TK.RBRACE) GLOBALS["SYMBOL_TABLE"].exit_scope() return FunctionAST(proto, body)
def match(token): if GLOBALS["CUR_TOKEN"] != token: processing_error("Expected {}, but got {}".format(token, GLOBALS["CUR_TOKEN"])) else: get_token()
def get_token(): """ Get the next token from the source code """ global curr_char # Reinitialize current token variables GLOBALS["CUR_VALUE"] = "" while True: # Check for end of file if curr_char == TK.EOF: GLOBALS["CUR_TOKEN"] = TK.EOF return # Checks if character is white space # New line is treated as a special case though (disabled) if curr_char.isspace(): # if curr_char == '\n': # GLOBALS["CUR_TOKEN"] = TK.EOLN # curr_char = get_char() # return curr_char = get_char() else: break # If the char is alphabetical or underscore, then we check if keyword or identifier if curr_char.isalpha() or curr_char == '_': while True: GLOBALS["CUR_VALUE"] += curr_char curr_char = get_char() if not (curr_char.isalnum() or curr_char == '_'): break if GLOBALS["CUR_VALUE"] in KEYWORDS: GLOBALS["CUR_TOKEN"] = KEYWORDS[GLOBALS["CUR_VALUE"]] else: GLOBALS["CUR_TOKEN"] = TK.ID # If the char is numerical or dot, then its a number # Also handle e/E in numbers elif curr_char.isdigit() or curr_char == '.': in_decimal = True if curr_char == '.' else False in_e = False while True: GLOBALS["CUR_VALUE"] += curr_char curr_char = get_char() if not curr_char.isdigit(): if in_e: GLOBALS["CUR_VALUE"] = float(GLOBALS["CUR_VALUE"]) GLOBALS["CUR_TOKEN"] = TK.DOUBLELIT break else: if curr_char == 'e' or curr_char == 'E': GLOBALS["CUR_VALUE"] += curr_char in_e = True curr_char = get_char() if curr_char == '-' or curr_char.isdigit(): continue else: processing_error("Expected integer after " + curr_char) if in_decimal: GLOBALS["CUR_VALUE"] = float(GLOBALS["CUR_VALUE"]) GLOBALS["CUR_TOKEN"] = TK.DOUBLELIT break else: if curr_char == '.': in_decimal = True continue else: GLOBALS["CUR_VALUE"] = int(GLOBALS["CUR_VALUE"]) GLOBALS["CUR_TOKEN"] = TK.INTLIT break # Handle line and multiline comments (and division) elif curr_char == '/': curr_char = get_char() if curr_char == '/': while curr_char != '\n': curr_char = get_char() if curr_char != TK.EOF: get_token() elif curr_char == '*': while True: curr_char = get_char() if curr_char == '*': curr_char = get_char() if curr_char == '/': curr_char = get_char() get_token() break elif curr_char == TK.EOF: processing_error("Unterminated comment") elif curr_char == '=': GLOBALS["CUR_TOKEN"] = TK.DIV_EQ curr_char = get_char() else: GLOBALS["CUR_TOKEN"] = TK.DIV # Handle characters elif curr_char == '\'': GLOBALS["CUR_VALUE"] = 0 while True: curr_char = get_char() if curr_char == '\'': GLOBALS["CUR_TOKEN"] = TK.INTLIT curr_char = get_char() break elif curr_char == '\n' or curr_char == TK.EOF: processing_error("Unterminated character string") else: GLOBALS["CUR_VALUE"] <<= 8 GLOBALS["CUR_VALUE"] += ord(curr_char) # Handle string literals elif curr_char == '"': while True: curr_char = get_char() if curr_char == '"': GLOBALS["CUR_TOKEN"] = TK.STRINGLIT curr_char = get_char() break elif curr_char == '\n' or curr_char == TK.EOF: processing_error("Unterminated string literal") else: GLOBALS["CUR_VALUE"] += curr_char # Checks if the character is an operator elif curr_char in OPERATORS["L1"]: tmp_str = curr_char curr_char = get_char() tmp_str += curr_char if tmp_str in OPERATORS["L2"]: curr_char = get_char() tmp_str += curr_char if tmp_str in OPERATORS["L3"]: GLOBALS["CUR_TOKEN"] = OPERATORS["L3"][tmp_str] curr_char = get_char() else: GLOBALS["CUR_TOKEN"] = OPERATORS["L2"][tmp_str[:-1]] else: GLOBALS["CUR_TOKEN"] = OPERATORS["L1"][tmp_str[:-1]] # If character is an unrecognized token, raise error else: processing_error("Unrecognized token: {}".format(curr_char))