def visit_call_rvalue(self, callStmt): if not self.sym_table.id_exists(callStmt.fun.lexeme): message = "function " + callStmt.fun.lexeme + " not recognized" raise error.MyPLError(message, callStmt.fun.line, callStmt.fun.column) temp = self.sym_table.get_info(callStmt.fun.lexeme) if len(temp[0]) != len(callStmt.args): raise error.MyPLError( "wrong number of parameters " + callStmt.fun.lexeme, callStmt.fun.line, callStmt.fun.column) for i, x in enumerate(callStmt.args): x.accept(self) if (temp[0][i] != self.current_type.tokentype and self.current_type.tokentype != token.NIL ): # and self.current_type.tokentype != token.ID): message = "parameter " + str(i) + " is the wrong type" + temp[ 0][i] + self.current_type.lexeme raise error.MyPLError(message, self.current_type.line, self.current_type.column) self.current_type = callStmt.fun self.current_type.tokentype = temp[1] temp2 = self.current_type.tokentype if temp2 == token.INTTYPE: temp2 = token.INTVAL elif temp2 == token.FLOATTYPE: temp2 = token.FLOATVAL elif temp2 == token.BOOLTYPE: temp2 = token.BOOLVAL elif temp2 == token.STRINGTYPE: temp2 = token.STRINGVAL self.current_type.tokentype = temp2
def visit_complex_expr(self, complExpr): complExpr.first_operand.accept(self) lhs = self.current_type complExpr.rest.accept(self) rhs = self.current_type if lhs.tokentype != rhs.tokentype or lhs.tokentype == token.BOOLVAL or lhs.tokentype == token.NIL: message = "cant combine a " + lhs.tokentype + " to a " + rhs.tokentype raise error.MyPLError(message, rhs.line, lhs.column) if complExpr.math_rel.lexeme != '+' and lhs.tokentype == token.STRINGVAL: message = "can only add a " + lhs.tokentype + " to a " + rhs.tokentype raise error.MyPLError(message, rhs.line, lhs.column) self.current_type = lhs
def visit_bool_expr(self, boolStmt): boolStmt.first_expr.accept(self) lhs = self.current_type if boolStmt.bool_rel != None: boolStmt.second_expr.accept(self) rhs = self.current_type if lhs.tokentype != rhs.tokentype and rhs.tokentype != token.NIL: message = "cant compare a " + lhs.tokentype + " with a " + rhs.tokentype raise error.MyPLError(message, rhs.column, rhs.line) if boolStmt.bool_rel.lexeme != "!=" and boolStmt.bool_rel.lexeme != "==" and rhs.tokentype == token.NIL: raise error.MyPLError("can only check if == or != with nil", rhs.line, rhs.column) if boolStmt.bool_connector != None: boolStmt.rest.accept(self)
def visit_new_rvalue(self, newStmt): if not self.sym_table.id_exists(newStmt.struct_type.lexeme): message = newStmt.struct_type.lexeme + " does not exist" raise error.MyPLError(message, newStmt.struct_type.column, newStmt.struct_type.line) self.current_type = newStmt.struct_type self.current_type.tokentype = newStmt.struct_type.lexeme
def visit_var_decl_stmt(self, var_decl): var_decl.var_expr.accept(self) exp_type = self.current_type.lexeme var_id = var_decl.var_id curr_env = self.sym_table.get_env_id() # check that variable isn't already defined if self.sym_table.id_exists_in_env(var_id.lexeme, curr_env): msg = 'variable already defined in current environment' self.__error(msg, var_id) lhs = var_decl.var_type #if not self.sym_table.id_exists(var_decl.var_id.lexeme): self.sym_table.add_id(var_decl.var_id.lexeme) var_decl.var_expr.accept(self) rhs = self.current_type if var_decl.var_type != None: check = False if lhs.tokentype == token.INTTYPE: lhs = token.INTVAL elif lhs.tokentype == token.STRINGTYPE: lhs = token.STRINGVAL elif lhs.tokentype == token.FLOATTYPE: lhs = token.FLOATVAL elif lhs.tokentype == token.BOOLTYPE: lhs = token.BOOLVAL elif lhs.tokentype == token.ID: if not self.sym_table.id_exists(lhs.lexeme): message = lhs.lexeme + " does not exist" raise error.MyPLError(message, lhs.column, lhs.line) lhs = lhs.lexeme self.sym_table.set_info(var_decl.var_id.lexeme, lhs) if lhs != rhs.tokentype and rhs.tokentype != token.NIL: message = "cant assign a " + rhs.tokentype + " to a " + lhs raise error.MyPLError(message, rhs.line, rhs.column) if self.inStruct: self.dict[var_decl.var_id.lexeme] = lhs else: self.sym_table.set_info(var_decl.var_id.lexeme, rhs.tokentype) if rhs.tokentype == token.NIL: raise error.MyPLError( "can only assign a nil to an explicitly defined variable", rhs.line, rhs.column) if self.inStruct: self.dict[var_decl.var_id.lexeme] = rhs.tokentype
def visit_assign_stmt(self, assign_stmt): assign_stmt.lhs.accept(self) lhs_type = self.current_type assign_stmt.rhs.accept(self) rhs_type = self.current_type.tokentype temp = self.current_type if rhs_type != token.NIL and rhs_type != lhs_type: msg = 'mismatch type in assignment ' + lhs_type + " " + rhs_type raise error.MyPLError(msg, temp.line, temp.column)
def visit_simple_rvalue(self, rvalue): self.current_type = rvalue.val if rvalue.val.tokentype == token.ID: if not self.sym_table.id_exists(rvalue.val.lexeme): message = rvalue.val.lexeme + " does not exist" raise error.MyPLError(message, rvalue.val.column, rvalue.val.line) temp = self.sym_table.get_info(rvalue.val.lexeme) self.current_type = copy.deepcopy(rvalue.val) self.current_type.tokentype = temp
def __type(self): """<type> ::= ID | INTTYPE | FLOATTYPE | BOOLTYPE | STRINGTYPE""" types = [token.ID, token.INTTYPE, token.FLOATTYPE, token.BOOLTYPE] types.append(token.STRINGTYPE) if self.current_token.tokentype in types: self.__advance() else: s = 'expected a type, found"' + self.current_token.lexeme + '" in parser' l = self.current_token.line c = self.current_token.column raise error.MyPLError(s, l, c)
def visit_lvalue(self, lvalueStmt): if len(lvalueStmt.path) == 1: temp = lvalueStmt.path[-1].lexeme if not self.sym_table.id_exists(temp): message = temp + " is not initialized" raise error.MyPLError(message, lvalueStmt.path[-1].line, lvalueStmt.path[-1].column) self.current_type = self.sym_table.get_info(temp) else: temp = lvalueStmt.path[0].lexeme if not self.sym_table.id_exists(temp): message = temp + "- does not exist" raise error.MyPLError(message, lvalueStmt.path[i].line, lvalueStmt.path[i].column) temp2 = self.sym_table.get_info(temp) i = 1 j = len(lvalueStmt.path) while i < j - 1: if not self.sym_table.id_exists(temp2): message = temp + "- does not exist" raise error.MyPLError(message, lvalueStmt.path[i].line, lvalueStmt.path[i].column) temp = self.sym_table.get_info(temp2) type = temp[lvalueStmt.path[i].lexeme] temp2 = type i += 1 #added this section in hw7 to get proper type temp = self.sym_table.get_info(temp2) temp2 = temp[lvalueStmt.path[-1].lexeme] if temp2 == token.INTTYPE: temp2 = token.INTVAL elif temp2 == token.FLOATTYPE: temp2 = token.FLOATVAL elif temp2 == token.BOOLTYPE: temp2 = token.BOOLVAL elif temp2 == token.STRINGTYPE: temp2 = token.STRINGVAL #self.current_type = lvalueStmt.path[-1] self.current_type = temp2
def visit_id_rvalue(self, idrVal): if len(idrVal.path) == 1: if not self.sym_table.id_exists(idrVal.path[0].lexeme): message = idrVal.path[0].lexeme + " does not exist" raise error.MyPLError(message, idrVal.path[0].column, idrVal.path[0].line) temp = self.sym_table.get_info(idrVal.path[0].lexeme) self.current_type = idrVal.path[0] self.current_type.tokentype = temp else: temp = idrVal.path[0].lexeme if not self.sym_table.id_exists(temp): message = temp + "- does not exist" raise error.MyPLError(message, idrVal.path[i].line, idrVal.path[i].column) temp2 = self.sym_table.get_info(temp) i = 1 j = len(idrVal.path) while i < j - 1: if not self.sym_table.id_exists(temp2): message = temp + " - does not exist" raise error.MyPLError(message, idrVal.path[i].line, idrVal.path[i].column) temp = self.sym_table.get_info(temp2) type = temp[idrVal.path[i].lexeme] temp2 = type i += 1 temp = self.sym_table.get_info(temp2) temp2 = temp[idrVal.path[-1].lexeme] if temp2 == token.INTTYPE: temp2 = token.INTVAL elif temp2 == token.FLOATTYPE: temp2 = token.FLOATVAL elif temp2 == token.BOOLTYPE: temp2 = token.BOOLVAL elif temp2 == token.STRINGTYPE: temp2 = token.STRINGVAL self.current_type = idrVal.path[-1] self.current_type.tokentype = temp2
def __mathrel(self): """<mathrel> ::= PLUS | MINUS | DIVIDE | MULTIPLY | MODULO""" types = [token.PLUS, token.MINUS, token.DIVIDE, token.MULTIPLY, token.MODULO] if self.current_token.tokentype in types: temp_token = self.current_token self.__advance() return temp_token else: s = 'expected a math operator, found "' + self.current_token.lexeme + '" in parser' l = self.current_token.line c = self.current_token.column raise error.MyPLError(s, l, c)
def __boolrel(self): """<boolrel> ::= EQUAL | LESS_THAN | GREATER_THAN | LESS_THAN_EQUAL |""" """ GREATER_THAN_EQUAL | NOT_EQUAL""" types = [token.EQUAL, token.LESS_THAN, token.LESS_THAN_EQUAL] types.extend([token.GREATER_THAN_EQUAL, token.GREATER_THAN]) types.append(token.NOT_EQUAL) if self.current_token.tokentype in types: self.__advance() else: s = 'expected a conditional operator, found "' + self.current_token.lexeme + '" in parser' l = self.current_token.line c = self.current_token.column raise error.MyPLError(s, l, c)
def __rvalue(self): """<rvalue> ::= STRINGVAL | INTVAL | BOOLVAL | FLOATVAL | NIL | NEW ID | <idrval>""" types = [token.STRINGVAL, token.INTVAL, token.BOOLVAL] types.extend([token.FLOATVAL, token.NIL]) if self.current_token.tokentype == token.NEW: self.__advance() self.__eat(token.ID, "expecting an identifier") elif self.current_token.tokentype == token.ID: self.__idrval() elif self.current_token.tokentype in types: self.__advance() else: s = 'expected a rvalue found"' + self.current_token.lexeme + '" in parser' l = self.current_token.line c = self.current_token.column raise error.MyPLError(s, l, c)
def __rvalue(self): """<rvalue> ::= STRINGVAL | INTVAL | BOOLVAL | FLOATVAL | NIL | NEW ID | <idrval>""" types = [token.STRINGVAL, token.INTVAL, token.BOOLVAL] types.extend([token.FLOATVAL, token.NIL]) if self.current_token.tokentype == token.NEW: new_r_value_node = ast.NewRValue() self.__advance() new_r_value_node.struct_type = self.current_token self.__eat(token.ID, "expecting an identifier") return new_r_value_node elif self.current_token.tokentype == token.ID: return self.__idrval() elif self.current_token.tokentype in types: simple_rvalue_node = ast.SimpleRValue() simple_rvalue_node.val = self.current_token self.__advance() return simple_rvalue_node else: s = 'expected a rvalue found"' + self.current_token.lexeme + '" in parser' l = self.current_token.line c = self.current_token.column raise error.MyPLError(s, l, c)
def visit_fun_decl_stmt(self, funStmt): self.sym_table.add_id(funStmt.fun_name.lexeme) param = [] paramNames = [] type = [] self.sym_table.push_environment() for x in funStmt.params: x.accept(self) if self.current_type.lexeme in paramNames: message = "cant have parameters with the same name" + self.current_type.lexeme raise error.MyPLError(message, self.current_type.column, self.current_type.line) param.append(self.current_type.tokentype) paramNames.append(self.current_type.lexeme) type.append(param) temp2 = funStmt.return_type.tokentype if temp2 == token.INTTYPE: temp2 = token.INTVAL elif temp2 == token.FLOATTYPE: temp2 = token.FLOATVAL elif temp2 == token.BOOLTYPE: temp2 = token.BOOLVAL elif temp2 == token.STRINGTYPE: temp2 = token.STRINGVAL if funStmt.return_type.tokentype != token.ID: type.append(funStmt.return_type.tokentype) else: type.append(funStmt.return_type.lexeme) self.sym_table.set_info(funStmt.fun_name.lexeme, type) temp = self.sym_table.get_info("return") self.sym_table.set_info("return", funStmt.return_type.tokentype) for x in funStmt.stmt_list.stmts: self.current_type = None x.accept(self) self.sym_table.pop_environment() self.sym_table.set_info("return", temp)
def next_token(self): curSymbol = '' #Remove spaces if (self.__peek() == ' '): while (self.__peek() == ' '): self.__read() self.column += 1 #Remove newlines if found if (self.__peek() == '\n'): while (self.__peek() == '\n'): self.__read() self.line += 1 self.column = 0 #If I see a comment, get to the next line if (self.__peek() == '#'): while (self.__peek() != '\n' and self.__peek() != ''): self.__read() self.column += 1 if (self.__read() != ''): self.line += 1 self.column = 0 return self.next_token() #If there is a letter, get the whole identifyer if (self.__peek().isalpha()): while (self.__peek().isalpha() or self.__peek().isdigit() or self.__peek() == '_'): curSymbol += self.__read() self.column += 1 #If there are double quotes, get the whole string elif (self.__peek() == '"'): curSymbol += self.__read() self.column += 1 #Keep reading in the string until the next double quotes are found while (self.__peek() != '"' and self.__peek() != '\n'): curSymbol += self.__read() self.column += 1 #Check to see if there was a newline or missing closing quote if (self.__peek() == '\n'): raise error.MyPLError("reached newline reading string", self.line, self.column + 1) elif (self.__peek() != '"'): raise error.MyPLError('missing closing quotes', self.line, self.column + 1) else: curSymbol += self.__read() self.column += 1 #If there is a digit, get the whole number elif (self.__peek().isdigit()): #Keep reading until you do not find any more digits while (self.__peek().isdigit()): curSymbol += self.__read() self.column += 1 if (curSymbol == '0' and self.__peek().isdigit()): raise error.MyPLError( 'unexpeted symbol "%s"' % self.__peek(), self.line, self.column + 1) #If theres a period, it's a float. if (self.__peek() == '.'): curSymbol += self.__read() self.column += 1 changed = 0 #Keep reading digits after the period while (self.__peek().isdigit()): curSymbol += self.__read() self.column += 1 changed = 1 if (self.__peek().isalpha()): raise error.MyPLError( 'unexpected symbol "%s"' % self.__peek(), self.line, self.column + 1) #Make sure there are digits after the decimal if (changed == 0): raise error.MyPLError("missing digit in float value", self.line, self.column + 1) #Make sure there is only one decimal point elif (self.__peek() == '.'): raise error.MyPLError( "Too many decimal points in float value", self.line, self.column + 1) elif (self.__peek().isalpha()): raise error.MyPLError('unexpcted symbol "%s"' % self.__peek(), self.line, self.column + 1) #If nothing above was triggerd, #the next symbol is punctuation or EOS else: curSymbol += self.__read() #If the symbol is EOS, do not increment the colmumn if (curSymbol == '!' and self.__peek() != '='): error.MyPLError('missing "=" after "!"', self.line, self.column) if (curSymbol != ''): self.column += 1 #Check to see if the symbol is >=, <=, ==, or != and include the = #second equals sign and incriment the column if ((curSymbol == '>' or curSymbol == '=' or curSymbol == '!' or curSymbol == '<') and self.__peek() == '='): curSymbol += self.__read() self.column += 1 #Check the current symbol against all possibilities and return #a token of the correct type if (curSymbol == ''): return token.Token(token.EOS, curSymbol, self.line, self.column) elif (curSymbol == '='): return token.Token(token.ASSIGN, curSymbol, self.line, self.column) elif (curSymbol == ','): return token.Token(token.COMMA, curSymbol, self.line, self.column) elif (curSymbol == ':'): return token.Token(token.COLON, curSymbol, self.line, self.column) elif (curSymbol == '/'): return token.Token(token.DIVIDE, curSymbol, self.line, self.column) elif (curSymbol == '.'): return token.Token(token.DOT, curSymbol, self.line, self.column) elif (curSymbol == '=='): return token.Token(token.EQUAL, curSymbol, self.line, self.column - 1) elif (curSymbol == '>'): return token.Token(token.GREATER_THAN, curSymbol, self.line, self.column) elif (curSymbol == '>='): return token.Token(token.GREATER_THAN_EQUAL, curSymbol, self.line, self.column - 1) elif (curSymbol == '<'): return token.Token(token.LESS_THAN, curSymbol, self.line, self.column) elif (curSymbol == '<='): return token.Token(token.LESS_THAN_EQUAL, curSymbol, self.line, self.column - 1) elif (curSymbol == '!='): return token.Token(token.NOT_EQUAL, curSymbol, self.line, self.column - 1) elif (curSymbol == '('): return token.Token(token.LPAREN, curSymbol, self.line, self.column) elif (curSymbol == ')'): return token.Token(token.RPAREN, curSymbol, self.line, self.column) elif (curSymbol == '-'): return token.Token(token.MINUS, curSymbol, self.line, self.column) elif (curSymbol == '%'): return token.Token(token.MODULO, curSymbol, self.line, self.column) elif (curSymbol == '*'): return token.Token(token.MULTIPLY, curSymbol, self.line, self.column) elif (curSymbol == '+'): return token.Token(token.PLUS, curSymbol, self.line, self.column) elif (curSymbol == ';'): return token.Token(token.SEMICOLON, curSymbol, self.line, self.column) elif (curSymbol == 'bool'): return token.Token(token.BOOLTYPE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'int'): return token.Token(token.INTTYPE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'float'): return token.Token(token.FLOATTYPE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'string'): return token.Token(token.STRINGTYPE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'struct'): return token.Token(token.STRUCTTYPE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'and'): return token.Token(token.AND, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'or'): return token.Token(token.OR, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'not'): return token.Token(token.NOT, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'while'): return token.Token(token.WHILE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'do'): return token.Token(token.DO, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'if'): return token.Token(token.IF, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'then'): return token.Token(token.THEN, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'else'): return token.Token(token.ELSE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'elif'): return token.Token(token.ELIF, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'end'): return token.Token(token.END, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'fun'): return token.Token(token.FUN, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'var'): return token.Token(token.VAR, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'set'): return token.Token(token.SET, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'return'): return token.Token(token.RETURN, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'new'): return token.Token(token.NEW, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'nil'): return token.Token(token.NIL, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'true' or curSymbol == 'false'): return token.Token(token.BOOLVAL, curSymbol, self.line, self.column - len(curSymbol) + 1) elif ('"' in curSymbol): return token.Token(token.STRINGVAL, curSymbol[1:len(curSymbol) - 1], self.line, self.column - len(curSymbol) + 1) elif (curSymbol[0].isdigit()): if ('.' in curSymbol): return token.Token(token.FLOATVAL, curSymbol, self.line, self.column - len(curSymbol) + 1) else: return token.Token(token.INTVAL, curSymbol, self.line, self.column - len(curSymbol) + 1) else: return token.Token(token.ID, curSymbol, self.line, self.column - len(curSymbol) + 1)
def __error(self, error_msg, error_token): s = error_msg + ', found "' + error_token.lexeme + '" in type checker' l = error_token.line c = error_token.column raise error.MyPLError(s, l, c)
def __error(self, error_msg): s = error_msg + ', found "' + self.current_token.lexeme + '" in parser' l = self.current_token.line c = self.current_token.column raise error.MyPLError(s, l, c)
def next_token(self): peekValue = self.__peek() # for efficiency purposes # check the next character and act from there # if EOS, return EOS token if peekValue == "": return token.Token(token.EOS, "", self.line, self.column) # if digit, determine if float or int, then return elif peekValue.isdigit(): curr_lexeme = self.__read() # used to ensure a float only contains one decimal point isFloat = False # read until it's another important character in MyPL or whitespace because people might not put whitespace (ex. x=x+5; ) # if there is a non-digit character, we'll raise an error in the loop while self.__peek( ) not in ';=+-*/%<>(),"' and not self.__peek().isspace(): # if not digit or decimal, error if not self.__peek().isdigit() and self.__peek() != ".": raise error.MyPLError( 'unexpected symbol "' + self.__peek() + '"', self.line, self.column) # if decimal point, ensure this is the first one and that the first half of the number is a valid intval elif self.__peek() == "." and not isFloat: if len(curr_lexeme) == 1 or (curr_lexeme[0] != "0" and len(curr_lexeme) > 1): isFloat = True else: raise error.MyPLError( "float starts with invalid int", self.line, self.column - (len(curr_lexeme) - 1)) elif self.__peek() == "." and isFloat: raise error.MyPLError("two decimal points in one number", self.line, self.column) # if all is well, read the character curr_lexeme += self.__read() # after we have the whole number, # verify it's valid and return the corresponding token if isFloat: # if there is nothing after the decimal point if curr_lexeme[-1] == ".": raise error.MyPLError( "missing digit in float value", self.line, self.column ) # +1 to column to show where digit needs to be else: return token.Token(token.FLOATVAL, float(curr_lexeme), self.line, self.column - (len(curr_lexeme) - 1)) else: # check if valid intval if len(curr_lexeme) == 1 or (curr_lexeme[0] != "0" and len(curr_lexeme) > 1): return token.Token(token.INTVAL, int(curr_lexeme), self.line, self.column - (len(curr_lexeme) - 1)) else: # this happens if the number is longer than one digit # and starts with 0 # the unexpected symbol will always be the second digit, # even if it's also 0 raise error.MyPLError( 'unexpected symbol "' + curr_lexeme[1] + '"', self.line, self.column - (len(curr_lexeme) - 1)) # if alpha, determine if ID or keyword and return accordingly elif peekValue.isalpha(): curr_lexeme = self.__read() # expedites identification if the word has an underscore or digit isDefinitelyID = False # read until something that isn't alphanumeric or underscore while self.__peek().isalpha() or self.__peek().isdigit( ) or self.__peek() == "_": if self.__peek().isdigit() or self.__peek() == "_": isDefinitelyID = True curr_lexeme += self.__read() # identify what keyword the token is, or if it's an ID if isDefinitelyID: return token.Token(token.ID, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "bool": return token.Token(token.BOOLTYPE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "int": return token.Token(token.INTTYPE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "float": return token.Token(token.FLOATTYPE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "string": return token.Token(token.STRINGTYPE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "struct": return token.Token(token.STRUCTTYPE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "and": return token.Token(token.AND, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "or": return token.Token(token.OR, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "not": return token.Token(token.NOT, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "while": return token.Token(token.WHILE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "do": return token.Token(token.DO, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "if": return token.Token(token.IF, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "then": return token.Token(token.THEN, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "else": return token.Token(token.ELSE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "elif": return token.Token(token.ELIF, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "end": return token.Token(token.END, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "fun": return token.Token(token.FUN, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "var": return token.Token(token.VAR, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "set": return token.Token(token.SET, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "return": return token.Token(token.RETURN, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "new": return token.Token(token.NEW, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "nil": return token.Token(token.NIL, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "true" or curr_lexeme == "false": return token.Token(token.BOOLVAL, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) else: # must be ID by process of elimination return token.Token(token.ID, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) # if whitespace, read past it without doing anything elif peekValue.isspace(): self.__read() return self.next_token() # if semicolon... (first because it's extremely common) elif peekValue == ";": self.__read() return token.Token(token.SEMICOLON, ";", self.line, self.column) # if quotes, return stringval token # need to ensure the opening quotes are closed elif peekValue == '"': curr_lexeme = "" self.__read() # throw out opening quotes # read and stop before closing quotes or newline or EOS while self.__peek() != '"' and self.__peek( ) != "\n" and self.__peek() != "": curr_lexeme += self.__read() if self.__peek() == '"': # if it ended with closing quotes self.__read() return token.Token(token.STRINGVAL, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1) - 2) # -2 to account for quotes elif self.__peek() == "\n": raise error.MyPLError("reached newline reading string", self.line, self.column) elif self.__peek() == "": raise error.MyPLError( "reached EOS reading string", self.line, self.column + 1 ) # +1 to column to show where the closing quotes should be else: raise error.MyPLError("something strange happened", self.line, self.column) # if comment, go until the end of the line/file without keeping anything elif peekValue == "#": while self.__peek() != "\n" and self.__peek() != "": self.__read() return self.next_token() # if any other recognized character in MyPL elif peekValue == "=": self.__read() if self.__peek() == "=": # if there is a second "=" self.__read() return token.Token(token.EQUAL, "==", self.line, self.column - 1) else: return token.Token(token.ASSIGN, "=", self.line, self.column) elif peekValue == ",": # reading here instead of in the return # so that line and column update properly self.__read() return token.Token(token.COMMA, ",", self.line, self.column) elif peekValue == ":": self.__read() return token.Token(token.COLON, ":", self.line, self.column) elif peekValue == "/": self.__read() return token.Token(token.DIVIDE, "/", self.line, self.column) elif peekValue == ".": self.__read() # a dot will never have numbers after it unless it's a float # because IDs cannot start with numbers, and no keywords do either # so if we got here, a dot with numbers after is a malformed float if self.__peek().isdigit(): raise error.MyPLError("missing intval before decimal point", self.line, self.column - 1) else: return token.Token(token.DOT, ".", self.line, self.column) elif peekValue == ">": self.__read() if self.__peek() == "=": # if the next character is "=" self.__read() return token.Token(token.GREATER_THAN_EQUAL, ">=", self.line, self.column - 1) else: return token.Token(token.GREATER_THAN, ">", self.line, self.column) elif peekValue == "<": self.__read() if self.__peek() == "=": # if the next character is "=" self.__read() return token.Token(token.LESS_THAN_EQUAL, "<=", self.line, self.column - 1) else: return token.Token(token.LESS_THAN, "<", self.line, self.column) elif peekValue == "!": self.__read() if self.__peek() == "=": self.__read() return token.Token(token.NOT_EQUAL, "!=", self.line, self.column - 1) else: raise error.MyPLError( 'unexpected symbol "!"', self.line, self.column - 1) # -1 to column to go back one since we read it already elif peekValue == "(": self.__read() return token.Token(token.LPAREN, "(", self.line, self.column) elif peekValue == ")": self.__read() return token.Token(token.RPAREN, ")", self.line, self.column) elif peekValue == "-": self.__read() return token.Token(token.MINUS, "-", self.line, self.column) elif peekValue == "%": self.__read() return token.Token(token.MODULO, "%", self.line, self.column) elif peekValue == "*": self.__read() return token.Token(token.MULTIPLY, "*", self.line, self.column) elif peekValue == "+": self.__read() return token.Token(token.PLUS, "+", self.line, self.column) # if we reach this point, there is something very strange in the source else: raise error.MyPLError('unexpected symbol "' + peekValue + '"', self.line, self.column)
def next_token(self): symbol = "" word = "" tokenStartCol = self.column + 1 if (not self.__peek()): return token.Token(token.EOS, "", self.line, tokenStartCol - 1) symbol = self.__read() word += symbol self.column += 1 # ignore comment lines if (symbol == "#"): while (self.__peek() != "\n"): self.__read() self.__read() self.column = 0 self.line += 1 return self.next_token() if (symbol == "\n"): self.line += 1 self.column = 0 return self.next_token() if (symbol.isspace()): return self.next_token() if (symbol.isalpha()): # any later characters, after a first alpha can be alphanumeric or '_' while (self.__peek().isalpha() or self.__peek() == "_" or self.__peek().isdigit()): symbol = self.__read() word += symbol self.column += 1 # All alphabetic-only keywords appear here if (word == 'and'): return token.Token(token.AND, word, self.line, tokenStartCol) elif (word == 'or'): return token.Token(token.OR, word, self.line, tokenStartCol) elif (word == 'not'): return token.Token(token.NOT, word, self.line, tokenStartCol) elif (word == 'while'): return token.Token(token.WHILE, word, self.line, tokenStartCol) elif (word == 'do'): return token.Token(token.DO, word, self.line, tokenStartCol) elif (word == 'if'): return token.Token(token.IF, word, self.line, tokenStartCol) elif (word == 'then'): return token.Token(token.THEN, word, self.line, tokenStartCol) elif (word == 'else'): return token.Token(token.ELSE, word, self.line, tokenStartCol) elif (word == 'elif'): return token.Token(token.ELIF, word, self.line, tokenStartCol) elif (word == 'end'): return token.Token(token.END, word, self.line, tokenStartCol) elif (word == 'fun'): return token.Token(token.FUN, word, self.line, tokenStartCol) elif (word == 'var'): return token.Token(token.VAR, word, self.line, tokenStartCol) elif (word == 'set'): return token.Token(token.SET, word, self.line, tokenStartCol) elif (word == 'return'): return token.Token(token.RETURN, word, self.line, tokenStartCol) elif (word == 'new'): return token.Token(token.NEW, word, self.line, tokenStartCol) elif (word == 'nil'): return token.Token(token.NIL, word, self.line, tokenStartCol) elif (word == 'true' or word == 'false'): return token.Token(token.BOOLVAL, word, self.line, tokenStartCol) elif (word == 'int'): return token.Token(token.INTTYPE, word, self.line, tokenStartCol) elif (word == 'bool'): return token.Token(token.BOOLTYPE, word, self.line, tokenStartCol) elif (word == 'float'): return token.Token(token.FLOATTYPE, word, self.line, tokenStartCol) elif (word == 'string'): return token.Token(token.STRINGTYPE, word, self.line, tokenStartCol) elif (word == 'struct'): return token.Token(token.STRUCTTYPE, word, self.line, tokenStartCol) else: return token.Token(token.ID, word, self.line, tokenStartCol) if (symbol.isdigit()): if (symbol == "0"): if (self.__peek().isdigit()): raise error.MyPLError( 'unexpected symbol "' + self.__peek() + '"', self.line, tokenStartCol) # floats cannot have more than one decimal hasDotYet = False while (self.__peek().isdigit() or self.__peek() == '.'): symbol = self.__read() word += symbol self.column += 1 if (symbol == '.'): if (hasDotYet): raise error.MyPLError( 'unexpected symbol "' + self.__peek() + '"', self.line, tokenStartCol) hasDotYet = True if (symbol == '.'): # float ends in '.' and is invalid raise error.MyPLError('missing digit in float value', self.line, self.column + 1) if (self.__peek().isalpha()): raise error.MyPLError( 'unexpected symbol "' + self.__peek() + '"', self.line, tokenStartCol) if ('.' in word): return token.Token(token.FLOATVAL, word, self.line, tokenStartCol) else: return token.Token(token.INTVAL, word, self.line, tokenStartCol) # handle all string types if (symbol == '"'): while (self.__peek() and self.__peek() != '"' and not self.__peek() == '\n'): symbol = self.__read() word += symbol self.column += 1 if (not self.__peek() or self.__peek() == '\n'): raise error.MyPLError("reached newline reading string", self.line, self.column) elif (self.__peek() == '"'): word += self.__read() self.column += 1 return token.Token(token.STRINGVAL, word[1:-1], self.line, tokenStartCol) if (symbol == '('): return token.Token(token.LPAREN, symbol, self.line, tokenStartCol) if (symbol == ')'): return token.Token(token.RPAREN, symbol, self.line, tokenStartCol) if (symbol == ','): return token.Token(token.COMMA, symbol, self.line, tokenStartCol) if (symbol == '%'): return token.Token(token.MODULO, symbol, self.line, tokenStartCol) if (symbol == '+'): return token.Token(token.PLUS, symbol, self.line, tokenStartCol) if (symbol == '-'): return token.Token(token.MINUS, symbol, self.line, tokenStartCol) if (symbol == ';'): return token.Token(token.SEMICOLON, symbol, self.line, tokenStartCol) if (symbol == ':'): return token.Token(token.COLON, symbol, self.line, tokenStartCol) if (symbol == '*'): return token.Token(token.MULTIPLY, symbol, self.line, tokenStartCol) if (symbol == '/'): return token.Token(token.DIVIDE, symbol, self.line, tokenStartCol) if (symbol == '.'): return token.Token(token.DOT, symbol, self.line, tokenStartCol) if (symbol == '='): if (self.__peek() == '='): symbol = self.__read() word += symbol self.column += 1 return token.Token(token.EQUAL, word, self.line, tokenStartCol) else: return token.Token(token.ASSIGN, symbol, self.line, tokenStartCol) if (symbol == '>'): if (self.__peek() == '='): symbol = self.__read() word += symbol self.column += 1 return token.Token(token.GREATER_THAN_EQUAL, word, self.line, tokenStartCol) else: return token.Token(token.GREATER_THAN, symbol, self.line, tokenStartCol) if (symbol == '<'): if (self.__peek() == '='): symbol = self.__read() word += symbol self.column += 1 return token.Token(token.LESS_THAN_EQUAL, word, self.line, tokenStartCol) else: return token.Token(token.LESS_THAN, symbol, self.line, tokenStartCol) if (symbol == '!'): if (self.__peek() == '='): symbol = self.__read() word += symbol self.column += 1 return token.Token(token.NOT_EQUAL, word, self.line, tokenStartCol) raise error.MyPLError('unexpected symbol "' + symbol + '"', self.line, tokenStartCol)
def next_token(self): symbol = self.__read() #check EOF if symbol == '': return token.Token('EOS', symbol, self.line, self.column) self.column += 1 #check space and skip over if symbol == ' ': return self.next_token() #newline resets column and updates line if symbol == '\n': self.column = 0 self.line += 1 return self.next_token() #skip over comments and increment line at the end if symbol == '#': while self.__peek() != '\n': symbol += self.__read() self.__read() self.line += 1 self.column = 0 return self.next_token() #checking 1 or 2 symbol characters if symbol == '+': return token.Token('PLUS', symbol , self.line, self.column) elif symbol == '=': if self.__peek() == '=': symbol += self.__read() self.column += 1 return token.Token('EQUAL', symbol , self.line, self.column - 1) return token.Token('ASSIGN', symbol , self.line, self.column) elif symbol == ',': return token.Token('COMMA', symbol , self.line, self.column) elif symbol == ':': return token.Token('COLON', symbol , self.line, self.column) elif symbol == '/': return token.Token('DIVIDE', symbol , self.line, self.column) elif symbol == '.': return token.Token('DOT', symbol , self.line, self.column) elif symbol == '>': if self.__peek() == '=': symbol += self.__read() self.column += 1 return token.Token('GREATER_THAN_EQUAL', symbol , self.line, self.column - 1) return token.Token('GREATER_THAN', symbol , self.line, self.column) elif symbol == '<': if self.__peek() == '=': symbol += self.__read() self.column += 1 return token.Token('LESS_THAN_EQUAL', symbol , self.line, self.column - 1) return token.Token('LESS_THAN', symbol , self.line, self.column) elif symbol == '(': return token.Token('LPAREN', symbol , self.line, self.column) elif symbol == ')': return token.Token('RPAREN', symbol , self.line, self.column) elif symbol == '-': return token.Token('MINUS', symbol , self.line, self.column) elif symbol == '%': return token.Token('MODULO', symbol , self.line, self.column) elif symbol == '*': return token.Token('MULTIPLY', symbol , self.line, self.column) elif symbol == ';': return token.Token('SEMICOLON', symbol , self.line, self.column) elif symbol == '!': if self.__peek() == '=': symbol += self.__read() self.column += 1 return token.Token('NOT_EQUAL', symbol , self.line, self.column -1) else: raise error.MyPLError("Unexpected symbol '!'", self.line, self.column) #check constants #--------------- #check leading 0 if symbol == '0': if self.__peek().isnumeric(): raise error.MyPLError("unexpected symbol '" + self.__peek() + "' ", self.line, self.column) #get int/float value if symbol.isnumeric(): while self.__peek().isnumeric(): symbol += self.__read() self.column += 1 #check if poorly formed due to letters if self.__peek().isalpha(): raise error.MyPLError("unexpected symbol '" + self.__peek() + "' ", self.line, self.column - (len(symbol) -1)) #check if floating point if self.__peek() == '.': symbol += self.__read() self.column += 1 if not self.__peek().isnumeric(): raise error.MyPLError('Missingn digit in float value', self.line, self.column - (len(symbol) -1)) while self.__peek().isnumeric(): symbol += self.__read() self.column += 1 #check letters in float if self.__peek().isalpha(): raise error.MyPLError("unexpected symbol '" + self.__peek() + "' ", self.line, self.column - (len(symbol) -1)) return token.Token('FLOATVAL', symbol, self.line, self.column - (len(symbol) -1)) else: return token.Token('INTVAL', symbol, self.line, self.column - (len(symbol) - 1)) #check if string value if symbol == '"': while self.__peek() != '"': if self.__peek() == '\n': raise error.MyPLError('Newline in middle of string', self.line, self.column - (len(symbol) - 1)) symbol += self.__read() self.column += 1 if self.__peek() == '': raise error.MyPLError('Missing " in string', self.line, self.column - (len(symbol) -1)) #read the ending " self.__read() self.column += 1 return token.Token('STRINGVAL', symbol[1:], self.line, self.column - (len(symbol))) #get a string of alphabet characters while self.__peek().isalpha() or self.__peek() == '_' or self.__peek().isnumeric(): symbol += self.__read() self.column += 1 #check specical words if symbol == 'bool': return token.Token('BOOLTYPE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'int': return token.Token('INTTYPE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'float': return token.Token('FLOATTYPE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'string': return token.Token('STRINGTYPE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'struct': return token.Token('STRUCTTYPE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'and': return token.Token('AND', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'or': return token.Token('OR', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'not': return token.Token('NOT', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'while': return token.Token('WHILE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'do': return token.Token('DO', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'if': return token.Token('IF', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'then': return token.Token('THEN', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'else': return token.Token('ELSE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'elif': return token.Token('ELIF', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'end': return token.Token('END', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'fun': return token.Token('FUN', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'var': return token.Token('VAR', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'set': return token.Token('SET', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'return': return token.Token('RETURN', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'new': return token.Token('NEW', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'nil': return token.Token('NIL', symbol, self.line, self.column - (len(symbol) - 1)) #BOOL values elif symbol == 'true': return token.Token('BOOLVAL', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'false': return token.Token('BOOLVAL', symbol, self.line, self.column - (len(symbol) - 1)) #identifiers if symbol[0].isalpha(): return token.Token('ID', symbol, self.line, self.column - (len(symbol) -1)) elif symbol[0] == '_': raise error.MyPLError('Poorly formed identifier', self.line, self.column - (len(symbol) -1)) #unexpected char error raise error.MyPLError("Unexpected character(s): '" + symbol + "'", self.line, self.column - (len(symbol) -1))
def __error(self, error_msg, target_token): s = error_msg l = target_token.line c = target_token.column raise error.MyPLError(s, l, c)
def next_token(self): tokentype = token.ID item = '' isStringVal = False # makes sure that string values are set as string values error_message = "Lexer Error " while self.__peek().isspace(): # increments column for every character of whitespace found if self.__peek() == " ": self.column += 1 self.__read() self.__comment_check() if self.__peek() == '': # EOS end of file tokentype = token.EOS # special character such as plus or minus if self.check_char() == 0: self.column += 1 item += self.__read() # checks for and invalid dot if item == "." and str(self.__peek()).isnumeric(): error_message += "invalid float value" e = error.MyPLError(error_message, self.line, self.column_index - 1) raise e # checks for comparison operators if item == '=' or item == '>' or item == '<' or item == '!': if self.__peek() == '=': item += self.__read() elif self.__peek() == '"': # string value self.column += 1 # increments column to a tokens starting index self.__read() while self.__peek() != '"': if self.__peek() == '\n': error_message += "reached newline character in string" e = error.MyPLError(error_message, self.line, self.column_index) raise e elif self.__peek() == '': error_message += "reached EOS character in string" e = error.MyPLError(error_message, self.line, self.column_index) raise e else: item += self.__read() self.__read() tokentype = token.STRINGVAL isStringVal = True; else: # any other type of character self.column += 1 # increments column to a tokens starting index isnum = False # tracks if you are entering a number item += self.__read() # sets while loop to true if you are entering a number if item.isnumeric(): isnum = True if self.check_char() == 1: # checks for characters that should end the token # checks if a number starts with zero # the previous if statement will check if the next # character is a decimal point if isnum and item == '0': error_message += "unexpected symbol '" + str(self.__peek()) + "'" e = error.MyPLError(error_message, self.line, self.column_index) raise e # runs until it reaches a character that marks the end of the token while self.check_char() != 0 and self.__peek() != '"': if isnum and not item.isnumeric(): error_message += "unexpected value '" + str(item[len(item) - 1]) + "'" e = error.MyPLError(error_message, self.line, self.column_index - 1) raise e else: self.__comment_check() item += self.__read() if not isStringVal: if item == "string": # strips spaces tokentype = token.STRINGTYPE else: item = item.strip() if item.isnumeric() and tokentype != token.STRINGVAL: # int and float check if self.__peek() == ".": item += self.__read() decimal = str(self.__peek()) if not decimal.isnumeric(): # checks for an invalid float character error_message += "missing digit in float value" e = error.MyPLError(error_message, self.line, self.column_index - 1) raise e while decimal.isnumeric and (self.check_char() != 0 or self.__peek() == "."): decimal += self.__peek() if not decimal.isnumeric() and self.__peek() != ";": self.__peek() error_message += "unexpected character '" + str(self.__peek()) + "'" e = error.MyPLError(error_message, self.line, self.column_index + 1) raise e else: item += self.__read() item = item.strip() if item.count('.', 0, len(item)): tokentype = token.FLOATVAL else: tokentype = token.INTVAL # checks if the token is a special character and sets token type accordingly if not isStringVal: if item == "=": tokentype = token.ASSIGN elif item == ",": tokentype = token.COMMA elif item == ":": tokentype = token.COLON elif item == "/": tokentype = token.DIVIDE elif item == ".": tokentype = token.DOT elif item == "==": tokentype = token.EQUAL elif item == ">": tokentype = token.GREATER_THAN elif item == ">=": tokentype = token.GREATER_THAN_EQUAL elif item == "<": tokentype = token.LESS_THAN elif item == "<=": tokentype = token.LESS_THAN_EQUAL elif item == "!=": tokentype = token.NOT_EQUAL elif item == "(": tokentype = token.LPAREN elif item == ")": tokentype = token.RPAREN elif item == "-": tokentype = token.MINUS elif item == "%": tokentype = token.MODULO elif item == "*": tokentype = token.MULTIPLY elif item == "+": tokentype = token.PLUS elif item == "true" or item == "false": tokentype = token.BOOLVAL elif item == ";": tokentype = token.SEMICOLON elif item == "bool": tokentype = token.BOOLTYPE elif item == "int": tokentype = token.INTTYPE elif item == "float": tokentype = token.FLOATTYPE elif item == "struct": tokentype = token.STRUCTTYPE elif item == "and": tokentype = token.AND elif item == "or": tokentype = token.OR elif item == "not": tokentype = token.NOT elif item == "while": tokentype = token.WHILE elif item == "do": tokentype = token.DO elif item == "if": tokentype = token.IF elif item == "then": tokentype = token.THEN elif item == "else": tokentype = token.ELSE elif item == "elif": tokentype = token.ELIF elif item == "end": tokentype = token.END elif item == "fun": tokentype = token.FUN elif item == "var": tokentype = token.VAR elif item == "set": tokentype = token.SET elif item == "return": tokentype = token.RETURN elif item == "new": tokentype = token.NEW elif item == "nil": tokentype = token.NIL final_token = token.Token(tokentype, item, self.line, self.column) self.column = self.column_index # sets column to new value if tokentype == token.EOS: # sets column to 0 at the end of the line self.column = 0 return final_token
def next_token(self): while self.__peek().isspace(): if self.__peek() == '\n': self.line += 1 self.column = 0 self.__read() self.column += 1 symbol = self.__read() self.column += 1 if symbol == '#': while not self.__peek() == '\n': self.__read() self.line += 1 self.__read() return self.next_token() if symbol == '': return token.Token(token.EOS, '', self.line, self.column - 1) elif symbol == '=': if self.__peek() == '=': symbol += self.__read() col = self.column self.column += 1 return token.Token(token.EQUAL, symbol, self.line, col) else: return token.Token(token.ASSIGN, symbol, self.line, self.column) elif symbol == ',': return token.Token(token.COMMA, symbol, self.line, self.column) elif symbol == ':': return token.Token(token.COLON, symbol, self.line, self.column) elif symbol == '/': return token.Token(token.DIVIDE, symbol, self.line, self.column) elif symbol == '.': return token.Token(token.DOT, symbol, self.line, self.column) elif symbol == '>': if self.__peek() == '=': symbol += self.__read() col = self.column self.column += 1 return token.Token(token.GREATER_THAN_EQUAL, symbol, self.line, col) else: return token.Token(token.GREATER_THAN, symbol, self.line, self.column) elif symbol == '<': if self.__peek() == '=': symbol += self.__read() col = self.column self.column += 1 return token.Token(token.LESS_THAN_EQUAL, symbol, self.line, col) else: return token.Token(token.LESS_THAN, symbol, self.line, self.column) elif symbol == '!': if self.__peek() == '=': symbol += self.__read() col = self.column self.column += 1 return token.Token(token.NOT_EQUAL, symbol, self.line, col) elif symbol == '(': return token.Token(token.LPAREN, symbol, self.line, self.column) elif symbol == ')': return token.Token(token.RPAREN, symbol, self.line, self.column) elif symbol == '-': return token.Token(token.MINUS, symbol, self.line, self.column) elif symbol == '%': return token.Token(token.MODULO, symbol, self.line, self.column) elif symbol == '*': return token.Token(token.MULTIPLY, symbol, self.line, self.column) elif symbol == '+': return token.Token(token.PLUS, symbol, self.line, self.column) elif symbol == ';': return token.Token(token.SEMICOLON, symbol, self.line, self.column) elif symbol.isdigit(): flt = False col = self.column - 1 if symbol == '0' and self.__peek().isdigit(): print( error.MyPLError('unexpected number', self.line, self.column)) exit() while self.__peek().isdigit() or self.__peek() == ".": symbol += self.__read() self.column += 1 if symbol[-1] == ".": if flt: print(error.MyPLError('invalid number', self.line, col)) exit() elif not self.__peek().isdigit(): print( error.MyPLError('missing digit in float value', self.line, self.column)) exit() else: symbol += self.__read() self.column += 1 flt = True if self.__peek().isalpha(): print( error.MyPLError('unexpected symbol', self.line, self.column)) exit() if flt: return token.Token(token.FLOATVAL, symbol, self.line, col) else: return token.Token(token.INTVAL, symbol, self.line, col) elif symbol == '"': col = self.column - 1 if self.__peek() == '"': symbol += self.__read() self.column += 1 return token.Token(token.STRINGVAL, '', self.line, col) symbol = self.__read() while not self.__peek() == '"': symbol += self.__read() self.column += 1 if self.__peek() == '': print("Improper string") break self.__read() self.column += 1 return token.Token(token.STRINGVAL, symbol, self.line, col) elif symbol.isalpha(): col = self.column - 1 while self.__peek().isalpha() or self.__peek().isdigit( ) or self.__peek() == '_': symbol += self.__read() self.column += 1 if symbol == 'and': return token.Token(token.AND, symbol, self.line, col) elif symbol == 'or': return token.Token(token.OR, symbol, self.line, col) elif symbol == 'not': return token.Token(token.NOT, symbol, self.line, col) elif symbol == 'bool': return token.Token(token.BOOLTYPE, symbol, self.line, col) elif symbol == 'int': return token.Token(token.INTTYPE, symbol, self.line, col) elif symbol == 'float': return token.Token(token.FLOATTYPE, symbol, self.line, col) elif symbol == 'string': return token.Token(token.STRINGTYPE, symbol, self.line, col) elif symbol == 'struct': return token.Token(token.STRUCTTYPE, symbol, self.line, col) elif symbol == 'while': return token.Token(token.WHILE, symbol, self.line, col) elif symbol == 'do': return token.Token(token.DO, symbol, self.line, col) elif symbol == 'if': return token.Token(token.IF, symbol, self.line, col) elif symbol == 'then': return token.Token(token.THEN, symbol, self.line, col) elif symbol == 'else': return token.Token(token.ELSE, symbol, self.line, col) elif symbol == 'elif': return token.Token(token.ELIF, symbol, self.line, col) elif symbol == 'end': return token.Token(token.END, symbol, self.line, col) elif symbol == 'fun': return token.Token(token.FUN, symbol, self.line, col) elif symbol == 'var': return token.Token(token.VAR, symbol, self.line, col) elif symbol == 'set': return token.Token(token.SET, symbol, self.line, col) elif symbol == 'return': return token.Token(token.RETURN, symbol, self.line, col) elif symbol == 'new': return token.Token(token.NEW, symbol, self.line, col) elif symbol == 'nil': return token.Token(token.NIL, symbol, self.line, col) elif symbol == 'true': return token.Token(token.BOOLVAL, symbol, self.line, col) elif symbol == 'false': return token.Token(token.BOOLVAL, symbol, self.line, col) else: return token.Token(token.ID, symbol, self.line, col)
def __error(self, msg, the_token): raise error.MyPLError(msg, the_token.line, the_token.column)
def next_token(self): input = "" oldLine = self.line oldColumn = self.column temp = "" inputType = "" intFloatOrString = 0 #1 if input is a number, 2 if float, 3 if string, 0 otherwise checker = True keepLooping = True noSpaceSymbols = ['.', '<=', '>=', '==', '!=', '*',':', '/', '!', '+', '-', ';', '=', ')', '(', '<', '>', ',', '%'] #list of symbols that dont need whitesaces comparisonsOp = ['<=', '>=', '==', '!=', '<', '>'] #list of comparisons #take care of whitespace while self.__peek().isspace(): # == " " or self.__peek() == '\n' or self.__peek == '\t': temp = self.__read() self.column += 1 oldColumn = self.column while self.__peek() == '\n': temp = self.__read() self.column += 1 oldColumn = self.column while self.__peek() != " " and self.__peek() != "" and self.__peek() != '\n' and not self.__peek().isspace() and keepLooping: #token is a string if self.__peek() == '"' or self.__peek() == "'": isSingle = True; if self.__peek() == '"': isSingle = False temp = self.__read() self.column += 1 while (self.__peek() != "'" and self.__peek() != '"') or ((self.__peek() != "'" or not isSingle) and (self.__peek() != '"' or isSingle)): input += self.__read() if self.__peek() == '\n': raise error.MyPLError("uh oh, you had a new line in a string ", self.line, self.column) self.column += 1 temp = self.__read() self.column += 1 intFloatOrString = 3 keepLooping = False #token is not a string #print(self.column) #reads the next character if keepLooping: input += self.__read() #what to do for comments if input == '#': while self.__peek() != '\n': input = self.__read() input = "" temp == self.__read() while self.__peek() == " " or self.__peek().isspace(): temp = self.__read() while self.__peek() == '\n': temp = self.__read() self.line += 1 self.column = 1 oldLine = self.line oldColumn = self.column #bad number checking for number preceded by 0 if input == '0' and self.__peek().isdigit(): m = 'unexpected symbol "%s" at: ' %self.__peek() raise error.MyPLError(m, self.line, self.column) #bad number checking for digit followed by alphabetical letter if input.isdigit() and self.__peek().isalpha(): temp = self.__read() m = 'unexpected symbol "%s" at:' % temp raise error.MyPLError(m, self.line, self.column) checker = True if self.__peek() == '.' and input.isdigit(): checker = False #checking for operators if checker == True: if (input in noSpaceSymbols or self.__peek() in noSpaceSymbols) and input != "": if input == '!' and self.__peek() != '=': raise error.MyPLError("extra exclamation mark at ", self.line, self.column) temp = input + self.__peek() if len(input) == 1: if temp != '!=' and temp != '==': if input in comparisonsOp: if self.__peek() in comparisonsOp == False: keepLooping = False else: keepLooping = False else: keepLooping = False #increment column count self.column += 1 if intFloatOrString != 3: #checks to see if the input is an int if(input.isdigit()): intFloatOrString = 1 #checks to see if it is a double else: soFarSoGood = True #bool that checks to see if there are any alphabetical characters numberOfDots = 0 #keeps track of the dots if len(input) > 1: for i, x in enumerate(input): if x == '.': numberOfDots += 1 #float error where there is nothing after the if i + 1 == len(input): m = input + "missing digit in float value at " raise error.MyPLError(m, self.line, self.column) #float error where there is an invalid number after the dot #pre dot invalid number checking is taken care of up above elif input[i+1].isdigit() == False and soFarSoGood: m = 'unexpected character "%s" at ' % (self.__peek()) raise error.MyPLError(m, self.line, self.column) elif x.isdigit() == False: soFarSoGood = False if soFarSoGood and numberOfDots == 1: intFloatOrString = 2 #end of the line if self.__peek() == '\n': self.line += 1 self.column = 1; temp = self.__read() #input was a int float or string if intFloatOrString != 0: if intFloatOrString == 1: inputType = token.INTVAL elif intFloatOrString == 2: inputType = token.FLOATVAL else: inputType = token.STRINGVAL #input was not a value except for maybe a bool else: if input == '=': inputType = token.ASSIGN elif input == ',': inputType = token.COMMA elif input == ':': inputType = token.COLON elif input == '/': inputType = token.DIVIDE elif input == '.': inputType = token.DOT elif input == '==': inputType = token.EQUAL elif input == '': oldColumn -= 1 inputType = token.EOS elif input == '>': inputType = token.GREATER_THAN elif input == '>=': inputType = token.GREATER_THAN_EQUAL elif input == '<': inputType = token.LESS_THAN elif input == '<=': inputType = token.LESS_THAN_EQUAL elif input == '!=': inputType = token.NOT_EQUAL elif input == '(': inputType = token.LPAREN elif input == ')': inputType = token.RPAREN elif input == '-': inputType = token.MINUS elif input == '%': inputType = token.MODULO elif input == '*': inputType = token.MULTIPLY elif input == '+': inputType = token.PLUS elif input == ';': inputType = token.SEMICOLON elif input == 'bool': inputType = token.BOOLTYPE elif input == 'int': inputType = token.INTTYPE elif input == 'float': inputType = token.FLOATTYPE elif input == 'string': inputType = token.STRINGTYPE elif input == 'struct': inputType = token.STRUCTTYPE elif input == 'and': inputType = token.AND elif input == 'or': inputType = token.OR elif input == 'not': inputType = token.NOT elif input == 'while': inputType = token.WHILE elif input == 'do': inputType = token.DO elif input == 'if': inputType = token.IF elif input == 'then': inputType = token.THEN elif input == 'else': inputType = token.ELSE elif input == 'elif': inputType = token.ELIF elif input == 'end': inputType = token.END elif input == 'fun': inputType = token.FUN elif input == 'var': inputType = token.VAR elif input == 'set': inputType = token.SET elif input == 'return': inputType = token.RETURN elif input == 'new': inputType = token.NEW elif input == 'false' or input == 'true': inputType = token.BOOLVAL elif input == 'nil': inputType = token.NIL else: inputType = token.ID newToken = token.Token(inputType, input, oldLine, oldColumn) return newToken