def visit_if_stmt(self, if_stmt): if_stmt.if_part.bool_expr.accept(self) bool_expr_type = self.current_type if(bool_expr_type != token.BOOLTYPE): msg = 'Non-boolean expression in if statement' self.__error(msg, token.Token(bool_expr_type, "", 0,0)) if_stmt.if_part.stmt_list.accept(self) for elseif in if_stmt.elseifs: elseif.bool_expr.accept(self) bool_expr_type = self.current_type if(bool_expr_type != token.BOOLTYPE): msg = 'Non-boolean expression in elif statement' self.__error(msg, token.Token(bool_expr_type, "", 0,0)) elseif.stmt_list.accept(self) if if_stmt.has_else: if_stmt.else_stmts.accept(self)
def visit_fun_param(self, paramStmt): self.current_type = token.Token("temp", "temp", 0, 0) self.current_type.lexeme = paramStmt.param_name.lexeme self.sym_table.add_id(paramStmt.param_name.lexeme) temp = "" if paramStmt.param_type.tokentype == token.INTTYPE: temp = token.INTVAL elif paramStmt.param_type.tokentype == token.STRINGTYPE: temp = token.STRINGVAL elif paramStmt.param_type.tokentype == token.BOOLTYPE: temp = token.BOOLVAL elif paramStmt.param_type.tokentype == token.FLOATTYPE: temp = token.FLOATVAL elif paramStmt.param_type.tokentype == token.ID: temp = paramStmt.param_type.lexeme self.sym_table.set_info(paramStmt.param_name.lexeme, temp) self.current_type.tokentype = temp
def next_token(self): symbol = "" word = "" tokenStartCol = self.column + 1 if (not self.__peek()): return token.Token(token.EOS, "", self.line, tokenStartCol - 1) symbol = self.__read() word += symbol self.column += 1 # ignore comment lines if (symbol == "#"): while (self.__peek() != "\n"): self.__read() self.__read() self.column = 0 self.line += 1 return self.next_token() if (symbol == "\n"): self.line += 1 self.column = 0 return self.next_token() if (symbol.isspace()): return self.next_token() if (symbol.isalpha()): # any later characters, after a first alpha can be alphanumeric or '_' while (self.__peek().isalpha() or self.__peek() == "_" or self.__peek().isdigit()): symbol = self.__read() word += symbol self.column += 1 # All alphabetic-only keywords appear here if (word == 'and'): return token.Token(token.AND, word, self.line, tokenStartCol) elif (word == 'or'): return token.Token(token.OR, word, self.line, tokenStartCol) elif (word == 'not'): return token.Token(token.NOT, word, self.line, tokenStartCol) elif (word == 'while'): return token.Token(token.WHILE, word, self.line, tokenStartCol) elif (word == 'do'): return token.Token(token.DO, word, self.line, tokenStartCol) elif (word == 'if'): return token.Token(token.IF, word, self.line, tokenStartCol) elif (word == 'then'): return token.Token(token.THEN, word, self.line, tokenStartCol) elif (word == 'else'): return token.Token(token.ELSE, word, self.line, tokenStartCol) elif (word == 'elif'): return token.Token(token.ELIF, word, self.line, tokenStartCol) elif (word == 'end'): return token.Token(token.END, word, self.line, tokenStartCol) elif (word == 'fun'): return token.Token(token.FUN, word, self.line, tokenStartCol) elif (word == 'var'): return token.Token(token.VAR, word, self.line, tokenStartCol) elif (word == 'set'): return token.Token(token.SET, word, self.line, tokenStartCol) elif (word == 'return'): return token.Token(token.RETURN, word, self.line, tokenStartCol) elif (word == 'new'): return token.Token(token.NEW, word, self.line, tokenStartCol) elif (word == 'nil'): return token.Token(token.NIL, word, self.line, tokenStartCol) elif (word == 'true' or word == 'false'): return token.Token(token.BOOLVAL, word, self.line, tokenStartCol) elif (word == 'int'): return token.Token(token.INTTYPE, word, self.line, tokenStartCol) elif (word == 'bool'): return token.Token(token.BOOLTYPE, word, self.line, tokenStartCol) elif (word == 'float'): return token.Token(token.FLOATTYPE, word, self.line, tokenStartCol) elif (word == 'string'): return token.Token(token.STRINGTYPE, word, self.line, tokenStartCol) elif (word == 'struct'): return token.Token(token.STRUCTTYPE, word, self.line, tokenStartCol) else: return token.Token(token.ID, word, self.line, tokenStartCol) if (symbol.isdigit()): if (symbol == "0"): if (self.__peek().isdigit()): raise error.MyPLError( 'unexpected symbol "' + self.__peek() + '"', self.line, tokenStartCol) # floats cannot have more than one decimal hasDotYet = False while (self.__peek().isdigit() or self.__peek() == '.'): symbol = self.__read() word += symbol self.column += 1 if (symbol == '.'): if (hasDotYet): raise error.MyPLError( 'unexpected symbol "' + self.__peek() + '"', self.line, tokenStartCol) hasDotYet = True if (symbol == '.'): # float ends in '.' and is invalid raise error.MyPLError('missing digit in float value', self.line, self.column + 1) if (self.__peek().isalpha()): raise error.MyPLError( 'unexpected symbol "' + self.__peek() + '"', self.line, tokenStartCol) if ('.' in word): return token.Token(token.FLOATVAL, word, self.line, tokenStartCol) else: return token.Token(token.INTVAL, word, self.line, tokenStartCol) # handle all string types if (symbol == '"'): while (self.__peek() and self.__peek() != '"' and not self.__peek() == '\n'): symbol = self.__read() word += symbol self.column += 1 if (not self.__peek() or self.__peek() == '\n'): raise error.MyPLError("reached newline reading string", self.line, self.column) elif (self.__peek() == '"'): word += self.__read() self.column += 1 return token.Token(token.STRINGVAL, word[1:-1], self.line, tokenStartCol) if (symbol == '('): return token.Token(token.LPAREN, symbol, self.line, tokenStartCol) if (symbol == ')'): return token.Token(token.RPAREN, symbol, self.line, tokenStartCol) if (symbol == ','): return token.Token(token.COMMA, symbol, self.line, tokenStartCol) if (symbol == '%'): return token.Token(token.MODULO, symbol, self.line, tokenStartCol) if (symbol == '+'): return token.Token(token.PLUS, symbol, self.line, tokenStartCol) if (symbol == '-'): return token.Token(token.MINUS, symbol, self.line, tokenStartCol) if (symbol == ';'): return token.Token(token.SEMICOLON, symbol, self.line, tokenStartCol) if (symbol == ':'): return token.Token(token.COLON, symbol, self.line, tokenStartCol) if (symbol == '*'): return token.Token(token.MULTIPLY, symbol, self.line, tokenStartCol) if (symbol == '/'): return token.Token(token.DIVIDE, symbol, self.line, tokenStartCol) if (symbol == '.'): return token.Token(token.DOT, symbol, self.line, tokenStartCol) if (symbol == '='): if (self.__peek() == '='): symbol = self.__read() word += symbol self.column += 1 return token.Token(token.EQUAL, word, self.line, tokenStartCol) else: return token.Token(token.ASSIGN, symbol, self.line, tokenStartCol) if (symbol == '>'): if (self.__peek() == '='): symbol = self.__read() word += symbol self.column += 1 return token.Token(token.GREATER_THAN_EQUAL, word, self.line, tokenStartCol) else: return token.Token(token.GREATER_THAN, symbol, self.line, tokenStartCol) if (symbol == '<'): if (self.__peek() == '='): symbol = self.__read() word += symbol self.column += 1 return token.Token(token.LESS_THAN_EQUAL, word, self.line, tokenStartCol) else: return token.Token(token.LESS_THAN, symbol, self.line, tokenStartCol) if (symbol == '!'): if (self.__peek() == '='): symbol = self.__read() word += symbol self.column += 1 return token.Token(token.NOT_EQUAL, word, self.line, tokenStartCol) raise error.MyPLError('unexpected symbol "' + symbol + '"', self.line, tokenStartCol)
def next_token(self): curSymbol = '' #Remove spaces if (self.__peek() == ' '): while (self.__peek() == ' '): self.__read() self.column += 1 #Remove newlines if found if (self.__peek() == '\n'): while (self.__peek() == '\n'): self.__read() self.line += 1 self.column = 0 #If I see a comment, get to the next line if (self.__peek() == '#'): while (self.__peek() != '\n' and self.__peek() != ''): self.__read() self.column += 1 if (self.__read() != ''): self.line += 1 self.column = 0 return self.next_token() #If there is a letter, get the whole identifyer if (self.__peek().isalpha()): while (self.__peek().isalpha() or self.__peek().isdigit() or self.__peek() == '_'): curSymbol += self.__read() self.column += 1 #If there are double quotes, get the whole string elif (self.__peek() == '"'): curSymbol += self.__read() self.column += 1 #Keep reading in the string until the next double quotes are found while (self.__peek() != '"' and self.__peek() != '\n'): curSymbol += self.__read() self.column += 1 #Check to see if there was a newline or missing closing quote if (self.__peek() == '\n'): raise error.MyPLError("reached newline reading string", self.line, self.column + 1) elif (self.__peek() != '"'): raise error.MyPLError('missing closing quotes', self.line, self.column + 1) else: curSymbol += self.__read() self.column += 1 #If there is a digit, get the whole number elif (self.__peek().isdigit()): #Keep reading until you do not find any more digits while (self.__peek().isdigit()): curSymbol += self.__read() self.column += 1 if (curSymbol == '0' and self.__peek().isdigit()): raise error.MyPLError( 'unexpeted symbol "%s"' % self.__peek(), self.line, self.column + 1) #If theres a period, it's a float. if (self.__peek() == '.'): curSymbol += self.__read() self.column += 1 changed = 0 #Keep reading digits after the period while (self.__peek().isdigit()): curSymbol += self.__read() self.column += 1 changed = 1 if (self.__peek().isalpha()): raise error.MyPLError( 'unexpected symbol "%s"' % self.__peek(), self.line, self.column + 1) #Make sure there are digits after the decimal if (changed == 0): raise error.MyPLError("missing digit in float value", self.line, self.column + 1) #Make sure there is only one decimal point elif (self.__peek() == '.'): raise error.MyPLError( "Too many decimal points in float value", self.line, self.column + 1) elif (self.__peek().isalpha()): raise error.MyPLError('unexpcted symbol "%s"' % self.__peek(), self.line, self.column + 1) #If nothing above was triggerd, #the next symbol is punctuation or EOS else: curSymbol += self.__read() #If the symbol is EOS, do not increment the colmumn if (curSymbol == '!' and self.__peek() != '='): error.MyPLError('missing "=" after "!"', self.line, self.column) if (curSymbol != ''): self.column += 1 #Check to see if the symbol is >=, <=, ==, or != and include the = #second equals sign and incriment the column if ((curSymbol == '>' or curSymbol == '=' or curSymbol == '!' or curSymbol == '<') and self.__peek() == '='): curSymbol += self.__read() self.column += 1 #Check the current symbol against all possibilities and return #a token of the correct type if (curSymbol == ''): return token.Token(token.EOS, curSymbol, self.line, self.column) elif (curSymbol == '='): return token.Token(token.ASSIGN, curSymbol, self.line, self.column) elif (curSymbol == ','): return token.Token(token.COMMA, curSymbol, self.line, self.column) elif (curSymbol == ':'): return token.Token(token.COLON, curSymbol, self.line, self.column) elif (curSymbol == '/'): return token.Token(token.DIVIDE, curSymbol, self.line, self.column) elif (curSymbol == '.'): return token.Token(token.DOT, curSymbol, self.line, self.column) elif (curSymbol == '=='): return token.Token(token.EQUAL, curSymbol, self.line, self.column - 1) elif (curSymbol == '>'): return token.Token(token.GREATER_THAN, curSymbol, self.line, self.column) elif (curSymbol == '>='): return token.Token(token.GREATER_THAN_EQUAL, curSymbol, self.line, self.column - 1) elif (curSymbol == '<'): return token.Token(token.LESS_THAN, curSymbol, self.line, self.column) elif (curSymbol == '<='): return token.Token(token.LESS_THAN_EQUAL, curSymbol, self.line, self.column - 1) elif (curSymbol == '!='): return token.Token(token.NOT_EQUAL, curSymbol, self.line, self.column - 1) elif (curSymbol == '('): return token.Token(token.LPAREN, curSymbol, self.line, self.column) elif (curSymbol == ')'): return token.Token(token.RPAREN, curSymbol, self.line, self.column) elif (curSymbol == '-'): return token.Token(token.MINUS, curSymbol, self.line, self.column) elif (curSymbol == '%'): return token.Token(token.MODULO, curSymbol, self.line, self.column) elif (curSymbol == '*'): return token.Token(token.MULTIPLY, curSymbol, self.line, self.column) elif (curSymbol == '+'): return token.Token(token.PLUS, curSymbol, self.line, self.column) elif (curSymbol == ';'): return token.Token(token.SEMICOLON, curSymbol, self.line, self.column) elif (curSymbol == 'bool'): return token.Token(token.BOOLTYPE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'int'): return token.Token(token.INTTYPE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'float'): return token.Token(token.FLOATTYPE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'string'): return token.Token(token.STRINGTYPE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'struct'): return token.Token(token.STRUCTTYPE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'and'): return token.Token(token.AND, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'or'): return token.Token(token.OR, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'not'): return token.Token(token.NOT, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'while'): return token.Token(token.WHILE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'do'): return token.Token(token.DO, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'if'): return token.Token(token.IF, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'then'): return token.Token(token.THEN, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'else'): return token.Token(token.ELSE, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'elif'): return token.Token(token.ELIF, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'end'): return token.Token(token.END, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'fun'): return token.Token(token.FUN, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'var'): return token.Token(token.VAR, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'set'): return token.Token(token.SET, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'return'): return token.Token(token.RETURN, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'new'): return token.Token(token.NEW, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'nil'): return token.Token(token.NIL, curSymbol, self.line, self.column - len(curSymbol) + 1) elif (curSymbol == 'true' or curSymbol == 'false'): return token.Token(token.BOOLVAL, curSymbol, self.line, self.column - len(curSymbol) + 1) elif ('"' in curSymbol): return token.Token(token.STRINGVAL, curSymbol[1:len(curSymbol) - 1], self.line, self.column - len(curSymbol) + 1) elif (curSymbol[0].isdigit()): if ('.' in curSymbol): return token.Token(token.FLOATVAL, curSymbol, self.line, self.column - len(curSymbol) + 1) else: return token.Token(token.INTVAL, curSymbol, self.line, self.column - len(curSymbol) + 1) else: return token.Token(token.ID, curSymbol, self.line, self.column - len(curSymbol) + 1)
def next_token(self): peekValue = self.__peek() # for efficiency purposes # check the next character and act from there # if EOS, return EOS token if peekValue == "": return token.Token(token.EOS, "", self.line, self.column) # if digit, determine if float or int, then return elif peekValue.isdigit(): curr_lexeme = self.__read() # used to ensure a float only contains one decimal point isFloat = False # read until it's another important character in MyPL or whitespace because people might not put whitespace (ex. x=x+5; ) # if there is a non-digit character, we'll raise an error in the loop while self.__peek( ) not in ';=+-*/%<>(),"' and not self.__peek().isspace(): # if not digit or decimal, error if not self.__peek().isdigit() and self.__peek() != ".": raise error.MyPLError( 'unexpected symbol "' + self.__peek() + '"', self.line, self.column) # if decimal point, ensure this is the first one and that the first half of the number is a valid intval elif self.__peek() == "." and not isFloat: if len(curr_lexeme) == 1 or (curr_lexeme[0] != "0" and len(curr_lexeme) > 1): isFloat = True else: raise error.MyPLError( "float starts with invalid int", self.line, self.column - (len(curr_lexeme) - 1)) elif self.__peek() == "." and isFloat: raise error.MyPLError("two decimal points in one number", self.line, self.column) # if all is well, read the character curr_lexeme += self.__read() # after we have the whole number, # verify it's valid and return the corresponding token if isFloat: # if there is nothing after the decimal point if curr_lexeme[-1] == ".": raise error.MyPLError( "missing digit in float value", self.line, self.column ) # +1 to column to show where digit needs to be else: return token.Token(token.FLOATVAL, float(curr_lexeme), self.line, self.column - (len(curr_lexeme) - 1)) else: # check if valid intval if len(curr_lexeme) == 1 or (curr_lexeme[0] != "0" and len(curr_lexeme) > 1): return token.Token(token.INTVAL, int(curr_lexeme), self.line, self.column - (len(curr_lexeme) - 1)) else: # this happens if the number is longer than one digit # and starts with 0 # the unexpected symbol will always be the second digit, # even if it's also 0 raise error.MyPLError( 'unexpected symbol "' + curr_lexeme[1] + '"', self.line, self.column - (len(curr_lexeme) - 1)) # if alpha, determine if ID or keyword and return accordingly elif peekValue.isalpha(): curr_lexeme = self.__read() # expedites identification if the word has an underscore or digit isDefinitelyID = False # read until something that isn't alphanumeric or underscore while self.__peek().isalpha() or self.__peek().isdigit( ) or self.__peek() == "_": if self.__peek().isdigit() or self.__peek() == "_": isDefinitelyID = True curr_lexeme += self.__read() # identify what keyword the token is, or if it's an ID if isDefinitelyID: return token.Token(token.ID, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "bool": return token.Token(token.BOOLTYPE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "int": return token.Token(token.INTTYPE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "float": return token.Token(token.FLOATTYPE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "string": return token.Token(token.STRINGTYPE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "struct": return token.Token(token.STRUCTTYPE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "and": return token.Token(token.AND, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "or": return token.Token(token.OR, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "not": return token.Token(token.NOT, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "while": return token.Token(token.WHILE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "do": return token.Token(token.DO, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "if": return token.Token(token.IF, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "then": return token.Token(token.THEN, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "else": return token.Token(token.ELSE, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "elif": return token.Token(token.ELIF, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "end": return token.Token(token.END, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "fun": return token.Token(token.FUN, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "var": return token.Token(token.VAR, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "set": return token.Token(token.SET, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "return": return token.Token(token.RETURN, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "new": return token.Token(token.NEW, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "nil": return token.Token(token.NIL, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) elif curr_lexeme == "true" or curr_lexeme == "false": return token.Token(token.BOOLVAL, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) else: # must be ID by process of elimination return token.Token(token.ID, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1)) # if whitespace, read past it without doing anything elif peekValue.isspace(): self.__read() return self.next_token() # if semicolon... (first because it's extremely common) elif peekValue == ";": self.__read() return token.Token(token.SEMICOLON, ";", self.line, self.column) # if quotes, return stringval token # need to ensure the opening quotes are closed elif peekValue == '"': curr_lexeme = "" self.__read() # throw out opening quotes # read and stop before closing quotes or newline or EOS while self.__peek() != '"' and self.__peek( ) != "\n" and self.__peek() != "": curr_lexeme += self.__read() if self.__peek() == '"': # if it ended with closing quotes self.__read() return token.Token(token.STRINGVAL, curr_lexeme, self.line, self.column - (len(curr_lexeme) - 1) - 2) # -2 to account for quotes elif self.__peek() == "\n": raise error.MyPLError("reached newline reading string", self.line, self.column) elif self.__peek() == "": raise error.MyPLError( "reached EOS reading string", self.line, self.column + 1 ) # +1 to column to show where the closing quotes should be else: raise error.MyPLError("something strange happened", self.line, self.column) # if comment, go until the end of the line/file without keeping anything elif peekValue == "#": while self.__peek() != "\n" and self.__peek() != "": self.__read() return self.next_token() # if any other recognized character in MyPL elif peekValue == "=": self.__read() if self.__peek() == "=": # if there is a second "=" self.__read() return token.Token(token.EQUAL, "==", self.line, self.column - 1) else: return token.Token(token.ASSIGN, "=", self.line, self.column) elif peekValue == ",": # reading here instead of in the return # so that line and column update properly self.__read() return token.Token(token.COMMA, ",", self.line, self.column) elif peekValue == ":": self.__read() return token.Token(token.COLON, ":", self.line, self.column) elif peekValue == "/": self.__read() return token.Token(token.DIVIDE, "/", self.line, self.column) elif peekValue == ".": self.__read() # a dot will never have numbers after it unless it's a float # because IDs cannot start with numbers, and no keywords do either # so if we got here, a dot with numbers after is a malformed float if self.__peek().isdigit(): raise error.MyPLError("missing intval before decimal point", self.line, self.column - 1) else: return token.Token(token.DOT, ".", self.line, self.column) elif peekValue == ">": self.__read() if self.__peek() == "=": # if the next character is "=" self.__read() return token.Token(token.GREATER_THAN_EQUAL, ">=", self.line, self.column - 1) else: return token.Token(token.GREATER_THAN, ">", self.line, self.column) elif peekValue == "<": self.__read() if self.__peek() == "=": # if the next character is "=" self.__read() return token.Token(token.LESS_THAN_EQUAL, "<=", self.line, self.column - 1) else: return token.Token(token.LESS_THAN, "<", self.line, self.column) elif peekValue == "!": self.__read() if self.__peek() == "=": self.__read() return token.Token(token.NOT_EQUAL, "!=", self.line, self.column - 1) else: raise error.MyPLError( 'unexpected symbol "!"', self.line, self.column - 1) # -1 to column to go back one since we read it already elif peekValue == "(": self.__read() return token.Token(token.LPAREN, "(", self.line, self.column) elif peekValue == ")": self.__read() return token.Token(token.RPAREN, ")", self.line, self.column) elif peekValue == "-": self.__read() return token.Token(token.MINUS, "-", self.line, self.column) elif peekValue == "%": self.__read() return token.Token(token.MODULO, "%", self.line, self.column) elif peekValue == "*": self.__read() return token.Token(token.MULTIPLY, "*", self.line, self.column) elif peekValue == "+": self.__read() return token.Token(token.PLUS, "+", self.line, self.column) # if we reach this point, there is something very strange in the source else: raise error.MyPLError('unexpected symbol "' + peekValue + '"', self.line, self.column)
def next_token(self): symbol = '' currToken = '' currColumn = self.column + 1 currLine = self.line if self.__peek() == '': symbol = token.EOS currToken = '' the_token = token.Token(symbol, currToken, currLine, 0) return the_token elif self.__peek() == '#': while self.__peek() != '\n': self.__read() return self.next_token() elif self.__peek() == '\n': self.__read() self.line += 1 self.column = 0 return self.next_token() elif self.__peek() == '\t': while self.__peek() == '\t': self.__read() self.column += 1 return self.next_token() elif self.__peek() == ' ': while self.__peek() == ' ': self.__read() self.column += 1 return self.next_token() elif self.__peek() == '"': self.__read() self.column += 1 symbol = token.STRINGVAL while self.__peek() != '"': currToken += self.__read() self.column += 1 self.__read() self.column += 1 elif self.__peek() == '=': self.__read() self.column += 1 if self.__peek() == '=': self.__read() self.column += 1 symbol = token.EQUAL currToken = '==' else: symbol = token.ASSIGN currToken = '=' elif self.__peek() == ':': self.__read() self.column += 1 symbol = token.COLON currToken = ':' elif self.__peek() == ',': self.__read() self.column += 1 symbol = token.COMMA currToken = ',' elif self.__peek() == '/': self.__read() self.column += 1 symbol = token.DIVIDE currToken = '/' elif self.__peek().isdigit(): symbol = token.INTVAL while self.__peek().isdigit(): currToken += self.__read() self.column += 1 if self.__peek() == '.': symbol = token.FLOATVAL currToken += self.__read() self.column += 1 if self.__peek().isdigit(): while self.__peek().isdigit(): currToken += self.__read() self.column += 1 #else: #exception elif self.__peek() == '.': self.__read() self.column += 1 symbol = token.DOT currToken = '.' elif self.__peek() == '<': self.__read() self.column += 1 symbol = token.LESS_THAN currToken = '<' if self.__peek() == '=': self.__read() self.column += 1 symbol = token.LESS_THAN_EQUAL currToken = '<=' elif self.__peek() == '>': self.__read() self.column += 1 symbol = token.GREATER_THAN currToken = '>' if self.__peek() == '=': self.__read() self.column += 1 symbol = token.GREATER_THAN_EQUAL currToken = '>=' elif self.__peek() == '!': self.__read() self.column += 1 #symbol = token.NOT currToken = '!' if self.__peek() == '=': self.__read() self.column += 1 symbol = token.NOT_EQUAL currToken = '!=' elif self.__peek() == '(': self.__read() self.column += 1 symbol = token.LPAREN currToken = '(' elif self.__peek() == ')': self.__read() self.column += 1 symbol = token.RPAREN currToken = ')' elif self.__peek() == '-': self.__read() self.column += 1 symbol = token.MINUS currToken = '-' elif self.__peek() == '%': self.__read() self.column += 1 symbol = token.MODULO currToken = '%' elif self.__peek() == '*': self.__read() self.column += 1 symbol = token.MULTIPLY currToken = '*' elif self.__peek() == '+': self.__read() self.column += 1 symbol = token.PLUS currToken = '+' elif self.__peek() == ';': self.__read() self.column += 1 symbol = token.SEMICOLON currToken = ';' elif self.__peek() == 'b': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'bool': symbol = token.BOOLTYPE else: symbol = token.ID elif self.__peek() == 'i': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'int': symbol = token.INTTYPE elif currToken == 'if': symbol = token.IF else: symbol = token.ID elif self.__peek() == 's': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'string': symbol = token.STRINGTYPE elif currToken == 'struct': symbol = token.STRUCTTYPE elif currToken == 'set': symbol = token.SET else: symbol = token.ID elif self.__peek() == 'a': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'and': symbol = token.AND else: symbol = token.ID elif self.__peek() == 'o': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'or': symbol = token.OR else: symbol = token.ID elif self.__peek() == 'w': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'while': symbol = token.WHILE else: symbol = token.ID elif self.__peek() == 'd': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'do': symbol = token.DO else: symbol = token.ID elif self.__peek() == 't': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'then': symbol = token.THEN elif currToken == 'true': symbol = token.BOOLVAL else: symbol = token.ID elif self.__peek() == 'e': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'else': symbol = token.ELSE elif currToken == 'elif': symbol = token.ELIF elif currToken == 'end': symbol = token.END else: symbol = token.ID elif self.__peek() == 'f': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'fun': symbol = token.FUN elif currToken == 'false': symbol = token.BOOLVAL elif currToken == 'float': symbol = token.FLOATTYPE else: symbol = token.ID elif self.__peek() == 'v': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'var': symbol = token.VAR else: symbol = token.ID elif self.__peek() == 'r': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'return': symbol = token.RETURN else: symbol = token.ID elif self.__peek() == 'n': while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 if currToken == 'new': symbol = token.NEW elif currToken == 'nil': symbol = token.NIL elif currToken == 'not': symbol = token.NOT else: symbol = token.ID else: while self.__peek() != ' ' and self.__peek() != '' and self.__peek( ) != '\t' and self.__peek() != '\n' and self.__peek( ) != '.' and self.__peek() != ';' and self.__peek( ) != ',' and self.__peek() != '(' and self.__peek( ) != ')' and self.__peek() != '-' and self.__peek( ) != '+' and self.__peek() != '%' and self.__peek( ) != '*' and self.__peek() != '>' and self.__peek( ) != '<' and self.__peek() != '=' and self.__peek( ) != '/' and self.__peek() != ':': currToken += self.__read() self.column += 1 symbol = token.ID the_token = token.Token(symbol, currToken, currLine, currColumn) return the_token
def next_token(self): tokentype = token.ID item = '' isStringVal = False # makes sure that string values are set as string values error_message = "Lexer Error " while self.__peek().isspace(): # increments column for every character of whitespace found if self.__peek() == " ": self.column += 1 self.__read() self.__comment_check() if self.__peek() == '': # EOS end of file tokentype = token.EOS # special character such as plus or minus if self.check_char() == 0: self.column += 1 item += self.__read() # checks for and invalid dot if item == "." and str(self.__peek()).isnumeric(): error_message += "invalid float value" e = error.MyPLError(error_message, self.line, self.column_index - 1) raise e # checks for comparison operators if item == '=' or item == '>' or item == '<' or item == '!': if self.__peek() == '=': item += self.__read() elif self.__peek() == '"': # string value self.column += 1 # increments column to a tokens starting index self.__read() while self.__peek() != '"': if self.__peek() == '\n': error_message += "reached newline character in string" e = error.MyPLError(error_message, self.line, self.column_index) raise e elif self.__peek() == '': error_message += "reached EOS character in string" e = error.MyPLError(error_message, self.line, self.column_index) raise e else: item += self.__read() self.__read() tokentype = token.STRINGVAL isStringVal = True; else: # any other type of character self.column += 1 # increments column to a tokens starting index isnum = False # tracks if you are entering a number item += self.__read() # sets while loop to true if you are entering a number if item.isnumeric(): isnum = True if self.check_char() == 1: # checks for characters that should end the token # checks if a number starts with zero # the previous if statement will check if the next # character is a decimal point if isnum and item == '0': error_message += "unexpected symbol '" + str(self.__peek()) + "'" e = error.MyPLError(error_message, self.line, self.column_index) raise e # runs until it reaches a character that marks the end of the token while self.check_char() != 0 and self.__peek() != '"': if isnum and not item.isnumeric(): error_message += "unexpected value '" + str(item[len(item) - 1]) + "'" e = error.MyPLError(error_message, self.line, self.column_index - 1) raise e else: self.__comment_check() item += self.__read() if not isStringVal: if item == "string": # strips spaces tokentype = token.STRINGTYPE else: item = item.strip() if item.isnumeric() and tokentype != token.STRINGVAL: # int and float check if self.__peek() == ".": item += self.__read() decimal = str(self.__peek()) if not decimal.isnumeric(): # checks for an invalid float character error_message += "missing digit in float value" e = error.MyPLError(error_message, self.line, self.column_index - 1) raise e while decimal.isnumeric and (self.check_char() != 0 or self.__peek() == "."): decimal += self.__peek() if not decimal.isnumeric() and self.__peek() != ";": self.__peek() error_message += "unexpected character '" + str(self.__peek()) + "'" e = error.MyPLError(error_message, self.line, self.column_index + 1) raise e else: item += self.__read() item = item.strip() if item.count('.', 0, len(item)): tokentype = token.FLOATVAL else: tokentype = token.INTVAL # checks if the token is a special character and sets token type accordingly if not isStringVal: if item == "=": tokentype = token.ASSIGN elif item == ",": tokentype = token.COMMA elif item == ":": tokentype = token.COLON elif item == "/": tokentype = token.DIVIDE elif item == ".": tokentype = token.DOT elif item == "==": tokentype = token.EQUAL elif item == ">": tokentype = token.GREATER_THAN elif item == ">=": tokentype = token.GREATER_THAN_EQUAL elif item == "<": tokentype = token.LESS_THAN elif item == "<=": tokentype = token.LESS_THAN_EQUAL elif item == "!=": tokentype = token.NOT_EQUAL elif item == "(": tokentype = token.LPAREN elif item == ")": tokentype = token.RPAREN elif item == "-": tokentype = token.MINUS elif item == "%": tokentype = token.MODULO elif item == "*": tokentype = token.MULTIPLY elif item == "+": tokentype = token.PLUS elif item == "true" or item == "false": tokentype = token.BOOLVAL elif item == ";": tokentype = token.SEMICOLON elif item == "bool": tokentype = token.BOOLTYPE elif item == "int": tokentype = token.INTTYPE elif item == "float": tokentype = token.FLOATTYPE elif item == "struct": tokentype = token.STRUCTTYPE elif item == "and": tokentype = token.AND elif item == "or": tokentype = token.OR elif item == "not": tokentype = token.NOT elif item == "while": tokentype = token.WHILE elif item == "do": tokentype = token.DO elif item == "if": tokentype = token.IF elif item == "then": tokentype = token.THEN elif item == "else": tokentype = token.ELSE elif item == "elif": tokentype = token.ELIF elif item == "end": tokentype = token.END elif item == "fun": tokentype = token.FUN elif item == "var": tokentype = token.VAR elif item == "set": tokentype = token.SET elif item == "return": tokentype = token.RETURN elif item == "new": tokentype = token.NEW elif item == "nil": tokentype = token.NIL final_token = token.Token(tokentype, item, self.line, self.column) self.column = self.column_index # sets column to new value if tokentype == token.EOS: # sets column to 0 at the end of the line self.column = 0 return final_token
def next_token(self): while self.__peek().isspace(): if self.__peek() == '\n': self.line += 1 self.column = 0 self.__read() self.column += 1 symbol = self.__read() self.column += 1 if symbol == '#': while not self.__peek() == '\n': self.__read() self.line += 1 self.__read() return self.next_token() if symbol == '': return token.Token(token.EOS, '', self.line, self.column - 1) elif symbol == '=': if self.__peek() == '=': symbol += self.__read() col = self.column self.column += 1 return token.Token(token.EQUAL, symbol, self.line, col) else: return token.Token(token.ASSIGN, symbol, self.line, self.column) elif symbol == ',': return token.Token(token.COMMA, symbol, self.line, self.column) elif symbol == ':': return token.Token(token.COLON, symbol, self.line, self.column) elif symbol == '/': return token.Token(token.DIVIDE, symbol, self.line, self.column) elif symbol == '.': return token.Token(token.DOT, symbol, self.line, self.column) elif symbol == '>': if self.__peek() == '=': symbol += self.__read() col = self.column self.column += 1 return token.Token(token.GREATER_THAN_EQUAL, symbol, self.line, col) else: return token.Token(token.GREATER_THAN, symbol, self.line, self.column) elif symbol == '<': if self.__peek() == '=': symbol += self.__read() col = self.column self.column += 1 return token.Token(token.LESS_THAN_EQUAL, symbol, self.line, col) else: return token.Token(token.LESS_THAN, symbol, self.line, self.column) elif symbol == '!': if self.__peek() == '=': symbol += self.__read() col = self.column self.column += 1 return token.Token(token.NOT_EQUAL, symbol, self.line, col) elif symbol == '(': return token.Token(token.LPAREN, symbol, self.line, self.column) elif symbol == ')': return token.Token(token.RPAREN, symbol, self.line, self.column) elif symbol == '-': return token.Token(token.MINUS, symbol, self.line, self.column) elif symbol == '%': return token.Token(token.MODULO, symbol, self.line, self.column) elif symbol == '*': return token.Token(token.MULTIPLY, symbol, self.line, self.column) elif symbol == '+': return token.Token(token.PLUS, symbol, self.line, self.column) elif symbol == ';': return token.Token(token.SEMICOLON, symbol, self.line, self.column) elif symbol.isdigit(): flt = False col = self.column - 1 if symbol == '0' and self.__peek().isdigit(): print( error.MyPLError('unexpected number', self.line, self.column)) exit() while self.__peek().isdigit() or self.__peek() == ".": symbol += self.__read() self.column += 1 if symbol[-1] == ".": if flt: print(error.MyPLError('invalid number', self.line, col)) exit() elif not self.__peek().isdigit(): print( error.MyPLError('missing digit in float value', self.line, self.column)) exit() else: symbol += self.__read() self.column += 1 flt = True if self.__peek().isalpha(): print( error.MyPLError('unexpected symbol', self.line, self.column)) exit() if flt: return token.Token(token.FLOATVAL, symbol, self.line, col) else: return token.Token(token.INTVAL, symbol, self.line, col) elif symbol == '"': col = self.column - 1 if self.__peek() == '"': symbol += self.__read() self.column += 1 return token.Token(token.STRINGVAL, '', self.line, col) symbol = self.__read() while not self.__peek() == '"': symbol += self.__read() self.column += 1 if self.__peek() == '': print("Improper string") break self.__read() self.column += 1 return token.Token(token.STRINGVAL, symbol, self.line, col) elif symbol.isalpha(): col = self.column - 1 while self.__peek().isalpha() or self.__peek().isdigit( ) or self.__peek() == '_': symbol += self.__read() self.column += 1 if symbol == 'and': return token.Token(token.AND, symbol, self.line, col) elif symbol == 'or': return token.Token(token.OR, symbol, self.line, col) elif symbol == 'not': return token.Token(token.NOT, symbol, self.line, col) elif symbol == 'bool': return token.Token(token.BOOLTYPE, symbol, self.line, col) elif symbol == 'int': return token.Token(token.INTTYPE, symbol, self.line, col) elif symbol == 'float': return token.Token(token.FLOATTYPE, symbol, self.line, col) elif symbol == 'string': return token.Token(token.STRINGTYPE, symbol, self.line, col) elif symbol == 'struct': return token.Token(token.STRUCTTYPE, symbol, self.line, col) elif symbol == 'while': return token.Token(token.WHILE, symbol, self.line, col) elif symbol == 'do': return token.Token(token.DO, symbol, self.line, col) elif symbol == 'if': return token.Token(token.IF, symbol, self.line, col) elif symbol == 'then': return token.Token(token.THEN, symbol, self.line, col) elif symbol == 'else': return token.Token(token.ELSE, symbol, self.line, col) elif symbol == 'elif': return token.Token(token.ELIF, symbol, self.line, col) elif symbol == 'end': return token.Token(token.END, symbol, self.line, col) elif symbol == 'fun': return token.Token(token.FUN, symbol, self.line, col) elif symbol == 'var': return token.Token(token.VAR, symbol, self.line, col) elif symbol == 'set': return token.Token(token.SET, symbol, self.line, col) elif symbol == 'return': return token.Token(token.RETURN, symbol, self.line, col) elif symbol == 'new': return token.Token(token.NEW, symbol, self.line, col) elif symbol == 'nil': return token.Token(token.NIL, symbol, self.line, col) elif symbol == 'true': return token.Token(token.BOOLVAL, symbol, self.line, col) elif symbol == 'false': return token.Token(token.BOOLVAL, symbol, self.line, col) else: return token.Token(token.ID, symbol, self.line, col)
def next_token(self): #... define next token here ... self.column += 1 currentSymbol = '' reserved = False #Check for Newlines if (self.__peek() == '\n'): self.__read() self.line += 1 self.column = 0 return self.next_token() #read in symbol symbol = self.__read() #check for end of file if (self.__peek() == ''): return token.Token(token.EOS, '', self.line, self.column) #check for spaces if symbol.isspace(): #self.column += 1 return self.next_token() #check for comments if (symbol == '#'): while(self.__peek() != '\n'): self.__read() self.__read() self.line += 1 self.column = 0 return self.next_token() #ASSIGN if (symbol == '=' and self.__peek() != '='): currentSymbol = '=' return token.Token(token.ASSIGN, currentSymbol, self.line, self.column) #EQUAL if (symbol == '=' and self.__peek() == '='): currentSymbol = '==' symbol = self.next_token() return token.Token(token.EQUAL, currentSymbol, self.line, self.column - 1) #COLON if (symbol == ':'): return token.Token(token.COLON, symbol, self.line, self.column) #COMMA if (symbol == ','): return token.Token(token.COMMA, symbol, self.line, self.column) #DIVIDE if (symbol == '/'): return token.Token(token.DIVIDE, symbol, self.line, self.column) #DOT if (symbol == '.'): return token.Token(token.DOT, symbol, self.line, self.column) #GREATER THAN if (symbol == '>' and self.__peek() != '='): currentSymbol = '>' return token.Token(token.GREATER_THAN, currentSymbol, self.line, self.column) #GREATER THAN EQUAL if (symbol == '>' and self.__peek() == '='): symbol = self.next_token() currentSymbol = '>=' return token.Token(token.GREATER_THAN_EQUAL, currentSymbol, self.line, self.column - 1) #LESS THAN if (symbol == '<' and self.__peek() != '='): currentSymbol = '<' #self.column += 1 return token.Token(token.LESS_THAN, currentSymbol, self.line, self.column) #LESS THAN EQUAL if (symbol == '<' and self.__peek() == '='): symbol = self.next_token() currentSymbol = '<=' return token.Token(token.LESS_THAN_EQUAL, currentSymbol, self.line, self.column - 1) #NOT EQUAL if (symbol == '!' and self.__peek() == '='): symbol = self.next_token() currentSymbol = '!=' return token.Token(token.NOT_EQUAL, currentSymbol, self.line, self.column) #LPAREN if (symbol =='('): return token.Token(token.LPAREN, symbol, self.line, self.column) #RPAREN if (symbol ==')'): return token.Token(token.RPAREN, symbol, self.line, self.column) #MINUS if (symbol =='-'): return token.Token(token.MINUS, symbol, self.line, self.column) #MODULO if (symbol =='%'): return token.Token(token.MODULO, symbol, self.line, self.column) #MULTIPLY if (symbol =='*'): return token.Token(token.MULTIPLY, symbol, self.line, self.column) #PLUS if (symbol =='+'): return token.Token(token.PLUS, symbol, self.line, self.column) #SEMICOLON if (symbol ==';'): return token.Token(token.SEMICOLON, symbol, self.line, self.column) if (symbol.isalpha()): reserved = False currentSymbol += symbol startPosition = self.column if (self.__peek() in ('=,:/><()*+;-!')): reserved = True while(not(self.__peek().isspace() or reserved)): symbol = self.__read() currentSymbol += symbol self.column += 1 #bool type if (currentSymbol == 'bool'): return token.Token(token.BOOLTYPE, currentSymbol, self.line, startPosition) #int type elif (currentSymbol == 'int'): return token.Token(token.INTTYPE, currentSymbol, self.line, startPosition) #float type elif (currentSymbol == 'float'): return token.Token(token.FLOATTYPE, currentSymbol, self.line, startPosition) #struct type elif (currentSymbol == 'struct'): return token.Token(token.STRUCTTYPE, currentSymbol, self.line, startPosition) #and type elif (currentSymbol == 'and'): return token.Token(token.AND, currentSymbol, self.line, startPosition) #or type elif (currentSymbol == 'or'): return token.Token(token.OR, currentSymbol, self.line, startPosition) #not type elif (currentSymbol == 'not'): return token.Token(token.NOT, currentSymbol, self.line, startPosition) #while type elif (currentSymbol == 'while'): return token.Token(token.WHILE, currentSymbol, self.line, startPosition) #do type elif (currentSymbol == 'do'): return token.Token(token.DO, currentSymbol, self.line, startPosition) #if type elif (currentSymbol == 'if'): return token.Token(token.IF, currentSymbol, self.line, startPosition) #then type elif (currentSymbol == 'then'): return token.Token(token.THEN, currentSymbol, self.line, startPosition) #else type elif (currentSymbol == 'else'): return token.Token(token.ELSE, currentSymbol, self.line, startPosition) #elif type elif (currentSymbol == 'elif'): return token.Token(token.ELIF, currentSymbol, self.line, startPosition) #end type elif (currentSymbol == 'end'): return token.Token(token.END, currentSymbol, self.line, startPosition) #fun type elif (currentSymbol == 'fun'): return token.Token(token.FUN, currentSymbol, self.line, startPosition) #var type elif (currentSymbol == 'var'): return token.Token(token.VAR, currentSymbol, self.line, startPosition) #set type elif (currentSymbol == 'set'): return token.Token(token.SET, currentSymbol, self.line, startPosition) #return type elif (currentSymbol == 'return'): return token.Token(token.RETURN, currentSymbol, self.line, startPosition) #new type elif (currentSymbol == 'new'): return token.Token(token.NEW, currentSymbol, self.line, startPosition) #nil type elif (currentSymbol == 'nil'): return token.Token(token.NIL, currentSymbol, self.line, startPosition) #true elif (currentSymbol == 'true'): return token.Token(token.BOOLVAL, currentSymbol, self.line, startPosition) #false elif (currentSymbol == 'false'): return token.Token(token.BOOLVAL, currentSymbol, self.line, startPosition) #identifier else: return token.Token(token.ID, currentSymbol, self.line, startPosition) if (symbol.isdigit()): reserved = False isFloat = 0 currentSymbol += str(symbol) startPosition = self.column if (self.__peek() in ('=,:/><()*+;-!')): reserved = True while(not(self.__peek().isspace() or reserved)): if (self.__peek().isdigit()): symbol = str(self.__read()) currentSymbol += symbol self.column += 1 if (self.__peek() == '.'): symbol = self.__read() currentSymbol += symbol self.column += 1 isFloat = 1 #integer value if (isFloat == 0): return token.Token(token.INTVAL, currentSymbol, self.line, startPosition) #float value if (isFloat == 1): return token.Token(token.FLOATVAL, currentSymbol, self.line, startPosition) #string values if (symbol == '"'): startPosition = self.column while(not(self.__peek() == '"')): currentSymbol += self.__read() self.column += 1 self.__read() self.column += 1 return token.Token(token.STRINGVAL, currentSymbol, self.line, startPosition) if (symbol == "'"): startPosition = self.column while(not(self.__peek() == "'")): currentSymbol += self.__read() self.column += 1 self.__read() self.column += 1 return token.Token(token.STRINGVAL, currentSymbol, self.line, startPosition)
def next_token(self): symbol = self.__read() #check EOF if symbol == '': return token.Token('EOS', symbol, self.line, self.column) self.column += 1 #check space and skip over if symbol == ' ': return self.next_token() #newline resets column and updates line if symbol == '\n': self.column = 0 self.line += 1 return self.next_token() #skip over comments and increment line at the end if symbol == '#': while self.__peek() != '\n': symbol += self.__read() self.__read() self.line += 1 self.column = 0 return self.next_token() #checking 1 or 2 symbol characters if symbol == '+': return token.Token('PLUS', symbol , self.line, self.column) elif symbol == '=': if self.__peek() == '=': symbol += self.__read() self.column += 1 return token.Token('EQUAL', symbol , self.line, self.column - 1) return token.Token('ASSIGN', symbol , self.line, self.column) elif symbol == ',': return token.Token('COMMA', symbol , self.line, self.column) elif symbol == ':': return token.Token('COLON', symbol , self.line, self.column) elif symbol == '/': return token.Token('DIVIDE', symbol , self.line, self.column) elif symbol == '.': return token.Token('DOT', symbol , self.line, self.column) elif symbol == '>': if self.__peek() == '=': symbol += self.__read() self.column += 1 return token.Token('GREATER_THAN_EQUAL', symbol , self.line, self.column - 1) return token.Token('GREATER_THAN', symbol , self.line, self.column) elif symbol == '<': if self.__peek() == '=': symbol += self.__read() self.column += 1 return token.Token('LESS_THAN_EQUAL', symbol , self.line, self.column - 1) return token.Token('LESS_THAN', symbol , self.line, self.column) elif symbol == '(': return token.Token('LPAREN', symbol , self.line, self.column) elif symbol == ')': return token.Token('RPAREN', symbol , self.line, self.column) elif symbol == '-': return token.Token('MINUS', symbol , self.line, self.column) elif symbol == '%': return token.Token('MODULO', symbol , self.line, self.column) elif symbol == '*': return token.Token('MULTIPLY', symbol , self.line, self.column) elif symbol == ';': return token.Token('SEMICOLON', symbol , self.line, self.column) elif symbol == '!': if self.__peek() == '=': symbol += self.__read() self.column += 1 return token.Token('NOT_EQUAL', symbol , self.line, self.column -1) else: raise error.MyPLError("Unexpected symbol '!'", self.line, self.column) #check constants #--------------- #check leading 0 if symbol == '0': if self.__peek().isnumeric(): raise error.MyPLError("unexpected symbol '" + self.__peek() + "' ", self.line, self.column) #get int/float value if symbol.isnumeric(): while self.__peek().isnumeric(): symbol += self.__read() self.column += 1 #check if poorly formed due to letters if self.__peek().isalpha(): raise error.MyPLError("unexpected symbol '" + self.__peek() + "' ", self.line, self.column - (len(symbol) -1)) #check if floating point if self.__peek() == '.': symbol += self.__read() self.column += 1 if not self.__peek().isnumeric(): raise error.MyPLError('Missingn digit in float value', self.line, self.column - (len(symbol) -1)) while self.__peek().isnumeric(): symbol += self.__read() self.column += 1 #check letters in float if self.__peek().isalpha(): raise error.MyPLError("unexpected symbol '" + self.__peek() + "' ", self.line, self.column - (len(symbol) -1)) return token.Token('FLOATVAL', symbol, self.line, self.column - (len(symbol) -1)) else: return token.Token('INTVAL', symbol, self.line, self.column - (len(symbol) - 1)) #check if string value if symbol == '"': while self.__peek() != '"': if self.__peek() == '\n': raise error.MyPLError('Newline in middle of string', self.line, self.column - (len(symbol) - 1)) symbol += self.__read() self.column += 1 if self.__peek() == '': raise error.MyPLError('Missing " in string', self.line, self.column - (len(symbol) -1)) #read the ending " self.__read() self.column += 1 return token.Token('STRINGVAL', symbol[1:], self.line, self.column - (len(symbol))) #get a string of alphabet characters while self.__peek().isalpha() or self.__peek() == '_' or self.__peek().isnumeric(): symbol += self.__read() self.column += 1 #check specical words if symbol == 'bool': return token.Token('BOOLTYPE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'int': return token.Token('INTTYPE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'float': return token.Token('FLOATTYPE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'string': return token.Token('STRINGTYPE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'struct': return token.Token('STRUCTTYPE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'and': return token.Token('AND', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'or': return token.Token('OR', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'not': return token.Token('NOT', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'while': return token.Token('WHILE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'do': return token.Token('DO', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'if': return token.Token('IF', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'then': return token.Token('THEN', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'else': return token.Token('ELSE', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'elif': return token.Token('ELIF', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'end': return token.Token('END', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'fun': return token.Token('FUN', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'var': return token.Token('VAR', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'set': return token.Token('SET', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'return': return token.Token('RETURN', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'new': return token.Token('NEW', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'nil': return token.Token('NIL', symbol, self.line, self.column - (len(symbol) - 1)) #BOOL values elif symbol == 'true': return token.Token('BOOLVAL', symbol, self.line, self.column - (len(symbol) - 1)) elif symbol == 'false': return token.Token('BOOLVAL', symbol, self.line, self.column - (len(symbol) - 1)) #identifiers if symbol[0].isalpha(): return token.Token('ID', symbol, self.line, self.column - (len(symbol) -1)) elif symbol[0] == '_': raise error.MyPLError('Poorly formed identifier', self.line, self.column - (len(symbol) -1)) #unexpected char error raise error.MyPLError("Unexpected character(s): '" + symbol + "'", self.line, self.column - (len(symbol) -1))
def next_token(self): input = "" oldLine = self.line oldColumn = self.column temp = "" inputType = "" intFloatOrString = 0 #1 if input is a number, 2 if float, 3 if string, 0 otherwise checker = True keepLooping = True noSpaceSymbols = ['.', '<=', '>=', '==', '!=', '*',':', '/', '!', '+', '-', ';', '=', ')', '(', '<', '>', ',', '%'] #list of symbols that dont need whitesaces comparisonsOp = ['<=', '>=', '==', '!=', '<', '>'] #list of comparisons #take care of whitespace while self.__peek().isspace(): # == " " or self.__peek() == '\n' or self.__peek == '\t': temp = self.__read() self.column += 1 oldColumn = self.column while self.__peek() == '\n': temp = self.__read() self.column += 1 oldColumn = self.column while self.__peek() != " " and self.__peek() != "" and self.__peek() != '\n' and not self.__peek().isspace() and keepLooping: #token is a string if self.__peek() == '"' or self.__peek() == "'": isSingle = True; if self.__peek() == '"': isSingle = False temp = self.__read() self.column += 1 while (self.__peek() != "'" and self.__peek() != '"') or ((self.__peek() != "'" or not isSingle) and (self.__peek() != '"' or isSingle)): input += self.__read() if self.__peek() == '\n': raise error.MyPLError("uh oh, you had a new line in a string ", self.line, self.column) self.column += 1 temp = self.__read() self.column += 1 intFloatOrString = 3 keepLooping = False #token is not a string #print(self.column) #reads the next character if keepLooping: input += self.__read() #what to do for comments if input == '#': while self.__peek() != '\n': input = self.__read() input = "" temp == self.__read() while self.__peek() == " " or self.__peek().isspace(): temp = self.__read() while self.__peek() == '\n': temp = self.__read() self.line += 1 self.column = 1 oldLine = self.line oldColumn = self.column #bad number checking for number preceded by 0 if input == '0' and self.__peek().isdigit(): m = 'unexpected symbol "%s" at: ' %self.__peek() raise error.MyPLError(m, self.line, self.column) #bad number checking for digit followed by alphabetical letter if input.isdigit() and self.__peek().isalpha(): temp = self.__read() m = 'unexpected symbol "%s" at:' % temp raise error.MyPLError(m, self.line, self.column) checker = True if self.__peek() == '.' and input.isdigit(): checker = False #checking for operators if checker == True: if (input in noSpaceSymbols or self.__peek() in noSpaceSymbols) and input != "": if input == '!' and self.__peek() != '=': raise error.MyPLError("extra exclamation mark at ", self.line, self.column) temp = input + self.__peek() if len(input) == 1: if temp != '!=' and temp != '==': if input in comparisonsOp: if self.__peek() in comparisonsOp == False: keepLooping = False else: keepLooping = False else: keepLooping = False #increment column count self.column += 1 if intFloatOrString != 3: #checks to see if the input is an int if(input.isdigit()): intFloatOrString = 1 #checks to see if it is a double else: soFarSoGood = True #bool that checks to see if there are any alphabetical characters numberOfDots = 0 #keeps track of the dots if len(input) > 1: for i, x in enumerate(input): if x == '.': numberOfDots += 1 #float error where there is nothing after the if i + 1 == len(input): m = input + "missing digit in float value at " raise error.MyPLError(m, self.line, self.column) #float error where there is an invalid number after the dot #pre dot invalid number checking is taken care of up above elif input[i+1].isdigit() == False and soFarSoGood: m = 'unexpected character "%s" at ' % (self.__peek()) raise error.MyPLError(m, self.line, self.column) elif x.isdigit() == False: soFarSoGood = False if soFarSoGood and numberOfDots == 1: intFloatOrString = 2 #end of the line if self.__peek() == '\n': self.line += 1 self.column = 1; temp = self.__read() #input was a int float or string if intFloatOrString != 0: if intFloatOrString == 1: inputType = token.INTVAL elif intFloatOrString == 2: inputType = token.FLOATVAL else: inputType = token.STRINGVAL #input was not a value except for maybe a bool else: if input == '=': inputType = token.ASSIGN elif input == ',': inputType = token.COMMA elif input == ':': inputType = token.COLON elif input == '/': inputType = token.DIVIDE elif input == '.': inputType = token.DOT elif input == '==': inputType = token.EQUAL elif input == '': oldColumn -= 1 inputType = token.EOS elif input == '>': inputType = token.GREATER_THAN elif input == '>=': inputType = token.GREATER_THAN_EQUAL elif input == '<': inputType = token.LESS_THAN elif input == '<=': inputType = token.LESS_THAN_EQUAL elif input == '!=': inputType = token.NOT_EQUAL elif input == '(': inputType = token.LPAREN elif input == ')': inputType = token.RPAREN elif input == '-': inputType = token.MINUS elif input == '%': inputType = token.MODULO elif input == '*': inputType = token.MULTIPLY elif input == '+': inputType = token.PLUS elif input == ';': inputType = token.SEMICOLON elif input == 'bool': inputType = token.BOOLTYPE elif input == 'int': inputType = token.INTTYPE elif input == 'float': inputType = token.FLOATTYPE elif input == 'string': inputType = token.STRINGTYPE elif input == 'struct': inputType = token.STRUCTTYPE elif input == 'and': inputType = token.AND elif input == 'or': inputType = token.OR elif input == 'not': inputType = token.NOT elif input == 'while': inputType = token.WHILE elif input == 'do': inputType = token.DO elif input == 'if': inputType = token.IF elif input == 'then': inputType = token.THEN elif input == 'else': inputType = token.ELSE elif input == 'elif': inputType = token.ELIF elif input == 'end': inputType = token.END elif input == 'fun': inputType = token.FUN elif input == 'var': inputType = token.VAR elif input == 'set': inputType = token.SET elif input == 'return': inputType = token.RETURN elif input == 'new': inputType = token.NEW elif input == 'false' or input == 'true': inputType = token.BOOLVAL elif input == 'nil': inputType = token.NIL else: inputType = token.ID newToken = token.Token(inputType, input, oldLine, oldColumn) return newToken