Ejemplo n.º 1
0
 def visit_call_rvalue(self, callStmt):
     if not self.sym_table.id_exists(callStmt.fun.lexeme):
         message = "function " + callStmt.fun.lexeme + " not recognized"
         raise error.MyPLError(message, callStmt.fun.line,
                               callStmt.fun.column)
     temp = self.sym_table.get_info(callStmt.fun.lexeme)
     if len(temp[0]) != len(callStmt.args):
         raise error.MyPLError(
             "wrong number of parameters " + callStmt.fun.lexeme,
             callStmt.fun.line, callStmt.fun.column)
     for i, x in enumerate(callStmt.args):
         x.accept(self)
         if (temp[0][i] != self.current_type.tokentype
                 and self.current_type.tokentype != token.NIL
             ):  # and self.current_type.tokentype != token.ID):
             message = "parameter " + str(i) + " is the wrong type" + temp[
                 0][i] + self.current_type.lexeme
             raise error.MyPLError(message, self.current_type.line,
                                   self.current_type.column)
     self.current_type = callStmt.fun
     self.current_type.tokentype = temp[1]
     temp2 = self.current_type.tokentype
     if temp2 == token.INTTYPE:
         temp2 = token.INTVAL
     elif temp2 == token.FLOATTYPE:
         temp2 = token.FLOATVAL
     elif temp2 == token.BOOLTYPE:
         temp2 = token.BOOLVAL
     elif temp2 == token.STRINGTYPE:
         temp2 = token.STRINGVAL
     self.current_type.tokentype = temp2
Ejemplo n.º 2
0
 def visit_complex_expr(self, complExpr):
     complExpr.first_operand.accept(self)
     lhs = self.current_type
     complExpr.rest.accept(self)
     rhs = self.current_type
     if lhs.tokentype != rhs.tokentype or lhs.tokentype == token.BOOLVAL or lhs.tokentype == token.NIL:
         message = "cant combine a " + lhs.tokentype + " to a " + rhs.tokentype
         raise error.MyPLError(message, rhs.line, lhs.column)
     if complExpr.math_rel.lexeme != '+' and lhs.tokentype == token.STRINGVAL:
         message = "can only add a " + lhs.tokentype + " to a " + rhs.tokentype
         raise error.MyPLError(message, rhs.line, lhs.column)
     self.current_type = lhs
Ejemplo n.º 3
0
 def visit_bool_expr(self, boolStmt):
     boolStmt.first_expr.accept(self)
     lhs = self.current_type
     if boolStmt.bool_rel != None:
         boolStmt.second_expr.accept(self)
         rhs = self.current_type
         if lhs.tokentype != rhs.tokentype and rhs.tokentype != token.NIL:
             message = "cant compare a " + lhs.tokentype + " with a " + rhs.tokentype
             raise error.MyPLError(message, rhs.column, rhs.line)
         if boolStmt.bool_rel.lexeme != "!=" and boolStmt.bool_rel.lexeme != "==" and rhs.tokentype == token.NIL:
             raise error.MyPLError("can only check if == or != with nil",
                                   rhs.line, rhs.column)
     if boolStmt.bool_connector != None:
         boolStmt.rest.accept(self)
Ejemplo n.º 4
0
 def visit_new_rvalue(self, newStmt):
     if not self.sym_table.id_exists(newStmt.struct_type.lexeme):
         message = newStmt.struct_type.lexeme + " does not exist"
         raise error.MyPLError(message, newStmt.struct_type.column,
                               newStmt.struct_type.line)
     self.current_type = newStmt.struct_type
     self.current_type.tokentype = newStmt.struct_type.lexeme
Ejemplo n.º 5
0
    def visit_var_decl_stmt(self, var_decl):
        var_decl.var_expr.accept(self)
        exp_type = self.current_type.lexeme
        var_id = var_decl.var_id
        curr_env = self.sym_table.get_env_id()
        # check that variable isn't already defined
        if self.sym_table.id_exists_in_env(var_id.lexeme, curr_env):
            msg = 'variable already defined in current environment'
            self.__error(msg, var_id)

        lhs = var_decl.var_type
        #if not self.sym_table.id_exists(var_decl.var_id.lexeme):
        self.sym_table.add_id(var_decl.var_id.lexeme)
        var_decl.var_expr.accept(self)
        rhs = self.current_type

        if var_decl.var_type != None:
            check = False
            if lhs.tokentype == token.INTTYPE:
                lhs = token.INTVAL
            elif lhs.tokentype == token.STRINGTYPE:
                lhs = token.STRINGVAL
            elif lhs.tokentype == token.FLOATTYPE:
                lhs = token.FLOATVAL
            elif lhs.tokentype == token.BOOLTYPE:
                lhs = token.BOOLVAL
            elif lhs.tokentype == token.ID:
                if not self.sym_table.id_exists(lhs.lexeme):
                    message = lhs.lexeme + " does not exist"
                    raise error.MyPLError(message, lhs.column, lhs.line)
                lhs = lhs.lexeme
            self.sym_table.set_info(var_decl.var_id.lexeme, lhs)
            if lhs != rhs.tokentype and rhs.tokentype != token.NIL:
                message = "cant assign a " + rhs.tokentype + " to a " + lhs
                raise error.MyPLError(message, rhs.line, rhs.column)
            if self.inStruct:
                self.dict[var_decl.var_id.lexeme] = lhs
        else:
            self.sym_table.set_info(var_decl.var_id.lexeme, rhs.tokentype)
            if rhs.tokentype == token.NIL:
                raise error.MyPLError(
                    "can only assign a nil to an explicitly defined variable",
                    rhs.line, rhs.column)
            if self.inStruct:
                self.dict[var_decl.var_id.lexeme] = rhs.tokentype
Ejemplo n.º 6
0
 def visit_assign_stmt(self, assign_stmt):
     assign_stmt.lhs.accept(self)
     lhs_type = self.current_type
     assign_stmt.rhs.accept(self)
     rhs_type = self.current_type.tokentype
     temp = self.current_type
     if rhs_type != token.NIL and rhs_type != lhs_type:
         msg = 'mismatch type in assignment ' + lhs_type + " " + rhs_type
         raise error.MyPLError(msg, temp.line, temp.column)
Ejemplo n.º 7
0
 def visit_simple_rvalue(self, rvalue):
     self.current_type = rvalue.val
     if rvalue.val.tokentype == token.ID:
         if not self.sym_table.id_exists(rvalue.val.lexeme):
             message = rvalue.val.lexeme + " does not exist"
             raise error.MyPLError(message, rvalue.val.column,
                                   rvalue.val.line)
         temp = self.sym_table.get_info(rvalue.val.lexeme)
         self.current_type = copy.deepcopy(rvalue.val)
         self.current_type.tokentype = temp
Ejemplo n.º 8
0
 def __type(self):
     """<type> ::= ID | INTTYPE | FLOATTYPE | BOOLTYPE | STRINGTYPE"""
     types = [token.ID, token.INTTYPE, token.FLOATTYPE, token.BOOLTYPE]
     types.append(token.STRINGTYPE)
     if self.current_token.tokentype in types:
         self.__advance()
     else:
         s = 'expected a type, found"' + self.current_token.lexeme + '" in parser'
         l = self.current_token.line
         c = self.current_token.column
         raise error.MyPLError(s, l, c)
Ejemplo n.º 9
0
    def visit_lvalue(self, lvalueStmt):
        if len(lvalueStmt.path) == 1:
            temp = lvalueStmt.path[-1].lexeme
            if not self.sym_table.id_exists(temp):
                message = temp + " is not initialized"
                raise error.MyPLError(message, lvalueStmt.path[-1].line,
                                      lvalueStmt.path[-1].column)
            self.current_type = self.sym_table.get_info(temp)
        else:
            temp = lvalueStmt.path[0].lexeme
            if not self.sym_table.id_exists(temp):
                message = temp + "- does not exist"
                raise error.MyPLError(message, lvalueStmt.path[i].line,
                                      lvalueStmt.path[i].column)
            temp2 = self.sym_table.get_info(temp)
            i = 1
            j = len(lvalueStmt.path)
            while i < j - 1:
                if not self.sym_table.id_exists(temp2):
                    message = temp + "- does not exist"
                    raise error.MyPLError(message, lvalueStmt.path[i].line,
                                          lvalueStmt.path[i].column)
                temp = self.sym_table.get_info(temp2)
                type = temp[lvalueStmt.path[i].lexeme]
                temp2 = type
                i += 1

            #added this section in hw7 to get proper type
            temp = self.sym_table.get_info(temp2)
            temp2 = temp[lvalueStmt.path[-1].lexeme]

            if temp2 == token.INTTYPE:
                temp2 = token.INTVAL
            elif temp2 == token.FLOATTYPE:
                temp2 = token.FLOATVAL
            elif temp2 == token.BOOLTYPE:
                temp2 = token.BOOLVAL
            elif temp2 == token.STRINGTYPE:
                temp2 = token.STRINGVAL
            #self.current_type = lvalueStmt.path[-1]
            self.current_type = temp2
Ejemplo n.º 10
0
    def visit_id_rvalue(self, idrVal):
        if len(idrVal.path) == 1:
            if not self.sym_table.id_exists(idrVal.path[0].lexeme):
                message = idrVal.path[0].lexeme + " does not exist"
                raise error.MyPLError(message, idrVal.path[0].column,
                                      idrVal.path[0].line)
            temp = self.sym_table.get_info(idrVal.path[0].lexeme)
            self.current_type = idrVal.path[0]
            self.current_type.tokentype = temp
        else:
            temp = idrVal.path[0].lexeme
            if not self.sym_table.id_exists(temp):
                message = temp + "- does not exist"
                raise error.MyPLError(message, idrVal.path[i].line,
                                      idrVal.path[i].column)
            temp2 = self.sym_table.get_info(temp)
            i = 1
            j = len(idrVal.path)
            while i < j - 1:
                if not self.sym_table.id_exists(temp2):
                    message = temp + " - does not exist"
                    raise error.MyPLError(message, idrVal.path[i].line,
                                          idrVal.path[i].column)
                temp = self.sym_table.get_info(temp2)
                type = temp[idrVal.path[i].lexeme]
                temp2 = type
                i += 1

            temp = self.sym_table.get_info(temp2)
            temp2 = temp[idrVal.path[-1].lexeme]

            if temp2 == token.INTTYPE:
                temp2 = token.INTVAL
            elif temp2 == token.FLOATTYPE:
                temp2 = token.FLOATVAL
            elif temp2 == token.BOOLTYPE:
                temp2 = token.BOOLVAL
            elif temp2 == token.STRINGTYPE:
                temp2 = token.STRINGVAL
            self.current_type = idrVal.path[-1]
            self.current_type.tokentype = temp2
Ejemplo n.º 11
0
 def __mathrel(self):
     """<mathrel> ::= PLUS | MINUS | DIVIDE | MULTIPLY | MODULO"""
     types = [token.PLUS, token.MINUS, token.DIVIDE, token.MULTIPLY, token.MODULO]
     if self.current_token.tokentype in types:
         temp_token = self.current_token
         self.__advance()
         return temp_token
     else:
         s = 'expected a math operator, found "' + self.current_token.lexeme + '" in parser'
         l = self.current_token.line
         c = self.current_token.column
         raise error.MyPLError(s, l, c)
Ejemplo n.º 12
0
 def __boolrel(self):
     """<boolrel> ::= EQUAL | LESS_THAN | GREATER_THAN | LESS_THAN_EQUAL |"""
     """              GREATER_THAN_EQUAL | NOT_EQUAL"""
     types = [token.EQUAL, token.LESS_THAN, token.LESS_THAN_EQUAL]
     types.extend([token.GREATER_THAN_EQUAL, token.GREATER_THAN])
     types.append(token.NOT_EQUAL)
     if self.current_token.tokentype in types:
         self.__advance()
     else:
         s = 'expected a conditional operator, found "' + self.current_token.lexeme + '" in parser'
         l = self.current_token.line
         c = self.current_token.column
         raise error.MyPLError(s, l, c)
Ejemplo n.º 13
0
 def __rvalue(self):
     """<rvalue> ::= STRINGVAL | INTVAL | BOOLVAL | FLOATVAL | NIL | NEW ID | <idrval>"""
     types = [token.STRINGVAL, token.INTVAL, token.BOOLVAL]
     types.extend([token.FLOATVAL, token.NIL])
     if self.current_token.tokentype == token.NEW:
         self.__advance()
         self.__eat(token.ID, "expecting an identifier")
     elif self.current_token.tokentype == token.ID:
         self.__idrval()
     elif self.current_token.tokentype in types:
         self.__advance()
     else:
         s = 'expected a rvalue found"' + self.current_token.lexeme + '" in parser'
         l = self.current_token.line
         c = self.current_token.column
         raise error.MyPLError(s, l, c)
Ejemplo n.º 14
0
 def __rvalue(self):
     """<rvalue> ::= STRINGVAL | INTVAL | BOOLVAL | FLOATVAL | NIL | NEW ID | <idrval>"""
     types = [token.STRINGVAL, token.INTVAL, token.BOOLVAL]
     types.extend([token.FLOATVAL, token.NIL])
     if self.current_token.tokentype == token.NEW:
         new_r_value_node = ast.NewRValue()
         self.__advance()
         new_r_value_node.struct_type = self.current_token
         self.__eat(token.ID, "expecting an identifier")
         return new_r_value_node
     elif self.current_token.tokentype == token.ID:
         return self.__idrval()
     elif self.current_token.tokentype in types:
         simple_rvalue_node = ast.SimpleRValue()
         simple_rvalue_node.val = self.current_token
         self.__advance()
         return simple_rvalue_node
     else:
         s = 'expected a rvalue found"' + self.current_token.lexeme + '" in parser'
         l = self.current_token.line
         c = self.current_token.column
         raise error.MyPLError(s, l, c)
Ejemplo n.º 15
0
    def visit_fun_decl_stmt(self, funStmt):
        self.sym_table.add_id(funStmt.fun_name.lexeme)
        param = []
        paramNames = []
        type = []
        self.sym_table.push_environment()
        for x in funStmt.params:
            x.accept(self)
            if self.current_type.lexeme in paramNames:
                message = "cant have parameters with the same name" + self.current_type.lexeme
                raise error.MyPLError(message, self.current_type.column,
                                      self.current_type.line)
            param.append(self.current_type.tokentype)
            paramNames.append(self.current_type.lexeme)
        type.append(param)
        temp2 = funStmt.return_type.tokentype
        if temp2 == token.INTTYPE:
            temp2 = token.INTVAL
        elif temp2 == token.FLOATTYPE:
            temp2 = token.FLOATVAL
        elif temp2 == token.BOOLTYPE:
            temp2 = token.BOOLVAL
        elif temp2 == token.STRINGTYPE:
            temp2 = token.STRINGVAL
        if funStmt.return_type.tokentype != token.ID:
            type.append(funStmt.return_type.tokentype)
        else:
            type.append(funStmt.return_type.lexeme)
        self.sym_table.set_info(funStmt.fun_name.lexeme, type)

        temp = self.sym_table.get_info("return")
        self.sym_table.set_info("return", funStmt.return_type.tokentype)
        for x in funStmt.stmt_list.stmts:
            self.current_type = None
            x.accept(self)
        self.sym_table.pop_environment()
        self.sym_table.set_info("return", temp)
Ejemplo n.º 16
0
    def next_token(self):
        curSymbol = ''

        #Remove spaces
        if (self.__peek() == ' '):
            while (self.__peek() == ' '):
                self.__read()
                self.column += 1

        #Remove newlines if found
        if (self.__peek() == '\n'):
            while (self.__peek() == '\n'):
                self.__read()
                self.line += 1
            self.column = 0

        #If I see a comment, get to the next line
        if (self.__peek() == '#'):
            while (self.__peek() != '\n' and self.__peek() != ''):
                self.__read()
                self.column += 1
            if (self.__read() != ''):
                self.line += 1
                self.column = 0
            return self.next_token()

        #If there is a letter, get the whole identifyer
        if (self.__peek().isalpha()):
            while (self.__peek().isalpha() or self.__peek().isdigit()
                   or self.__peek() == '_'):
                curSymbol += self.__read()
                self.column += 1

        #If there are double quotes, get the whole string
        elif (self.__peek() == '"'):
            curSymbol += self.__read()
            self.column += 1
            #Keep reading in the string until the next double quotes are found
            while (self.__peek() != '"' and self.__peek() != '\n'):
                curSymbol += self.__read()
                self.column += 1
            #Check to see if there was a newline or missing closing quote
            if (self.__peek() == '\n'):
                raise error.MyPLError("reached newline reading string",
                                      self.line, self.column + 1)
            elif (self.__peek() != '"'):
                raise error.MyPLError('missing closing quotes', self.line,
                                      self.column + 1)
            else:
                curSymbol += self.__read()
                self.column += 1

        #If there is a digit, get the whole number
        elif (self.__peek().isdigit()):
            #Keep reading until you do not find any more digits
            while (self.__peek().isdigit()):
                curSymbol += self.__read()
                self.column += 1
                if (curSymbol == '0' and self.__peek().isdigit()):
                    raise error.MyPLError(
                        'unexpeted symbol "%s"' % self.__peek(), self.line,
                        self.column + 1)
            #If theres a period, it's a float.
            if (self.__peek() == '.'):
                curSymbol += self.__read()
                self.column += 1
                changed = 0
                #Keep reading digits after the period
                while (self.__peek().isdigit()):
                    curSymbol += self.__read()
                    self.column += 1
                    changed = 1
                if (self.__peek().isalpha()):
                    raise error.MyPLError(
                        'unexpected symbol "%s"' % self.__peek(), self.line,
                        self.column + 1)
                #Make sure there are digits after the decimal
                if (changed == 0):
                    raise error.MyPLError("missing digit in float value",
                                          self.line, self.column + 1)
                #Make sure there is only one decimal point
                elif (self.__peek() == '.'):
                    raise error.MyPLError(
                        "Too many decimal points in float value", self.line,
                        self.column + 1)
            elif (self.__peek().isalpha()):
                raise error.MyPLError('unexpcted symbol "%s"' % self.__peek(),
                                      self.line, self.column + 1)
        #If nothing above was triggerd,
        #the next symbol is punctuation or EOS
        else:
            curSymbol += self.__read()
            #If the symbol is EOS, do not increment the colmumn
            if (curSymbol == '!' and self.__peek() != '='):
                error.MyPLError('missing "=" after "!"', self.line,
                                self.column)
            if (curSymbol != ''):
                self.column += 1
            #Check to see if the symbol is >=, <=, ==, or != and include the =
            #second equals sign and incriment the column
            if ((curSymbol == '>' or curSymbol == '=' or curSymbol == '!'
                 or curSymbol == '<') and self.__peek() == '='):
                curSymbol += self.__read()
                self.column += 1
        #Check the current symbol against all possibilities and return
        #a token of the correct type
        if (curSymbol == ''):
            return token.Token(token.EOS, curSymbol, self.line, self.column)
        elif (curSymbol == '='):
            return token.Token(token.ASSIGN, curSymbol, self.line, self.column)
        elif (curSymbol == ','):
            return token.Token(token.COMMA, curSymbol, self.line, self.column)
        elif (curSymbol == ':'):
            return token.Token(token.COLON, curSymbol, self.line, self.column)
        elif (curSymbol == '/'):
            return token.Token(token.DIVIDE, curSymbol, self.line, self.column)
        elif (curSymbol == '.'):
            return token.Token(token.DOT, curSymbol, self.line, self.column)
        elif (curSymbol == '=='):
            return token.Token(token.EQUAL, curSymbol, self.line,
                               self.column - 1)
        elif (curSymbol == '>'):
            return token.Token(token.GREATER_THAN, curSymbol, self.line,
                               self.column)
        elif (curSymbol == '>='):
            return token.Token(token.GREATER_THAN_EQUAL, curSymbol, self.line,
                               self.column - 1)
        elif (curSymbol == '<'):
            return token.Token(token.LESS_THAN, curSymbol, self.line,
                               self.column)
        elif (curSymbol == '<='):
            return token.Token(token.LESS_THAN_EQUAL, curSymbol, self.line,
                               self.column - 1)
        elif (curSymbol == '!='):
            return token.Token(token.NOT_EQUAL, curSymbol, self.line,
                               self.column - 1)
        elif (curSymbol == '('):
            return token.Token(token.LPAREN, curSymbol, self.line, self.column)
        elif (curSymbol == ')'):
            return token.Token(token.RPAREN, curSymbol, self.line, self.column)
        elif (curSymbol == '-'):
            return token.Token(token.MINUS, curSymbol, self.line, self.column)
        elif (curSymbol == '%'):
            return token.Token(token.MODULO, curSymbol, self.line, self.column)
        elif (curSymbol == '*'):
            return token.Token(token.MULTIPLY, curSymbol, self.line,
                               self.column)
        elif (curSymbol == '+'):
            return token.Token(token.PLUS, curSymbol, self.line, self.column)
        elif (curSymbol == ';'):
            return token.Token(token.SEMICOLON, curSymbol, self.line,
                               self.column)
        elif (curSymbol == 'bool'):
            return token.Token(token.BOOLTYPE, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'int'):
            return token.Token(token.INTTYPE, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'float'):
            return token.Token(token.FLOATTYPE, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'string'):
            return token.Token(token.STRINGTYPE, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'struct'):
            return token.Token(token.STRUCTTYPE, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'and'):
            return token.Token(token.AND, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'or'):
            return token.Token(token.OR, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'not'):
            return token.Token(token.NOT, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'while'):
            return token.Token(token.WHILE, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'do'):
            return token.Token(token.DO, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'if'):
            return token.Token(token.IF, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'then'):
            return token.Token(token.THEN, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'else'):
            return token.Token(token.ELSE, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'elif'):
            return token.Token(token.ELIF, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'end'):
            return token.Token(token.END, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'fun'):
            return token.Token(token.FUN, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'var'):
            return token.Token(token.VAR, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'set'):
            return token.Token(token.SET, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'return'):
            return token.Token(token.RETURN, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'new'):
            return token.Token(token.NEW, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'nil'):
            return token.Token(token.NIL, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol == 'true' or curSymbol == 'false'):
            return token.Token(token.BOOLVAL, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
        elif ('"' in curSymbol):
            return token.Token(token.STRINGVAL,
                               curSymbol[1:len(curSymbol) - 1], self.line,
                               self.column - len(curSymbol) + 1)
        elif (curSymbol[0].isdigit()):
            if ('.' in curSymbol):
                return token.Token(token.FLOATVAL, curSymbol, self.line,
                                   self.column - len(curSymbol) + 1)
            else:
                return token.Token(token.INTVAL, curSymbol, self.line,
                                   self.column - len(curSymbol) + 1)
        else:
            return token.Token(token.ID, curSymbol, self.line,
                               self.column - len(curSymbol) + 1)
Ejemplo n.º 17
0
 def __error(self, error_msg, error_token):
     s = error_msg + ', found "' + error_token.lexeme + '" in type checker'
     l = error_token.line
     c = error_token.column
     raise error.MyPLError(s, l, c)
Ejemplo n.º 18
0
 def __error(self, error_msg):
     s = error_msg + ', found "' + self.current_token.lexeme + '" in parser'
     l = self.current_token.line
     c = self.current_token.column
     raise error.MyPLError(s, l, c)
Ejemplo n.º 19
0
    def next_token(self):
        peekValue = self.__peek()  # for efficiency purposes

        # check the next character and act from there

        # if EOS, return EOS token
        if peekValue == "":
            return token.Token(token.EOS, "", self.line, self.column)

        # if digit, determine if float or int, then return
        elif peekValue.isdigit():
            curr_lexeme = self.__read()
            # used to ensure a float only contains one decimal point
            isFloat = False

            # read until it's another important character in MyPL or whitespace because people might not put whitespace (ex. x=x+5; )
            # if there is a non-digit character, we'll raise an error in the loop
            while self.__peek(
            ) not in ';=+-*/%<>(),"' and not self.__peek().isspace():
                # if not digit or decimal, error
                if not self.__peek().isdigit() and self.__peek() != ".":
                    raise error.MyPLError(
                        'unexpected symbol "' + self.__peek() + '"', self.line,
                        self.column)

                # if decimal point, ensure this is the first one and that the first half of the number is a valid intval
                elif self.__peek() == "." and not isFloat:
                    if len(curr_lexeme) == 1 or (curr_lexeme[0] != "0"
                                                 and len(curr_lexeme) > 1):
                        isFloat = True
                    else:
                        raise error.MyPLError(
                            "float starts with invalid int", self.line,
                            self.column - (len(curr_lexeme) - 1))
                elif self.__peek() == "." and isFloat:
                    raise error.MyPLError("two decimal points in one number",
                                          self.line, self.column)

                # if all is well, read the character
                curr_lexeme += self.__read()

            # after we have the whole number,
            # verify it's valid and return the corresponding token
            if isFloat:
                # if there is nothing after the decimal point
                if curr_lexeme[-1] == ".":
                    raise error.MyPLError(
                        "missing digit in float value", self.line, self.column
                    )  # +1 to column to show where digit needs to be
                else:
                    return token.Token(token.FLOATVAL, float(curr_lexeme),
                                       self.line,
                                       self.column - (len(curr_lexeme) - 1))
            else:
                # check if valid intval
                if len(curr_lexeme) == 1 or (curr_lexeme[0] != "0"
                                             and len(curr_lexeme) > 1):
                    return token.Token(token.INTVAL, int(curr_lexeme),
                                       self.line,
                                       self.column - (len(curr_lexeme) - 1))
                else:
                    # this happens if the number is longer than one digit
                    # and starts with 0
                    # the unexpected symbol will always be the second digit,
                    # even if it's also 0
                    raise error.MyPLError(
                        'unexpected symbol "' + curr_lexeme[1] + '"',
                        self.line, self.column - (len(curr_lexeme) - 1))

        # if alpha, determine if ID or keyword and return accordingly
        elif peekValue.isalpha():
            curr_lexeme = self.__read()
            # expedites identification if the word has an underscore or digit
            isDefinitelyID = False

            # read until something that isn't alphanumeric or underscore
            while self.__peek().isalpha() or self.__peek().isdigit(
            ) or self.__peek() == "_":
                if self.__peek().isdigit() or self.__peek() == "_":
                    isDefinitelyID = True
                curr_lexeme += self.__read()

            # identify what keyword the token is, or if it's an ID
            if isDefinitelyID:
                return token.Token(token.ID, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "bool":
                return token.Token(token.BOOLTYPE, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "int":
                return token.Token(token.INTTYPE, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "float":
                return token.Token(token.FLOATTYPE, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "string":
                return token.Token(token.STRINGTYPE, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "struct":
                return token.Token(token.STRUCTTYPE, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "and":
                return token.Token(token.AND, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "or":
                return token.Token(token.OR, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "not":
                return token.Token(token.NOT, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "while":
                return token.Token(token.WHILE, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "do":
                return token.Token(token.DO, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "if":
                return token.Token(token.IF, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "then":
                return token.Token(token.THEN, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "else":
                return token.Token(token.ELSE, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "elif":
                return token.Token(token.ELIF, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "end":
                return token.Token(token.END, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "fun":
                return token.Token(token.FUN, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "var":
                return token.Token(token.VAR, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "set":
                return token.Token(token.SET, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "return":
                return token.Token(token.RETURN, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "new":
                return token.Token(token.NEW, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "nil":
                return token.Token(token.NIL, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            elif curr_lexeme == "true" or curr_lexeme == "false":
                return token.Token(token.BOOLVAL, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))
            else:  # must be ID by process of elimination
                return token.Token(token.ID, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1))

        # if whitespace, read past it without doing anything
        elif peekValue.isspace():
            self.__read()
            return self.next_token()

        # if semicolon... (first because it's extremely common)
        elif peekValue == ";":
            self.__read()
            return token.Token(token.SEMICOLON, ";", self.line, self.column)

        # if quotes, return stringval token
        # need to ensure the opening quotes are closed
        elif peekValue == '"':
            curr_lexeme = ""
            self.__read()  # throw out opening quotes

            # read and stop before closing quotes or newline or EOS
            while self.__peek() != '"' and self.__peek(
            ) != "\n" and self.__peek() != "":
                curr_lexeme += self.__read()
            if self.__peek() == '"':  # if it ended with closing quotes
                self.__read()
                return token.Token(token.STRINGVAL, curr_lexeme, self.line,
                                   self.column - (len(curr_lexeme) - 1) -
                                   2)  # -2 to account for quotes
            elif self.__peek() == "\n":
                raise error.MyPLError("reached newline reading string",
                                      self.line, self.column)
            elif self.__peek() == "":
                raise error.MyPLError(
                    "reached EOS reading string", self.line, self.column + 1
                )  # +1 to column to show where the closing quotes should be
            else:
                raise error.MyPLError("something strange happened", self.line,
                                      self.column)

        # if comment, go until the end of the line/file without keeping anything
        elif peekValue == "#":
            while self.__peek() != "\n" and self.__peek() != "":
                self.__read()
            return self.next_token()

        # if any other recognized character in MyPL
        elif peekValue == "=":
            self.__read()
            if self.__peek() == "=":  # if there is a second "="
                self.__read()
                return token.Token(token.EQUAL, "==", self.line,
                                   self.column - 1)
            else:
                return token.Token(token.ASSIGN, "=", self.line, self.column)
        elif peekValue == ",":
            # reading here instead of in the return
            # so that line and column update properly
            self.__read()
            return token.Token(token.COMMA, ",", self.line, self.column)
        elif peekValue == ":":
            self.__read()
            return token.Token(token.COLON, ":", self.line, self.column)
        elif peekValue == "/":
            self.__read()
            return token.Token(token.DIVIDE, "/", self.line, self.column)
        elif peekValue == ".":
            self.__read()
            # a dot will never have numbers after it unless it's a float
            # because IDs cannot start with numbers, and no keywords do either
            # so if we got here, a dot with numbers after is a malformed float
            if self.__peek().isdigit():
                raise error.MyPLError("missing intval before decimal point",
                                      self.line, self.column - 1)
            else:
                return token.Token(token.DOT, ".", self.line, self.column)
        elif peekValue == ">":
            self.__read()
            if self.__peek() == "=":  # if the next character is "="
                self.__read()
                return token.Token(token.GREATER_THAN_EQUAL, ">=", self.line,
                                   self.column - 1)
            else:
                return token.Token(token.GREATER_THAN, ">", self.line,
                                   self.column)
        elif peekValue == "<":
            self.__read()
            if self.__peek() == "=":  # if the next character is "="
                self.__read()
                return token.Token(token.LESS_THAN_EQUAL, "<=", self.line,
                                   self.column - 1)
            else:
                return token.Token(token.LESS_THAN, "<", self.line,
                                   self.column)
        elif peekValue == "!":
            self.__read()
            if self.__peek() == "=":
                self.__read()
                return token.Token(token.NOT_EQUAL, "!=", self.line,
                                   self.column - 1)
            else:
                raise error.MyPLError(
                    'unexpected symbol "!"', self.line, self.column -
                    1)  # -1 to column to go back one since we read it already
        elif peekValue == "(":
            self.__read()
            return token.Token(token.LPAREN, "(", self.line, self.column)
        elif peekValue == ")":
            self.__read()
            return token.Token(token.RPAREN, ")", self.line, self.column)
        elif peekValue == "-":
            self.__read()
            return token.Token(token.MINUS, "-", self.line, self.column)
        elif peekValue == "%":
            self.__read()
            return token.Token(token.MODULO, "%", self.line, self.column)
        elif peekValue == "*":
            self.__read()
            return token.Token(token.MULTIPLY, "*", self.line, self.column)
        elif peekValue == "+":
            self.__read()
            return token.Token(token.PLUS, "+", self.line, self.column)

        # if we reach this point, there is something very strange in the source
        else:
            raise error.MyPLError('unexpected symbol "' + peekValue + '"',
                                  self.line, self.column)
Ejemplo n.º 20
0
    def next_token(self):
        symbol = ""
        word = ""

        tokenStartCol = self.column + 1

        if (not self.__peek()):
            return token.Token(token.EOS, "", self.line, tokenStartCol - 1)

        symbol = self.__read()
        word += symbol
        self.column += 1

        # ignore comment lines
        if (symbol == "#"):
            while (self.__peek() != "\n"):
                self.__read()
            self.__read()
            self.column = 0
            self.line += 1
            return self.next_token()

        if (symbol == "\n"):
            self.line += 1
            self.column = 0
            return self.next_token()

        if (symbol.isspace()):
            return self.next_token()

        if (symbol.isalpha()):
            # any later characters, after a first alpha can be alphanumeric or '_'
            while (self.__peek().isalpha() or self.__peek() == "_"
                   or self.__peek().isdigit()):
                symbol = self.__read()
                word += symbol
                self.column += 1

            # All alphabetic-only keywords appear here
            if (word == 'and'):
                return token.Token(token.AND, word, self.line, tokenStartCol)
            elif (word == 'or'):
                return token.Token(token.OR, word, self.line, tokenStartCol)
            elif (word == 'not'):
                return token.Token(token.NOT, word, self.line, tokenStartCol)
            elif (word == 'while'):
                return token.Token(token.WHILE, word, self.line, tokenStartCol)
            elif (word == 'do'):
                return token.Token(token.DO, word, self.line, tokenStartCol)
            elif (word == 'if'):
                return token.Token(token.IF, word, self.line, tokenStartCol)
            elif (word == 'then'):
                return token.Token(token.THEN, word, self.line, tokenStartCol)
            elif (word == 'else'):
                return token.Token(token.ELSE, word, self.line, tokenStartCol)
            elif (word == 'elif'):
                return token.Token(token.ELIF, word, self.line, tokenStartCol)
            elif (word == 'end'):
                return token.Token(token.END, word, self.line, tokenStartCol)
            elif (word == 'fun'):
                return token.Token(token.FUN, word, self.line, tokenStartCol)
            elif (word == 'var'):
                return token.Token(token.VAR, word, self.line, tokenStartCol)
            elif (word == 'set'):
                return token.Token(token.SET, word, self.line, tokenStartCol)
            elif (word == 'return'):
                return token.Token(token.RETURN, word, self.line,
                                   tokenStartCol)
            elif (word == 'new'):
                return token.Token(token.NEW, word, self.line, tokenStartCol)
            elif (word == 'nil'):
                return token.Token(token.NIL, word, self.line, tokenStartCol)
            elif (word == 'true' or word == 'false'):
                return token.Token(token.BOOLVAL, word, self.line,
                                   tokenStartCol)
            elif (word == 'int'):
                return token.Token(token.INTTYPE, word, self.line,
                                   tokenStartCol)
            elif (word == 'bool'):
                return token.Token(token.BOOLTYPE, word, self.line,
                                   tokenStartCol)
            elif (word == 'float'):
                return token.Token(token.FLOATTYPE, word, self.line,
                                   tokenStartCol)
            elif (word == 'string'):
                return token.Token(token.STRINGTYPE, word, self.line,
                                   tokenStartCol)
            elif (word == 'struct'):
                return token.Token(token.STRUCTTYPE, word, self.line,
                                   tokenStartCol)

            else:
                return token.Token(token.ID, word, self.line, tokenStartCol)

        if (symbol.isdigit()):
            if (symbol == "0"):
                if (self.__peek().isdigit()):
                    raise error.MyPLError(
                        'unexpected symbol "' + self.__peek() + '"', self.line,
                        tokenStartCol)

            # floats cannot have more than one decimal
            hasDotYet = False
            while (self.__peek().isdigit() or self.__peek() == '.'):
                symbol = self.__read()
                word += symbol
                self.column += 1

                if (symbol == '.'):
                    if (hasDotYet):
                        raise error.MyPLError(
                            'unexpected symbol "' + self.__peek() + '"',
                            self.line, tokenStartCol)
                    hasDotYet = True

            if (symbol == '.'):
                # float ends in '.' and is invalid
                raise error.MyPLError('missing digit in float value',
                                      self.line, self.column + 1)

            if (self.__peek().isalpha()):
                raise error.MyPLError(
                    'unexpected symbol "' + self.__peek() + '"', self.line,
                    tokenStartCol)

            if ('.' in word):
                return token.Token(token.FLOATVAL, word, self.line,
                                   tokenStartCol)
            else:
                return token.Token(token.INTVAL, word, self.line,
                                   tokenStartCol)

        # handle all string types
        if (symbol == '"'):
            while (self.__peek() and self.__peek() != '"'
                   and not self.__peek() == '\n'):
                symbol = self.__read()
                word += symbol
                self.column += 1

            if (not self.__peek() or self.__peek() == '\n'):
                raise error.MyPLError("reached newline reading string",
                                      self.line, self.column)

            elif (self.__peek() == '"'):
                word += self.__read()
                self.column += 1
                return token.Token(token.STRINGVAL, word[1:-1], self.line,
                                   tokenStartCol)

        if (symbol == '('):
            return token.Token(token.LPAREN, symbol, self.line, tokenStartCol)

        if (symbol == ')'):
            return token.Token(token.RPAREN, symbol, self.line, tokenStartCol)

        if (symbol == ','):
            return token.Token(token.COMMA, symbol, self.line, tokenStartCol)

        if (symbol == '%'):
            return token.Token(token.MODULO, symbol, self.line, tokenStartCol)

        if (symbol == '+'):
            return token.Token(token.PLUS, symbol, self.line, tokenStartCol)

        if (symbol == '-'):
            return token.Token(token.MINUS, symbol, self.line, tokenStartCol)

        if (symbol == ';'):
            return token.Token(token.SEMICOLON, symbol, self.line,
                               tokenStartCol)

        if (symbol == ':'):
            return token.Token(token.COLON, symbol, self.line, tokenStartCol)

        if (symbol == '*'):
            return token.Token(token.MULTIPLY, symbol, self.line,
                               tokenStartCol)

        if (symbol == '/'):
            return token.Token(token.DIVIDE, symbol, self.line, tokenStartCol)

        if (symbol == '.'):
            return token.Token(token.DOT, symbol, self.line, tokenStartCol)

        if (symbol == '='):
            if (self.__peek() == '='):
                symbol = self.__read()
                word += symbol
                self.column += 1
                return token.Token(token.EQUAL, word, self.line, tokenStartCol)
            else:
                return token.Token(token.ASSIGN, symbol, self.line,
                                   tokenStartCol)

        if (symbol == '>'):
            if (self.__peek() == '='):
                symbol = self.__read()
                word += symbol
                self.column += 1
                return token.Token(token.GREATER_THAN_EQUAL, word, self.line,
                                   tokenStartCol)
            else:
                return token.Token(token.GREATER_THAN, symbol, self.line,
                                   tokenStartCol)

        if (symbol == '<'):
            if (self.__peek() == '='):
                symbol = self.__read()
                word += symbol
                self.column += 1
                return token.Token(token.LESS_THAN_EQUAL, word, self.line,
                                   tokenStartCol)
            else:
                return token.Token(token.LESS_THAN, symbol, self.line,
                                   tokenStartCol)

        if (symbol == '!'):
            if (self.__peek() == '='):
                symbol = self.__read()
                word += symbol
                self.column += 1
                return token.Token(token.NOT_EQUAL, word, self.line,
                                   tokenStartCol)

        raise error.MyPLError('unexpected symbol "' + symbol + '"', self.line,
                              tokenStartCol)
Ejemplo n.º 21
0
    def next_token(self):
        symbol = self.__read()

        #check EOF
        if symbol == '':
            return token.Token('EOS', symbol, self.line, self.column)

        self.column += 1
        #check space and skip over
        if symbol == ' ':
            return self.next_token()
        
        #newline resets column and updates line
        if symbol == '\n':
            self.column = 0
            self.line += 1
            return self.next_token()

        #skip over comments and increment line at the end
        if symbol == '#':
            while self.__peek() != '\n':
                symbol += self.__read()
            self.__read()
            self.line += 1
            self.column = 0
            
            return self.next_token()

        
        
        
        #checking 1 or 2 symbol characters
        if symbol == '+':
            return token.Token('PLUS', symbol , self.line, self.column)
        elif symbol == '=':
            if self.__peek() == '=':
                symbol += self.__read()
                self.column += 1
                return token.Token('EQUAL', symbol , self.line, self.column - 1)
            return token.Token('ASSIGN', symbol , self.line, self.column)
        elif symbol == ',':
            return token.Token('COMMA', symbol , self.line, self.column)
        elif symbol == ':':
            return token.Token('COLON', symbol , self.line, self.column)
        elif symbol == '/':
            return token.Token('DIVIDE', symbol , self.line, self.column)
        elif symbol == '.':
            return token.Token('DOT', symbol , self.line, self.column)
        elif symbol == '>':
            if self.__peek() == '=':
                symbol += self.__read()
                self.column += 1
                return token.Token('GREATER_THAN_EQUAL', symbol , self.line, self.column - 1)
            return token.Token('GREATER_THAN', symbol , self.line, self.column)
        elif symbol == '<':
            if self.__peek() == '=':
                symbol += self.__read()
                self.column += 1
                return token.Token('LESS_THAN_EQUAL', symbol , self.line, self.column - 1)
            return token.Token('LESS_THAN', symbol , self.line, self.column)
        elif symbol == '(':
            return token.Token('LPAREN', symbol , self.line, self.column)
        elif symbol == ')':
            return token.Token('RPAREN', symbol , self.line, self.column)
        elif symbol == '-':
            return token.Token('MINUS', symbol , self.line, self.column)
        elif symbol == '%':
            return token.Token('MODULO', symbol , self.line, self.column)
        elif symbol == '*':
            return token.Token('MULTIPLY', symbol , self.line, self.column)
        elif symbol == ';':
            return token.Token('SEMICOLON', symbol , self.line, self.column)
        
        elif symbol == '!':
            if self.__peek() == '=':
                symbol += self.__read()
                self.column += 1
                return token.Token('NOT_EQUAL', symbol , self.line, self.column -1)
            else:
                raise error.MyPLError("Unexpected symbol '!'", self.line, self.column)

        #check constants
        #---------------

        #check leading 0
        if symbol == '0':
            if self.__peek().isnumeric():
                raise error.MyPLError("unexpected symbol '" + self.__peek() + "' ", self.line, self.column)

        #get int/float value
        if symbol.isnumeric():
            while self.__peek().isnumeric():
                symbol += self.__read()
                self.column += 1
            #check if poorly formed due to letters
            if self.__peek().isalpha():
                raise error.MyPLError("unexpected symbol '" + self.__peek() + "' ", self.line, self.column - (len(symbol) -1))
            #check if floating point
            if self.__peek() == '.':
                symbol += self.__read()
                self.column += 1
                if not self.__peek().isnumeric():
                    raise error.MyPLError('Missingn digit in float value', self.line, self.column - (len(symbol) -1))
                while self.__peek().isnumeric():
                    symbol += self.__read()
                    self.column += 1
                #check letters in float
                if self.__peek().isalpha():
                    raise error.MyPLError("unexpected symbol '" + self.__peek() + "' ", self.line, self.column - (len(symbol) -1))
                return token.Token('FLOATVAL', symbol, self.line, self.column - (len(symbol) -1))
            else:
                return token.Token('INTVAL', symbol, self.line, self.column - (len(symbol) - 1))

        #check if string value
        if symbol == '"':
            while self.__peek() != '"':
                if self.__peek() == '\n':
                    raise error.MyPLError('Newline in middle of string', self.line, self.column - (len(symbol) - 1))
                symbol += self.__read()
                self.column += 1
                if self.__peek() == '':
                    raise error.MyPLError('Missing " in string', self.line, self.column - (len(symbol) -1))
            #read the ending "
            self.__read()
            self.column += 1
            return token.Token('STRINGVAL', symbol[1:], self.line, self.column - (len(symbol)))

        #get a string of alphabet characters
        while self.__peek().isalpha() or self.__peek() == '_' or self.__peek().isnumeric():
            symbol += self.__read()
            self.column += 1

        #check specical words
        if symbol == 'bool':
            return token.Token('BOOLTYPE', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'int':
            return token.Token('INTTYPE', symbol, self.line, self.column - (len(symbol) - 1))
        elif symbol == 'float':
            return token.Token('FLOATTYPE', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'string':
            return token.Token('STRINGTYPE', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'struct':
            return token.Token('STRUCTTYPE', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'and':
            return token.Token('AND', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'or':
            return token.Token('OR', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'not':
            return token.Token('NOT', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'while':
            return token.Token('WHILE', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'do':
            return token.Token('DO', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'if':
            return token.Token('IF', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'then':
            return token.Token('THEN', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'else':
            return token.Token('ELSE', symbol, self.line, self.column - (len(symbol) - 1)) 
        elif symbol == 'elif':
            return token.Token('ELIF', symbol, self.line, self.column - (len(symbol) - 1))  
        elif symbol == 'end':
            return token.Token('END', symbol, self.line, self.column - (len(symbol) - 1))  
        elif symbol == 'fun':
            return token.Token('FUN', symbol, self.line, self.column - (len(symbol) - 1))  
        elif symbol == 'var':
            return token.Token('VAR', symbol, self.line, self.column - (len(symbol) - 1))  
        elif symbol == 'set':
            return token.Token('SET', symbol, self.line, self.column - (len(symbol) - 1))   
        elif symbol == 'return':
            return token.Token('RETURN', symbol, self.line, self.column - (len(symbol) - 1))  
        elif symbol == 'new':
            return token.Token('NEW', symbol, self.line, self.column - (len(symbol) - 1))    
        elif symbol == 'nil':
            return token.Token('NIL', symbol, self.line, self.column - (len(symbol) - 1))        
        #BOOL values  
        elif symbol == 'true':
            return token.Token('BOOLVAL', symbol, self.line, self.column - (len(symbol) - 1))          
        elif symbol == 'false':
            return token.Token('BOOLVAL', symbol, self.line, self.column - (len(symbol) - 1))      

        #identifiers
        if symbol[0].isalpha():
            return token.Token('ID', symbol, self.line, self.column - (len(symbol) -1))
        elif symbol[0] == '_':
            raise error.MyPLError('Poorly formed identifier', self.line, self.column - (len(symbol) -1))

    
        #unexpected char error
        raise error.MyPLError("Unexpected character(s): '" + symbol + "'", self.line, self.column - (len(symbol) -1))
 def __error(self, error_msg, target_token):
     s = error_msg
     l = target_token.line
     c = target_token.column
     raise error.MyPLError(s, l, c)
Ejemplo n.º 23
0
    def next_token(self):
        tokentype = token.ID
        item = ''
        isStringVal = False  # makes sure that string values are set as string values
        error_message = "Lexer Error "

        while self.__peek().isspace():
            # increments column for every character of whitespace found
            if self.__peek() == " ":
                self.column += 1
            self.__read()

        self.__comment_check()

        if self.__peek() == '':  # EOS end of file
            tokentype = token.EOS

        # special character such as plus or minus
        if self.check_char() == 0:
            self.column += 1
            item += self.__read()

            # checks for and invalid dot
            if item == "." and str(self.__peek()).isnumeric():
                error_message += "invalid float value"
                e = error.MyPLError(error_message, self.line, self.column_index - 1)
                raise e
            # checks for comparison operators
            if item == '=' or item == '>' or item == '<' or item == '!':
                if self.__peek() == '=':
                    item += self.__read()

        elif self.__peek() == '"':  # string value
            self.column += 1  # increments column to a tokens starting index
            self.__read()
            while self.__peek() != '"':
                if self.__peek() == '\n':
                    error_message += "reached newline character in string"
                    e = error.MyPLError(error_message, self.line, self.column_index)
                    raise e
                elif self.__peek() == '':
                    error_message += "reached EOS character in string"
                    e = error.MyPLError(error_message, self.line, self.column_index)
                    raise e
                else:
                    item += self.__read()
            self.__read()
            tokentype = token.STRINGVAL
            isStringVal = True;
        else:  # any other type of character
            self.column += 1  # increments column to a tokens starting index
            isnum = False   # tracks if you are entering a number
            item += self.__read()
            # sets while loop to true if you are entering a number

            if item.isnumeric():
                isnum = True
            if self.check_char() == 1:
                # checks for characters that should end the token
                # checks if a number starts with zero
                # the previous if statement will check if the next
                # character is a decimal point
                if isnum and item == '0':
                    error_message += "unexpected symbol '" + str(self.__peek()) + "'"
                    e = error.MyPLError(error_message, self.line, self.column_index)
                    raise e
                # runs until it reaches a character that marks the end of the token

                while self.check_char() != 0 and self.__peek() != '"':
                    if isnum and not item.isnumeric():
                        error_message += "unexpected value '" + str(item[len(item) - 1]) + "'"
                        e = error.MyPLError(error_message, self.line, self.column_index - 1)
                        raise e
                    else:
                        self.__comment_check()
                        item += self.__read()
        if not isStringVal:
            if item == "string": # strips spaces
                tokentype = token.STRINGTYPE
            else:
                item = item.strip()

        if item.isnumeric() and tokentype != token.STRINGVAL:  # int and float check
            if self.__peek() == ".":
                item += self.__read()
                decimal = str(self.__peek())
                if not decimal.isnumeric():  # checks for an invalid float character
                    error_message += "missing digit in float value"
                    e = error.MyPLError(error_message, self.line, self.column_index - 1)
                    raise e
                while decimal.isnumeric and (self.check_char() != 0 or self.__peek() == "."):

                    decimal += self.__peek()
                    if not decimal.isnumeric() and self.__peek() != ";":
                        self.__peek()
                        error_message += "unexpected character '" + str(self.__peek()) + "'"
                        e = error.MyPLError(error_message, self.line, self.column_index + 1)
                        raise e
                    else:
                        item += self.__read()

            item = item.strip()
            if item.count('.', 0, len(item)):
                tokentype = token.FLOATVAL
            else:
                tokentype = token.INTVAL
        # checks if the token is a special character and sets token type accordingly
        if not isStringVal:
            if item == "=":
                tokentype = token.ASSIGN
            elif item == ",":
                tokentype = token.COMMA
            elif item == ":":
                tokentype = token.COLON
            elif item == "/":
                tokentype = token.DIVIDE
            elif item == ".":
                tokentype = token.DOT
            elif item == "==":
                tokentype = token.EQUAL
            elif item == ">":
                tokentype = token.GREATER_THAN
            elif item == ">=":
                tokentype = token.GREATER_THAN_EQUAL
            elif item == "<":
                tokentype = token.LESS_THAN
            elif item == "<=":
                tokentype = token.LESS_THAN_EQUAL
            elif item == "!=":
                tokentype = token.NOT_EQUAL
            elif item == "(":
                tokentype = token.LPAREN
            elif item == ")":
                tokentype = token.RPAREN
            elif item == "-":
                tokentype = token.MINUS
            elif item == "%":
                tokentype = token.MODULO
            elif item == "*":
                tokentype = token.MULTIPLY
            elif item == "+":
                tokentype = token.PLUS
            elif item == "true" or item == "false":
                tokentype = token.BOOLVAL
            elif item == ";":
                tokentype = token.SEMICOLON
            elif item == "bool":
                tokentype = token.BOOLTYPE
            elif item == "int":
                tokentype = token.INTTYPE
            elif item == "float":
                tokentype = token.FLOATTYPE
            elif item == "struct":
                tokentype = token.STRUCTTYPE
            elif item == "and":
                tokentype = token.AND
            elif item == "or":
                tokentype = token.OR
            elif item == "not":
                tokentype = token.NOT
            elif item == "while":
                tokentype = token.WHILE
            elif item == "do":
                tokentype = token.DO
            elif item == "if":
                tokentype = token.IF
            elif item == "then":
                tokentype = token.THEN
            elif item == "else":
                tokentype = token.ELSE
            elif item == "elif":
                tokentype = token.ELIF
            elif item == "end":
                tokentype = token.END
            elif item == "fun":
                tokentype = token.FUN
            elif item == "var":
                tokentype = token.VAR
            elif item == "set":
                tokentype = token.SET
            elif item == "return":
                tokentype = token.RETURN
            elif item == "new":
                tokentype = token.NEW
            elif item == "nil":
                tokentype = token.NIL

        final_token = token.Token(tokentype, item, self.line, self.column)
        self.column = self.column_index  # sets column to new value

        if tokentype == token.EOS:  # sets column to 0 at the end of the line
            self.column = 0

        return final_token
Ejemplo n.º 24
0
    def next_token(self):
        while self.__peek().isspace():
            if self.__peek() == '\n':
                self.line += 1
                self.column = 0
            self.__read()
            self.column += 1

        symbol = self.__read()
        self.column += 1

        if symbol == '#':
            while not self.__peek() == '\n':
                self.__read()
            self.line += 1
            self.__read()
            return self.next_token()

        if symbol == '':
            return token.Token(token.EOS, '', self.line, self.column - 1)

        elif symbol == '=':
            if self.__peek() == '=':
                symbol += self.__read()
                col = self.column
                self.column += 1
                return token.Token(token.EQUAL, symbol, self.line, col)
            else:
                return token.Token(token.ASSIGN, symbol, self.line,
                                   self.column)

        elif symbol == ',':
            return token.Token(token.COMMA, symbol, self.line, self.column)

        elif symbol == ':':
            return token.Token(token.COLON, symbol, self.line, self.column)

        elif symbol == '/':
            return token.Token(token.DIVIDE, symbol, self.line, self.column)

        elif symbol == '.':
            return token.Token(token.DOT, symbol, self.line, self.column)

        elif symbol == '>':
            if self.__peek() == '=':
                symbol += self.__read()
                col = self.column
                self.column += 1
                return token.Token(token.GREATER_THAN_EQUAL, symbol, self.line,
                                   col)
            else:
                return token.Token(token.GREATER_THAN, symbol, self.line,
                                   self.column)

        elif symbol == '<':
            if self.__peek() == '=':
                symbol += self.__read()
                col = self.column
                self.column += 1
                return token.Token(token.LESS_THAN_EQUAL, symbol, self.line,
                                   col)
            else:
                return token.Token(token.LESS_THAN, symbol, self.line,
                                   self.column)

        elif symbol == '!':
            if self.__peek() == '=':
                symbol += self.__read()
                col = self.column
                self.column += 1
                return token.Token(token.NOT_EQUAL, symbol, self.line, col)

        elif symbol == '(':
            return token.Token(token.LPAREN, symbol, self.line, self.column)

        elif symbol == ')':
            return token.Token(token.RPAREN, symbol, self.line, self.column)

        elif symbol == '-':
            return token.Token(token.MINUS, symbol, self.line, self.column)

        elif symbol == '%':
            return token.Token(token.MODULO, symbol, self.line, self.column)

        elif symbol == '*':
            return token.Token(token.MULTIPLY, symbol, self.line, self.column)

        elif symbol == '+':
            return token.Token(token.PLUS, symbol, self.line, self.column)

        elif symbol == ';':
            return token.Token(token.SEMICOLON, symbol, self.line, self.column)

        elif symbol.isdigit():
            flt = False
            col = self.column - 1

            if symbol == '0' and self.__peek().isdigit():
                print(
                    error.MyPLError('unexpected number', self.line,
                                    self.column))
                exit()

            while self.__peek().isdigit() or self.__peek() == ".":
                symbol += self.__read()
                self.column += 1
                if symbol[-1] == ".":
                    if flt:
                        print(error.MyPLError('invalid number', self.line,
                                              col))
                        exit()
                    elif not self.__peek().isdigit():
                        print(
                            error.MyPLError('missing digit in float value',
                                            self.line, self.column))
                        exit()
                    else:
                        symbol += self.__read()
                        self.column += 1
                        flt = True

            if self.__peek().isalpha():
                print(
                    error.MyPLError('unexpected symbol', self.line,
                                    self.column))
                exit()

            if flt:
                return token.Token(token.FLOATVAL, symbol, self.line, col)
            else:
                return token.Token(token.INTVAL, symbol, self.line, col)

        elif symbol == '"':
            col = self.column - 1
            if self.__peek() == '"':
                symbol += self.__read()
                self.column += 1
                return token.Token(token.STRINGVAL, '', self.line, col)
            symbol = self.__read()
            while not self.__peek() == '"':
                symbol += self.__read()
                self.column += 1
                if self.__peek() == '':
                    print("Improper string")
                    break
            self.__read()
            self.column += 1
            return token.Token(token.STRINGVAL, symbol, self.line, col)

        elif symbol.isalpha():
            col = self.column - 1
            while self.__peek().isalpha() or self.__peek().isdigit(
            ) or self.__peek() == '_':
                symbol += self.__read()
                self.column += 1

            if symbol == 'and':
                return token.Token(token.AND, symbol, self.line, col)
            elif symbol == 'or':
                return token.Token(token.OR, symbol, self.line, col)
            elif symbol == 'not':
                return token.Token(token.NOT, symbol, self.line, col)
            elif symbol == 'bool':
                return token.Token(token.BOOLTYPE, symbol, self.line, col)
            elif symbol == 'int':
                return token.Token(token.INTTYPE, symbol, self.line, col)
            elif symbol == 'float':
                return token.Token(token.FLOATTYPE, symbol, self.line, col)
            elif symbol == 'string':
                return token.Token(token.STRINGTYPE, symbol, self.line, col)
            elif symbol == 'struct':
                return token.Token(token.STRUCTTYPE, symbol, self.line, col)
            elif symbol == 'while':
                return token.Token(token.WHILE, symbol, self.line, col)
            elif symbol == 'do':
                return token.Token(token.DO, symbol, self.line, col)
            elif symbol == 'if':
                return token.Token(token.IF, symbol, self.line, col)
            elif symbol == 'then':
                return token.Token(token.THEN, symbol, self.line, col)
            elif symbol == 'else':
                return token.Token(token.ELSE, symbol, self.line, col)
            elif symbol == 'elif':
                return token.Token(token.ELIF, symbol, self.line, col)
            elif symbol == 'end':
                return token.Token(token.END, symbol, self.line, col)
            elif symbol == 'fun':
                return token.Token(token.FUN, symbol, self.line, col)
            elif symbol == 'var':
                return token.Token(token.VAR, symbol, self.line, col)
            elif symbol == 'set':
                return token.Token(token.SET, symbol, self.line, col)
            elif symbol == 'return':
                return token.Token(token.RETURN, symbol, self.line, col)
            elif symbol == 'new':
                return token.Token(token.NEW, symbol, self.line, col)
            elif symbol == 'nil':
                return token.Token(token.NIL, symbol, self.line, col)
            elif symbol == 'true':
                return token.Token(token.BOOLVAL, symbol, self.line, col)
            elif symbol == 'false':
                return token.Token(token.BOOLVAL, symbol, self.line, col)
            else:
                return token.Token(token.ID, symbol, self.line, col)
Ejemplo n.º 25
0
 def __error(self, msg, the_token):
     raise error.MyPLError(msg, the_token.line, the_token.column)
    def next_token(self):
        input = ""
        oldLine = self.line
        oldColumn = self.column
        temp = ""
        inputType = ""
        intFloatOrString = 0 #1 if input is a number, 2 if float, 3 if string, 0 otherwise
        checker = True
        keepLooping = True
        noSpaceSymbols = ['.', '<=', '>=', '==', '!=', '*',':', '/', '!', '+', '-', ';', '=', ')', '(', '<', '>', ',', '%'] #list of symbols that dont need whitesaces
        comparisonsOp = ['<=', '>=', '==', '!=', '<', '>'] #list of comparisons

        #take care of whitespace
        while self.__peek().isspace(): # == " " or self.__peek() == '\n' or self.__peek == '\t':
            temp = self.__read()
            self.column += 1
            oldColumn = self.column
        while self.__peek() == '\n':
            temp = self.__read()
            self.column += 1
            oldColumn = self.column

        
        while self.__peek() !=  " " and self.__peek() != "" and self.__peek() != '\n' and not self.__peek().isspace() and keepLooping:
            #token is a string
            if self.__peek() == '"' or self.__peek() == "'": 
                isSingle = True;
                if self.__peek() == '"':
                    isSingle = False
                temp = self.__read()
                self.column += 1
                while (self.__peek() != "'" and self.__peek() != '"') or ((self.__peek() != "'" or not isSingle) and (self.__peek() != '"' or isSingle)):
                    input += self.__read()
                    if self.__peek() == '\n':
                        raise error.MyPLError("uh oh, you had a new line in a string ", self.line, self.column)
                    self.column += 1
                temp = self.__read()
                self.column += 1
                intFloatOrString = 3
                keepLooping = False
            #token is not a string               
            #print(self.column)
            #reads the next character   
            if keepLooping:
                input += self.__read()
                #what to do for comments
                if input == '#':
                    while self.__peek() != '\n':
                        input = self.__read()
                    input = ""
                    temp == self.__read()
                    while self.__peek() == " " or self.__peek().isspace():
                        temp = self.__read()
                    while self.__peek() == '\n':
                        temp = self.__read()
                    self.line += 1
                    self.column = 1
                    oldLine = self.line
                    oldColumn = self.column
                #bad number checking for number preceded by 0
                if input == '0' and self.__peek().isdigit():
                    m = 'unexpected symbol "%s" at: ' %self.__peek()
                    raise error.MyPLError(m, self.line, self.column)
                #bad number checking for digit followed by alphabetical letter
                if input.isdigit() and self.__peek().isalpha():   
                    temp = self.__read()
                    m = 'unexpected symbol "%s" at:' % temp
                    raise error.MyPLError(m, self.line, self.column)
                     
                checker = True
                if self.__peek() == '.' and input.isdigit():
                    checker = False
                #checking for operators 
                if checker == True:
                    if (input in noSpaceSymbols or self.__peek() in noSpaceSymbols) and input != "":
                        if input == '!' and self.__peek() != '=':
                            raise error.MyPLError("extra exclamation mark at ", self.line, self.column)
                        temp = input + self.__peek()
                        if len(input) == 1:
                            if temp != '!=' and temp != '==':
                                if input in comparisonsOp:
                                    if self.__peek() in comparisonsOp == False:
                                        keepLooping = False
                                else:
                                    keepLooping = False
                        else:
                            keepLooping = False
                 
                    
                #increment column count
                self.column += 1
        if intFloatOrString != 3:
            #checks to see if the input is an int
            if(input.isdigit()):   
                intFloatOrString = 1
            #checks to see if it is a double
            else:
                soFarSoGood = True   #bool that checks to see if there are any alphabetical characters
                numberOfDots = 0   #keeps track of the dots
                if len(input) > 1:
                    for i, x in enumerate(input):
                        if x == '.':
                            numberOfDots += 1
                            #float error where there is nothing after the 
                            if i + 1 == len(input):     
                                m = input + "missing digit in float value at "
                                raise error.MyPLError(m, self.line, self.column)
                            #float error where there is an invalid number after the dot
                                #pre dot invalid number checking is taken care of up above
                            elif input[i+1].isdigit() == False and soFarSoGood:   
                                m = 'unexpected character "%s" at ' % (self.__peek())
                                raise error.MyPLError(m, self.line, self.column)
                        elif x.isdigit() == False:
                            soFarSoGood = False
                if soFarSoGood and numberOfDots == 1:
                    intFloatOrString = 2
        #end of the line
        if self.__peek() == '\n':
            self.line += 1
            self.column = 1;
            temp = self.__read()
        #input was a int float or string
        if intFloatOrString != 0:
            if intFloatOrString == 1:
                inputType = token.INTVAL
            elif intFloatOrString == 2:
                inputType = token.FLOATVAL
            else:
                inputType = token.STRINGVAL
        #input was not a value except for maybe a bool
        else:
            if input == '=':
                inputType = token.ASSIGN
            elif input == ',':
                inputType = token.COMMA
            elif input == ':':
                inputType = token.COLON
            elif input == '/':
                inputType = token.DIVIDE
            elif input == '.':
                inputType = token.DOT
            elif input == '==':
                inputType = token.EQUAL
            elif input == '':
                oldColumn -= 1
                inputType = token.EOS
            elif input == '>':
                inputType = token.GREATER_THAN
            elif input == '>=':
                inputType = token.GREATER_THAN_EQUAL
            elif input == '<':
                inputType = token.LESS_THAN
            elif input == '<=':
                inputType = token.LESS_THAN_EQUAL
            elif input == '!=':
                inputType = token.NOT_EQUAL
            elif input == '(':
                inputType = token.LPAREN
            elif input == ')':
                inputType = token.RPAREN
            elif input == '-':
                inputType = token.MINUS
            elif input == '%':
                inputType = token.MODULO
            elif input == '*':
                inputType = token.MULTIPLY
            elif input == '+':
                inputType = token.PLUS
            elif input == ';':
                inputType = token.SEMICOLON
            elif input == 'bool':
                inputType = token.BOOLTYPE
            elif input == 'int':
                inputType = token.INTTYPE
            elif input == 'float':
                inputType = token.FLOATTYPE
            elif input == 'string':
                inputType = token.STRINGTYPE
            elif input == 'struct':
                inputType = token.STRUCTTYPE
            elif input == 'and':
                inputType = token.AND
            elif input == 'or':
                inputType = token.OR
            elif input == 'not':
                inputType = token.NOT
            elif input == 'while':
                inputType = token.WHILE
            elif input == 'do':
                inputType = token.DO
            elif input == 'if':
                inputType = token.IF
            elif input == 'then':
                inputType = token.THEN
            elif input == 'else':
                inputType = token.ELSE
            elif input == 'elif':
                inputType = token.ELIF
            elif input == 'end':
                inputType = token.END
            elif input == 'fun':
                inputType = token.FUN
            elif input == 'var':
                inputType = token.VAR
            elif input == 'set':
                inputType = token.SET
            elif input == 'return':
                inputType = token.RETURN
            elif input == 'new':
                inputType = token.NEW
            elif input == 'false' or input == 'true':
                inputType = token.BOOLVAL
            elif input == 'nil':
                inputType = token.NIL
            else:
                inputType = token.ID
        newToken = token.Token(inputType, input, oldLine, oldColumn)
        return newToken