예제 #1
0
파일: lexer.py 프로젝트: IsacSvensson/Six10
    def check_indent(self):
        """
        Checks indentation level.

        Returns
            int - Level of indentation
        """
        if self.current_character == "\n":
            self.advance()
        elif self.current_character != " ":
            start = self.position.copy()
            char = self.current_character
            self.advance()
            end = self.position.copy()
            self.error = Error("IndentationError: Unexpected character")
            self.tokens.append(Token(tt._INVALID, char, start, end))
            return None
        count = 0
        start, end = None, None
        while self.current_character == " ":
            if count % 4 == 0:
                start = self.position.copy()
            count += 1
            self.advance()

        if count % 4 == 0:
            return int(count / 4)
        else:
            self.error = Error("IndentationError: Invalid indentation")
            end = self.position.copy()
            self.tokens.append(
                Token(tt._INVALID, " " * (count % 4), start, end))
            return None
예제 #2
0
파일: lexer.py 프로젝트: IsacSvensson/Six10
    def make_symbol(self):
        """
        Creates identifiers and keywords.
        Reads characters from the source code until getting to a non allowed
        character, decides if it's a keyword or identifier and returns a token.

        RETURNS:
            Token
        """
        allowed_chars = "1234567890_abcdefghijklmnopqrstuvwxyz"
        symbol = ""
        start = self.position.copy()

        if not self.allowed_character(allowed_chars[10:]):
            symbol = self.current_character
            self.advance()
            end = self.position.copy()
            self.error = Error(
                "ValueError: Unexpected illegal character {}".format(symbol))
            return Token(tt._INVALID, symbol, start, end)

        while self.allowed_character(allowed_chars):
            symbol += self.current_character
            if not self.advance():
                break

        end = self.position.copy()

        symbol_type, error = isKeyword(symbol)

        if error:
            self.error = error
            return Token(tt._INVALID, symbol, start, end)

        return Token(symbol_type, symbol, start, end)
예제 #3
0
파일: lexer.py 프로젝트: IsacSvensson/Six10
    def make_hexadecimal(self):
        """
        Reads hexadecimal characters until not allowed character appers.
        Returns a hexdecimal token
        """
        hex_string = ""
        allowed_chars = "0123456789abcdef"

        start_position = self.position.copy()
        end_position = None

        hex_string += self.current_character
        self.advance()
        hex_string += self.current_character
        self.advance()

        if hex_string.lower() != '0x':
            end_position = self.position.copy()
            self.error = Error("ValueError: Can not convert to a number")
            return Token(tt._INVALID, hex_string, start_position, end_position)

        while self.current_character and self.allowed_character(allowed_chars):
            hex_string += self.current_character

            self.advance()

        end_position = self.position.copy()

        if len(hex_string) < 3:
            self.error = Error("ValueError: Can not convert to a number")
            return Token(tt._INVALID, hex_string, start_position, end_position)

        return Token(tt._HEX, int(hex_string, base=16), start_position,
                     end_position)
예제 #4
0
파일: lexer.py 프로젝트: IsacSvensson/Six10
    def make_octodecimal(self):
        """
        Reads octodecimal characters until not allowed character appers.
        Returns a octodecimal token
        """
        oct_string = ""
        allowed_chars = "01234567"

        start_position = self.position.copy()
        end_position = None

        oct_string += self.current_character
        self.advance()
        oct_string += self.current_character
        self.advance()

        if oct_string.lower() != '0o':
            end_position = self.position.copy()
            self.error = Error("ValueError: Can not convert to a number")
            return Token(tt._INVALID, oct_string, start_position, end_position)

        while self.current_character and self.allowed_character(allowed_chars):
            oct_string += self.current_character

            self.advance()

        end_position = self.position.copy()

        if len(oct_string) < 3:
            self.error = Error("ValueError: Can not convert to a number")
            return Token(tt._INVALID, oct_string, start_position, end_position)

        return Token(tt._OCT, int(oct_string, base=8), start_position,
                     end_position)
예제 #5
0
파일: lexer.py 프로젝트: IsacSvensson/Six10
    def make_binary(self):
        """
        Reads binary characters until not allowed character appers.
        Returns a binary token
        """
        binary_string = ""
        allowed_chars = "01"

        start_position = self.position.copy()
        end_position = None

        binary_string += self.current_character
        self.advance()
        binary_string += self.current_character
        self.advance()

        if binary_string.lower() != '0b':
            end_position = self.position.copy()
            self.error = Error("ValueError: Can not convert to a number")
            return Token(tt._INVALID, binary_string, start_position,
                         end_position)

        while self.current_character and self.allowed_character(allowed_chars):
            binary_string += self.current_character

            self.advance()

        end_position = self.position.copy()

        if len(binary_string) < 3:
            self.error = Error("ValueError: Can not convert to a number")
            return Token(tt._INVALID, binary_string, start_position,
                         end_position)
        return Token(tt._BIN, int(binary_string, base=2), start_position,
                     end_position)
예제 #6
0
파일: lexer.py 프로젝트: IsacSvensson/Six10
    def make_tokens(self):
        """
        Preforms the lexical analysis on the source code and breaks it down to terminal tokens.

        RETURNS:
            list, containing token-objects 
        """
        while self.current_character:
            if self.allowed_character("0123456789"):
                self.tokens.append(self.make_number())
                if self.error: return
            elif "{}".format(self.current_character) == "\n":
                start = self.position.copy()
                self.advance()
                self.tokens.append(
                    Token(tt._NEWLINE, '\n', start, self.position.copy()))
                indent = self.check_indent()
                if self.error: return
                if indent != self.position.indent:
                    self.change_indent(indent)
            elif self.allowed_character("'\""):
                self.tokens.append(self.make_string())
                if self.error: return
                continue
            elif self.is_operator():
                self.tokens.append(self.make_operator())
                if self.error: return
                continue
            else:
                letterResult, error = isLetter(self.current_character)
                if error:
                    self.error = error
                    return
                if letterResult:
                    self.tokens.append(self.make_symbol())
                    continue
                else:
                    if self.allowed_character(" \t"):
                        self.advance()
                        continue
                    start = self.position.copy()
                    char = self.current_character
                    self.advance()
                    end = self.position.copy()
                    self.tokens.append(Token(tt._INVALID, char, start, end))
                    self.error = Error("ValueError: Unexpected character")
            if self.error:
                return
예제 #7
0
파일: lexer.py 프로젝트: IsacSvensson/Six10
    def change_indent(self, indent):
        """
        Generates indent and dedent tokens to change indentation level.
        """
        if not isinstance(indent, int):
            self.error = Error("ValueError: Positive integer expected")
            return
        if indent < 0:
            self.error = Error("ValueError: Positive integer expected")
            return

        while self.position.indent < indent:
            self.position.indent += 1
            self.tokens.append(
                Token(tt._INDENT, "    ", self.position, self.position))

        while self.position.indent > indent:
            self.position.indent -= 1
            self.tokens.append(
                Token(tt._DEDENT, "    ", self.position, self.position))
예제 #8
0
파일: lexer.py 프로젝트: IsacSvensson/Six10
    def make_string(self):
        start = self.position.copy()
        qm = self.current_character
        not_allowed_chars = qm + "\n"

        string = str()
        prev = None
        while self.advance() and not self.allowed_character(not_allowed_chars):
            if self.current_character == '\\':
                next = self.look_ahead()
                if next == '\n':
                    self.advance()
                    continue
                elif next == "\"\'":
                    string += self.current_character
                    prev = self.current_character
                elif next == 'n':
                    self.advance()
                    string += '\n'
                    prev = '\n'
                    continue
                elif next == 't':
                    self.advance()
                    string += '\t'
                    prev = '\t'
                    continue
                elif next == '\\':
                    self.advance()

            string += self.current_character
            prev = self.current_character

        if self.current_character == qm:
            self.advance()
            end = self.position.copy()
            return Token(tt._STRING, string, start, end)
        elif self.current_character == "\n":
            self.advance()
            end = self.position.copy()
            self.error = Error("StringError: Incorrect line break in string")
            return Token(tt._INVALID, string, start, end)
예제 #9
0
파일: lexer.py 프로젝트: IsacSvensson/Six10
    def make_decimal(self):
        """
        Reads decimal characters until not allowed character appers.
        Returns a integer or float token
        """

        number_string = ""
        dot_counter = 0
        allowed_chars = "1234567890."

        start_position = self.position.copy()
        end_position = None

        if self.current_character not in allowed_chars:
            self.error = Error("ValueError: Expected a digit or dot '.'")
            char = self.current_character
            self.advance()
            end_position = self.position.copy()
            return Token(tt._INVALID, char, start_position, end_position)

        while self.allowed_character(allowed_chars) and dot_counter < 2:
            number_string += self.current_character

            self.advance()
            if self.current_character == ".":
                dot_counter += 1
            elif self.current_character is None:
                break

        end_position = self.position.copy()

        if dot_counter:
            return Token(tt._FLOAT, float(number_string), start_position,
                         end_position)
        else:
            return Token(tt._INT, int(number_string), start_position,
                         end_position)
예제 #10
0
파일: lexer.py 프로젝트: IsacSvensson/Six10
    def make_operator(self):
        n = self.look_ahead(2)
        if not n:
            n = self.look_ahead()

        possible_op = self.current_character
        if possible_op is None:
            self.error = Error("LexicalError: No characters in buffer")
            return Token(tt._INVALID, None, None, None)
        if n:
            possible_op += n
        start = self.position.copy()

        values = {
            '=': tt._ASSIGN,
            '==': tt._BITWISE_EQ,
            '+': tt._PLUS,
            '++': tt._INCR,
            '+=': tt._PLUS_ASSIGN,
            '-': tt._MINUS,
            '--': tt._DECR,
            '-=': tt._MINUS_ASSIGN,
            '*': tt._MULT,
            '*=': tt._MULT_ASSIGN,
            '**': tt._EXP,
            '**=': tt._POWER_ASSIGN,
            '/': tt._DIV,
            '/=': tt._DIV_ASSIGN,
            '//': tt._FLOOR,
            '//=': tt._FLOOR_ASSIGN,
            '%': tt._MOD,
            '%=': tt._MOD_ASSIGN,
            '&=': tt._AND_ASSIGN,
            '&': tt._BITWISE_AND,
            '|=': tt._OR_ASSIGN,
            '|': tt._BITWISE_OR,
            '^': tt._BITWISE_XOR,
            '^=': tt._XOR_ASSIGN,
            '<': tt._BITWISE_LT,
            '<=': tt._BITWISE_LTE,
            '<<': tt._BITWISE_LSHIFT,
            '<<=': tt._LSHIFT_ASSIGN,
            '>': tt._BITWISE_GT,
            '>=': tt._BITWISE_GTE,
            '>>': tt._BITWISE_RSHIFT,
            '>>=': tt._RSHIFT_ASSIGN,
            '(': tt._LPARAN,
            ')': tt._RPARAN,
            '[': tt._LSQBRACK,
            ']': tt._RSQBRACK,
            '{': tt._LCURLBRACK,
            '}': tt._RCURLBRACK,
            '.': tt._DOT,
            ',': tt._COMMA,
            ':': tt._COLON,
        }

        if possible_op.__len__() == 3 and values.get(possible_op):
            self.advance(3)
            end = self.position.copy()
            return Token(values.get(possible_op), possible_op, start, end)
        elif possible_op.__len__() >= 2 and values.get(possible_op[:2]):
            self.advance(2)
            end = self.position.copy()
            return Token(values.get(possible_op[:2]), possible_op[:2], start,
                         end)
        elif values.get(self.current_character):
            char = self.current_character
            self.advance()
            end = self.position.copy()
            return Token(values.get(char), char, start, end)
        else:
            self.error = Error("ValueError: Token not a operator")
            char = self.current_character
            self.advance()
            end = self.position.copy()
            return Token(tt._INVALID, char, start, end)