Ejemplo n.º 1
0
    def generate_keyword_or_ident_token(self):

        # checks if current char signifies that a keyword or ident appears.
        if not self.current_char.isalpha() and self.current_char != '_':
            return None

        identLength = self.maxIdentLength

        # gets all valid chars until max length is reached and value is cut short
        while (self.current_char.isalnum()
               or self.current_char == '_') and identLength > 0:
            self.tokenValue += self.current_char
            if not self.get_next_char():
                break
            identLength -= 1

        # Encountered an identifier that exceeds max allowed length. Raises an error.
        if identLength <= 0:
            stop = self.readCursorPosition
            raise LexerError(
                self.current_char, stop,
                " (Exceeded maximum length of a identifier literal)")

        # checks whether or not token might be a keyword or not
        value = token_type_repr.get(self.tokenValue)

        if value:
            self.token = new_token(value, self.tokenValue, self.start)
        else:
            self.token = new_token(TokenType.VALUE_ID, self.tokenValue,
                                   self.start)

        return self.token
Ejemplo n.º 2
0
    def generate_comment_token(self):

        # checks if first char is "/"
        if self.current_char != '/':
            return None

        self.get_next_char()
        # checks if second char is "/". returns TokenType.DIV if not.
        if self.current_char != '/':

            self.token = new_token(TokenType.DIV, self.tokenValue, self.start)

        else:
            # second char is "/". Returns valid VALUE_COMMENT token, generated from // chars to end of line.
            self.get_next_char()

            while self.current_char != '\n' and not self.textSource.is_end_of_text(
            ):
                self.tokenValue += self.current_char
                self.get_next_char()

            self.token = new_token(TokenType.VALUE_COMMENT, self.tokenValue,
                                   self.start)

        return self.token
Ejemplo n.º 3
0
    def generate_eot_token(self):

        if self.textSource.is_end_of_text():
            self.token = new_token(TokenType.EOT, self.tokenValue,
                                   self.readCursorPosition,
                                   self.readCursorPosition)
            return self.token
Ejemplo n.º 4
0
    def generate_two_char_operator(self):
        if self.current_char == "=":

            self.tokenValue += self.current_char
            key = token_type_repr.get(self.tokenValue)

            if key:
                self.token = new_token(key, self.tokenValue, self.start)
                self.get_next_char()
Ejemplo n.º 5
0
    def __init__(self, maxIdentLength, maxStringLength, textSource=None):

        self.maxIdentLength = maxIdentLength
        self.maxStringLength = maxStringLength
        self.textSource = textSource

        self.readCursorPosition = Position(row=1, column=-1)
        self.start = Position(row=1, column=-1)

        self.token = new_token(TokenType.UNKNOWN, 0, Position(1, 0),
                               Position(1, 0))
        self.tokenValue = ''
        self.current_char = ''

        self.get_next_char()
Ejemplo n.º 6
0
    def generate_special_char_or_unknown_token(self):

        # checks if any ALNUM token or EOT token was evaluated before. Ensures that
        # single special chars are evaluated properly.
        if self.tokenValue.isalnum() or self.token.type == TokenType.EOT:
            return None

        self.tokenValue += self.current_char

        # Generates single special char token and reads next char for processing
        # during next get_token call.
        key = token_type_repr.get(self.tokenValue)
        if key:
            self.get_next_char()
            self.token = new_token(key, self.tokenValue, self.start)

            if self.tokenValue in ["!", "=", ">", "<"]:
                self.generate_two_char_operator()
        return self.token
Ejemplo n.º 7
0
    def generate_double_token(self, numberTokenValue):

        # creates appropiate denominator and decimal part of newly parsed
        # double value.
        decimalTokenValue = 0
        decimalDenominator = 0
        self.get_next_char()

        while self.current_char.isdigit():
            decimalTokenValue = decimalTokenValue * 10 + int(self.current_char)
            decimalDenominator += 1
            if not self.get_next_char():
                break

        self.token = new_token(TokenType.VALUE_DOUBLE, numberTokenValue,
                               self.start, decimalTokenValue,
                               decimalDenominator)

        return self.token
Ejemplo n.º 8
0
    def generate_zero_integer_token(self):

        # Zero digit char: checks if there is a dot char. If not and there is
        # a digit instead, raises Lexical Error. If not and there is
        # another char, returns 0 as Integer. If yes, parses double
        # value.
        numberTokenValue = 0
        self.tokenValue = '0'

        self.get_next_char()

        if self.current_char == ".":
            return self.generate_double_token(numberTokenValue)

        elif self.current_char.isdigit():
            stop = self.readCursorPosition
            raise LexerError(self.tokenValue, stop, "")

        else:
            self.token = new_token(TokenType.VALUE_INT, numberTokenValue,
                                   self.start)
            return self.token
Ejemplo n.º 9
0
    def generate_nonzero_integer_token(self):

        # Non zero digit: takes all valid digits and checks, if there is
        # a dot char. If there is, parses double value. If not, just returns
        # the number.
        numberTokenValue = 0

        # gets all valid characters and represents them as an integer value.
        # creates integer part of a number.
        while self.current_char.isdigit():
            numberTokenValue = numberTokenValue * 10 + int(self.current_char)
            if not self.get_next_char():
                break

        # checks if character is meant to be a double.
        if self.current_char == ".":
            self.generate_double_token(numberTokenValue)
        else:
            self.token = new_token(TokenType.VALUE_INT, numberTokenValue,
                                   self.start)

        self.tokenValue = str(numberTokenValue)

        return self.token
Ejemplo n.º 10
0
    def generate_string_token(self):

        # checks if first char is """
        if self.current_char != '\"':
            return None

        self.get_next_char()
        stringLength = self.maxStringLength

        # gets all chars until second, unescaped quote char appears
        while self.current_char != '\"' and stringLength > 0 and not self.textSource.is_end_of_text(
        ):

            # escapes quote char or anything else, if needed. Doesnt write slash char to
            # the string value.
            if self.current_char == "\\":
                self.get_next_char()

            self.tokenValue += self.current_char
            self.get_next_char()

            stringLength -= 1

        # Encountered a string that exceeds max allowed length. Raises an error.
        if stringLength <= 0:
            stop = self.readCursorPosition
            raise LexerError(self.current_char, stop,
                             " (Exceeded maximum length of a string literal)")

        # escapes second quote char
        self.get_next_char()

        self.token = new_token(TokenType.VALUE_STRING, self.tokenValue,
                               self.start)

        return self.token
Ejemplo n.º 11
0
    def generate_unknown_token_placeholder(self):

        self.token = new_token(TokenType.UNKNOWN, 0, Position(1, 0),
                               Position(1, 0))
        return self.token