def generate_keyword_or_ident_token(self): # checks if current char signifies that a keyword or ident appears. if not self.current_char.isalpha() and self.current_char != '_': return None identLength = self.maxIdentLength # gets all valid chars until max length is reached and value is cut short while (self.current_char.isalnum() or self.current_char == '_') and identLength > 0: self.tokenValue += self.current_char if not self.get_next_char(): break identLength -= 1 # Encountered an identifier that exceeds max allowed length. Raises an error. if identLength <= 0: stop = self.readCursorPosition raise LexerError( self.current_char, stop, " (Exceeded maximum length of a identifier literal)") # checks whether or not token might be a keyword or not value = token_type_repr.get(self.tokenValue) if value: self.token = new_token(value, self.tokenValue, self.start) else: self.token = new_token(TokenType.VALUE_ID, self.tokenValue, self.start) return self.token
def generate_comment_token(self): # checks if first char is "/" if self.current_char != '/': return None self.get_next_char() # checks if second char is "/". returns TokenType.DIV if not. if self.current_char != '/': self.token = new_token(TokenType.DIV, self.tokenValue, self.start) else: # second char is "/". Returns valid VALUE_COMMENT token, generated from // chars to end of line. self.get_next_char() while self.current_char != '\n' and not self.textSource.is_end_of_text( ): self.tokenValue += self.current_char self.get_next_char() self.token = new_token(TokenType.VALUE_COMMENT, self.tokenValue, self.start) return self.token
def generate_eot_token(self): if self.textSource.is_end_of_text(): self.token = new_token(TokenType.EOT, self.tokenValue, self.readCursorPosition, self.readCursorPosition) return self.token
def generate_two_char_operator(self): if self.current_char == "=": self.tokenValue += self.current_char key = token_type_repr.get(self.tokenValue) if key: self.token = new_token(key, self.tokenValue, self.start) self.get_next_char()
def __init__(self, maxIdentLength, maxStringLength, textSource=None): self.maxIdentLength = maxIdentLength self.maxStringLength = maxStringLength self.textSource = textSource self.readCursorPosition = Position(row=1, column=-1) self.start = Position(row=1, column=-1) self.token = new_token(TokenType.UNKNOWN, 0, Position(1, 0), Position(1, 0)) self.tokenValue = '' self.current_char = '' self.get_next_char()
def generate_special_char_or_unknown_token(self): # checks if any ALNUM token or EOT token was evaluated before. Ensures that # single special chars are evaluated properly. if self.tokenValue.isalnum() or self.token.type == TokenType.EOT: return None self.tokenValue += self.current_char # Generates single special char token and reads next char for processing # during next get_token call. key = token_type_repr.get(self.tokenValue) if key: self.get_next_char() self.token = new_token(key, self.tokenValue, self.start) if self.tokenValue in ["!", "=", ">", "<"]: self.generate_two_char_operator() return self.token
def generate_double_token(self, numberTokenValue): # creates appropiate denominator and decimal part of newly parsed # double value. decimalTokenValue = 0 decimalDenominator = 0 self.get_next_char() while self.current_char.isdigit(): decimalTokenValue = decimalTokenValue * 10 + int(self.current_char) decimalDenominator += 1 if not self.get_next_char(): break self.token = new_token(TokenType.VALUE_DOUBLE, numberTokenValue, self.start, decimalTokenValue, decimalDenominator) return self.token
def generate_zero_integer_token(self): # Zero digit char: checks if there is a dot char. If not and there is # a digit instead, raises Lexical Error. If not and there is # another char, returns 0 as Integer. If yes, parses double # value. numberTokenValue = 0 self.tokenValue = '0' self.get_next_char() if self.current_char == ".": return self.generate_double_token(numberTokenValue) elif self.current_char.isdigit(): stop = self.readCursorPosition raise LexerError(self.tokenValue, stop, "") else: self.token = new_token(TokenType.VALUE_INT, numberTokenValue, self.start) return self.token
def generate_nonzero_integer_token(self): # Non zero digit: takes all valid digits and checks, if there is # a dot char. If there is, parses double value. If not, just returns # the number. numberTokenValue = 0 # gets all valid characters and represents them as an integer value. # creates integer part of a number. while self.current_char.isdigit(): numberTokenValue = numberTokenValue * 10 + int(self.current_char) if not self.get_next_char(): break # checks if character is meant to be a double. if self.current_char == ".": self.generate_double_token(numberTokenValue) else: self.token = new_token(TokenType.VALUE_INT, numberTokenValue, self.start) self.tokenValue = str(numberTokenValue) return self.token
def generate_string_token(self): # checks if first char is """ if self.current_char != '\"': return None self.get_next_char() stringLength = self.maxStringLength # gets all chars until second, unescaped quote char appears while self.current_char != '\"' and stringLength > 0 and not self.textSource.is_end_of_text( ): # escapes quote char or anything else, if needed. Doesnt write slash char to # the string value. if self.current_char == "\\": self.get_next_char() self.tokenValue += self.current_char self.get_next_char() stringLength -= 1 # Encountered a string that exceeds max allowed length. Raises an error. if stringLength <= 0: stop = self.readCursorPosition raise LexerError(self.current_char, stop, " (Exceeded maximum length of a string literal)") # escapes second quote char self.get_next_char() self.token = new_token(TokenType.VALUE_STRING, self.tokenValue, self.start) return self.token
def generate_unknown_token_placeholder(self): self.token = new_token(TokenType.UNKNOWN, 0, Position(1, 0), Position(1, 0)) return self.token