def check_indent(self): """ Checks indentation level. Returns int - Level of indentation """ if self.current_character == "\n": self.advance() elif self.current_character != " ": start = self.position.copy() char = self.current_character self.advance() end = self.position.copy() self.error = Error("IndentationError: Unexpected character") self.tokens.append(Token(tt._INVALID, char, start, end)) return None count = 0 start, end = None, None while self.current_character == " ": if count % 4 == 0: start = self.position.copy() count += 1 self.advance() if count % 4 == 0: return int(count / 4) else: self.error = Error("IndentationError: Invalid indentation") end = self.position.copy() self.tokens.append( Token(tt._INVALID, " " * (count % 4), start, end)) return None
def make_symbol(self): """ Creates identifiers and keywords. Reads characters from the source code until getting to a non allowed character, decides if it's a keyword or identifier and returns a token. RETURNS: Token """ allowed_chars = "1234567890_abcdefghijklmnopqrstuvwxyz" symbol = "" start = self.position.copy() if not self.allowed_character(allowed_chars[10:]): symbol = self.current_character self.advance() end = self.position.copy() self.error = Error( "ValueError: Unexpected illegal character {}".format(symbol)) return Token(tt._INVALID, symbol, start, end) while self.allowed_character(allowed_chars): symbol += self.current_character if not self.advance(): break end = self.position.copy() symbol_type, error = isKeyword(symbol) if error: self.error = error return Token(tt._INVALID, symbol, start, end) return Token(symbol_type, symbol, start, end)
def make_hexadecimal(self): """ Reads hexadecimal characters until not allowed character appers. Returns a hexdecimal token """ hex_string = "" allowed_chars = "0123456789abcdef" start_position = self.position.copy() end_position = None hex_string += self.current_character self.advance() hex_string += self.current_character self.advance() if hex_string.lower() != '0x': end_position = self.position.copy() self.error = Error("ValueError: Can not convert to a number") return Token(tt._INVALID, hex_string, start_position, end_position) while self.current_character and self.allowed_character(allowed_chars): hex_string += self.current_character self.advance() end_position = self.position.copy() if len(hex_string) < 3: self.error = Error("ValueError: Can not convert to a number") return Token(tt._INVALID, hex_string, start_position, end_position) return Token(tt._HEX, int(hex_string, base=16), start_position, end_position)
def make_octodecimal(self): """ Reads octodecimal characters until not allowed character appers. Returns a octodecimal token """ oct_string = "" allowed_chars = "01234567" start_position = self.position.copy() end_position = None oct_string += self.current_character self.advance() oct_string += self.current_character self.advance() if oct_string.lower() != '0o': end_position = self.position.copy() self.error = Error("ValueError: Can not convert to a number") return Token(tt._INVALID, oct_string, start_position, end_position) while self.current_character and self.allowed_character(allowed_chars): oct_string += self.current_character self.advance() end_position = self.position.copy() if len(oct_string) < 3: self.error = Error("ValueError: Can not convert to a number") return Token(tt._INVALID, oct_string, start_position, end_position) return Token(tt._OCT, int(oct_string, base=8), start_position, end_position)
def make_binary(self): """ Reads binary characters until not allowed character appers. Returns a binary token """ binary_string = "" allowed_chars = "01" start_position = self.position.copy() end_position = None binary_string += self.current_character self.advance() binary_string += self.current_character self.advance() if binary_string.lower() != '0b': end_position = self.position.copy() self.error = Error("ValueError: Can not convert to a number") return Token(tt._INVALID, binary_string, start_position, end_position) while self.current_character and self.allowed_character(allowed_chars): binary_string += self.current_character self.advance() end_position = self.position.copy() if len(binary_string) < 3: self.error = Error("ValueError: Can not convert to a number") return Token(tt._INVALID, binary_string, start_position, end_position) return Token(tt._BIN, int(binary_string, base=2), start_position, end_position)
def make_tokens(self): """ Preforms the lexical analysis on the source code and breaks it down to terminal tokens. RETURNS: list, containing token-objects """ while self.current_character: if self.allowed_character("0123456789"): self.tokens.append(self.make_number()) if self.error: return elif "{}".format(self.current_character) == "\n": start = self.position.copy() self.advance() self.tokens.append( Token(tt._NEWLINE, '\n', start, self.position.copy())) indent = self.check_indent() if self.error: return if indent != self.position.indent: self.change_indent(indent) elif self.allowed_character("'\""): self.tokens.append(self.make_string()) if self.error: return continue elif self.is_operator(): self.tokens.append(self.make_operator()) if self.error: return continue else: letterResult, error = isLetter(self.current_character) if error: self.error = error return if letterResult: self.tokens.append(self.make_symbol()) continue else: if self.allowed_character(" \t"): self.advance() continue start = self.position.copy() char = self.current_character self.advance() end = self.position.copy() self.tokens.append(Token(tt._INVALID, char, start, end)) self.error = Error("ValueError: Unexpected character") if self.error: return
def change_indent(self, indent): """ Generates indent and dedent tokens to change indentation level. """ if not isinstance(indent, int): self.error = Error("ValueError: Positive integer expected") return if indent < 0: self.error = Error("ValueError: Positive integer expected") return while self.position.indent < indent: self.position.indent += 1 self.tokens.append( Token(tt._INDENT, " ", self.position, self.position)) while self.position.indent > indent: self.position.indent -= 1 self.tokens.append( Token(tt._DEDENT, " ", self.position, self.position))
def make_string(self): start = self.position.copy() qm = self.current_character not_allowed_chars = qm + "\n" string = str() prev = None while self.advance() and not self.allowed_character(not_allowed_chars): if self.current_character == '\\': next = self.look_ahead() if next == '\n': self.advance() continue elif next == "\"\'": string += self.current_character prev = self.current_character elif next == 'n': self.advance() string += '\n' prev = '\n' continue elif next == 't': self.advance() string += '\t' prev = '\t' continue elif next == '\\': self.advance() string += self.current_character prev = self.current_character if self.current_character == qm: self.advance() end = self.position.copy() return Token(tt._STRING, string, start, end) elif self.current_character == "\n": self.advance() end = self.position.copy() self.error = Error("StringError: Incorrect line break in string") return Token(tt._INVALID, string, start, end)
def make_decimal(self): """ Reads decimal characters until not allowed character appers. Returns a integer or float token """ number_string = "" dot_counter = 0 allowed_chars = "1234567890." start_position = self.position.copy() end_position = None if self.current_character not in allowed_chars: self.error = Error("ValueError: Expected a digit or dot '.'") char = self.current_character self.advance() end_position = self.position.copy() return Token(tt._INVALID, char, start_position, end_position) while self.allowed_character(allowed_chars) and dot_counter < 2: number_string += self.current_character self.advance() if self.current_character == ".": dot_counter += 1 elif self.current_character is None: break end_position = self.position.copy() if dot_counter: return Token(tt._FLOAT, float(number_string), start_position, end_position) else: return Token(tt._INT, int(number_string), start_position, end_position)
def make_operator(self): n = self.look_ahead(2) if not n: n = self.look_ahead() possible_op = self.current_character if possible_op is None: self.error = Error("LexicalError: No characters in buffer") return Token(tt._INVALID, None, None, None) if n: possible_op += n start = self.position.copy() values = { '=': tt._ASSIGN, '==': tt._BITWISE_EQ, '+': tt._PLUS, '++': tt._INCR, '+=': tt._PLUS_ASSIGN, '-': tt._MINUS, '--': tt._DECR, '-=': tt._MINUS_ASSIGN, '*': tt._MULT, '*=': tt._MULT_ASSIGN, '**': tt._EXP, '**=': tt._POWER_ASSIGN, '/': tt._DIV, '/=': tt._DIV_ASSIGN, '//': tt._FLOOR, '//=': tt._FLOOR_ASSIGN, '%': tt._MOD, '%=': tt._MOD_ASSIGN, '&=': tt._AND_ASSIGN, '&': tt._BITWISE_AND, '|=': tt._OR_ASSIGN, '|': tt._BITWISE_OR, '^': tt._BITWISE_XOR, '^=': tt._XOR_ASSIGN, '<': tt._BITWISE_LT, '<=': tt._BITWISE_LTE, '<<': tt._BITWISE_LSHIFT, '<<=': tt._LSHIFT_ASSIGN, '>': tt._BITWISE_GT, '>=': tt._BITWISE_GTE, '>>': tt._BITWISE_RSHIFT, '>>=': tt._RSHIFT_ASSIGN, '(': tt._LPARAN, ')': tt._RPARAN, '[': tt._LSQBRACK, ']': tt._RSQBRACK, '{': tt._LCURLBRACK, '}': tt._RCURLBRACK, '.': tt._DOT, ',': tt._COMMA, ':': tt._COLON, } if possible_op.__len__() == 3 and values.get(possible_op): self.advance(3) end = self.position.copy() return Token(values.get(possible_op), possible_op, start, end) elif possible_op.__len__() >= 2 and values.get(possible_op[:2]): self.advance(2) end = self.position.copy() return Token(values.get(possible_op[:2]), possible_op[:2], start, end) elif values.get(self.current_character): char = self.current_character self.advance() end = self.position.copy() return Token(values.get(char), char, start, end) else: self.error = Error("ValueError: Token not a operator") char = self.current_character self.advance() end = self.position.copy() return Token(tt._INVALID, char, start, end)