def _handle_word(self, state): word = "" start_pos = state.column while state.column < len(state.line): current_symbol = state.line[state.column] if not is_word(current_symbol): break word += current_symbol state.column += 1 if word in Tokens.Keyword.value: self._token.append( Token(token_type=Tokens.Keyword, row=state.row, column=start_pos, spec=word)) return if word in Tokens.ConstLiteral.value: self._token.append( Token(token_type=Tokens.ConstLiteral, row=state.row, column=start_pos, spec=word)) return self._add_correct_token(token_type=Tokens.Identifier, row=state.row, column=start_pos, lit_value=word)
def _interpolate(self, state): self._token.append( Token(token_type=Tokens.InterpolationStart, row=state.row, column=state.column)) state.column += 2 while state.row < len(state.all_lines): state.line = state.all_lines[state.row] while state.column < len(state.line): if self._get_next_token_interpolate(state): return state.column = 0 state.row += 1 state.row -= 1 state.column = len(state.all_lines[-1]) - 1 self._check_multi_line_mode(state) self._token.append( Token(token_type=Tokens.InterpolationEnd, row=state.row, column=state.column)) self._invalid_token.append( WrongToken(message="There was no end for interpolation", token=self._token[-1]))
def _handle_punctuation(self, state): current_symbol = state.line[state.column] self._token.append( Token(token_type=Tokens.Punctuation, row=state.row, column=state.column, spec=current_symbol)) state.column += 1
def _add_wrong_token(self, token_type, row, column, lit_value, error_message): self._token.append( Token(token_type=token_type, row=row, column=column, index=len(self._symbol_table))) self._symbol_table.append(lit_value) self._invalid_token.append( WrongToken(message=error_message, token=self._token[-1]))
def _get_next_token_interpolate(self, state): current_symbol = state.line[state.column] if not state.multi_line_mode and current_symbol == Tokens.InterpolationEnd.value: self._token.append( Token(token_type=Tokens.InterpolationEnd, row=state.row, column=state.column)) return True else: self._get_next_token(state) return False
def _handle_whitespaces(self, state): while state.column < len(state.line): current_symbol = state.line[state.column] if current_symbol == ' ': self._token.append( Token(token_type=Tokens.Space, row=state.row, column=state.column)) elif current_symbol == '\t': self._token.append( Token(token_type=Tokens.Tab, row=state.row, column=state.column)) elif current_symbol == '\n': self._token.append( Token(token_type=Tokens.Enter, row=state.row, column=state.column)) else: break state.column += 1
def _init_template_string(self, state): self._token.append( Token(token_type=Tokens.StartTemplateString, row=state.row, column=state.column)) self._add_correct_token(token_type=Tokens.StringLiteral, row=state.row, column=state.column, lit_value="", token_spec=state.line[state.column]) state.token = self._token[-1] state.multi_line_mode = True state.column += 1
def _add_correct_token(self, token_type, row, column, lit_value, token_spec=None): self._token.append( Token(token_type=token_type, row=row, column=column, index=len(self._symbol_table), spec=token_spec)) self._symbol_table.append(lit_value)
def _handle_template_string(self, state): st = state.column ending = state.token.spec while st < len(state.line): temp_s = state.line[st:] pos = temp_s.find(ending) if pos == -1: if self._check_interpolation(state, state.column, len(state.line)): return if len(state.line) >= 2 and state.line[-2] == '\\': slash_pos = len(state.line) - 2 while slash_pos >= state.column and state.line[ slash_pos] == '\\': slash_pos -= 1 if (len(state.line) - 2 - slash_pos) % 2 == 1: string_lit = state.line[state.column:] self._symbol_table[state.token.index] += string_lit state.column = len(state.line) return string_lit = state.line[state.column:] self._symbol_table[state.token.index] += string_lit state.column = len(state.line) return else: st += pos if self._check_interpolation(state, state.column, st): return if st - 1 >= state.column and state.line[st - 1] == '\\': slash_pos = st - 1 while slash_pos >= state.column and state.line[ slash_pos] == '\\': slash_pos -= 1 if (st - 1 - slash_pos) % 2 == 1: st += 1 continue string_lit = state.line[state.column:st] self._symbol_table[state.token.index] += string_lit self._token.append( Token(token_type=Tokens.EndTemplateString, row=state.row, column=st)) state.column = st + 1 state.token = None state.multi_line_mode = False return
def _check_multi_line_mode(self, state): if state.multi_line_mode: if state.token.type == Tokens.MultiLineComment: self._invalid_token.append( WrongToken(message="Multi line comment requires end", token=state.token)) if state.token.type == Tokens.StringLiteral and state.token.spec != '`': self._invalid_token.append( WrongToken(message="String literal requires the end", token=state.token)) if state.token.type == Tokens.StringLiteral and state.token.spec == '`': self._token.append( Token(token_type=Tokens.EndTemplateString, row=state.row, column=state.column)) self._invalid_token.append( WrongToken(message="String literal requires the end", token=self._token[-1]))
def _handle_operation(self, state): op = "" start_pos = state.column while state.column < len(state.line): temp_op = op + state.line[state.column] if temp_op in Tokens.Operators.value: op = temp_op else: break state.column += 1 if op == '/': self._check_regex(state) else: self._token.append( Token(token_type=Tokens.Operators, row=state.row, column=start_pos, spec=op))
def _check_regex(self, state): start_pos = state.column - 1 i = len(self._token) - 1 from TokenChecks import is_whitespace_token while i >= 0 and is_whitespace_token(self._token[i].type): i -= 1 if i < 0 or self._token[i].type != Tokens.Identifier \ and self._token[i].type != Tokens.NumberLiteral: regex = '/' while state.column < len(state.line): if state.line[state.column] != '/': regex += state.line[state.column] state.column += 1 else: break if state.column < len(state.line): regex += '/' state.column += 1 if state.column >= len(state.line): if state.line[-1] == '\n': state.column = len(state.line) - 1 regex = regex[:-1] regex_token = Token(token_type=Tokens.Regex, row=state.row, column=start_pos, index=len(self._symbol_table)) self._invalid_token.append( WrongToken(message="Regex has no end", token=regex_token)) self._symbol_table.append(regex) self._token.append(regex_token) else: was_error = False while state.column < len(state.line): c = state.line[state.column] if not is_symbol(c) or c == '_': if is_number(c) or c == '_': was_error = True regex += c break regex += c state.column += 1 if not was_error: regex_token = Token(token_type=Tokens.Regex, row=state.row, column=start_pos, index=len(self._symbol_table)) self._symbol_table.append(regex) self._token.append(regex_token) else: regex = regex[:] regex_token = Token(token_type=Tokens.Regex, row=state.row, column=start_pos, index=len(self._symbol_table)) self._invalid_token.append( WrongToken(message="Regex incorrect ending", token=regex_token)) self._symbol_table.append(regex) self._token.append(regex_token) state.column += 1 else: self._token.append( Token(token_type=Tokens.Operators, row=state.row, column=start_pos, spec='/'))
def _handle_number(self, state): if self._check_nonnormal_numbers(state): return current_symbol = state.line[state.column] if is_dot(current_symbol): if state.column + 1 >= len(state.line) or not is_number( state.line[state.column + 1]): if is_dot(state.line[state.column + 1]) and \ state.column + 2 < len(state.line) and is_dot(state.line[state.column + 2]): self._token.append( Token(token_type=Tokens.Operators, spec='...', row=state.row, column=state.column)) state.column += 3 else: self._token.append( Token(token_type=Tokens.Punctuation, row=state.row, column=state.column, spec=current_symbol)) state.column += 1 return if current_symbol == '0' and state.column + 1 < len(state.line) and \ not is_correct_after_number(state.line[state.column + 1]) and \ not is_dot(state.line[state.column + 1]): state.column += 1 self._add_wrong_token(token_type=Tokens.NumberLiteral, row=state.row, column=state.column - 1, lit_value=current_symbol + state.line[state.column], error_message="Not allowed start from zero") state.column += 1 return new_number = "" was_dot, was_e, was_sign, was_dash, last_dash = False, False, False, False, False start_pos = state.column unknown = False while state.column < len(state.line): current_symbol = state.line[state.column] if is_number(current_symbol): new_number += current_symbol last_dash = False elif is_dot(current_symbol): new_number += current_symbol if last_dash: self._add_wrong_token( token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number, error_message="Unexpected symbol in the number end") state.column += 1 return was_dot = True break elif is_e(current_symbol): new_number += current_symbol if last_dash: self._add_wrong_token( token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number, error_message="Unexpected symbol in the number end") state.column += 1 return was_e = True break elif is_down_dash(current_symbol): last_dash = True new_number += current_symbol else: unknown = True break state.column += 1 if was_dot: state.column += 1 if state.column < len(state.line) and \ (is_number(state.line[state.column]) or is_e(state.line[state.column])): while state.column < len(state.line): current_symbol = state.line[state.column] if is_number(current_symbol): new_number += current_symbol last_dash = False elif is_e(current_symbol): new_number += current_symbol if last_dash: self._add_wrong_token( token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number, error_message= "Unexpected symbol in the number end") state.column += 1 return was_e = True break elif is_down_dash(current_symbol): last_dash = True new_number += current_symbol else: unknown = True break state.column += 1 if was_e: state.column += 1 if state.column < len(state.line) and \ (is_number(state.line[state.column]) or state.line[state.column] in '+-'): if state.line[state.column] in '+-': new_number += state.line[state.column] state.column += 1 if state.column >= len(state.line) or not is_number( state.line[state.column]): new_number = new_number[:-1] self._add_wrong_token( token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number, error_message="Unexpected symbol in the number end" ) state.column -= 1 return while state.column < len(state.line): current_symbol = state.line[state.column] if is_number(current_symbol): new_number += current_symbol last_dash = False elif is_down_dash(current_symbol): last_dash = True new_number += current_symbol else: unknown = True break state.column += 1 else: self._add_wrong_token( token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number, error_message="Unexpected symbol in the number end") return if last_dash: self._add_wrong_token( token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number, error_message="Unexpected symbol in the number end") state.column += 1 return if unknown: if is_correct_after_number(state.line[state.column]): cur_spec = Tokens.NumberLiteral.value[1] if was_dot or was_e: cur_spec = Tokens.NumberLiteral.value[0] self._add_correct_token(token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number, token_spec=cur_spec) elif state.line[state.column] == 'n': new_number += 'n' if was_dot or was_e: self._add_wrong_token( token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number, error_message="Invalid biginteger literal") else: self._add_correct_token( token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number, token_spec=Tokens.NumberLiteral.value[5]) state.column += 1 else: self._add_wrong_token( token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number + state.line[state.column], error_message="Unexpected symbol in the number end") state.column += 1 else: cur_spec = Tokens.NumberLiteral.value[1] if was_dot or was_e: cur_spec = Tokens.NumberLiteral.value[0] self._add_correct_token(token_type=Tokens.NumberLiteral, row=state.row, column=start_pos, lit_value=new_number, token_spec=cur_spec)
def __init__(self): self.column = 0 self.row = 0 self.line = "" self.token = Token(token_type=Tokens.Invalid) self.multi_line_mode = False