def _generic_builder(self, type_1char, type_2chars): if self.source.current_char != type_1char.value: return None self.source.move_to_next_char() if self.source.current_char == type_2chars.value[-1]: self.source.move_to_next_char() return Token(type=type_2chars) return Token(type=type_1char)
def run(document): tokens = lexer.run(document) tokens = cut_of_eos(tokens) tokens = remove_repeated_eos(tokens) tokens.append(Token("\n", "", tokens[-1].line)) parse_tree = None try: parse_tree = parser.parse(tokens) except InternalError as e: print(bcolors.HEADER + "There is a syntax error in your script!" + bcolors.ENDC) print(str(e.args[0])) if DETAILED_ERROR: print("\n\n-------Detailed Parse Tree-------") try: print(e.args[1]) except: pass sys.exit(1) ast = None try: ast = abstract_syntax_tree.generate(parse_tree, None) except InternalError as e: print( bcolors.HEADER + "There is a parse tree sequence without a corresponding AST rule, this most likely " "isn't an error with your script but with the interpreter." + bcolors.ENDC) try: body = "" for t in e.args[1].body: body += t.__repr__() + " " print(e.args[1].head + " -> " + body) except: pass sys.exit(1) state = State() try: ast.generate(state) except InternalError as e: msg = bcolors.FAIL + str(e.args[0]) + bcolors.ENDC print(bcolors.HEADER + "An error occurred while executing your script!" + bcolors.ENDC) print(msg) if DETAILED_ERROR: print("\n\n-------Detailed Abstract Syntax Tree-------") try: print(ast) except: pass sys.exit(1)
def try_to_build_string(self): if self.source.current_char != '"': return None collected_chars = [] self.source.move_to_next_char() while self.source.current_char != '"': # string hasn't been finished, ETX appeared if self.source.current_char == TokenType.ETX.value: self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL) # if current char is '\' if self.source.current_char == '\\': self.source.move_to_next_char() collected_chars.append({ '"': '"', '\\': '\\' }.get(self.source.current_char, f'\\{self.source.current_char}')) else: collected_chars.append(self.source.current_char) self.source.move_to_next_char() self.source.move_to_next_char() # convert to string result = ''.join(collected_chars) return Token( type=TokenType.STRING, value=result, )
def scan(self): # self.scanTokens() self.tokens.append(Token( "FIM", '', self.linha)) #Escrever fim de arquivo nos tokens self.scanReserved( ) # Verifica a tabela de tokens para atualizar o Tipo dos tokens das palavras reservadas return self.tokens
def test_etx_on_comment_line(self): string = 'not_comment = 1; # a comment' lexer = Lexer(StringSource(string)) tokens = [ Token(TokenType.ID, 'not_comment'), Token(TokenType.ASSIGN), Token(TokenType.SCALAR, 1), Token(TokenType.SEMI), Token(TokenType.ETX) ] for expected_token in tokens: token = lexer.current_token self.assertEqual(expected_token.type, token.type) self.assertEqual(expected_token.value, token.value) lexer.build_next_token()
def handle_number(document): global pos, cur_line total_str = "" while pos < len(document) and is_number( document[pos]) or document[pos] == ".": total_str += document[pos] pos += 1 tokens.append(Token("LITERAL", float(total_str), cur_line))
def try_to_build_neq(self): if self.source.current_char != '!': return None self.source.move_to_next_char() if self.source.current_char != '=': self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL) self.source.move_to_next_char() return Token(type=TokenType.NEQ)
def try_to_build_single_char_token(self): # Handle single-character tokens try: token_type = TokenType(self.source.current_char) except ValueError: # No enum member with value equal to self.source.current_char. return None else: # No exception occurred. # Return created single-character token. self.source.move_to_next_char() return Token(type=token_type)
def handle_string(document): global pos, tokens, cur_line pos += 1 string = "" while pos < len(document) and document[pos] != "\"": string += document[pos] pos += 1 if pos >= len(document): raise Exception("Unclosed string, in line: " + cur_line) tokens.append(Token("LITERAL", string, cur_line))
def scanTokens(self): while self.atual < len(self.programa): self.inicio = self.atual char = self.nextChar() if char == " " or char == "\t" or char == "\r": pass elif char == "\n": self.linha += 1 elif char == "(" or char == ")" or char == "{" or char == "}": self.tokens.append( Token( self.delimitadoresToken(char), self.programa[self.inicio:self.atual], self.linha, )) elif char == "+" or char == "-" or char == "*" or char == "/": self.tokens.append( Token( self.opAritmeticaToken(char), self.programa[self.inicio:self.atual], self.linha, )) elif char == ":" or char == "=" or char == "!" or char == "<" or char == ">": self.tokens.append( Token( self.opBolleanaToken(char), self.programa[self.inicio:self.atual], self.linha, )) # Vírgula elif char == ",": self.tokens.append( Token("COMMA", self.programa[self.inicio:self.atual], self.linha)) # Números elif char >= "0" and char <= "9": while self.lookAhead() >= "0" and self.lookAhead() <= "9": self.nextChar() self.tokens.append( Token("NUM", self.programa[self.inicio:self.atual], self.linha)) # Letras / Identificadores / Palavras Reservadas elif char.isalpha(): while self.lookAhead().isalnum(): self.nextChar() self.tokens.append( Token("ID", self.programa[self.inicio:self.atual], self.linha)) # Outros else: print("Caractere inválido na linha ", self.linha) exit(2)
def try_to_build_scalar(self): if not self.source.current_char.isdigit(): return None collected_chars = [] # Handle integer part of scalar if self.source.current_char == '0': collected_chars.append('0') self.source.move_to_next_char() if self.source.current_char.isdigit(): self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL) else: while self.source.current_char.isdigit(): collected_chars.append(self.source.current_char) self.source.move_to_next_char() # Handle decimal part of scalar if self.source.current_char == '.': collected_chars.append(self.source.current_char) self.source.move_to_next_char() if not self.source.current_char.isdigit(): self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL) while self.source.current_char.isdigit(): collected_chars.append(self.source.current_char) self.source.move_to_next_char() # Handle scientific notation if self.source.current_char == 'e' or self.source.current_char == 'E': collected_chars.append(self.source.current_char) self.source.move_to_next_char() if self.source.current_char == '-' or self.source.current_char == '+': collected_chars.append(self.source.current_char) self.source.move_to_next_char() if not self.source.current_char.isdigit(): self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL) while self.source.current_char.isdigit(): collected_chars.append(self.source.current_char) self.source.move_to_next_char() # convert to string result = ''.join(collected_chars) return Token( type=TokenType.SCALAR, value=float(result), )
def run(document): global pos, tokens, cur_line global pos, cur_line buffer = "" while pos < len(document): if document[pos] in SPECIAL_CHARS: if len(buffer) > 0: if is_keyword(buffer): tokens.append(Token(buffer, "", cur_line)) else: tokens.append(Token("SYMBOL", buffer, cur_line)) buffer = "" continue if pos + 1 < len(document) and document[pos] + document[ pos + 1] in [">=", "==", "<=", "+=", "-="]: tokens.append( Token(document[pos] + document[pos + 1], "", cur_line)) pos += 2 else: if document[pos] == "\"": handle_string(document) elif document[pos] != " ": tokens.append(Token(document[pos], "", cur_line)) pos += 1 elif is_number(document[pos]) and len(buffer) == 0: handle_number(document) else: buffer += document[pos] pos += 1 if pos < len(document) and document[pos] == "\n": cur_line += 1 return tokens
def create_expected_binary_operator(parameters): binop = None actions = { 'scalar': lambda value: Scalar(value), 'op': lambda type: Token(TokenType(type)), 'id': lambda id: Identifier(id), 'prev': lambda _: binop } for param in parameters: if param[1][1] == '=': binop = Assignment(actions[param[0][0]](param[0][1]), actions[param[2][0]](param[2][1])) else: binop = BinaryOperator( lvalue=actions[param[0][0]](param[0][1]), op=actions[param[1][0]](param[1][1]), rvalue=actions[param[2][0]](param[2][1]) ) return binop
def test_double_char_operators(self): string = '<= > <= < < >= = < > >= < <= <= < >= <= >= >= != > >=' tokens = [ Token(TokenType.LEQ), Token(TokenType.GRE), Token(TokenType.LEQ), Token(TokenType.LESS), Token(TokenType.LESS), Token(TokenType.GEQ), Token(TokenType.ASSIGN), Token(TokenType.LESS), Token(TokenType.GRE), Token(TokenType.GEQ), Token(TokenType.LESS), Token(TokenType.LEQ), Token(TokenType.LEQ), Token(TokenType.LESS), Token(TokenType.GEQ), Token(TokenType.LEQ), Token(TokenType.GEQ), Token(TokenType.GEQ), Token(TokenType.NEQ), Token(TokenType.GRE), Token(TokenType.GEQ) ] lexer = Lexer(StringSource(string)) for expected_token in tokens: token = lexer.current_token self.assertEqual(expected_token.type, token.type) self.assertEqual(expected_token.value, token.value) lexer.build_next_token()
def scanTokens(self): # procura os tokens while (self.atual < len(self.programa)): #enquanto n chegar no final self.inicio = self.atual char = self.nextChar() if char == ' ' or char == '\t' or char == '\r': pass elif char == '\n': self.linha += 1 elif char == '(': # Parentese esquerdo self.tokens.append( Token("LBRACK", self.programa[self.inicio:self.atual], self.linha)) elif char == ')': # Parentese direito self.tokens.append( Token("RBRACK", self.programa[self.inicio:self.atual], self.linha)) elif char == '{': # Chaves (Curly Brackets) esquerdo self.tokens.append( Token("LCBRACK", self.programa[self.inicio:self.atual], self.linha)) elif char == '}': # Direito self.tokens.append( Token("RCBRACK", self.programa[self.inicio:self.atual], self.linha)) elif char == '+': # Soma self.tokens.append( Token("SUM", self.programa[self.inicio:self.atual], self.linha)) elif char == '-': # Subtracao self.tokens.append( Token("SUB", self.programa[self.inicio:self.atual], self.linha)) elif char == '*': # Multiplicacao self.tokens.append( Token("MUL", self.programa[self.inicio:self.atual], self.linha)) elif char == '/': # Divisao self.tokens.append( Token("DIV", self.programa[self.inicio:self.atual], self.linha)) elif char == '=': # Igual ou Atribuicao if self.lookAhead() == '=': self.atual += 1 self.tokens.append( Token("EQUAL", self.programa[self.inicio:self.atual], self.linha)) else: self.tokens.append( Token("ATTR", self.programa[self.inicio:self.atual], self.linha)) elif char == '<': # Diferente, menor ou igual, menor if self.lookAhead() == '>': self.atual += 1 self.tokens.append( Token("DIFF", self.programa[self.inicio:self.atual], self.linha)) elif self.lookAhead() == '=': self.atual += 1 self.tokens.append( Token("LESSEQUAL", self.programa[self.inicio:self.atual], self.linha)) else: self.tokens.append( Token("LESS", self.programa[self.inicio:self.atual], self.linha)) elif char == '>': # Maior ou igual, Maior if self.lookAhead() == '=': self.atual += 1 self.tokens.append( Token("GREATEQUAL", self.programa[self.inicio:self.atual], self.linha)) else: self.tokens.append( Token("GREAT", self.programa[self.inicio:self.atual], self.linha)) elif char == ',': # Virgula self.tokens.append( Token("COMMA", self.programa[self.inicio:self.atual], self.linha)) elif char == ';': # Ponto e virgula self.tokens.append( Token("SEMICOLON", self.programa[self.inicio:self.atual], self.linha)) elif char >= '0' and char <= '9': # Numeros while (self.lookAhead() >= '0' and self.lookAhead() <= '9'): self.nextChar() self.tokens.append( Token("NUMBER", self.programa[self.inicio:self.atual], self.linha)) elif char.isalpha(): #Letras/Identificadores/Reservadas while (self.lookAhead().isalnum()): self.nextChar() self.tokens.append( Token("ID", self.programa[self.inicio:self.atual], self.linha)) else: print('Caractere Inválido na linha:', self.linha) exit(2)
def test_all_tokens(self): tokens = [Token(t) for t in RESERVED_KEYWORDS.values()] + [ Token(TokenType.ID, 'a'), Token(TokenType.ID, 'aaa'), Token(TokenType.ID, 'a123'), Token(TokenType.ID, 'a_'), Token(TokenType.ID, 'a_123'), Token(TokenType.ID, 'abc_def_123gh'), Token(TokenType.SEMI), Token(TokenType.COMMA), Token(TokenType.COLON), Token(TokenType.PLUS), Token(TokenType.MINUS), Token(TokenType.MUL), Token(TokenType.FLOAT_DIV), Token(TokenType.ASSIGN), Token(TokenType.LPAREN), Token(TokenType.RPAREN), Token(TokenType.LBRACK), Token(TokenType.RBRACK), Token(TokenType.LCURB), Token(TokenType.RCURB), Token(TokenType.LESS), Token(TokenType.GRE), Token(TokenType.LEQ), Token(TokenType.GEQ), Token(TokenType.EQ), Token(TokenType.NEQ), Token(TokenType.POW), Token(TokenType.SCALAR, 0), Token(TokenType.SCALAR, 12), Token(TokenType.SCALAR, 12.345), Token(TokenType.SCALAR, 12.345), Token(TokenType.SCALAR, float('12.345e6')), Token(TokenType.SCALAR, float('12.345e-6')), Token(TokenType.SCALAR, 0), Token(TokenType.SCALAR, 0.01), Token(TokenType.SCALAR, float('0.001e2')), Token(TokenType.SCALAR, float('0.0001e-2')), Token(TokenType.ETX) ] file_source = FileSource('tokens/all_tokens.txt') lexer = Lexer(file_source) for expected_token in tokens: token = lexer.current_token self.assertEqual(expected_token.type, token.type) self.assertEqual(expected_token.value, token.value) lexer.build_next_token()
while len(collected_chars) <= Lexer.MAX_ID_LENGTH and \ (self.source.current_char.isalnum() or self.source.current_char == '_'): collected_chars.append(self.source.current_char) self.source.move_to_next_char() if len(collected_chars) > Lexer.MAX_ID_LENGTH: self.error(error_code=ErrorCode.EXCEED_MAX_ID_SIZE) # convert to string result = ''.join(collected_chars) if not (token_type := RESERVED_KEYWORDS.get(result)): token_type = TokenType.ID return Token( type=token_type, value=result, ) def try_to_build_string(self): if self.source.current_char != '"': return None collected_chars = [] self.source.move_to_next_char() while self.source.current_char != '"': # string hasn't been finished, ETX appeared if self.source.current_char == TokenType.ETX.value: self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL)