def test_lexer_fails_with_single_exclamation_mark(): lexer = Lexer('!') with raises(LexerError) as exc_info: lexer.lex() assert (exc_info.value.message, exc_info.value.row, exc_info.value.column) == ( "Encountered unexpected character: '!'", 0, 0, )
def test_lexer_fails_with_unclosed_delimiter_for_long_byte_string(): lexer = Lexer('b"""hello there""') with raises(LexerError) as exc_info: lexer.lex() assert (exc_info.value.message, exc_info.value.row, exc_info.value.column) == ( "Unexpected end of string. Closing delimiter not found", 0, 16, )
def test_lexer_fails_with_non_ascii_char_in_long_byte_string(): lexer = Lexer('b"""hello thereΣ"""') with raises(LexerError) as exc_info: lexer.lex() assert (exc_info.value.message, exc_info.value.row, exc_info.value.column) == ( "Encountered unexpected non-ASCII character: 'Σ'", 0, 14, )
def test_lexer_fails_with_newline_char_in_short_string(): lexer = Lexer('"\n"') with raises(LexerError) as exc_info: lexer.lex() assert (exc_info.value.message, exc_info.value.row, exc_info.value.column) == ( "Encountered unexpected newline character", 0, 0, )
def test_lexer_fails_on_invalid_indentation(): # Mixed space types in indentation lexer0 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "\t\t 0x110") # Wrong number of spaces in indent lexer1 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "\t\t\t0x110") # Wrong number of spaces in dedent lexer2 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "\t0x110") # Mixed space types in separate indentation lexer3 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" " 0x110") with raises(LexerError) as exc_info0: lexer0.lex() with raises(LexerError) as exc_info1: lexer1.lex() with raises(LexerError) as exc_info2: lexer2.lex() with raises(LexerError) as exc_info3: lexer3.lex() assert (exc_info0.value.message, exc_info0.value.row, exc_info0.value.column) == ( "Unexpected mix of different types of spaces in indentation", 2, 3, ) assert (exc_info1.value.message, exc_info1.value.row, exc_info1.value.column) == ( "Expected an indent of 2 spaces", 2, 2, ) assert (exc_info2.value.message, exc_info2.value.row, exc_info2.value.column) == ( "Unexpected number of spaces in dedent", 2, 0, ) assert (exc_info3.value.message, exc_info3.value.row, exc_info3.value.column) == ( "Unexpected mix of different types of spaces in indentation", 2, 3, )
def test_lexer_fails_with_coefficient_literal_on_non_dec_numeric_literal(): lexer0 = Lexer("0b1_110f") lexer1 = Lexer("0x1234fereef") lexer2 = Lexer("0o23_347good") with raises(LexerError) as exc_info0: lexer0.lex() with raises(LexerError) as exc_info1: lexer1.lex() with raises(LexerError) as exc_info2: lexer2.lex() assert (exc_info0.value.message, exc_info0.value.row, exc_info0.value.column) == ( "Encountered invalid coefficient literal: '0b1110f'", 0, 7, ) assert (exc_info1.value.message, exc_info1.value.row, exc_info1.value.column) == ( "Encountered invalid coefficient literal: '0x1234fereef'", 0, 11, ) assert (exc_info2.value.message, exc_info2.value.row, exc_info2.value.column) == ( "Encountered invalid coefficient literal: '0o23347good'", 0, 11, )
def test_lexer_fails_with_incomplete_non_decimal_integer_literal(): lexer0 = Lexer("0o") lexer1 = Lexer("0btt") lexer2 = Lexer("0x") with raises(LexerError) as exc_info0: lexer0.lex() with raises(LexerError) as exc_info1: lexer1.lex() with raises(LexerError) as exc_info2: lexer2.lex() assert (exc_info0.value.message, exc_info0.value.row, exc_info0.value.column) == ( "Unexpected end of integer literal", 0, 1, ) assert (exc_info1.value.message, exc_info1.value.row, exc_info1.value.column) == ( "Unexpected end of integer literal", 0, 1, ) assert (exc_info2.value.message, exc_info2.value.row, exc_info2.value.column) == ( "Unexpected end of integer literal", 0, 1, )
def test_lexer_fails_on_invalid_line_continuation(): lexer0 = Lexer(r"\ \n") lexer1 = Lexer(r"\x") with raises(LexerError) as exc_info0: lexer0.lex() with raises(LexerError) as exc_info1: lexer1.lex() assert (exc_info0.value.message, exc_info0.value.row, exc_info0.value.column) == ( "Unexpected character after line continuation character: ' '", 0, 0, ) assert (exc_info1.value.message, exc_info1.value.row, exc_info1.value.column) == ( "Unexpected character after line continuation character: 'x'", 0, 0, )
def test_lexer_fails_with_consecutive_underscores_in_dec_float_literal(): lexer0 = Lexer("1_234.0__5") lexer1 = Lexer(".111__0") lexer2 = Lexer("1_23.e-4__5") lexer3 = Lexer("1_23.100e-4__5") with raises(LexerError) as exc_info0: lexer0.lex() with raises(LexerError) as exc_info1: lexer1.lex() with raises(LexerError) as exc_info2: lexer2.lex() with raises(LexerError) as exc_info3: lexer3.lex() assert (exc_info0.value.message, exc_info0.value.row, exc_info0.value.column) == ( "Unexpected consecutive underscores in floating point literal", 0, 8, ) assert (exc_info1.value.message, exc_info1.value.row, exc_info1.value.column) == ( "Unexpected consecutive underscores in floating point literal", 0, 5, ) assert (exc_info2.value.message, exc_info2.value.row, exc_info2.value.column) == ( "Unexpected consecutive underscores in floating point literal", 0, 9, ) assert (exc_info3.value.message, exc_info3.value.row, exc_info3.value.column) == ( "Unexpected consecutive underscores in floating point literal", 0, 12, )
def test_lexer_fails_with_consecutive_underscores_in_integer_literal(): lexer0 = Lexer("0o1_234__5") lexer1 = Lexer("0b1_111__0") lexer2 = Lexer("0x1_234__5") lexer3 = Lexer("1_234__5") with raises(LexerError) as exc_info0: lexer0.lex() with raises(LexerError) as exc_info1: lexer1.lex() with raises(LexerError) as exc_info2: lexer2.lex() with raises(LexerError) as exc_info3: lexer3.lex() assert (exc_info0.value.message, exc_info0.value.row, exc_info0.value.column) == ( "Unexpected consecutive underscores in integer literal", 0, 8, ) assert (exc_info1.value.message, exc_info1.value.row, exc_info1.value.column) == ( "Unexpected consecutive underscores in integer literal", 0, 8, ) assert (exc_info2.value.message, exc_info2.value.row, exc_info2.value.column) == ( "Unexpected consecutive underscores in integer literal", 0, 8, ) assert (exc_info3.value.message, exc_info3.value.row, exc_info3.value.column) == ( "Unexpected consecutive underscores in integer literal", 0, 6, )
class Compiler: def __init__(self): self.lexer = Lexer() self.parser = Parser() self.typechecker = Typechecker() @with_logger.log_result('LEXER') def lex(self, source): return self.lexer.lex(source) @with_logger.log_result('PARSER') def parse(self, source): return self.parser.parse(self.lex(source)) @with_logger.log_result('TYPECHECKER') def typecheck(self, source): return self.typechecker.typecheck(self.parse(source))
def test_lexer_fails_with_return_char_in_short_string(): lexer = Lexer('"\r"') with raises(LexerError) as exc_info: lexer.lex() assert (exc_info.value.row, exc_info.value.column) == (0, 0)
def test_lexer_tokenizes_valid_indentations_successfully(): # Indentation with spaces lexer0 = Lexer("name \n age \n gender") result0 = lexer0.lex() # Indentation with tabs lexer1 = Lexer("name \n\t\tage \n\t\t\t\tgender\nhello") result1 = lexer1.lex() # Indentation in nested brackets lexer2 = Lexer("name \n\t(age \n{\n\t\n\t\tgender\n} try)\n\thello") result2 = lexer2.lex() # Unmatched indentation for parentheses with block inside lexer3 = Lexer("name (\r\n" "\t\tlambda:\n" "\t\t\t\tname, match (x, y): \t\n" "\t\t\t\t\t\tage)") result3 = lexer3.lex() # Matched indentation for parentheses with block inside lexer4 = Lexer("name (\n" " 1_000_234, lambda:\n" " name, match (x, y): \t\r\n" " age \n" ")") result4 = lexer4.lex() # Matched indentation for parentheses with block inside lexer5 = Lexer("name (\n" " 1_000_234\n" " lambda:\n" " \n" " ( name, lambda: \t\n" " age\r\n" " hello)\n" " gem)") result5 = lexer5.lex() # Unmatched indentation for parentheses with block inside, but not currently in block lexer6 = Lexer("name (\n" " lambda:\n" " name, match (x, y): \n" " age\n" " \r\n" " { lambda: x})") result6 = lexer6.lex() assert result0 == [ Token("name", TokenKind.IDENTIFIER, 0, 3), Token("", TokenKind.INDENT, 1, 3), Token("age", TokenKind.IDENTIFIER, 1, 6), Token("", TokenKind.INDENT, 2, 7), Token("gender", TokenKind.IDENTIFIER, 2, 13), Token("", TokenKind.DEDENT, 2, 13), Token("", TokenKind.DEDENT, 2, 13), ] assert (lexer0.indent_factor, lexer0.indent_space_type) == (4, IndentSpaceKind.SPACE) assert result1 == [ Token("name", TokenKind.IDENTIFIER, 0, 3), Token("", TokenKind.INDENT, 1, 1), Token("age", TokenKind.IDENTIFIER, 1, 4), Token("", TokenKind.INDENT, 2, 3), Token("gender", TokenKind.IDENTIFIER, 2, 9), Token("", TokenKind.DEDENT, 3, -1), Token("", TokenKind.DEDENT, 3, -1), Token("hello", TokenKind.IDENTIFIER, 3, 4), ] assert (lexer1.indent_factor, lexer1.indent_space_type) == (2, IndentSpaceKind.TAB) assert result2 == [ Token("name", TokenKind.IDENTIFIER, 0, 3), Token("", TokenKind.INDENT, 1, 0), Token("(", TokenKind.DELIMITER, 1, 1), Token("age", TokenKind.IDENTIFIER, 1, 4), Token("{", TokenKind.DELIMITER, 2, 0), Token("gender", TokenKind.IDENTIFIER, 4, 7), Token("}", TokenKind.DELIMITER, 5, 0), Token("try", TokenKind.KEYWORD, 5, 4), Token(")", TokenKind.DELIMITER, 5, 5), Token("", TokenKind.NEWLINE, 6, 0), Token("hello", TokenKind.IDENTIFIER, 6, 5), Token("", TokenKind.DEDENT, 6, 5), ] assert (lexer2.indent_factor, lexer2.indent_space_type) == (1, IndentSpaceKind.TAB) assert result3 == [ Token("name", TokenKind.IDENTIFIER, 0, 3), Token("(", TokenKind.DELIMITER, 0, 5), Token("lambda", TokenKind.KEYWORD, 1, 7), Token(":", TokenKind.DELIMITER, 1, 8), Token("", TokenKind.INDENT, 2, 3), Token("name", TokenKind.IDENTIFIER, 2, 7), Token(",", TokenKind.DELIMITER, 2, 8), Token("match", TokenKind.KEYWORD, 2, 14), Token("(", TokenKind.DELIMITER, 2, 16), Token("x", TokenKind.IDENTIFIER, 2, 17), Token(",", TokenKind.DELIMITER, 2, 18), Token("y", TokenKind.IDENTIFIER, 2, 20), Token(")", TokenKind.DELIMITER, 2, 21), Token(":", TokenKind.DELIMITER, 2, 22), Token("", TokenKind.INDENT, 3, 5), Token("age", TokenKind.IDENTIFIER, 3, 8), Token("", TokenKind.DEDENT, 3, 9), Token("", TokenKind.DEDENT, 3, 9), Token(")", TokenKind.DELIMITER, 3, 9) ] assert (lexer3.indent_factor, lexer3.indent_space_type) == (2, IndentSpaceKind.TAB) assert result4 == [ Token("name", TokenKind.IDENTIFIER, 0, 3), Token("(", TokenKind.DELIMITER, 0, 5), Token("1000234", TokenKind.DEC_INTEGER, 1, 9), Token(",", TokenKind.DELIMITER, 1, 10), Token("lambda", TokenKind.KEYWORD, 1, 17), Token(":", TokenKind.DELIMITER, 1, 18), Token("", TokenKind.INDENT, 2, 2), Token("name", TokenKind.IDENTIFIER, 2, 6), Token(",", TokenKind.DELIMITER, 2, 7), Token("match", TokenKind.KEYWORD, 2, 13), Token("(", TokenKind.DELIMITER, 2, 15), Token("x", TokenKind.IDENTIFIER, 2, 16), Token(",", TokenKind.DELIMITER, 2, 17), Token("y", TokenKind.IDENTIFIER, 2, 19), Token(")", TokenKind.DELIMITER, 2, 20), Token(":", TokenKind.DELIMITER, 2, 21), Token("", TokenKind.INDENT, 3, 4), Token("age", TokenKind.IDENTIFIER, 3, 7), Token("", TokenKind.DEDENT, 4, -1), Token("", TokenKind.DEDENT, 4, -1), Token(")", TokenKind.DELIMITER, 4, 0) ] assert (lexer4.indent_factor, lexer4.indent_space_type) == (2, IndentSpaceKind.SPACE) assert result5 == [ Token("name", TokenKind.IDENTIFIER, 0, 3), Token("(", TokenKind.DELIMITER, 0, 5), Token("1000234", TokenKind.DEC_INTEGER, 1, 10), Token("lambda", TokenKind.KEYWORD, 2, 9), Token(":", TokenKind.DELIMITER, 2, 10), Token("", TokenKind.INDENT, 4, 7), Token("(", TokenKind.DELIMITER, 4, 8), Token("name", TokenKind.IDENTIFIER, 4, 13), Token(",", TokenKind.DELIMITER, 4, 14), Token("lambda", TokenKind.KEYWORD, 4, 21), Token(":", TokenKind.DELIMITER, 4, 22), Token("", TokenKind.INDENT, 5, 11), Token("age", TokenKind.IDENTIFIER, 5, 14), Token("", TokenKind.NEWLINE, 6, 11), Token("hello", TokenKind.IDENTIFIER, 6, 16), Token("", TokenKind.DEDENT, 6, 17), Token(")", TokenKind.DELIMITER, 6, 17), Token("", TokenKind.NEWLINE, 7, 7), Token("gem", TokenKind.IDENTIFIER, 7, 10), Token("", TokenKind.DEDENT, 7, 11), Token(")", TokenKind.DELIMITER, 7, 11) ] assert (lexer5.indent_factor, lexer5.indent_space_type) == (4, IndentSpaceKind.SPACE) assert result6 == [ Token("name", TokenKind.IDENTIFIER, 0, 3), Token("(", TokenKind.DELIMITER, 0, 5), Token("lambda", TokenKind.KEYWORD, 1, 7), Token(":", TokenKind.DELIMITER, 1, 8), Token("", TokenKind.INDENT, 2, 3), Token("name", TokenKind.IDENTIFIER, 2, 7), Token(",", TokenKind.DELIMITER, 2, 8), Token("match", TokenKind.KEYWORD, 2, 14), Token("(", TokenKind.DELIMITER, 2, 16), Token("x", TokenKind.IDENTIFIER, 2, 17), Token(",", TokenKind.DELIMITER, 2, 18), Token("y", TokenKind.IDENTIFIER, 2, 20), Token(")", TokenKind.DELIMITER, 2, 21), Token(":", TokenKind.DELIMITER, 2, 22), Token("", TokenKind.INDENT, 3, 5), Token("age", TokenKind.IDENTIFIER, 3, 8), Token("", TokenKind.DEDENT, 5, 1), Token("", TokenKind.DEDENT, 5, 1), Token("{", TokenKind.DELIMITER, 5, 2), Token("lambda", TokenKind.KEYWORD, 5, 9), Token(":", TokenKind.DELIMITER, 5, 10), Token("x", TokenKind.IDENTIFIER, 5, 12), Token("}", TokenKind.DELIMITER, 5, 13), Token(")", TokenKind.DELIMITER, 5, 14) ] assert (lexer6.indent_factor, lexer6.indent_space_type) == (2, IndentSpaceKind.SPACE)