예제 #1
0
def test_lexer_fails_with_single_exclamation_mark():
    lexer = Lexer('!')
    with raises(LexerError) as exc_info:
        lexer.lex()

    assert (exc_info.value.message, exc_info.value.row,
            exc_info.value.column) == (
                "Encountered unexpected character: '!'",
                0,
                0,
            )
예제 #2
0
def test_lexer_fails_with_unclosed_delimiter_for_long_byte_string():
    lexer = Lexer('b"""hello there""')
    with raises(LexerError) as exc_info:
        lexer.lex()

    assert (exc_info.value.message, exc_info.value.row,
            exc_info.value.column) == (
                "Unexpected end of string. Closing delimiter not found",
                0,
                16,
            )
예제 #3
0
def test_lexer_fails_with_non_ascii_char_in_long_byte_string():
    lexer = Lexer('b"""hello thereΣ"""')
    with raises(LexerError) as exc_info:
        lexer.lex()

    assert (exc_info.value.message, exc_info.value.row,
            exc_info.value.column) == (
                "Encountered unexpected non-ASCII character: 'Σ'",
                0,
                14,
            )
예제 #4
0
def test_lexer_fails_with_newline_char_in_short_string():
    lexer = Lexer('"\n"')
    with raises(LexerError) as exc_info:
        lexer.lex()

    assert (exc_info.value.message, exc_info.value.row,
            exc_info.value.column) == (
                "Encountered unexpected newline character",
                0,
                0,
            )
예제 #5
0
def test_lexer_fails_on_invalid_indentation():
    # Mixed space types in indentation
    lexer0 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "\t\t  0x110")

    # Wrong number of spaces in indent
    lexer1 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "\t\t\t0x110")

    # Wrong number of spaces in dedent
    lexer2 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "\t0x110")

    # Mixed space types in separate indentation
    lexer3 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "    0x110")

    with raises(LexerError) as exc_info0:
        lexer0.lex()

    with raises(LexerError) as exc_info1:
        lexer1.lex()

    with raises(LexerError) as exc_info2:
        lexer2.lex()

    with raises(LexerError) as exc_info3:
        lexer3.lex()

    assert (exc_info0.value.message, exc_info0.value.row,
            exc_info0.value.column) == (
                "Unexpected mix of different types of spaces in indentation",
                2,
                3,
            )

    assert (exc_info1.value.message, exc_info1.value.row,
            exc_info1.value.column) == (
                "Expected an indent of 2 spaces",
                2,
                2,
            )

    assert (exc_info2.value.message, exc_info2.value.row,
            exc_info2.value.column) == (
                "Unexpected number of spaces in dedent",
                2,
                0,
            )

    assert (exc_info3.value.message, exc_info3.value.row,
            exc_info3.value.column) == (
                "Unexpected mix of different types of spaces in indentation",
                2,
                3,
            )
예제 #6
0
def test_lexer_fails_with_coefficient_literal_on_non_dec_numeric_literal():
    lexer0 = Lexer("0b1_110f")
    lexer1 = Lexer("0x1234fereef")
    lexer2 = Lexer("0o23_347good")

    with raises(LexerError) as exc_info0:
        lexer0.lex()

    with raises(LexerError) as exc_info1:
        lexer1.lex()

    with raises(LexerError) as exc_info2:
        lexer2.lex()

    assert (exc_info0.value.message, exc_info0.value.row,
            exc_info0.value.column) == (
                "Encountered invalid coefficient literal: '0b1110f'",
                0,
                7,
            )

    assert (exc_info1.value.message, exc_info1.value.row,
            exc_info1.value.column) == (
                "Encountered invalid coefficient literal: '0x1234fereef'",
                0,
                11,
            )

    assert (exc_info2.value.message, exc_info2.value.row,
            exc_info2.value.column) == (
                "Encountered invalid coefficient literal: '0o23347good'",
                0,
                11,
            )
예제 #7
0
def test_lexer_fails_with_incomplete_non_decimal_integer_literal():
    lexer0 = Lexer("0o")
    lexer1 = Lexer("0btt")
    lexer2 = Lexer("0x")

    with raises(LexerError) as exc_info0:
        lexer0.lex()

    with raises(LexerError) as exc_info1:
        lexer1.lex()

    with raises(LexerError) as exc_info2:
        lexer2.lex()

    assert (exc_info0.value.message, exc_info0.value.row,
            exc_info0.value.column) == (
                "Unexpected end of integer literal",
                0,
                1,
            )

    assert (exc_info1.value.message, exc_info1.value.row,
            exc_info1.value.column) == (
                "Unexpected end of integer literal",
                0,
                1,
            )

    assert (exc_info2.value.message, exc_info2.value.row,
            exc_info2.value.column) == (
                "Unexpected end of integer literal",
                0,
                1,
            )
예제 #8
0
def test_lexer_fails_on_invalid_line_continuation():
    lexer0 = Lexer(r"\    \n")
    lexer1 = Lexer(r"\x")

    with raises(LexerError) as exc_info0:
        lexer0.lex()

    with raises(LexerError) as exc_info1:
        lexer1.lex()

    assert (exc_info0.value.message, exc_info0.value.row,
            exc_info0.value.column) == (
                "Unexpected character after line continuation character: ' '",
                0,
                0,
            )

    assert (exc_info1.value.message, exc_info1.value.row,
            exc_info1.value.column) == (
                "Unexpected character after line continuation character: 'x'",
                0,
                0,
            )
예제 #9
0
def test_lexer_fails_with_consecutive_underscores_in_dec_float_literal():
    lexer0 = Lexer("1_234.0__5")
    lexer1 = Lexer(".111__0")
    lexer2 = Lexer("1_23.e-4__5")
    lexer3 = Lexer("1_23.100e-4__5")

    with raises(LexerError) as exc_info0:
        lexer0.lex()

    with raises(LexerError) as exc_info1:
        lexer1.lex()

    with raises(LexerError) as exc_info2:
        lexer2.lex()

    with raises(LexerError) as exc_info3:
        lexer3.lex()

    assert (exc_info0.value.message, exc_info0.value.row,
            exc_info0.value.column) == (
                "Unexpected consecutive underscores in floating point literal",
                0,
                8,
            )

    assert (exc_info1.value.message, exc_info1.value.row,
            exc_info1.value.column) == (
                "Unexpected consecutive underscores in floating point literal",
                0,
                5,
            )

    assert (exc_info2.value.message, exc_info2.value.row,
            exc_info2.value.column) == (
                "Unexpected consecutive underscores in floating point literal",
                0,
                9,
            )

    assert (exc_info3.value.message, exc_info3.value.row,
            exc_info3.value.column) == (
                "Unexpected consecutive underscores in floating point literal",
                0,
                12,
            )
예제 #10
0
def test_lexer_fails_with_consecutive_underscores_in_integer_literal():
    lexer0 = Lexer("0o1_234__5")
    lexer1 = Lexer("0b1_111__0")
    lexer2 = Lexer("0x1_234__5")
    lexer3 = Lexer("1_234__5")

    with raises(LexerError) as exc_info0:
        lexer0.lex()

    with raises(LexerError) as exc_info1:
        lexer1.lex()

    with raises(LexerError) as exc_info2:
        lexer2.lex()

    with raises(LexerError) as exc_info3:
        lexer3.lex()

    assert (exc_info0.value.message, exc_info0.value.row,
            exc_info0.value.column) == (
                "Unexpected consecutive underscores in integer literal",
                0,
                8,
            )

    assert (exc_info1.value.message, exc_info1.value.row,
            exc_info1.value.column) == (
                "Unexpected consecutive underscores in integer literal",
                0,
                8,
            )

    assert (exc_info2.value.message, exc_info2.value.row,
            exc_info2.value.column) == (
                "Unexpected consecutive underscores in integer literal",
                0,
                8,
            )

    assert (exc_info3.value.message, exc_info3.value.row,
            exc_info3.value.column) == (
                "Unexpected consecutive underscores in integer literal",
                0,
                6,
            )
예제 #11
0
파일: compiler.py 프로젝트: vkolmakov/minic
    class Compiler:
        def __init__(self):
            self.lexer = Lexer()
            self.parser = Parser()
            self.typechecker = Typechecker()

        @with_logger.log_result('LEXER')
        def lex(self, source):
            return self.lexer.lex(source)

        @with_logger.log_result('PARSER')
        def parse(self, source):
            return self.parser.parse(self.lex(source))

        @with_logger.log_result('TYPECHECKER')
        def typecheck(self, source):
            return self.typechecker.typecheck(self.parse(source))
예제 #12
0
def test_lexer_fails_with_return_char_in_short_string():
    lexer = Lexer('"\r"')
    with raises(LexerError) as exc_info:
        lexer.lex()

    assert (exc_info.value.row, exc_info.value.column) == (0, 0)
예제 #13
0
def test_lexer_tokenizes_valid_indentations_successfully():
    # Indentation with spaces
    lexer0 = Lexer("name \n    age \n        gender")
    result0 = lexer0.lex()

    # Indentation with tabs
    lexer1 = Lexer("name \n\t\tage \n\t\t\t\tgender\nhello")
    result1 = lexer1.lex()

    # Indentation in nested brackets
    lexer2 = Lexer("name \n\t(age \n{\n\t\n\t\tgender\n} try)\n\thello")
    result2 = lexer2.lex()

    # Unmatched indentation for parentheses with block inside
    lexer3 = Lexer("name (\r\n"
                   "\t\tlambda:\n"
                   "\t\t\t\tname, match (x, y): \t\n"
                   "\t\t\t\t\t\tage)")
    result3 = lexer3.lex()

    # Matched indentation for parentheses with block inside
    lexer4 = Lexer("name (\n"
                   " 1_000_234, lambda:\n"
                   "   name, match (x, y): \t\r\n"
                   "     age   \n"
                   ")")
    result4 = lexer4.lex()

    # Matched indentation for parentheses with block inside
    lexer5 = Lexer("name (\n"
                   "  1_000_234\n"
                   "    lambda:\n"
                   "          \n"
                   "        ( name, lambda: \t\n"
                   "            age\r\n"
                   "            hello)\n"
                   "        gem)")
    result5 = lexer5.lex()

    # Unmatched indentation for parentheses with block inside, but not currently in block
    lexer6 = Lexer("name (\n"
                   "  lambda:\n"
                   "    name, match (x, y): \n"
                   "      age\n"
                   "      \r\n"
                   "  { lambda: x})")
    result6 = lexer6.lex()

    assert result0 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("", TokenKind.INDENT, 1, 3),
        Token("age", TokenKind.IDENTIFIER, 1, 6),
        Token("", TokenKind.INDENT, 2, 7),
        Token("gender", TokenKind.IDENTIFIER, 2, 13),
        Token("", TokenKind.DEDENT, 2, 13),
        Token("", TokenKind.DEDENT, 2, 13),
    ]
    assert (lexer0.indent_factor,
            lexer0.indent_space_type) == (4, IndentSpaceKind.SPACE)

    assert result1 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("", TokenKind.INDENT, 1, 1),
        Token("age", TokenKind.IDENTIFIER, 1, 4),
        Token("", TokenKind.INDENT, 2, 3),
        Token("gender", TokenKind.IDENTIFIER, 2, 9),
        Token("", TokenKind.DEDENT, 3, -1),
        Token("", TokenKind.DEDENT, 3, -1),
        Token("hello", TokenKind.IDENTIFIER, 3, 4),
    ]
    assert (lexer1.indent_factor,
            lexer1.indent_space_type) == (2, IndentSpaceKind.TAB)

    assert result2 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("", TokenKind.INDENT, 1, 0),
        Token("(", TokenKind.DELIMITER, 1, 1),
        Token("age", TokenKind.IDENTIFIER, 1, 4),
        Token("{", TokenKind.DELIMITER, 2, 0),
        Token("gender", TokenKind.IDENTIFIER, 4, 7),
        Token("}", TokenKind.DELIMITER, 5, 0),
        Token("try", TokenKind.KEYWORD, 5, 4),
        Token(")", TokenKind.DELIMITER, 5, 5),
        Token("", TokenKind.NEWLINE, 6, 0),
        Token("hello", TokenKind.IDENTIFIER, 6, 5),
        Token("", TokenKind.DEDENT, 6, 5),
    ]
    assert (lexer2.indent_factor,
            lexer2.indent_space_type) == (1, IndentSpaceKind.TAB)

    assert result3 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("(", TokenKind.DELIMITER, 0, 5),
        Token("lambda", TokenKind.KEYWORD, 1, 7),
        Token(":", TokenKind.DELIMITER, 1, 8),
        Token("", TokenKind.INDENT, 2, 3),
        Token("name", TokenKind.IDENTIFIER, 2, 7),
        Token(",", TokenKind.DELIMITER, 2, 8),
        Token("match", TokenKind.KEYWORD, 2, 14),
        Token("(", TokenKind.DELIMITER, 2, 16),
        Token("x", TokenKind.IDENTIFIER, 2, 17),
        Token(",", TokenKind.DELIMITER, 2, 18),
        Token("y", TokenKind.IDENTIFIER, 2, 20),
        Token(")", TokenKind.DELIMITER, 2, 21),
        Token(":", TokenKind.DELIMITER, 2, 22),
        Token("", TokenKind.INDENT, 3, 5),
        Token("age", TokenKind.IDENTIFIER, 3, 8),
        Token("", TokenKind.DEDENT, 3, 9),
        Token("", TokenKind.DEDENT, 3, 9),
        Token(")", TokenKind.DELIMITER, 3, 9)
    ]
    assert (lexer3.indent_factor,
            lexer3.indent_space_type) == (2, IndentSpaceKind.TAB)

    assert result4 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("(", TokenKind.DELIMITER, 0, 5),
        Token("1000234", TokenKind.DEC_INTEGER, 1, 9),
        Token(",", TokenKind.DELIMITER, 1, 10),
        Token("lambda", TokenKind.KEYWORD, 1, 17),
        Token(":", TokenKind.DELIMITER, 1, 18),
        Token("", TokenKind.INDENT, 2, 2),
        Token("name", TokenKind.IDENTIFIER, 2, 6),
        Token(",", TokenKind.DELIMITER, 2, 7),
        Token("match", TokenKind.KEYWORD, 2, 13),
        Token("(", TokenKind.DELIMITER, 2, 15),
        Token("x", TokenKind.IDENTIFIER, 2, 16),
        Token(",", TokenKind.DELIMITER, 2, 17),
        Token("y", TokenKind.IDENTIFIER, 2, 19),
        Token(")", TokenKind.DELIMITER, 2, 20),
        Token(":", TokenKind.DELIMITER, 2, 21),
        Token("", TokenKind.INDENT, 3, 4),
        Token("age", TokenKind.IDENTIFIER, 3, 7),
        Token("", TokenKind.DEDENT, 4, -1),
        Token("", TokenKind.DEDENT, 4, -1),
        Token(")", TokenKind.DELIMITER, 4, 0)
    ]
    assert (lexer4.indent_factor,
            lexer4.indent_space_type) == (2, IndentSpaceKind.SPACE)

    assert result5 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("(", TokenKind.DELIMITER, 0, 5),
        Token("1000234", TokenKind.DEC_INTEGER, 1, 10),
        Token("lambda", TokenKind.KEYWORD, 2, 9),
        Token(":", TokenKind.DELIMITER, 2, 10),
        Token("", TokenKind.INDENT, 4, 7),
        Token("(", TokenKind.DELIMITER, 4, 8),
        Token("name", TokenKind.IDENTIFIER, 4, 13),
        Token(",", TokenKind.DELIMITER, 4, 14),
        Token("lambda", TokenKind.KEYWORD, 4, 21),
        Token(":", TokenKind.DELIMITER, 4, 22),
        Token("", TokenKind.INDENT, 5, 11),
        Token("age", TokenKind.IDENTIFIER, 5, 14),
        Token("", TokenKind.NEWLINE, 6, 11),
        Token("hello", TokenKind.IDENTIFIER, 6, 16),
        Token("", TokenKind.DEDENT, 6, 17),
        Token(")", TokenKind.DELIMITER, 6, 17),
        Token("", TokenKind.NEWLINE, 7, 7),
        Token("gem", TokenKind.IDENTIFIER, 7, 10),
        Token("", TokenKind.DEDENT, 7, 11),
        Token(")", TokenKind.DELIMITER, 7, 11)
    ]
    assert (lexer5.indent_factor,
            lexer5.indent_space_type) == (4, IndentSpaceKind.SPACE)

    assert result6 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("(", TokenKind.DELIMITER, 0, 5),
        Token("lambda", TokenKind.KEYWORD, 1, 7),
        Token(":", TokenKind.DELIMITER, 1, 8),
        Token("", TokenKind.INDENT, 2, 3),
        Token("name", TokenKind.IDENTIFIER, 2, 7),
        Token(",", TokenKind.DELIMITER, 2, 8),
        Token("match", TokenKind.KEYWORD, 2, 14),
        Token("(", TokenKind.DELIMITER, 2, 16),
        Token("x", TokenKind.IDENTIFIER, 2, 17),
        Token(",", TokenKind.DELIMITER, 2, 18),
        Token("y", TokenKind.IDENTIFIER, 2, 20),
        Token(")", TokenKind.DELIMITER, 2, 21),
        Token(":", TokenKind.DELIMITER, 2, 22),
        Token("", TokenKind.INDENT, 3, 5),
        Token("age", TokenKind.IDENTIFIER, 3, 8),
        Token("", TokenKind.DEDENT, 5, 1),
        Token("", TokenKind.DEDENT, 5, 1),
        Token("{", TokenKind.DELIMITER, 5, 2),
        Token("lambda", TokenKind.KEYWORD, 5, 9),
        Token(":", TokenKind.DELIMITER, 5, 10),
        Token("x", TokenKind.IDENTIFIER, 5, 12),
        Token("}", TokenKind.DELIMITER, 5, 13),
        Token(")", TokenKind.DELIMITER, 5, 14)
    ]
    assert (lexer6.indent_factor,
            lexer6.indent_space_type) == (2, IndentSpaceKind.SPACE)