Ejemplo n.º 1
0
def test_lexer_tokenizes_coefficient_expression_with_adjacent_number_and_identifier_successfully(
):
    result0 = Lexer("045_").lex()
    result1 = Lexer("123f").lex()
    result2 = Lexer("1_234e_00").lex()
    result3 = Lexer("1_000_500r_ac").lex()
    result3 = Lexer("1_000.500r_ac").lex()

    assert result0 == [
        Token("045", TokenKind.DEC_INTEGER, 0, 2),
        Token("*", TokenKind.OPERATOR, 0, 2),
        Token("_", TokenKind.IDENTIFIER, 0, 3),
    ]

    assert result1 == [
        Token("123", TokenKind.DEC_INTEGER, 0, 2),
        Token("*", TokenKind.OPERATOR, 0, 2),
        Token("f", TokenKind.IDENTIFIER, 0, 3),
    ]

    assert result2 == [
        Token("1234", TokenKind.DEC_INTEGER, 0, 4),
        Token("*", TokenKind.OPERATOR, 0, 4),
        Token("e_00", TokenKind.IDENTIFIER, 0, 8),
    ]

    assert result3 == [
        Token("1000.500", TokenKind.DEC_FLOAT, 0, 8),
        Token("*", TokenKind.OPERATOR, 0, 8),
        Token("r_ac", TokenKind.IDENTIFIER, 0, 12),
    ]
Ejemplo n.º 2
0
def test_lexer_skips_comment():
    result0 = Lexer("# hello world!\n").lex()
    result1 = Lexer("# dhdgsdgjf,adtw%$#@5C%^@VY2;P'K9(").lex()
    result2 = Lexer("#").lex()

    assert result0 == [Token("", TokenKind.NEWLINE, 1, -1)]
    assert result1 == []
    assert result2 == []
Ejemplo n.º 3
0
def test_lexer_tokenizes_valid_hex_integer_successfully():
    result0 = Lexer("0x1234567890aAbBcCdDeEfF").lex()
    result1 = Lexer("0x_23A_b4_567dD_90aBcCeEfF").lex()
    assert result0 == [
        Token("1234567890aAbBcCdDeEfF", TokenKind.HEX_INTEGER, 0, 23)
    ]
    assert result1 == [
        Token("23Ab4567dD90aBcCeEfF", TokenKind.HEX_INTEGER, 0, 25)
    ]
Ejemplo n.º 4
0
def test_lexer_tokenizes_valid_prefixed_strings_successfully():
    result0 = Lexer(r"r'hello'").lex()
    result1 = Lexer(r'u"hello"').lex()
    result2 = Lexer(r'f"""hello"""').lex()
    result3 = Lexer(r"rf'hello'").lex()

    assert result0 == [Token(r"hello", TokenKind.PREFIXED_STRING, 0, 7)]
    assert result1 == [Token(r"hello", TokenKind.PREFIXED_STRING, 0, 7)]
    assert result2 == [Token(r"hello", TokenKind.PREFIXED_STRING, 0, 11)]
    assert result3 == [Token(r"hello", TokenKind.PREFIXED_STRING, 0, 8)]
Ejemplo n.º 5
0
def test_lexer_fails_with_non_ascii_char_in_long_byte_string():
    lexer = Lexer('b"""hello thereΣ"""')
    with raises(LexerError) as exc_info:
        lexer.lex()

    assert (exc_info.value.message, exc_info.value.row,
            exc_info.value.column) == (
                "Encountered unexpected non-ASCII character: 'Σ'",
                0,
                14,
            )
Ejemplo n.º 6
0
def test_lexer_fails_with_single_exclamation_mark():
    lexer = Lexer('!')
    with raises(LexerError) as exc_info:
        lexer.lex()

    assert (exc_info.value.message, exc_info.value.row,
            exc_info.value.column) == (
                "Encountered unexpected character: '!'",
                0,
                0,
            )
Ejemplo n.º 7
0
def test_lexer_fails_with_unclosed_delimiter_for_long_byte_string():
    lexer = Lexer('b"""hello there""')
    with raises(LexerError) as exc_info:
        lexer.lex()

    assert (exc_info.value.message, exc_info.value.row,
            exc_info.value.column) == (
                "Unexpected end of string. Closing delimiter not found",
                0,
                16,
            )
Ejemplo n.º 8
0
def test_lexer_fails_with_newline_char_in_short_string():
    lexer = Lexer('"\n"')
    with raises(LexerError) as exc_info:
        lexer.lex()

    assert (exc_info.value.message, exc_info.value.row,
            exc_info.value.column) == (
                "Encountered unexpected newline character",
                0,
                0,
            )
Ejemplo n.º 9
0
def test_lexer_tokenizes_valid_single_char_prefixed_double_quote_short_string_successfully(
):
    result = Lexer(r'r"erCA63y8hbb 54^58*(@$Q#3qDSHHScTw62-+"').lex()
    assert result == [
        Token(r"erCA63y8hbb 54^58*(@$Q#3qDSHHScTw62-+",
              TokenKind.PREFIXED_STRING, 0, 39)
    ]
Ejemplo n.º 10
0
def test_lexer_tokenizes_keywords_successfully():
    result = Lexer(
        "False None True and as assert async await break class continue def del elif else except "
        "finally for from global if import in is lambda nonlocal not or pass raise return try "
        "while with yield const ref ptr val match let var enum true false interface where macro "
        "typealias").lex()

    assert result == [
        Token("False", TokenKind.KEYWORD, 0, 4),
        Token("None", TokenKind.KEYWORD, 0, 9),
        Token("True", TokenKind.KEYWORD, 0, 14),
        Token("and", TokenKind.KEYWORD, 0, 18),
        Token("as", TokenKind.KEYWORD, 0, 21),
        Token("assert", TokenKind.KEYWORD, 0, 28),
        Token("async", TokenKind.KEYWORD, 0, 34),
        Token("await", TokenKind.KEYWORD, 0, 40),
        Token("break", TokenKind.KEYWORD, 0, 46),
        Token("class", TokenKind.KEYWORD, 0, 52),
        Token("continue", TokenKind.KEYWORD, 0, 61),
        Token("def", TokenKind.KEYWORD, 0, 65),
        Token("del", TokenKind.KEYWORD, 0, 69),
        Token("elif", TokenKind.KEYWORD, 0, 74),
        Token("else", TokenKind.KEYWORD, 0, 79),
        Token("except", TokenKind.KEYWORD, 0, 86),
        Token("finally", TokenKind.KEYWORD, 0, 94),
        Token("for", TokenKind.KEYWORD, 0, 98),
        Token("from", TokenKind.KEYWORD, 0, 103),
        Token("global", TokenKind.KEYWORD, 0, 110),
        Token("if", TokenKind.KEYWORD, 0, 113),
        Token("import", TokenKind.KEYWORD, 0, 120),
        Token("in", TokenKind.KEYWORD, 0, 123),
        Token("is", TokenKind.KEYWORD, 0, 126),
        Token("lambda", TokenKind.KEYWORD, 0, 133),
        Token("nonlocal", TokenKind.KEYWORD, 0, 142),
        Token("not", TokenKind.KEYWORD, 0, 146),
        Token("or", TokenKind.KEYWORD, 0, 149),
        Token("pass", TokenKind.KEYWORD, 0, 154),
        Token("raise", TokenKind.KEYWORD, 0, 160),
        Token("return", TokenKind.KEYWORD, 0, 167),
        Token("try", TokenKind.KEYWORD, 0, 171),
        Token("while", TokenKind.KEYWORD, 0, 177),
        Token("with", TokenKind.KEYWORD, 0, 182),
        Token("yield", TokenKind.KEYWORD, 0, 188),
        Token("const", TokenKind.KEYWORD, 0, 194),
        Token("ref", TokenKind.KEYWORD, 0, 198),
        Token("ptr", TokenKind.KEYWORD, 0, 202),
        Token("val", TokenKind.KEYWORD, 0, 206),
        Token("match", TokenKind.KEYWORD, 0, 212),
        Token("let", TokenKind.KEYWORD, 0, 216),
        Token("var", TokenKind.KEYWORD, 0, 220),
        Token("enum", TokenKind.KEYWORD, 0, 225),
        Token("true", TokenKind.KEYWORD, 0, 230),
        Token("false", TokenKind.KEYWORD, 0, 236),
        Token("interface", TokenKind.KEYWORD, 0, 246),
        Token("where", TokenKind.KEYWORD, 0, 252),
        Token("macro", TokenKind.KEYWORD, 0, 258),
        Token("typealias", TokenKind.KEYWORD, 0, 268),
    ]
Ejemplo n.º 11
0
def test_lexer_tokenizes_valid_dec_float_literal_successfully():
    result0 = Lexer("0123.456789").lex()
    result1 = Lexer(".00").lex()
    result2 = Lexer(".12_34").lex()
    result3 = Lexer(".12_34e-100").lex()
    result4 = Lexer("1_234e00").lex()
    result5 = Lexer("1234.").lex()
    result6 = Lexer("1234.e-56789").lex()
    result7 = Lexer("12_34.5_67e+8_900").lex()
    result8 = Lexer("00.1_23456_789").lex()
    assert result0 == [Token("0123.456789", TokenKind.DEC_FLOAT, 0, 10)]
    assert result1 == [Token("0.00", TokenKind.DEC_FLOAT, 0, 2)]
    assert result2 == [Token("0.1234", TokenKind.DEC_FLOAT, 0, 5)]
    assert result3 == [Token("0.1234e-100", TokenKind.DEC_FLOAT, 0, 10)]
    assert result4 == [Token("1234e00", TokenKind.DEC_FLOAT, 0, 7)]
    assert result5 == [Token("1234.0", TokenKind.DEC_FLOAT, 0, 4)]
    assert result6 == [Token("1234.0e-56789", TokenKind.DEC_FLOAT, 0, 11)]
    assert result7 == [Token("1234.567e+8900", TokenKind.DEC_FLOAT, 0, 16)]
    assert result8 == [Token("00.123456789", TokenKind.DEC_FLOAT, 0, 13)]
Ejemplo n.º 12
0
def test_lexer_tokenizes_valid_code_that_looks_like_dec_float_literal_successfully(
):
    result0 = Lexer("12_34.e_00").lex()
    result1 = Lexer("12_34.f100").lex()
    result2 = Lexer("12_34e_00").lex()

    assert result0 == [
        Token("1234", TokenKind.DEC_INTEGER, 0, 4),
        Token(".", TokenKind.DELIMITER, 0, 5),
        Token("e_00", TokenKind.IDENTIFIER, 0, 9),
    ]

    assert result1 == [
        Token("1234", TokenKind.DEC_INTEGER, 0, 4),
        Token(".", TokenKind.DELIMITER, 0, 5),
        Token("f100", TokenKind.IDENTIFIER, 0, 9),
    ]

    assert result2 == [
        Token("1234", TokenKind.DEC_INTEGER, 0, 4),
        Token("*", TokenKind.OPERATOR, 0, 4),
        Token("e_00", TokenKind.IDENTIFIER, 0, 8),
    ]
Ejemplo n.º 13
0
    class Compiler:
        def __init__(self):
            self.lexer = Lexer()
            self.parser = Parser()
            self.typechecker = Typechecker()

        @with_logger.log_result('LEXER')
        def lex(self, source):
            return self.lexer.lex(source)

        @with_logger.log_result('PARSER')
        def parse(self, source):
            return self.parser.parse(self.lex(source))

        @with_logger.log_result('TYPECHECKER')
        def typecheck(self, source):
            return self.typechecker.typecheck(self.parse(source))
Ejemplo n.º 14
0
def test_lexer_tokenizes_valid_dec_integer_successfully():
    result0 = Lexer("0123456789").lex()
    result1 = Lexer("123456789").lex()
    result2 = Lexer("01_33456_789").lex()
    result3 = Lexer("5_37450_99").lex()
    result4 = Lexer("1").lex()
    result5 = Lexer("0").lex()
    assert result0 == [Token("0123456789", TokenKind.DEC_INTEGER, 0, 9)]
    assert result1 == [Token("123456789", TokenKind.DEC_INTEGER, 0, 8)]
    assert result2 == [Token("0133456789", TokenKind.DEC_INTEGER, 0, 11)]
    assert result3 == [Token("53745099", TokenKind.DEC_INTEGER, 0, 9)]
    assert result4 == [Token("1", TokenKind.DEC_INTEGER, 0, 0)]
    assert result5 == [Token("0", TokenKind.DEC_INTEGER, 0, 0)]
Ejemplo n.º 15
0
def test_lexer_fails_on_invalid_line_continuation():
    lexer0 = Lexer(r"\    \n")
    lexer1 = Lexer(r"\x")

    with raises(LexerError) as exc_info0:
        lexer0.lex()

    with raises(LexerError) as exc_info1:
        lexer1.lex()

    assert (exc_info0.value.message, exc_info0.value.row,
            exc_info0.value.column) == (
                "Unexpected character after line continuation character: ' '",
                0,
                0,
            )

    assert (exc_info1.value.message, exc_info1.value.row,
            exc_info1.value.column) == (
                "Unexpected character after line continuation character: 'x'",
                0,
                0,
            )
Ejemplo n.º 16
0
def test_lexer_continues_on_valid_line_continuation():
    result = Lexer(r"""\
    """).lex()
    assert result == []
Ejemplo n.º 17
0
def test_lexer_fails_with_consecutive_underscores_in_dec_float_literal():
    lexer0 = Lexer("1_234.0__5")
    lexer1 = Lexer(".111__0")
    lexer2 = Lexer("1_23.e-4__5")
    lexer3 = Lexer("1_23.100e-4__5")

    with raises(LexerError) as exc_info0:
        lexer0.lex()

    with raises(LexerError) as exc_info1:
        lexer1.lex()

    with raises(LexerError) as exc_info2:
        lexer2.lex()

    with raises(LexerError) as exc_info3:
        lexer3.lex()

    assert (exc_info0.value.message, exc_info0.value.row,
            exc_info0.value.column) == (
                "Unexpected consecutive underscores in floating point literal",
                0,
                8,
            )

    assert (exc_info1.value.message, exc_info1.value.row,
            exc_info1.value.column) == (
                "Unexpected consecutive underscores in floating point literal",
                0,
                5,
            )

    assert (exc_info2.value.message, exc_info2.value.row,
            exc_info2.value.column) == (
                "Unexpected consecutive underscores in floating point literal",
                0,
                9,
            )

    assert (exc_info3.value.message, exc_info3.value.row,
            exc_info3.value.column) == (
                "Unexpected consecutive underscores in floating point literal",
                0,
                12,
            )
Ejemplo n.º 18
0
def test_lexer_tokenizes_valid_double_quote_short_string_successfully():
    result = Lexer(r'"erCA63y8hbb 54^58*(@$Q#3qDSHHScTw62-+"').lex()
    assert result == [
        Token(r"erCA63y8hbb 54^58*(@$Q#3qDSHHScTw62-+", TokenKind.STRING, 0,
              38)
    ]
Ejemplo n.º 19
0
def test_lexer_tokenizes_underscore_as_identifier():
    result = Lexer("_").lex()
    assert result == [Token(r"_", TokenKind.IDENTIFIER, 0, 0)]
Ejemplo n.º 20
0
def test_lexer_tokenizes_identifier_that_starts_with_underscore_as_identifier(
):
    result = Lexer(r"_hello").lex()
    assert result == [Token(r"_hello", TokenKind.IDENTIFIER, 0, 5)]
Ejemplo n.º 21
0
def test_lexer_tokenizes_valid_delimiter_successfully():
    result0 = Lexer("(").lex()
    result1 = Lexer(")").lex()
    result2 = Lexer("[").lex()
    result3 = Lexer("]").lex()
    result4 = Lexer("{").lex()
    result5 = Lexer("}").lex()
    result6 = Lexer(",").lex()
    result8 = Lexer(":").lex()
    result9 = Lexer(".").lex()
    result10 = Lexer(";").lex()
    result11 = Lexer("@").lex()
    result12 = Lexer("=").lex()
    result13 = Lexer("->").lex()
    result14 = Lexer("+=").lex()
    result15 = Lexer("-=").lex()
    result16 = Lexer("*=").lex()
    result17 = Lexer("/=").lex()
    result18 = Lexer("//=").lex()
    result19 = Lexer("%=").lex()
    result20 = Lexer("@=").lex()
    result21 = Lexer("&=").lex()
    result22 = Lexer("|=").lex()
    result23 = Lexer("^=").lex()
    result24 = Lexer(">>=").lex()
    result25 = Lexer("<<=").lex()
    result26 = Lexer("||=").lex()

    assert result0 == [Token("(", TokenKind.DELIMITER, 0, 0)]
    assert result1 == [Token(")", TokenKind.DELIMITER, 0, 0)]
    assert result2 == [Token("[", TokenKind.DELIMITER, 0, 0)]
    assert result3 == [Token("]", TokenKind.DELIMITER, 0, 0)]
    assert result4 == [Token("{", TokenKind.DELIMITER, 0, 0)]
    assert result5 == [Token("}", TokenKind.DELIMITER, 0, 0)]
    assert result6 == [Token(",", TokenKind.DELIMITER, 0, 0)]
    assert result8 == [Token(":", TokenKind.DELIMITER, 0, 0)]
    assert result9 == [Token(".", TokenKind.DELIMITER, 0, 0)]
    assert result10 == [Token(";", TokenKind.DELIMITER, 0, 0)]
    assert result11 == [Token("@", TokenKind.DELIMITER, 0, 0)]
    assert result12 == [Token("=", TokenKind.DELIMITER, 0, 0)]
    assert result13 == [Token("->", TokenKind.DELIMITER, 0, 1)]
    assert result14 == [Token("+=", TokenKind.DELIMITER, 0, 1)]
    assert result15 == [Token("-=", TokenKind.DELIMITER, 0, 1)]
    assert result16 == [Token("*=", TokenKind.DELIMITER, 0, 1)]
    assert result17 == [Token("/=", TokenKind.DELIMITER, 0, 1)]
    assert result18 == [Token("//=", TokenKind.DELIMITER, 0, 2)]
    assert result19 == [Token("%=", TokenKind.DELIMITER, 0, 1)]
    assert result20 == [Token("@=", TokenKind.DELIMITER, 0, 1)]
    assert result21 == [Token("&=", TokenKind.DELIMITER, 0, 1)]
    assert result22 == [Token("|=", TokenKind.DELIMITER, 0, 1)]
    assert result23 == [Token("^=", TokenKind.DELIMITER, 0, 1)]
    assert result24 == [Token(">>=", TokenKind.DELIMITER, 0, 2)]
    assert result25 == [Token("<<=", TokenKind.DELIMITER, 0, 2)]
    assert result26 == [Token("||=", TokenKind.DELIMITER, 0, 2)]
Ejemplo n.º 22
0
def test_lexer_fails_on_invalid_indentation():
    # Mixed space types in indentation
    lexer0 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "\t\t  0x110")

    # Wrong number of spaces in indent
    lexer1 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "\t\t\t0x110")

    # Wrong number of spaces in dedent
    lexer2 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "\t0x110")

    # Mixed space types in separate indentation
    lexer3 = Lexer("lambda *args:\n" "\t\t[1, 2, 3]\r\n" "    0x110")

    with raises(LexerError) as exc_info0:
        lexer0.lex()

    with raises(LexerError) as exc_info1:
        lexer1.lex()

    with raises(LexerError) as exc_info2:
        lexer2.lex()

    with raises(LexerError) as exc_info3:
        lexer3.lex()

    assert (exc_info0.value.message, exc_info0.value.row,
            exc_info0.value.column) == (
                "Unexpected mix of different types of spaces in indentation",
                2,
                3,
            )

    assert (exc_info1.value.message, exc_info1.value.row,
            exc_info1.value.column) == (
                "Expected an indent of 2 spaces",
                2,
                2,
            )

    assert (exc_info2.value.message, exc_info2.value.row,
            exc_info2.value.column) == (
                "Unexpected number of spaces in dedent",
                2,
                0,
            )

    assert (exc_info3.value.message, exc_info3.value.row,
            exc_info3.value.column) == (
                "Unexpected mix of different types of spaces in indentation",
                2,
                3,
            )
Ejemplo n.º 23
0
def test_lexer_tokenizes_valid_indentations_successfully():
    # Indentation with spaces
    lexer0 = Lexer("name \n    age \n        gender")
    result0 = lexer0.lex()

    # Indentation with tabs
    lexer1 = Lexer("name \n\t\tage \n\t\t\t\tgender\nhello")
    result1 = lexer1.lex()

    # Indentation in nested brackets
    lexer2 = Lexer("name \n\t(age \n{\n\t\n\t\tgender\n} try)\n\thello")
    result2 = lexer2.lex()

    # Unmatched indentation for parentheses with block inside
    lexer3 = Lexer("name (\r\n"
                   "\t\tlambda:\n"
                   "\t\t\t\tname, match (x, y): \t\n"
                   "\t\t\t\t\t\tage)")
    result3 = lexer3.lex()

    # Matched indentation for parentheses with block inside
    lexer4 = Lexer("name (\n"
                   " 1_000_234, lambda:\n"
                   "   name, match (x, y): \t\r\n"
                   "     age   \n"
                   ")")
    result4 = lexer4.lex()

    # Matched indentation for parentheses with block inside
    lexer5 = Lexer("name (\n"
                   "  1_000_234\n"
                   "    lambda:\n"
                   "          \n"
                   "        ( name, lambda: \t\n"
                   "            age\r\n"
                   "            hello)\n"
                   "        gem)")
    result5 = lexer5.lex()

    # Unmatched indentation for parentheses with block inside, but not currently in block
    lexer6 = Lexer("name (\n"
                   "  lambda:\n"
                   "    name, match (x, y): \n"
                   "      age\n"
                   "      \r\n"
                   "  { lambda: x})")
    result6 = lexer6.lex()

    assert result0 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("", TokenKind.INDENT, 1, 3),
        Token("age", TokenKind.IDENTIFIER, 1, 6),
        Token("", TokenKind.INDENT, 2, 7),
        Token("gender", TokenKind.IDENTIFIER, 2, 13),
        Token("", TokenKind.DEDENT, 2, 13),
        Token("", TokenKind.DEDENT, 2, 13),
    ]
    assert (lexer0.indent_factor,
            lexer0.indent_space_type) == (4, IndentSpaceKind.SPACE)

    assert result1 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("", TokenKind.INDENT, 1, 1),
        Token("age", TokenKind.IDENTIFIER, 1, 4),
        Token("", TokenKind.INDENT, 2, 3),
        Token("gender", TokenKind.IDENTIFIER, 2, 9),
        Token("", TokenKind.DEDENT, 3, -1),
        Token("", TokenKind.DEDENT, 3, -1),
        Token("hello", TokenKind.IDENTIFIER, 3, 4),
    ]
    assert (lexer1.indent_factor,
            lexer1.indent_space_type) == (2, IndentSpaceKind.TAB)

    assert result2 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("", TokenKind.INDENT, 1, 0),
        Token("(", TokenKind.DELIMITER, 1, 1),
        Token("age", TokenKind.IDENTIFIER, 1, 4),
        Token("{", TokenKind.DELIMITER, 2, 0),
        Token("gender", TokenKind.IDENTIFIER, 4, 7),
        Token("}", TokenKind.DELIMITER, 5, 0),
        Token("try", TokenKind.KEYWORD, 5, 4),
        Token(")", TokenKind.DELIMITER, 5, 5),
        Token("", TokenKind.NEWLINE, 6, 0),
        Token("hello", TokenKind.IDENTIFIER, 6, 5),
        Token("", TokenKind.DEDENT, 6, 5),
    ]
    assert (lexer2.indent_factor,
            lexer2.indent_space_type) == (1, IndentSpaceKind.TAB)

    assert result3 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("(", TokenKind.DELIMITER, 0, 5),
        Token("lambda", TokenKind.KEYWORD, 1, 7),
        Token(":", TokenKind.DELIMITER, 1, 8),
        Token("", TokenKind.INDENT, 2, 3),
        Token("name", TokenKind.IDENTIFIER, 2, 7),
        Token(",", TokenKind.DELIMITER, 2, 8),
        Token("match", TokenKind.KEYWORD, 2, 14),
        Token("(", TokenKind.DELIMITER, 2, 16),
        Token("x", TokenKind.IDENTIFIER, 2, 17),
        Token(",", TokenKind.DELIMITER, 2, 18),
        Token("y", TokenKind.IDENTIFIER, 2, 20),
        Token(")", TokenKind.DELIMITER, 2, 21),
        Token(":", TokenKind.DELIMITER, 2, 22),
        Token("", TokenKind.INDENT, 3, 5),
        Token("age", TokenKind.IDENTIFIER, 3, 8),
        Token("", TokenKind.DEDENT, 3, 9),
        Token("", TokenKind.DEDENT, 3, 9),
        Token(")", TokenKind.DELIMITER, 3, 9)
    ]
    assert (lexer3.indent_factor,
            lexer3.indent_space_type) == (2, IndentSpaceKind.TAB)

    assert result4 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("(", TokenKind.DELIMITER, 0, 5),
        Token("1000234", TokenKind.DEC_INTEGER, 1, 9),
        Token(",", TokenKind.DELIMITER, 1, 10),
        Token("lambda", TokenKind.KEYWORD, 1, 17),
        Token(":", TokenKind.DELIMITER, 1, 18),
        Token("", TokenKind.INDENT, 2, 2),
        Token("name", TokenKind.IDENTIFIER, 2, 6),
        Token(",", TokenKind.DELIMITER, 2, 7),
        Token("match", TokenKind.KEYWORD, 2, 13),
        Token("(", TokenKind.DELIMITER, 2, 15),
        Token("x", TokenKind.IDENTIFIER, 2, 16),
        Token(",", TokenKind.DELIMITER, 2, 17),
        Token("y", TokenKind.IDENTIFIER, 2, 19),
        Token(")", TokenKind.DELIMITER, 2, 20),
        Token(":", TokenKind.DELIMITER, 2, 21),
        Token("", TokenKind.INDENT, 3, 4),
        Token("age", TokenKind.IDENTIFIER, 3, 7),
        Token("", TokenKind.DEDENT, 4, -1),
        Token("", TokenKind.DEDENT, 4, -1),
        Token(")", TokenKind.DELIMITER, 4, 0)
    ]
    assert (lexer4.indent_factor,
            lexer4.indent_space_type) == (2, IndentSpaceKind.SPACE)

    assert result5 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("(", TokenKind.DELIMITER, 0, 5),
        Token("1000234", TokenKind.DEC_INTEGER, 1, 10),
        Token("lambda", TokenKind.KEYWORD, 2, 9),
        Token(":", TokenKind.DELIMITER, 2, 10),
        Token("", TokenKind.INDENT, 4, 7),
        Token("(", TokenKind.DELIMITER, 4, 8),
        Token("name", TokenKind.IDENTIFIER, 4, 13),
        Token(",", TokenKind.DELIMITER, 4, 14),
        Token("lambda", TokenKind.KEYWORD, 4, 21),
        Token(":", TokenKind.DELIMITER, 4, 22),
        Token("", TokenKind.INDENT, 5, 11),
        Token("age", TokenKind.IDENTIFIER, 5, 14),
        Token("", TokenKind.NEWLINE, 6, 11),
        Token("hello", TokenKind.IDENTIFIER, 6, 16),
        Token("", TokenKind.DEDENT, 6, 17),
        Token(")", TokenKind.DELIMITER, 6, 17),
        Token("", TokenKind.NEWLINE, 7, 7),
        Token("gem", TokenKind.IDENTIFIER, 7, 10),
        Token("", TokenKind.DEDENT, 7, 11),
        Token(")", TokenKind.DELIMITER, 7, 11)
    ]
    assert (lexer5.indent_factor,
            lexer5.indent_space_type) == (4, IndentSpaceKind.SPACE)

    assert result6 == [
        Token("name", TokenKind.IDENTIFIER, 0, 3),
        Token("(", TokenKind.DELIMITER, 0, 5),
        Token("lambda", TokenKind.KEYWORD, 1, 7),
        Token(":", TokenKind.DELIMITER, 1, 8),
        Token("", TokenKind.INDENT, 2, 3),
        Token("name", TokenKind.IDENTIFIER, 2, 7),
        Token(",", TokenKind.DELIMITER, 2, 8),
        Token("match", TokenKind.KEYWORD, 2, 14),
        Token("(", TokenKind.DELIMITER, 2, 16),
        Token("x", TokenKind.IDENTIFIER, 2, 17),
        Token(",", TokenKind.DELIMITER, 2, 18),
        Token("y", TokenKind.IDENTIFIER, 2, 20),
        Token(")", TokenKind.DELIMITER, 2, 21),
        Token(":", TokenKind.DELIMITER, 2, 22),
        Token("", TokenKind.INDENT, 3, 5),
        Token("age", TokenKind.IDENTIFIER, 3, 8),
        Token("", TokenKind.DEDENT, 5, 1),
        Token("", TokenKind.DEDENT, 5, 1),
        Token("{", TokenKind.DELIMITER, 5, 2),
        Token("lambda", TokenKind.KEYWORD, 5, 9),
        Token(":", TokenKind.DELIMITER, 5, 10),
        Token("x", TokenKind.IDENTIFIER, 5, 12),
        Token("}", TokenKind.DELIMITER, 5, 13),
        Token(")", TokenKind.DELIMITER, 5, 14)
    ]
    assert (lexer6.indent_factor,
            lexer6.indent_space_type) == (2, IndentSpaceKind.SPACE)
Ejemplo n.º 24
0
def test_lexer_fails_with_coefficient_literal_on_non_dec_numeric_literal():
    lexer0 = Lexer("0b1_110f")
    lexer1 = Lexer("0x1234fereef")
    lexer2 = Lexer("0o23_347good")

    with raises(LexerError) as exc_info0:
        lexer0.lex()

    with raises(LexerError) as exc_info1:
        lexer1.lex()

    with raises(LexerError) as exc_info2:
        lexer2.lex()

    assert (exc_info0.value.message, exc_info0.value.row,
            exc_info0.value.column) == (
                "Encountered invalid coefficient literal: '0b1110f'",
                0,
                7,
            )

    assert (exc_info1.value.message, exc_info1.value.row,
            exc_info1.value.column) == (
                "Encountered invalid coefficient literal: '0x1234fereef'",
                0,
                11,
            )

    assert (exc_info2.value.message, exc_info2.value.row,
            exc_info2.value.column) == (
                "Encountered invalid coefficient literal: '0o23347good'",
                0,
                11,
            )
Ejemplo n.º 25
0
def test_lexer_tokenizes_valid_empty_short_string_successfully():
    result = Lexer(r'""').lex()
    assert result == [Token(r"", TokenKind.STRING, 0, 1)]
Ejemplo n.º 26
0
def test_lexer_tokenizes_valid_byte_strings_successfully():
    result0 = Lexer(r"b'hello'").lex()
    result6 = Lexer(r"rb'''hello'''").lex()

    assert result0 == [Token(r"hello", TokenKind.BYTE_STRING, 0, 7)]
    assert result6 == [Token(r"hello", TokenKind.BYTE_STRING, 0, 12)]
Ejemplo n.º 27
0
def test_lexer_fails_with_return_char_in_short_string():
    lexer = Lexer('"\r"')
    with raises(LexerError) as exc_info:
        lexer.lex()

    assert (exc_info.value.row, exc_info.value.column) == (0, 0)
Ejemplo n.º 28
0
def test_lexer_tokenizes_valid_dec_imaginary_literal_successfully():
    result0 = Lexer("1_234.0_5im").lex()
    result1 = Lexer("1_234im").lex()
    assert result0 == [Token("1234.05", TokenKind.DEC_FLOAT_IMAG, 0, 10)]
    assert result1 == [Token("1234", TokenKind.DEC_INTEGER_IMAG, 0, 6)]
Ejemplo n.º 29
0
 def __init__(self):
     self.lexer = Lexer()
     self.parser = Parser()
     self.typechecker = Typechecker()
Ejemplo n.º 30
0
def test_lexer_tokenizes_valid_operator_successfully():
    result0 = Lexer("+").lex()
    result1 = Lexer("-").lex()
    result2 = Lexer("/").lex()
    result3 = Lexer("*").lex()
    result4 = Lexer("//").lex()
    result5 = Lexer("%").lex()
    result6 = Lexer("<<").lex()
    result8 = Lexer(">>").lex()
    result9 = Lexer("&").lex()
    result10 = Lexer("|").lex()
    result11 = Lexer("^").lex()
    result12 = Lexer("~").lex()
    result13 = Lexer("<").lex()
    result14 = Lexer(">").lex()
    result15 = Lexer("<=").lex()
    result16 = Lexer(">=").lex()
    result17 = Lexer("==").lex()
    result18 = Lexer("!=").lex()
    result19 = Lexer("||").lex()
    result20 = Lexer("**").lex()
    result21 = Lexer("²").lex()
    result22 = Lexer("√").lex()

    assert result0 == [Token("+", TokenKind.OPERATOR, 0, 0)]
    assert result1 == [Token("-", TokenKind.OPERATOR, 0, 0)]
    assert result2 == [Token("/", TokenKind.OPERATOR, 0, 0)]
    assert result3 == [Token("*", TokenKind.OPERATOR, 0, 0)]
    assert result4 == [Token("//", TokenKind.OPERATOR, 0, 1)]
    assert result5 == [Token("%", TokenKind.OPERATOR, 0, 0)]
    assert result6 == [Token("<<", TokenKind.OPERATOR, 0, 1)]
    assert result8 == [Token(">>", TokenKind.OPERATOR, 0, 1)]
    assert result9 == [Token("&", TokenKind.OPERATOR, 0, 0)]
    assert result10 == [Token("|", TokenKind.OPERATOR, 0, 0)]
    assert result11 == [Token("^", TokenKind.OPERATOR, 0, 0)]
    assert result12 == [Token("~", TokenKind.OPERATOR, 0, 0)]
    assert result13 == [Token("<", TokenKind.OPERATOR, 0, 0)]
    assert result14 == [Token(">", TokenKind.OPERATOR, 0, 0)]
    assert result15 == [Token("<=", TokenKind.OPERATOR, 0, 1)]
    assert result16 == [Token(">=", TokenKind.OPERATOR, 0, 1)]
    assert result17 == [Token("==", TokenKind.OPERATOR, 0, 1)]
    assert result18 == [Token("!=", TokenKind.OPERATOR, 0, 1)]
    assert result19 == [Token("||", TokenKind.OPERATOR, 0, 1)]
    assert result20 == [Token("**", TokenKind.OPERATOR, 0, 1)]
    assert result21 == [Token("²", TokenKind.OPERATOR, 0, 0)]
    assert result22 == [Token("√", TokenKind.OPERATOR, 0, 0)]