def test_symbol(self): symbols = "((+-*/))" lexer = Lexer(symbols) for expected in symbols: with self.subTest(expected=expected): self.assertToken(lexer.next(), TokenType.Symbol, expected) self.assertToken(lexer.next(), TokenType.EndOfStream)
def test_integer(self): lexer = Lexer("1 2 34 -56 789") self.assertToken(lexer.next(), TokenType.Integer, "1") self.assertToken(lexer.next(), TokenType.Integer, "2") self.assertToken(lexer.next(), TokenType.Integer, "34") self.assertToken(lexer.next(), TokenType.Symbol, "-") self.assertToken(lexer.next(), TokenType.Integer, "56") self.assertToken(lexer.next(), TokenType.Integer, "789") self.assertToken(lexer.next(), TokenType.EndOfStream)
def test_peeking(self): symbols = "+-*/" lexer = Lexer(symbols) for i in range(len(symbols)): next_expected = symbols[i] with self.subTest(next_token=next_expected): with lexer.peeking(): for j in range(len(symbols) - i): expected = symbols[i + j] with self.subTest(peek_token=expected): self.assertToken(lexer.next(), TokenType.Symbol, expected) self.assertToken(lexer.next(), TokenType.Symbol, next_expected)
def test_peeking_save(self): lexer = Lexer("1 2 3 4 5") self.assertToken(lexer.next(), TokenType.Integer, "1") with lexer.peeking() as p: self.assertToken(lexer.next(), TokenType.Integer, "2") self.assertToken(lexer.next(), TokenType.Integer, "3") p.save() self.assertToken(lexer.next(), TokenType.Integer, "4") self.assertToken(lexer.next(), TokenType.Integer, "4") self.assertToken(lexer.next(), TokenType.Integer, "5") self.assertToken(lexer.next(), TokenType.EndOfStream)
def test_string_unterminated_eol(self): strings = [ '"Hello World\n', '"Hello World\\"\n', '"Hello World\'\n', "'Hello World\n", "'Hello World\\'\n", "'Hello World\"\n", ] for string in strings: with self.subTest(string=string): lexer = Lexer(string) with self.assertRaisesRegex(LexerError, r"^Unexpected end of line while scanning string literal \(\d+:\d+, \d+:\d+\)$"): lexer.next()
def test_string_unterminated_eos(self): strings = [ '"Hello World', '"Hello World\\"', '"Hello World\'', "'Hello World", "'Hello World\\'", "'Hello World\"", ] for string in strings: with self.subTest(string=string): lexer = Lexer(string) with self.assertRaisesRegex(LexerError, r"Unexpected end of stream"): lexer.next()
def test_bracket(self): lexer = Lexer(dedent("""\ A B ( C ) D """)) self.assertToken(lexer.next(), TokenType.Identifier, "A") self.assertToken(lexer.next(), TokenType.Newline, "\n") self.assertToken(lexer.next(), TokenType.Indent, "\t") self.assertToken(lexer.next(), TokenType.Identifier, "B") self.assertToken(lexer.next(), TokenType.Symbol, "(") self.assertToken(lexer.next(), TokenType.Newline, "\n") self.assertToken(lexer.next(), TokenType.Identifier, "C") self.assertToken(lexer.next(), TokenType.Newline, "\n") self.assertToken(lexer.next(), TokenType.Symbol, ")") self.assertToken(lexer.next(), TokenType.Newline, "\n") self.assertToken(lexer.next(), TokenType.Dedent, "") self.assertToken(lexer.next(), TokenType.Identifier, "D") self.assertToken(lexer.next(), TokenType.Newline, "\n") self.assertToken(lexer.next(), TokenType.EndOfStream, "")
def test_identifier(self): identifiers = [ "x", "_x", "xyz", "xyz123", ] for identifier in identifiers: with self.subTest(identifier=identifier): lexer = Lexer(identifier) self.assertToken(lexer.next(), TokenType.Identifier, identifier, ((1, 1), (1, len(identifier) + 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((1, len(identifier) + 1), (1, len(identifier) + 1)))
def test_string(self): strings = [ '"Hello World"', '"Hello \\" World"', '"Hello \\\' World"', '"Hello \' World"', "'Hello World'", "'Hello \\' World'", "'Hello \\\" World'", "'Hello \" World'", ] for string in strings: with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.String, string, ((1, 1), (1, len(string) + 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((1, len(string) + 1), (1, len(string) + 1)))
def test_nested_peeking(self): lexer = Lexer("1 2 3") self.assertToken(lexer.next(), TokenType.Integer, "1") with lexer.peeking(): self.assertToken(lexer.next(), TokenType.Integer, "2") with lexer.peeking(): self.assertToken(lexer.next(), TokenType.Integer, "3") self.assertToken(lexer.next(), TokenType.EndOfStream) self.assertToken(lexer.next(), TokenType.Integer, "3") self.assertToken(lexer.next(), TokenType.EndOfStream) self.assertToken(lexer.next(), TokenType.Integer, "2") self.assertToken(lexer.next(), TokenType.Integer, "3") self.assertToken(lexer.next(), TokenType.EndOfStream)
def test_nested_peeking_save_multiple(self): lexer = Lexer("1 2 3 4 5") self.assertToken(lexer.next(), TokenType.Integer, "1") with lexer.peeking() as p1: self.assertToken(lexer.next(), TokenType.Integer, "2") with lexer.peeking() as p2: self.assertToken(lexer.next(), TokenType.Integer, "3") p2.save() self.assertToken(lexer.next(), TokenType.Integer, "4") p1.save() self.assertToken(lexer.next(), TokenType.Integer, "4") self.assertToken(lexer.next(), TokenType.Integer, "4") self.assertToken(lexer.next(), TokenType.Integer, "5") self.assertToken(lexer.next(), TokenType.EndOfStream)
def test_empty(self): string = "" with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((1, 1), (1, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((1, 1), (1, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((1, 1), (1, 1))) string = " " with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((1, 3), (1, 3))) string = "\n" with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((1, 1), (2, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((2, 1), (2, 1))) string = "\r\n" with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.Newline, "\r\n", ((1, 1), (2, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((2, 1), (2, 1))) string = " \t\n" with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((1, 4), (2, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((2, 1), (2, 1))) string = " \t\r\n" with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.Newline, "\r\n", ((1, 4), (2, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((2, 1), (2, 1))) string = "\n\t " with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((1, 1), (2, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((2, 4), (2, 4))) string = "\r\n\t " with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.Newline, "\r\n", ((1, 1), (2, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((2, 4), (2, 4))) string = "\n\n" with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((1, 1), (2, 1))) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((2, 1), (3, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((3, 1), (3, 1))) string = "\r\n\r\n" with self.subTest(string=string): lexer = Lexer(string) self.assertToken(lexer.next(), TokenType.Newline, "\r\n", ((1, 1), (2, 1))) self.assertToken(lexer.next(), TokenType.Newline, "\r\n", ((2, 1), (3, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((3, 1), (3, 1)))
def test_indentation_dedent(self): lexer = Lexer(dedent("""\ A B C D E F """)) self.assertToken(lexer.next(), TokenType.Identifier, "A", ((1, 1), (1, 2))) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((1, 2), (2, 1))) self.assertToken(lexer.next(), TokenType.Indent, "\t", ((2, 1), (2, 2))) self.assertToken(lexer.next(), TokenType.Identifier, "B", ((2, 2), (2, 3))) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((2, 3), (3, 1))) self.assertToken(lexer.next(), TokenType.Indent, "\t\t", ((3, 1), (3, 3))) self.assertToken(lexer.next(), TokenType.Identifier, "C", ((3, 3), (3, 4))) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((3, 4), (4, 1))) self.assertToken(lexer.next(), TokenType.Indent, "\t\t\t", ((4, 1), (4, 4))) self.assertToken(lexer.next(), TokenType.Identifier, "D", ((4, 4), (4, 5))) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((4, 5), (5, 1))) self.assertToken(lexer.next(), TokenType.Dedent, "\t\t", ((5, 2), (5, 2))) self.assertToken(lexer.next(), TokenType.Dedent, "\t", ((5, 2), (5, 2))) self.assertToken(lexer.next(), TokenType.Identifier, "E", ((5, 2), (5, 3))) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((5, 3), (6, 1))) self.assertToken(lexer.next(), TokenType.Dedent, "", ((6, 1), (6, 1))) self.assertToken(lexer.next(), TokenType.Identifier, "F", ((6, 1), (6, 2))) self.assertToken(lexer.next(), TokenType.Newline, "\n", ((6, 2), (7, 1))) self.assertToken(lexer.next(), TokenType.EndOfStream, "", ((7, 1), (7, 1)))