def test_triple_quote(): test.equal( list(lex_string(r'''( """hi""" """world""" )''')), [ base.Token(None, '(', None), base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, ')', None), base.Token(None, 'EOF', None), ], ) test.equal( list(lex_string(r"""( '''hi''' '''world''' )""")), [ base.Token(None, '(', None), base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, ')', None), base.Token(None, 'EOF', None), ], )
def process_indents_adapter(tokens): stack = [''] for token in tokens: if token.type == 'EOF': while len(stack) > 1: stack.pop() yield base.Token(token.mark, 'DEDENT', None) if token.type == 'NEWLINE': yield base.Token(token.mark, 'NEWLINE', None) indent = token.value if indent != stack[-1]: if indent.startswith(stack[-1]): yield base.Token(token.mark, 'INDENT', None) stack.append(indent) elif stack[-1].startswith(indent): while (stack[-1] != indent and stack[-1].startswith(indent)): stack.pop() yield base.Token(token.mark, 'DEDENT', None) if indent != stack[-1]: raise base.Error([token.mark], 'Invalid indent') else: yield token
def test_empty(): test.equal( list(lex_string(r""" """)), [base.Token(None, 'NEWLINE', None), base.Token(None, 'EOF', None)], )
def test_keyword(): test.equal( list(lex_string('for')), [ base.Token(None, 'for', None), base.Token(None, 'EOF', None), ] )
def test_id(): test.equal( list(lex_string('hi')), [ base.Token(None, 'ID', 'hi'), base.Token(None, 'EOF', None), ] )
def test_line_comment(): test.equal( list(lex_string(""" # this is a comment hi """)), [ base.Token(None, 'ID', 'hi'), base.Token(None, 'EOF', None), ])
def test_non_greedy_str_literals(): test.equal( list(lex_string(r''' "hi" "world" ''')), [ base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, 'EOF', None), ], ) test.equal( list(lex_string(r''' "hi" """world""" ''')), [ base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, 'EOF', None), ], ) test.equal( list(lex_string(r''' """hi""" "world" ''')), [ base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, 'EOF', None), ], )
def test_triple_quote(): test.equal( list(lex_string(r''' """hi""" """world""" ''')), [ base.Token(None, 'STR', 'hi'), base.Token(None, 'STR', 'world'), base.Token(None, 'EOF', None), ], )
def test_block_comment(): test.equal( list( lex_string(""" /* this is a comment * this is a block comment */ hi """)), [ base.Token(None, 'ID', 'hi'), base.Token(None, 'EOF', None), ])
def test_id(): test.equal(list(lex_string('`hi`')), [ base.Token(None, 'ID', 'hi'), base.Token(None, 'EOF', None), ]) test.equal(list(lex_string('`class`')), [ base.Token(None, 'ID', 'class'), base.Token(None, 'EOF', None), ]) @test.throws(errors.LexError) def on_c_keyword(): list(lex_string('`struct`'))
def test_string_and_char_literals(): test.equal( list(lex_string(r""" "hi" 'h' "h\nh" "\123" """)), [ base.Token(None, 'STR', 'hi'), base.Token(None, 'CHAR', "h"), base.Token(None, 'STR', 'h\nh'), base.Token(None, 'STR', chr(int('123', 8))), base.Token(None, 'EOF', None), ], )
def int_literal(m, mark): text = m.group() if text.endswith(('l', 'L')): type = 'LONG' else: type = 'INT' value = int(text.strip('lL')) return [base.Token(mark, type, value)]
def float_literal(m, mark): text = m.group() if text.endswith(('f', 'F')): type = 'FLOAT' else: type = 'DOUBLE' value = float(text.strip('fFdD')) return [base.Token(mark, type, value)]
def escaped_id(m, mark): # Allow user to specify valid C identifiers as identifiers. name = m.group()[1:-1] if name in C_KEYWORDS: raise errors.LexError( [mark], 'C keywords cannot be used as identifiers even ' f'if they are escaped ({name})', ) return [base.Token(mark, 'ID', name)]
def test_eof_dedents(): test.equal( list(lex_string(r""" foo bar""")), [ base.Token(None, 'NEWLINE', None), base.Token(None, 'ID', 'foo'), base.Token(None, 'NEWLINE', None), base.Token(None, 'INDENT', None), base.Token(None, 'ID', 'bar'), base.Token(None, 'DEDENT', None), base.Token(None, 'EOF', None), ], )
def test_separators_and_operators(): test.equal( list(lex_string(r""" ( ) , . += + """)), [ base.Token(None, '(', None), base.Token(None, ')', None), base.Token(None, ',', None), base.Token(None, '.', None), base.Token(None, '+=', None), base.Token(None, '+', None), base.Token(None, 'EOF', None), ], )
def test_decimal_float(): test.equal( list(lex_string(""" 1.0 .5 1.5f 1.5F 1.5D 1.5d """)), [ base.Token(None, 'DOUBLE', 1.0), base.Token(None, 'DOUBLE', 0.5), base.Token(None, 'FLOAT', 1.5), base.Token(None, 'FLOAT', 1.5), base.Token(None, 'DOUBLE', 1.5), base.Token(None, 'DOUBLE', 1.5), base.Token(None, 'EOF', None), ], )
def test_decimal_int(): test.equal( list(lex_string(""" 11l 22L 33 44 0 """)), [ base.Token(None, 'LONG', 11), base.Token(None, 'LONG', 22), base.Token(None, 'INT', 33), base.Token(None, 'INT', 44), base.Token(None, 'INT', 0), base.Token(None, 'EOF', None), ], )
def id_or_keyword(m, mark): name = m.group() if name in KEYWORDS: return [base.Token(mark, name, None)] else: return [base.Token(mark, 'ID', name)]
def test_newline_and_grouping(): # Newlines should only appear: # * at top level, # * or inside '{}' grouping symbol test.equal( list(lex_string(r""" ( )[ ]{ }""")), [ base.Token(None, 'NEWLINE', None), base.Token(None, '(', None), base.Token(None, ')', None), base.Token(None, '[', None), base.Token(None, ']', None), base.Token(None, '{', None), base.Token(None, 'NEWLINE', None), base.Token(None, '}', None), base.Token(None, 'EOF', None), ], ) test.equal( list(lex_string(r"""({ })""")), [ base.Token(None, '(', None), base.Token(None, '{', None), base.Token(None, 'NEWLINE', None), base.Token(None, '}', None), base.Token(None, ')', None), base.Token(None, 'EOF', None), ], ) @test.throws(errors.InvalidGrouping) def throws(): list(lex_string('( ]')) @test.throws(errors.InvalidGrouping) def throws(): list(lex_string(']')) list(lex_string('[ ]'))
def test_sample_simple_python_code(): test.equal( list(lex_string(r""" # Some comments def foo( ): pass """)), [ base.Token(None, 'NEWLINE', None), base.Token(None, 'NEWLINE', None), base.Token(None, 'def', None), base.Token(None, 'ID', 'foo'), base.Token(None, '(', None), base.Token(None, ')', None), base.Token(None, ':', None), base.Token(None, 'NEWLINE', None), base.Token(None, 'INDENT', None), base.Token(None, 'pass', None), base.Token(None, 'NEWLINE', None), base.Token(None, 'DEDENT', None), base.Token(None, 'EOF', None), ], )
def separators_and_operators(m, mark): return [base.Token(mark, m.group(), None)]
def double_quote_str_literal(m, mark): value = resolve_str(m.group()[1:-1], mark) return [base.Token(mark, 'STR', value)]
def triple_single_quote_str_literal(m, mark): value = resolve_str(m.group()[3:-3], mark) return [base.Token(mark, 'STR', value)]
def int_literal(m, mark): text = m.group() return [base.Token(mark, 'INT', int(text))]
def float_literal(m, mark): text = m.group() return [base.Token(mark, 'FLOAT', float(text))]
def char_literal(m, mark): value = resolve_str(m.group()[1:-1], mark) return [base.Token(mark, 'CHAR', value)]
def newline_and_raw_indent(m, mark): raw_indent = m.group()[1:] return [base.Token(mark, 'NEWLINE', raw_indent)]
def test_sample_code(): test.equal( list( lex_string(r""" # Hi, this is some code import <stdio.h> def main() int { printf("Hello world!\n"); return 0; } """)), [ base.Token(None, 'import', None), base.Token(None, '<', None), base.Token(None, 'ID', 'stdio'), base.Token(None, '.', None), base.Token(None, 'ID', 'h'), base.Token(None, '>', None), base.Token(None, 'def', None), base.Token(None, 'ID', 'main'), base.Token(None, '(', None), base.Token(None, ')', None), base.Token(None, 'int', None), base.Token(None, '{', None), base.Token(None, 'ID', 'printf'), base.Token(None, '(', None), base.Token(None, 'STR', 'Hello world!\n'), base.Token(None, ')', None), base.Token(None, ';', None), base.Token(None, 'return', None), base.Token(None, 'INT', 0), base.Token(None, ';', None), base.Token(None, '}', None), base.Token(None, 'EOF', None), ], )
def newline_and_raw_indent(m, mark): return [base.Token(mark, 'NEWLINE', None)]