def parse_declaration(self) -> list: # Int thing # Int thing = expr # TODO: module.Thingy thing # # "Int thing = expr" produces overlapping Declaration and # Assignment nodes, that's why this returns a list of nodes datatype = self.parse_name() # TODO: module's Thing variable = self.parse_name() if self.tokens.coming_up().kind == 'NEWLINE': self.tokens.pop_newline() return [ Declaration(Location.between(datatype, variable), datatype, variable.name) ] self.tokens.check_and_pop('OP', '=') initial_value = self.parse_expression() self.tokens.pop_newline() return [ Declaration(Location.between(datatype, variable), datatype, variable.name), Assignment(Location.between(variable, initial_value), variable, initial_value) ]
def test_parse_string(): assert get_ast('"hello"') == [ast.String(Location(0, 7), 'hello')] # TODO: do something to escapes assert get_ast(r'"hello \n\t"') == [ ast.String(Location(0, 12), r'hello \n\t') ]
def test_integers(): assert tokenize('1234') == [Token('INTEGER', '1234', Location(0, 4))] # TODO: fix this assert tokenize('0x1234') == [ Token('INTEGER', '0', Location(0, 1)), Token('NAME', 'x1234', Location(1, 6)), ]
def test_names(): assert tokenize('hello') == [Token('NAME', 'hello', Location(0, 5))] assert tokenize('lol123') == [Token('NAME', 'lol123', Location(0, 6))] # i'm not sure if the tokenizer should raise an error about this, # but this behaviour is ok for now assert tokenize('123wolo') == [ Token('INTEGER', '123', Location(0, 3)), Token('NAME', 'wolo', Location(3, 7)), ]
def test_trailing_commas(error_at): assert get_ast('lol(1,)\n' 'lol(1, 2, 3,)') == [ ast.FunctionCall(Location(0, 7), ast.Name(Location(0, 3), 'lol'), [ast.Integer(Location(4, 5), '1')]), ast.FunctionCall(Location( 0, 13, 2), ast.Name(Location(0, 3, 2), 'lol'), [ ast.Integer(Location(4, 5, 2), '1'), ast.Integer(Location(7, 8, 2), '2'), ast.Integer(Location(10, 11, 2), '3') ]), ] with error_at(4, 5, msg="don't put a ',' here"): get_ast('lol(,)') with error_at(4, 5, msg="don't put a ',' here"): get_ast('lol(,,)') with error_at(13, 15, msg="two ',' characters"): get_ast('lol(something,,)') # this doesn't matter much because it's unlikely that anyone will # accidentally put 3 commas next to each other with error_at(13, 15, msg="two ',' characters"): get_ast('lol(something,,,)')
def test_function_calls(): assert get_ast('lol()\n' 'lol(1)\n' 'lol(1, 2, 3)\n') == [ ast.FunctionCall(Location(0, 5), ast.Name(Location(0, 3), 'lol'), []), ast.FunctionCall(Location(0, 6, 2), ast.Name(Location(0, 3, 2), 'lol'), [ast.Integer(Location(4, 5, 2), '1')]), ast.FunctionCall(Location( 0, 12, 3), ast.Name(Location(0, 3, 3), 'lol'), [ ast.Integer(Location(4, 5, 3), '1'), ast.Integer(Location(7, 8, 3), '2'), ast.Integer(Location(10, 11, 3), '3') ]), ]
def _parse_comma_list(self, start='(', stop=')', parsemethod=None): # ( ) # ( element ) # ( element , ) # ( element , element ) # ( element , element , ) # ... if parsemethod is None: parsemethod = self.parse_expression start_token = self.tokens.check_and_pop('OP', start) if self.tokens.coming_up().startswith(['OP', stop]): # empty list return ([], self.tokens.pop()) elements = [] while True: if self.tokens.coming_up().startswith(['OP', ',']): raise CompileError("don't put a ',' here", self.tokens.coming_up().location) elements.append(parsemethod()) if self.tokens.coming_up().startswith(['OP', stop]): return (elements, self.tokens.pop()) comma = self.tokens.check_and_pop('OP', ',') if self.tokens.coming_up().startswith(['OP', ',']): raise CompileError( "two ',' characters", Location.between(comma, self.tokens.coming_up())) if self.tokens.coming_up().startswith(['OP', stop]): return (elements, self.tokens.pop())
def parse_expression(self): coming_up = self.tokens.coming_up() if coming_up.kind == 'NAME': # hello result = self.parse_name() elif coming_up.kind == 'STRING': # "hello" result = self.parse_string() elif coming_up.kind == 'INTEGER': # 123 result = self.parse_integer() elif coming_up.startswith(['OP', '(']): result = self.parse_parentheses() else: raise CompileError( "this should be variable name, string, integer or '('", coming_up.location) # check for function calls, this is a while loop to allow # function calls like thing()()() while self.tokens.coming_up().startswith(['OP', '(']): args, stop_token = self._parse_comma_list('(', ')') result = FunctionCall(Location.between(result, stop_token), result, args) return result
def parse_function_def(self): # function main() { ... } # function thing() returns Int { ... } function_keyword = self.tokens.check_and_pop('NAME', 'function') name = self.parse_name() args, junk = self._parse_comma_list('(', ')', parsemethod=self._type_and_name) if self.tokens.coming_up().startswith(['NAME', 'returns']): self.tokens.pop() returntype = self.parse_name() else: returntype = None opening_brace = self.tokens.check_and_pop('OP', '{') if self.tokens.coming_up().kind == 'NEWLINE': self.tokens.pop_newline() body = [] while not self.tokens.coming_up().startswith(['OP', '}']): body.extend(self.parse_statement()) closing_brace = self.tokens.check_and_pop('OP', '}') self.tokens.pop_newline() return FunctionDef(Location.between(function_keyword, closing_brace), name.name, args, returntype, body)
def test_parse_name(fake_keyword, error_at): assert get_ast('hello') == [ast.Name(Location(0, 5), 'hello')] with error_at(0, 4, msg=("fake is not a valid variable name " "because it has a special meaning")): get_ast('fake')
def assignment(self): # thing = value # TODO: thing's stuff = value target = self.parse_name() self.tokens.check_and_pop('OP', '=') value = self.parse_expression() self.tokens.pop_newline() return Assignment(Location.between(target, value), target, value)
def test_declaration_and_assignment(): assert get_ast('Int i') == [ ast.Declaration(Location(0, 5), ast.Name(Location(0, 3), 'Int'), 'i'), ] assert get_ast('Int i = 123') == [ # these overlap ast.Declaration( Location(0, 5), # Int i ast.Name(Location(0, 3), 'Int'), 'i'), ast.Assignment( Location(4, 11), # i = 123 ast.Name(Location(4, 5), 'i'), ast.Integer(Location(8, 11), '123')), ] assert get_ast('i = 123') == [ ast.Assignment(Location(0, 7), ast.Name(Location(0, 1), 'i'), ast.Integer(Location(4, 7), '123')), ]
def parse_if(self): # if cond { statements } the_if = self.tokens.check_and_pop('NAME', 'if') condition = self.parse_expression() self.tokens.check_and_pop('OP', '{') body = [] # allow "if thing { }" without a newline if not self.tokens.coming_up().startswith(['OP', '}']): self.tokens.pop_newline() while not self.tokens.coming_up().startswith(['OP', '}']): body.extend(self.parse_statement()) closing_brace = self.tokens.check_and_pop('OP', '}') self.tokens.pop_newline() return If(Location.between(the_if, closing_brace), condition, body)
def parse_file(self): while True: try: self.tokens.coming_up(1) except EOFError: break try: yield from self.parse_statement() except EOFError: # underline 3 blanks after last token last_location = self.tokens.last_popped.location mark_here = Location(last_location.end, last_location.end + 3, last_location.lineno) # python abbreviates this as EOF and beginners don't # understand it, but i guess this one is good enough raise CompileError("unexpected end of file", mark_here)
def test_if(): assert get_ast('if thing { }') == [ ast.If(Location(0, 12), ast.Name(Location(3, 8), 'thing'), []), ] assert get_ast('if thing {\n' '\tstuff\n' '\tmore_stuff\n' '}') == [ ast.If( Location(0, None), # None because it's not a 1-liner ast.Name(Location(3, 8), 'thing'), [ ast.Name(Location(4, 9, 2), 'stuff'), ast.Name(Location(4, 14, 3), 'more_stuff') ]) ]
def test_parse_integer(): assert get_ast('123') == [ast.Integer(Location(0, 3), '123')]
def test_strings(): assert tokenize('"hello world"') == [ Token('STRING', '"hello world"', Location(0, 13)) ] with pytest.raises(CompileError): tokenize('"hello \n world"')
def parse_return(self): the_return = self.tokens.check_and_pop('NAME', 'return') value = self.parse_expression() self.tokens.pop_newline() return Return(Location.between(the_return, value), value)
def test_whitespace(): assert tokenize(' \n \t \r ') == [] assert tokenize(' \n \t 123 \r ') == [ Token('INTEGER', '123', Location(5, 8, 2)), ]
def test_comments(): assert tokenize('// hello\n123') == [ Token('NEWLINE', '\n', Location(8, 11, 1)), Token('INTEGER', '123', Location(0, 3, 2)), ] assert tokenize('/* hello\nhello\nhello */') == []
def test_hello_world(): assert tokenize(_HELLO_WORLD) == [ Token('NEWLINE', '\n', Location(37, 40, 2)), Token('NAME', 'import', Location(0, 6, 3)), Token('NAME', 'stdout', Location(7, 13, 3)), Token('NAME', 'from', Location(14, 18, 3)), Token('STRING', '"io.weird"', Location(19, 29, 3)), Token('NEWLINE', '\n', Location(29, 32, 3)), Token('NEWLINE', '\n', Location(0, 3, 4)), Token('NAME', 'function', Location(0, 8, 5)), Token('NAME', 'main', Location(9, 13, 5)), Token('OP', '(', Location(13, 14, 5)), Token('OP', ')', Location(14, 15, 5)), Token('OP', '{', Location(16, 17, 5)), Token('NEWLINE', '\n', Location(17, 20, 5)), Token('NAME', 'stdout', Location(4, 10, 6)), Token('OP', '.', Location(10, 11, 6)), Token('NAME', 'print', Location(11, 16, 6)), Token('OP', '(', Location(16, 17, 6)), Token('STRING', '"Hello World!"', Location(17, 31, 6)), Token('OP', ')', Location(31, 32, 6)), Token('NEWLINE', '\n', Location(32, 35, 6)), Token('OP', '}', Location(0, 1, 7)), Token('NEWLINE', '\n', Location(1, 4, 7)), ]
def test_function_returns_function(): assert get_ast('lol()()()') == [ ast.FunctionCall( Location(0, 9), ast.FunctionCall( Location(0, 7), ast.FunctionCall(Location(0, 5), ast.Name(Location(0, 3), 'lol'), []), []), []), ] assert get_ast('lol(1, 2)(3, 4)') == [ ast.FunctionCall( Location(0, 15), ast.FunctionCall(Location(0, 9), ast.Name(Location(0, 3), 'lol'), [ ast.Integer(Location(4, 5), '1'), ast.Integer(Location(7, 8), '2') ]), [ ast.Integer(Location(10, 11), '3'), ast.Integer(Location(13, 14), '4') ]), ]
def test_return(): assert get_ast('return 123') == [ ast.Return(Location(0, 10), ast.Integer(Location(7, 10), '123')), ]
def test_function_defs(): assert get_ast( 'function thing() {\n' '\tlol()\n' '\tlol()\n' '}') == [ ast.FunctionDef( Location(0, None), 'thing', # function name [], # arguments None, # return type [ # body ast.FunctionCall(Location(4, 9, 2), ast.Name(Location(4, 7, 2), 'lol'), []), ast.FunctionCall(Location(4, 9, 3), ast.Name(Location(4, 7, 3), 'lol'), []), ]), ] # 0-statement 1-liner assert get_ast('function thing(Int i, String s) returns String { }') == [ ast.FunctionDef( Location(0, 50), 'thing', # arguments are (type, name) tuples [(ast.Name(Location(15, 18), 'Int'), ast.Name( Location(19, 20), 'i')), (ast.Name(Location(22, 28), 'String'), ast.Name(Location(29, 30), 's'))], ast.Name(Location(40, 46), 'String'), []), ]
def test_parentheses(): # FIXME: currently the whole expression starts at column 4 and # that's not quite right... assert get_ast('(((("hello"))))') == [ast.String(Location(4, 11), 'hello')]
def test_ops(error_at): for op in '= ( ) { } [ ] , .'.split(): assert tokenize(op) == [Token('OP', op, Location(0, len(op)))] with error_at(0, 1, msg="I don't know what this is"): tokenize(';')