def test_array_mixed1(self): input_data = '{1, "hello"}' tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() parser = Parser(tokens) ast = parser.parse() self.assertEqual(1, len(ast)) l_curly_token = Token(TokenType.L_CURLY, lexer.STRING_INPUT_FILE, 1, 1) number_token = Token(TokenType.NUMBER, lexer.STRING_INPUT_FILE, 1, 2, value='1') l_double_quotes_token = Token(TokenType.DOUBLE_QUOTES, lexer.STRING_INPUT_FILE, 1, 5) string_token = Token(TokenType.WORD, lexer.STRING_INPUT_FILE, 1, 6, value='hello') r_double_quotes_token = Token(TokenType.DOUBLE_QUOTES, lexer.STRING_INPUT_FILE, 1, 11) r_curly_token = Token(TokenType.R_CURLY, lexer.STRING_INPUT_FILE, 1, 12) constant = Constant(number_token) string_literal = StringLiteral( [l_double_quotes_token, string_token, r_double_quotes_token]) array = Array(l_curly_token, [constant, string_literal], r_curly_token) self.assertEqual([array], ast)
def test_assignment3(self): input_data = "a = 'Hello';" tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() parser = Parser(tokens) ast = parser.parse() self.assertEqual(1, len(ast)) left_token = Token(TokenType.WORD, lexer.STRING_INPUT_FILE, 1, 1, value='a') equals_token = Token(TokenType.EQUALS, lexer.STRING_INPUT_FILE, 1, 3) right_tokens = [ Token(TokenType.QUOTE, lexer.STRING_INPUT_FILE, 1, 5), Token(TokenType.WORD, lexer.STRING_INPUT_FILE, 1, 6, value='Hello'), Token(TokenType.QUOTE, lexer.STRING_INPUT_FILE, 1, 11) ] semicolon_token = Token(TokenType.SEMICOLON, lexer.STRING_INPUT_FILE, 1, 12) left_ast = Identifier(left_token) right_ast = StringLiteral(right_tokens) assignment = Assignment(left_ast, equals_token, right_ast, semicolon_token) self.assertEqual([assignment], ast)
def test_array_simple5(self): input_data = "array[] = {};" tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() parser = Parser(tokens) ast = parser.parse() self.assertEqual(1, len(ast)) name = Token(TokenType.WORD, lexer.STRING_INPUT_FILE, 1, 1, value='array') l_square_token = Token(TokenType.L_SQUARE, lexer.STRING_INPUT_FILE, 1, 6) r_square_token = Token(TokenType.R_SQUARE, lexer.STRING_INPUT_FILE, 1, 7) identifier = Identifier(name) array_declaration = ArrayDeclaration(identifier, l_square_token, r_square_token) equals_token = Token(TokenType.EQUALS, lexer.STRING_INPUT_FILE, 1, 9) l_curly_token = Token(TokenType.L_CURLY, lexer.STRING_INPUT_FILE, 1, 11) r_curly_token = Token(TokenType.R_CURLY, lexer.STRING_INPUT_FILE, 1, 12) array = Array(l_curly_token, [], r_curly_token) semicolon_token = Token(TokenType.SEMICOLON, lexer.STRING_INPUT_FILE, 1, 13) assignment = Assignment(array_declaration, equals_token, array, semicolon_token) self.assertEqual([assignment], ast)
def test_assignment2(self): input_data = "a=1337;" tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() parser = Parser(tokens) ast = parser.parse() self.assertEqual(1, len(ast)) left_token = Token(TokenType.WORD, lexer.STRING_INPUT_FILE, 1, 1, value='a') equals_token = Token(TokenType.EQUALS, lexer.STRING_INPUT_FILE, 1, 2) right_token = Token(TokenType.NUMBER, lexer.STRING_INPUT_FILE, 1, 3, value='1337') semicolon_token = Token(TokenType.SEMICOLON, lexer.STRING_INPUT_FILE, 1, 7) left_ast = Identifier(left_token) right_ast = Constant(right_token) assignment = Assignment(left_ast, equals_token, right_ast, semicolon_token) self.assertEqual([assignment], ast)
def test_peek(self): input_data = 'hello' lexer = Lexer(input_data, armaclassparser.lexer.STRING_INPUT_FILE) self.assertEqual('h', lexer.peek()) self.assertEqual('h', lexer.peek(1)) self.assertEqual('he', lexer.peek(2)) self.assertEqual('hello', lexer.peek(5))
def test_include1(self): dir_path = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join( dir_path, "examples/include/01_include_test_config.cpp") with open(file_path, 'r', encoding='utf-8', newline=None) as fp: input_data = fp.read() tokens = Lexer(input_data, file_path).tokenize() preprocessor = PreProcessor(tokens, file_path) preprocessor.preprocess() tokens = preprocessor.tokens output = generator.from_tokens(tokens) expected_output = """1_include_test_file1_line1 1_include_test_file1_line2 1_include_test_file1_line3 1_include_test_file2_line1 1_include_test_file2_line2 1_include_test_file2_line3 class Foo {}; 1_include_test_file3_line1 1_include_test_file3_line2 1_include_test_file3_line3""" self.assertEqual(expected_output, output)
def test_assignment4(self): input_data = 'author = "Schwaggot";' tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() parser = Parser(tokens) ast = parser.parse() self.assertEqual(1, len(ast)) left_token = Token(TokenType.WORD, lexer.STRING_INPUT_FILE, 1, 1, value='author') equals_token = Token(TokenType.EQUALS, lexer.STRING_INPUT_FILE, 1, 8) right_tokens = [ Token(TokenType.DOUBLE_QUOTES, lexer.STRING_INPUT_FILE, 1, 10), Token(TokenType.WORD, lexer.STRING_INPUT_FILE, 1, 11, value='Schwaggot'), Token(TokenType.DOUBLE_QUOTES, lexer.STRING_INPUT_FILE, 1, 20) ] semicolon_token = Token(TokenType.SEMICOLON, lexer.STRING_INPUT_FILE, 1, 21) left_ast = Identifier(left_token) right_ast = StringLiteral(right_tokens) assignment = Assignment(left_ast, equals_token, right_ast, semicolon_token) self.assertEqual([assignment], ast)
def test_array_simple4(self): input_data = "{1, 2,3}" tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() parser = Parser(tokens) ast = parser.parse() self.assertEqual(1, len(ast)) l_curly_token = Token(TokenType.L_CURLY, lexer.STRING_INPUT_FILE, 1, 1) number_token1 = Token(TokenType.NUMBER, lexer.STRING_INPUT_FILE, 1, 2, value='1') number_token2 = Token(TokenType.NUMBER, lexer.STRING_INPUT_FILE, 1, 5, value='2') number_token3 = Token(TokenType.NUMBER, lexer.STRING_INPUT_FILE, 1, 7, value='3') r_curly_token = Token(TokenType.R_CURLY, lexer.STRING_INPUT_FILE, 1, 8) constant1 = Constant(number_token1) constant2 = Constant(number_token2) constant3 = Constant(number_token3) array = Array(l_curly_token, [constant1, constant2, constant3], r_curly_token) self.assertEqual([array], ast)
def test_string_literal1(self): input_data = 'hello' expected = [ Token(TokenType.WORD, armaclassparser.lexer.STRING_INPUT_FILE, 1, 1, 'hello') ] tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() self.assertEqual(expected, tokens)
def test_include_simple(self): input_data = '#include' expected = [ Token(TokenType.KEYWORD_INCLUDE, armaclassparser.lexer.STRING_INPUT_FILE, 1, 1) ] tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() self.assertEqual(expected, tokens)
def test_class_simple(self): input_data = 'class' expected = [ Token(TokenType.KEYWORD_CLASS, armaclassparser.lexer.STRING_INPUT_FILE, 1, 1) ] tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() self.assertEqual(expected, tokens)
def test_remove_multi_line_comment3(self): input_data = "/* Hello World*/" from armaclassparser import lexer tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() preprocessor = PreProcessor(tokens, None) preprocessor.tokens = tokens preprocessor._remove_comments() self.assertEqual(0, len(preprocessor.tokens))
def test_number_literal4(self): input_data = '-12.34' expected = [ Token(TokenType.NUMBER, armaclassparser.lexer.STRING_INPUT_FILE, 1, 1, '-12.34') ] tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() self.assertEqual(expected, tokens)
def test_file_sample01(self): dir_path = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join(dir_path, "examples/01_simple_config.cpp") with open(file_path, 'r') as fp: input_data = fp.read() tokens = Lexer(input_data, file_path).tokenize() parser = Parser(tokens) parser.parse()
def test_remove_single_line_comment6(self): input_data = """ // hello""" from armaclassparser import lexer tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() preprocessor = PreProcessor(tokens, None) preprocessor.tokens = tokens preprocessor._remove_comments() self.assertEqual(1, len(preprocessor.tokens))
def test_escaped_newlines(self): input_data = """\\ """ tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() preprocessor = PreProcessor(tokens, lexer.STRING_INPUT_FILE) preprocessor._remove_escaped_newlines() output = generator.from_tokens(preprocessor.tokens) expected_output = "" self.assertEqual(expected_output, output)
def test_remove_single_line_comment1(self): input_data = '''#include "script_component.hpp" // Hello World class Foo {};''' from armaclassparser import lexer tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() len_before = len(tokens) preprocessor = PreProcessor(tokens, None) preprocessor.tokens = tokens preprocessor._remove_comments() self.assertEqual(len_before - 5, len(preprocessor.tokens))
def test_remove_multi_line_comment2(self): input_data = '''/* Hello World */ class Foo {};''' from armaclassparser import lexer tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() len_before = len(tokens) preprocessor = PreProcessor(tokens, None) preprocessor.tokens = tokens preprocessor._remove_comments() self.assertEqual(len_before - 7, len(preprocessor.tokens))
def test_array_empty(self): input_data = "{}" tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() parser = Parser(tokens) ast = parser.parse() self.assertEqual(1, len(ast)) l_curly_token = Token(TokenType.L_CURLY, lexer.STRING_INPUT_FILE, 1, 1) r_curly_token = Token(TokenType.R_CURLY, lexer.STRING_INPUT_FILE, 1, 2) array = Array(l_curly_token, [], r_curly_token) self.assertEqual([array], ast)
def test_next(self): input_data = 'hello' lexer = Lexer(input_data, armaclassparser.lexer.STRING_INPUT_FILE) self.assertEqual('h', lexer.next()) self.assertEqual('e', lexer.next()) self.assertEqual('l', lexer.next()) self.assertEqual('l', lexer.next()) self.assertEqual('o', lexer.next()) try: lexer.next() self.fail('expected exception') except RuntimeError: pass
def test_identifier3(self): input_data = "class0" tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() self.assertEqual(1, len(tokens)) parser = Parser(tokens) ast = parser.parse() self.assertEqual(1, len(ast)) expected_token = Token(TokenType.WORD, lexer.STRING_INPUT_FILE, 1, 1, value="class0") identifier = Identifier(expected_token) self.assertEqual([identifier], ast)
def _replace_includes(self): """ Processes #include directives by parsing the included file and inserting it contents in place of the #include. """ self.index = 0 while self.index < len(self.tokens): token = self.token() if token.token_type == TokenType.KEYWORD_INCLUDE: include_start = self.index self.expect(TokenType.KEYWORD_INCLUDE) self.expect_next(TokenType.WHITESPACE) self.index += 1 include_file_path = self._parse_include_file_path() include_end = self.index # recursively process the file to be included dst_file_path = self._resolve_include_file_path( include_file_path) with open(dst_file_path, 'r', encoding='utf-8', newline=None) as fp: input_data = fp.read() tokens = Lexer(input_data, dst_file_path).tokenize() preprocessor = PreProcessor(tokens, dst_file_path) preprocessor.defines = self.defines tokens = preprocessor.preprocess() # replace include statement with included content del self.tokens[include_start:include_end] self.tokens = self.tokens[: include_start] + tokens + self.tokens[ include_start:] self.index += len(tokens) - (include_end - include_start) elif token.token_type in [ TokenType.COMMENT, TokenType.MCOMMENT_START, TokenType.MCOMMENT_END ]: msg = 'expected comments to have been handled already, but found {}'.format( repr(token)) raise PreProcessingError(msg) else: self.index += 1
def test_array_simple2(self): input_data = "{ 1 }" tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() parser = Parser(tokens) ast = parser.parse() self.assertEqual(1, len(ast)) l_curly_token = Token(TokenType.L_CURLY, lexer.STRING_INPUT_FILE, 1, 1) number_token = Token(TokenType.NUMBER, lexer.STRING_INPUT_FILE, 1, 3, value='2') r_curly_token = Token(TokenType.R_CURLY, lexer.STRING_INPUT_FILE, 1, 5) constant = Constant(number_token) array = Array(l_curly_token, [constant], r_curly_token) self.assertEqual([array], ast)
def test_class(self): input_data = 'class Foo {};' expected = [ Token(TokenType.KEYWORD_CLASS, armaclassparser.lexer.STRING_INPUT_FILE, 1, 1), Token(TokenType.WHITESPACE, armaclassparser.lexer.STRING_INPUT_FILE, 1, 6), Token(TokenType.WORD, armaclassparser.lexer.STRING_INPUT_FILE, 1, 7, 'Foo'), Token(TokenType.WHITESPACE, armaclassparser.lexer.STRING_INPUT_FILE, 1, 10), Token(TokenType.L_CURLY, armaclassparser.lexer.STRING_INPUT_FILE, 1, 11), Token(TokenType.R_CURLY, armaclassparser.lexer.STRING_INPUT_FILE, 1, 12), Token(TokenType.SEMICOLON, armaclassparser.lexer.STRING_INPUT_FILE, 1, 13) ] tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() self.assertEqual(expected, tokens)
def test_include(self): input_data = '#include "script_component.hpp"' expected = [ Token(TokenType.KEYWORD_INCLUDE, armaclassparser.lexer.STRING_INPUT_FILE, 1, 1), Token(TokenType.WHITESPACE, armaclassparser.lexer.STRING_INPUT_FILE, 1, 9), Token(TokenType.DOUBLE_QUOTES, armaclassparser.lexer.STRING_INPUT_FILE, 1, 10), Token(TokenType.WORD, armaclassparser.lexer.STRING_INPUT_FILE, 1, 11, "script_component"), Token(TokenType.DOT, armaclassparser.lexer.STRING_INPUT_FILE, 1, 27), Token(TokenType.WORD, armaclassparser.lexer.STRING_INPUT_FILE, 1, 28, "hpp"), Token(TokenType.DOUBLE_QUOTES, armaclassparser.lexer.STRING_INPUT_FILE, 1, 31) ] tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() self.assertEqual(expected, tokens)
def test_array_declaration1(self): input_data = "array[]" tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() parser = Parser(tokens) ast = parser.parse() self.assertEqual(1, len(ast)) name = Token(TokenType.WORD, lexer.STRING_INPUT_FILE, 1, 1, value='array') l_square_token = Token(TokenType.L_SQUARE, lexer.STRING_INPUT_FILE, 1, 6) r_square_token = Token(TokenType.R_SQUARE, lexer.STRING_INPUT_FILE, 1, 7) identifier = Identifier(name) array_declaration = ArrayDeclaration(identifier, l_square_token, r_square_token) self.assertEqual([array_declaration], ast)
def test_multiline1(self): input_data = '''#include "script_component.hpp" class Foo {};''' expected = [ Token(TokenType.KEYWORD_INCLUDE, armaclassparser.lexer.STRING_INPUT_FILE, 1, 1), Token(TokenType.WHITESPACE, armaclassparser.lexer.STRING_INPUT_FILE, 1, 9), Token(TokenType.DOUBLE_QUOTES, armaclassparser.lexer.STRING_INPUT_FILE, 1, 10), Token(TokenType.WORD, armaclassparser.lexer.STRING_INPUT_FILE, 1, 11, "script_component"), Token(TokenType.DOT, armaclassparser.lexer.STRING_INPUT_FILE, 1, 27), Token(TokenType.WORD, armaclassparser.lexer.STRING_INPUT_FILE, 1, 28, "hpp"), Token(TokenType.DOUBLE_QUOTES, armaclassparser.lexer.STRING_INPUT_FILE, 1, 31), Token(TokenType.NEWLINE, armaclassparser.lexer.STRING_INPUT_FILE, 1, 32), Token(TokenType.KEYWORD_CLASS, armaclassparser.lexer.STRING_INPUT_FILE, 2, 1), Token(TokenType.WHITESPACE, armaclassparser.lexer.STRING_INPUT_FILE, 2, 6), Token(TokenType.WORD, armaclassparser.lexer.STRING_INPUT_FILE, 2, 7, 'Foo'), Token(TokenType.WHITESPACE, armaclassparser.lexer.STRING_INPUT_FILE, 2, 10), Token(TokenType.L_CURLY, armaclassparser.lexer.STRING_INPUT_FILE, 2, 11), Token(TokenType.R_CURLY, armaclassparser.lexer.STRING_INPUT_FILE, 2, 12), Token(TokenType.SEMICOLON, armaclassparser.lexer.STRING_INPUT_FILE, 2, 13) ] tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() self.assertEqual(expected, tokens)
def _test_generator_tokens(self, input_data, expected_output): tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() output = generator.from_tokens(tokens) self.assertEqual(expected_output, output)
def _test_preprocessor(self, input_data, expected_output): tokens = Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize() preprocessor = PreProcessor(tokens, lexer.STRING_INPUT_FILE) preprocessor.preprocess() output = generator.from_tokens(preprocessor.tokens) self.assertEqual(expected_output, output)
def test_file_sample01(self): dir_path = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.join(dir_path, "examples/01_simple_config.cpp") with open(file_path, 'r', encoding='utf-8', newline=None) as fp: input_data = fp.read() Lexer(input_data, lexer.STRING_INPUT_FILE).tokenize()