def test_compile_func_call(self): # This also tests scope_begin and scope_over opcodes = [ OpCode("func_decl", "hello---", ""), OpCode("scope_begin", "", ""), OpCode("print", '"World"', None), OpCode("scope_over", "", ""), OpCode("func_call", "hello---", ""), ] table = SymbolTable() table.symbol_table = { 1: ["hello", "var", "function"], 2: ['"World"', "string", "constant"], } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual( data, [ "#include <stdio.h>", "", "void hello(void) {", '\tprintf("World");', "}", "\thello();", "", ], )
def test_compile_while(self): opcodes = [ OpCode("var_assign", "i---0", "int"), OpCode("while", "i < 10", None), OpCode("scope_begin", "", ""), OpCode("print", '"%d", i', None), OpCode("scope_over", "", ""), ] table = SymbolTable() table.symbol_table = { 1: ["i", "int", "variable"], 2: ["0", "int", "constant"], 3: ["10", "int", "constant"], } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual( data, [ "#include <stdio.h>", "\tint i = 0;", "\twhile(i < 10) {", '\tprintf("%d", i);', "}", "", ], )
def test_compile_for(self): opcodes = [ OpCode("for", "i&&&1&&&10&&&+&&&<&&&1", None), OpCode("scope_begin", "", ""), OpCode("print", '"%d", i', None), OpCode("scope_over", "", ""), ] table = SymbolTable() table.symbol_table = { 1: ["i", "int", "variable"], 2: ["1", "int", "constant"], 3: ["10", "int", "constant"], 4: ["1", "int", "constant"], } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual( data, [ "#include <stdio.h>", "\tfor(int i = 1; i < 10; i+=1) {", '\tprintf("%d", i);', "}", "", ], )
def test_compile_single_multi_line_comments(self): # Test single and multi line comments opcodes = [ OpCode("single_line_comment", " single line", ""), OpCode( "multi_line_comment", """ Multi line """, "", ), ] table = SymbolTable() compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual( data, [ "", "\t// single line ", "/* ", " Multi line", " */", "", ], )
def unary_statement(tokens, i, table, func_ret_type): """ Parse unary statement Params ====== tokens (list) = List of tokens i (int) = Current index in token table (SymbolTable) = Symbol table constructed holding information about identifiers and constants Returns ======= OpCode, int: The opcode for the unary code and the index after parsing unary statement Grammar ======= unary_statement -> id operator id -> [a-zA-Z_]?[a-zA-Z0-9_]* operator -> ++ | -- """ # Check if assignment operator follows identifier name if tokens[i].type not in ["increment", "decrement"]: check_if( tokens[i + 1].type, ["increment", "decrement"], "Expected unary operator after identifier", tokens[i + 1].line_num, ) # Check if expression follows = in assign statement op_value, _, i, func_ret_type = expression( tokens, i, table, "", accept_empty_expression=True, expect_paren=False, func_ret_type=func_ret_type, ) # Return the opcode and i (the token after unary statement) return OpCode("unary", op_value), i, func_ret_type else: check_if( tokens[i + 1].type, "id", "Expected identifier after unary operator", tokens[i + 1].line_num, ) op_value = -1 if tokens[i].type == "increment": op_value = "++ --- " else: op_value = "-- --- " value, func_ret_type, _ = table.get_by_id(tokens[i + 1].val) op_value += str(value) return OpCode("unary", op_value), i + 2, func_ret_type
def test_check_include(self): opcodes = [OpCode("print", ""), OpCode("var_assign", "")] includes = check_include(opcodes) self.assertEqual(includes, "#include <stdio.h>") opcodes = [OpCode("var_assign", "")] includes = check_include(opcodes) self.assertEqual(includes, "")
class TestOpCodeClass(unittest.TestCase): def setUp(self): self.opcode = OpCode("var_assign", "a---1 + 2", "int") def test__str__(self): self.assertEqual(str(self.opcode), "OpCode('var_assign', 'a---1 + 2', 'int')") def test_opcode2dig(self): self.assertEqual(self.opcode.opcode2dig("var_assign"), 2) self.assertEqual(self.opcode.opcode2dig("unary"), 12) self.assertEqual(self.opcode.opcode2dig("hello"), 0)
def test_compile_main_end_main(self): opcodes = [OpCode("MAIN", "", ""), OpCode("END_MAIN", "", "")] table = SymbolTable() compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual(data, ["", "", "int main() {", "", "\treturn 0;", "}"])
def test_compile_continue_break(self): # Test continue and break statements opcodes = [OpCode("continue", "", ""), OpCode("break", "", "")] table = SymbolTable() compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual(data, ["", "\tcontinue;", "\tbreak;", ""])
def switch_statement(tokens, i, table, func_ret_type): check_if(tokens[i].type, "left_paren", "Expected ( after switch", tokens[i].line_num) op_value, _, i, func_ret_type = expression( tokens, i + 1, table, "Expected expression inside switch statement", func_ret_type=func_ret_type, ) check_if( tokens[i - 1].type, "right_paren", "Expected ) after expression in switch", tokens[i - 1].line_num, ) check_if( tokens[i + 1].type, "left_brace", "Expected { after switch statement", tokens[i].line_num, ) return OpCode("switch", op_value[:-1], ""), i + 1, func_ret_type
def test_compile_assign(self): opcodes = [ OpCode("var_no_assign", "a", None), OpCode("assign", "a---=---3.14159", ""), ] table = SymbolTable() table.symbol_table = { 1: ["a", "float", "variable"], 2: ["3.14159", "float", "constant"], } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual(data, ["", "\tfloat a;", "\ta = 3.14159;", ""])
def test_compile_unary(self): opcodes = [ OpCode("var_assign", "a---1", "int"), OpCode("unary", "a ++ ", None) ] table = SymbolTable() table.symbol_table = { 1: ["a", "int", "variable"], 2: ["1", "int", "constant"] } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual(data, ["", "\tint a = 1;", "\ta++;", ""])
def exit_statement(tokens, i, table, func_ret_type): """ Parse exit statement Params ====== tokens (list) = List of tokens i (int) = Current index in token table (SymbolTable) = Symbol table constructed holding information about identifiers and constants Returns ======= OpCode, int: The opcode for the assign code and the index after parsing exit statement Grammar ======= exit_statement -> exit(expr) expr -> number number -> [0-9]+ """ # Check if ( follows exit statement check_if( tokens[i].type, "left_paren", "Expected ( after exit statement", tokens[i].line_num, ) # Check if number follows ( in exit statement check_if( tokens[i + 1].type, "number", "Expected number after ( in exit statement", tokens[i].line_num, ) # check if expression follows ( in exit statement op_value, _, i, func_ret_type = expression( tokens, i + 1, table, "Expected expression inside exit statement", func_ret_type=func_ret_type, ) op_value_list = op_value.replace(" ", "").split(",") # check if ) follows expression in exit statement check_if( tokens[i - 1].type, "right_paren", "Expected ) after expression in exit statement", tokens[i - 1].line_num, ) return OpCode("exit", op_value[:-1]), i, func_ret_type
def test_compile_ptr_assign(self): opcodes = [ OpCode("var_assign", "a---1", "int"), OpCode("ptr_assign", "n---&a---1", "int"), ] table = SymbolTable() table.symbol_table = { 1: ["a", "int", "variable"], 2: ["1", "int", "constant"], 3: ["n", "int", "variable"], } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual(data, ["", "\tint a = 1;", "\tint *n = &a;", ""])
def test_compile_return(self): opcodes = [ OpCode("func_decl", "hello---", ""), OpCode("scope_begin", "", ""), OpCode("return", "1", ""), OpCode("scope_over", "", ""), ] table = SymbolTable() table.symbol_table = { 1: ["hello", "int", "function"], 2: ["1", "int", "constant"], } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual( data, ["", "", "int hello(void) {", "", "\treturn 1;", "}", ""])
def test_compile_exit(self): opcodes = [OpCode("exit", "0", None)] table = SymbolTable() table.symbol_table = {1: ["0", "int", "constant"]} compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual(data, ["", "\texit(0);", ""])
def test_compile_print(self): opcodes = [OpCode("print", '"%d", 1')] table = SymbolTable() compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual(data, ["#include <stdio.h>", '\tprintf("%d", 1);', ""])
def test_compile_var_no_assign(self): opcodes = [OpCode("var_no_assign", "a", None)] table = SymbolTable() table.symbol_table = {1: ["a", "declared", "variable"]} compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual(data, ["", "\tdeclared a;", ""])
def case_statement(tokens, i, table, func_ret_type): op_value, _, i, func_ret_type = expression( tokens, i, table, "Expected expected expression after case", expect_paren=False, func_ret_type=func_ret_type, ) check_if( tokens[i].type, "colon", "Expected : after case in switch statement", tokens[i].line_num, ) return OpCode("case", op_value, ""), i + 1, func_ret_type
def print_statement(tokens, i, table, func_ret_type): """ Parse print statement Params ====== tokens (list) = List of tokens i (int) = Current index in token table (SymbolTable) = Symbol table constructed holding information about identifiers and constants func_ret_type (string) = Function return type Returns ======= OpCode, int: The opcode for the print code and the index after parsing print statement Grammar ======= print_statement -> print(expr) expr -> string | number | id | operator string -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote quote -> " number -> [0-9]+ id -> [a-zA-Z_]?[a-zA-Z0-9_]* operator -> + | - | * | / """ # Check if ( follows print statement check_if( tokens[i].type, "left_paren", "Expected ( after print statement", tokens[i].line_num, ) # Check if expression follows ( in print statement op_value, op_type, i, func_ret_type = expression( tokens, i + 1, table, "Expected expression inside print statement", func_ret_type=func_ret_type, ) # Map datatype to appropriate format specifiers prec_to_type = { 0: "", 1: '"%s", ', 2: '"%c", ', 3: '"%d", ', 4: '"%f", ', 5: '"%lf", ', } op_value = prec_to_type[op_type] + op_value[:-1] # Check if print statement has closing ) check_if( tokens[i - 1].type, "right_paren", "Expected ) after expression in print statement", tokens[i - 1].line_num, ) # Return the opcode and i+1 (the token after print statement) return OpCode("print", op_value), i + 1, func_ret_type
def test_compile_if_else_if_else(self): # Testing if, else if, else opcodes = [ OpCode("var_assign", "i---0", "int"), OpCode("if", "i == 1", None), OpCode("scope_begin", "", ""), OpCode("print", '"%d", 1', None), OpCode("scope_over", "", ""), OpCode("else_if", "i == 2", None), OpCode("scope_begin", "", ""), OpCode("print", '"%d", 2', None), OpCode("scope_over", "", ""), OpCode("else", "", ""), OpCode("scope_begin", "", ""), OpCode("print", '"Else"', None), OpCode("scope_over", "", ""), ] table = SymbolTable() table.symbol_table = { 1: ["i", "int", "variable"], 2: ["0", "int", "constant"], 3: ["1", "int", "constant"], 4: ["1", "int", "constant"], 5: ["2", "int", "constant"], 6: ["2", "int", "constant"], 7: ['"Else"', "string", "constant"], } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual( data, [ "#include <stdio.h>", "\tint i = 0;", "\tif(i == 1) {", '\tprintf("%d", 1);', "}", "\telse if(i == 2) {", '\tprintf("%d", 2);', "}", "\telse {", '\tprintf("Else");', "}", "", ], )
def for_statement(tokens, i, table, func_ret_type): """ Parse for for_loop Params ====== tokens (list) = List of tokens i (int) = Current index in token table (SymbolTable) = Symbol table constructed holding information about identifiers and constants Returns ======= OpCode, int: The opcode for the for loop code and the index after parsing for loop Grammar ======= for_loop -> for id in number to number by operator number number -> [0-9]+ id -> [a-zA-Z_]?[a-zA-Z0-9_]* operator -> + | - | * | / """ # Check if identifier follows for keyword check_if(tokens[i].type, "id", "Expected variable name", tokens[i].line_num) # Check if in follows identifier check_if(tokens[i + 1].type, "in", "Expected in keyword", tokens[i + 1].line_num) # Check if number follows in keyword check_if(tokens[i + 2].type, "number", "Expected starting value", tokens[i + 2].line_num) # Check if to keyword follows number check_if(tokens[i + 3].type, "to", "Expected to keyword", tokens[i + 3].line_num) # Check if number follows in keyword check_if(tokens[i + 4].type, "number", "Expected ending value", tokens[i + 4].line_num) # Check if by keyword follows number check_if(tokens[i + 5].type, "by", "Expected by keyword", tokens[i + 5].line_num) word_to_op = {"plus": "+", "minus": "-", "multiply": "*", "divide": "/"} # Check if number follows operator check_if( tokens[i + 7].type, "number", "Expected value for change", tokens[i + 7].line_num, ) # Get required values var_name, _, _ = table.get_by_id(tokens[i].val) table.symbol_table[tokens[i].val][1] = "int" starting_val, _, _ = table.get_by_id(tokens[i + 2].val) ending_val, _, _ = table.get_by_id(tokens[i + 4].val) operator_type = word_to_op[tokens[i + 6].type] change_val, _, _ = table.get_by_id(tokens[i + 7].val) # To determine the > or < sign if starting_val > ending_val: sign_needed = ">" else: sign_needed = "<" # Return the opcode and i+1 (the token after for loop statement) return ( OpCode( "for", str(var_name) + "&&&" + str(starting_val) + "&&&" + str(ending_val) + "&&&" + str(operator_type) + "&&&" + sign_needed + "&&&" + str(change_val), ), i + 1, func_ret_type, )
def if_statement(tokens, i, table, func_ret_type): """ Parse if statement Params ====== tokens (list) = List of tokens i (int) = Current index in token table (SymbolTable) = Symbol table constructed holding information about identifiers and constants Returns ======= OpCode, int: The opcode for the assign code and the index after parsing if statement Grammar ======= if_statement -> if(condition) { body } condition -> expr expr -> string | number | id | operator string -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote quote -> " number -> [0-9]+ id -> [a-zA-Z_]?[a-zA-Z0-9_]* operator -> + | - | * | / """ # Check if ( follows if statement check_if( tokens[i].type, "left_paren", "Expected ( after if statement", tokens[i].line_num, ) # check if expression follows ( in if statement op_value, op_type, i, func_ret_type = expression( tokens, i + 1, table, "Expected expression inside if statement", func_ret_type=func_ret_type, ) op_value_list = op_value.replace(" ", "").split(",") # check if ) follows expression in if statement check_if( tokens[i - 1].type, "right_paren", "Expected ) after expression in if statement", tokens[i - 1].line_num, ) # If \n follows ) then skip all the \n characters if tokens[i + 1].type == "newline": i += 1 while tokens[i].type == "newline": i += 1 i -= 1 # Check if { follows ) in if statement check_if( tokens[i + 1].type, "left_brace", "Expected { before if body", tokens[i + 1].line_num, ) # Loop until } is reached i += 2 ret_idx = i found_right_brace = False while i < len(tokens) and tokens[i].type != "right_brace": if found_right_brace: found_right_brace = True i += 1 # If right brace found at end if i != len(tokens) and tokens[i].type == "right_brace": found_right_brace = True # If right brace is not found then produce error if not found_right_brace: error("Expected } after if body", tokens[i].line_num) return OpCode("if", op_value[:-1]), ret_idx - 1, func_ret_type
def function_call_statement(tokens, i, table, func_ret_type): """ Parse function calling statement Params ====== tokens (list) = List of tokens i (int) = Current index in token table (SymbolTable) = Symbol table constructed holding information about identifiers and constants func_ret_type (dict) = If return type of function is not figured yet Returns ======= OpCode, int, dict: The opcode for the assign code, index after parsing function calling statement and function return type Grammar ======= function_call_statement -> id([actual_params,]*) actual_params -> expr body -> statement expr -> string | number | id | operator string -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote quote -> " number -> [0-9]+ id -> [a-zA-Z_]?[a-zA-Z0-9_]* operator -> + | - | * | / """ # Get information about the function from symbol table func_name, _, metadata = table.get_by_id(tokens[i].val) # Extract params from functions metadata (typedata), these are stored as <id>---[<param 1>, . . . , <param n>] params = metadata.split("---")[1:] if "---" in metadata else [")"] num_formal_params = len(params) if params != [")"] else 0 # Parse the params op_value, op_type, i, func_ret_type = expression( tokens, i + 2, table, "", True, True, expect_paren=True, func_ret_type=func_ret_type, ) op_value_list = op_value.replace(" ", "").split(",") op_value_list = (op_value_list if len(op_value_list) > 0 and len(op_value_list[0]) > 0 else []) num_actual_params = len(op_value_list) if op_value_list != [")"] else 0 # Check if number of actual and formal parameters match if num_formal_params != num_actual_params: error( "Expected %d parameters but got %d parameters in function %s" % (num_formal_params, num_actual_params, func_name), tokens[i].line_num, ) # Assign datatype to formal parameters for j in range(len(params)): # If parameter list is empty if params[j] == ")": continue # Fetch the datatype of corresponding actual parameter from symbol table _, dtype, _ = table.get_by_id( table.get_by_symbol(op_value_list[j].replace(")", ""))) # Set the datatype of the formal parameter table.symbol_table[table.get_by_symbol(params[j])][1] = dtype if func_name in func_ret_type.keys(): _, op_type, _, _ = expression(tokens, func_ret_type[func_name], table, "") # Map datatype to appropriate datatype in C prec_to_type = { 0: "char*", 1: "char*", 2: "char", 3: "int", 4: "float", 5: "double", } table.symbol_table[table.get_by_symbol( func_name)][1] = prec_to_type[op_type] del func_ret_type[func_name] return ( OpCode("func_call", func_name + "---" + "&&&".join(op_value_list)[:-1], ""), i + 1, func_ret_type, )
def function_definition_statement(tokens, i, table, func_ret_type): """ Parse function definition statement Params ====== tokens (list) = List of tokens i (int) = Current index in token table (SymbolTable) = Symbol table constructed holding information about identifiers and constants func_ret_type (string) = Function return type Returns ======= OpCode, int, string: The opcode for the assign code, the index, and the name of the function after parsing function calling statement Grammar ======= function_definition_statement -> fun id([formal_params,]*) { body } formal_params -> expr body -> statement expr -> string | number | id | operator string -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote quote -> " number -> [0-9]+ id -> [a-zA-Z_]?[a-zA-Z0-9_]* operator -> + | - | * | / """ # Check if identifier follows fun check_if(tokens[i].type, "id", "Expected function name", tokens[i].line_num) # Store the id of function name in symbol table func_idx = tokens[i].val # Get function name func_name, _, _ = table.get_by_id(func_idx) # Check if ( follows id in function check_if( tokens[i + 1].type, "left_paren", "Expected ( after function name", tokens[i + 1].line_num, ) # Check if expression follows ( in function statement op_value, op_type, i, func_ret_type = expression( tokens, i + 2, table, "", True, True, func_ret_type=func_ret_type) op_value_list = op_value.replace(" ", "").replace(")", "").split(",") # Check if ) follows expression in function check_if( tokens[i - 1].type, "right_paren", "Expected ) after function params list", tokens[i - 1].line_num, ) # If \n follows ) then skip all the \n characters if tokens[i + 1].type == "newline": i += 1 while tokens[i].type == "newline": i += 1 i -= 1 # Check if { follows ) in function check_if( tokens[i + 1].type, "left_brace", "Expected { before function body", tokens[i + 1].line_num, ) # Loop until } is reached i += 2 ret_idx = i found_right_brace = False while i < len(tokens) and tokens[i].type != "right_brace": if tokens[i].type == "right_brace": found_right_brace = True i += 1 # If right brace found at end if i != len(tokens) and tokens[i].type == "right_brace": found_right_brace = True # If right brace is not found then produce error if not found_right_brace: error("Expected } after function body", tokens[i].line_num) # Add the identifier types to function's typedata table.symbol_table[func_idx][2] = ( "function---" + "---".join(op_value_list) if len(op_value_list) > 0 and len(op_value_list[0]) > 0 else "function") return ( OpCode("func_decl", func_name + "---" + "&&&".join(op_value_list), ""), ret_idx - 1, func_name, func_ret_type, )
def parse(tokens, table): """ Parse tokens and generate opcodes Params ====== tokens (list) = List of tokens Returns ======= list: The list of opcodes Grammar ======= statement -> print_statement | var_statement | assign_statement | function_definition_statement """ # List of opcodes op_codes = [] # Current function's name func_name = "" # Do while started or not in_do = False # Count main functions main_fn_count = 0 # Count if conditions if_count = 0 # Brace count brace_count = 0 # If function return type could not be figured out during return then do it while calling func_ret_type = {} # Loop through all the tokens i = 0 while i <= len(tokens) - 1: # If token is of type print then generate print opcode if tokens[i].type == "print": print_opcode, i, func_ret_type = print_statement( tokens, i + 1, table, func_ret_type) op_codes.append(print_opcode) # If token is of type var then generate var opcode elif tokens[i].type == "var": var_opcode, i, func_ret_type = var_statement( tokens, i + 1, table, func_ret_type) op_codes.append(var_opcode) # If token is of type id then generate assign opcode elif tokens[i].type == "id": # If '(' follows id then it is function calling else variable assignment if tokens[i + 1].type == "left_paren": fun_opcode, i, func_ret_type = function_call_statement( tokens, i, table, func_ret_type) op_codes.append(fun_opcode) elif tokens[i + 1].type in ["increment", "decrement"]: unary_opcode, i, func_ret_type = unary_statement( tokens, i, table, func_ret_type) op_codes.append(unary_opcode) else: assign_opcode, i, func_ret_type = assign_statement( tokens, i + 1, table, func_ret_type) op_codes.append(assign_opcode) # If token is of type fun then generate function opcode elif tokens[i].type == "fun": fun_opcode, i, func_name, func_ret_type = function_definition_statement( tokens, i + 1, table, func_ret_type) op_codes.append(fun_opcode) # If token is of type left_brace then generate scope_begin opcode elif tokens[i].type == "left_brace": op_codes.append(OpCode("scope_begin", "", "")) brace_count += 1 i += 1 # If token is of type right_brace then generate scope_over opcode elif tokens[i].type == "right_brace": op_codes.append(OpCode("scope_over", "", "")) brace_count -= 1 if brace_count < 0: error( "Closing brace doesn't match any previous opening brace", tokens[i].line_num, ) i += 1 # If token is of type MAIN then generate MAIN opcode elif tokens[i].type == "MAIN": op_codes.append(OpCode("MAIN", "", "")) main_fn_count += 1 if main_fn_count > 1: error("Presence of two MAIN in a single file", tokens[i].line_num) i += 1 # If token is of type END_MAIN then generate MAIN opcode elif tokens[i].type == "END_MAIN": op_codes.append(OpCode("END_MAIN", "", "")) main_fn_count -= 1 i += 1 # If token is of type for then generate for code elif tokens[i].type == "for": for_opcode, i, func_ret_type = for_statement( tokens, i + 1, table, func_ret_type) op_codes.append(for_opcode) # If token is of type do then generate do_while code elif tokens[i].type == "do": check_if( tokens[i + 1].type, "left_brace", "Expected { after do statement", tokens[i + 1].line_num, ) in_do = True op_codes.append(OpCode("do", "", "")) i += 1 # If token is of type while then generate while opcode elif tokens[i].type == "while": while_opcode, i, func_ret_type = while_statement( tokens, i + 1, table, in_do, func_ret_type) if in_do: in_do = False op_codes.append(while_opcode) # If token is of type if then generate if opcode elif tokens[i].type == "if": if_opcode, i, func_ret_type = if_statement(tokens, i + 1, table, func_ret_type) op_codes.append(if_opcode) # Increment if count on encountering if if_count += 1 # If token is of type exit then generate exit opcode elif tokens[i].type == "exit": exit_opcode, i, func_ret_type = exit_statement( tokens, i + 1, table, func_ret_type) op_codes.append(exit_opcode) # If token is of type else then check whether it is else if or else elif tokens[i].type == "else": # If the next token is if, then it is else if if tokens[i + 1].type == "if": if_opcode, i, func_ret_type = if_statement( tokens, i + 2, table, func_ret_type) if_opcode.type = "else_if" op_codes.append(if_opcode) # Otherwise it is else elif tokens[i + 1].type == "left_brace": op_codes.append(OpCode("else", "", "")) # Decrement if count on encountering if, to make sure there aren't extra else conditions if_count -= 1 # If if_count is negative then the current else is extra if if_count < 0: error("Else does not match any if!", tokens[i].line_num) i += 1 # If token is of type return then generate return opcode elif tokens[i].type == "return": beg_idx = i + 1 if tokens[i + 1].type not in ["id", "number", "string"]: op_value = "" op_type = 6 i += 2 else: op_value, op_type, i, func_ret_type = expression( tokens, i + 1, table, "Expected expression after return", True, True, expect_paren=False, func_ret_type=func_ret_type, ) if func_name == "": error("Return statement outside any function", tokens[i].line_num) else: # Map datatype to appropriate datatype in C prec_to_type = { -1: "not_known", 0: "char*", 1: "char*", 2: "char", 3: "int", 4: "float", 5: "double", 6: "void", } if op_type == -1: func_ret_type[func_name] = beg_idx # Change return type of function table.symbol_table[table.get_by_symbol( func_name)][1] = prec_to_type[op_type] # Set func_name to an empty string after processing func_name = "" op_codes.append(OpCode("return", op_value, "")) # If token is of type break then generate break opcode elif tokens[i].type == "break": op_codes.append(OpCode("break", "", "")) i += 1 # If token is of type continue then generate continue opcode elif tokens[i].type == "continue": op_codes.append(OpCode("continue", "", "")) i += 1 # If token is of type single_line_statement then generate single_line_comment opcode elif tokens[i].type == "single_line_comment": op_codes.append(OpCode("single_line_comment", tokens[i].val, "")) i += 1 # If token is of type multi_line_statement then generate multi_line_comment opcode elif tokens[i].type == "multi_line_comment": op_codes.append(OpCode("multi_line_comment", tokens[i].val, "")) i += 1 # If token is of type switch then generate switch opcode elif tokens[i].type == "switch": switch_opcode, i, func_ret_type = switch_statement( tokens, i + 1, table, func_ret_type) op_codes.append(switch_opcode) # If token is of type case then generate case opcode elif tokens[i].type == "case": case_opcode, i, func_ret_type = case_statement( tokens, i + 1, table, func_ret_type) op_codes.append(case_opcode) # If token is of type default then generate default opcode elif tokens[i].type == "default": check_if( tokens[i + 1].type, "colon", "Expected : after default statement in switch", tokens[i + 1].line_num, ) op_codes.append(OpCode("default", "", "")) i += 2 # If token is the type increment or decrement then generate unary_opcode elif tokens[i].type in ["increment", "decrement"]: unary_opcode, i, func_ret_type = unary_statement( tokens, i, table, func_ret_type) op_codes.append(unary_opcode) # Otherwise increment the index else: i += 1 # Errors that may occur after parsing loop if main_fn_count != 0: error("MAIN not ended with END_MAIN", tokens[i - 1].line_num + 1) # Return opcodes return op_codes
def var_statement(tokens, i, table, func_ret_type): """ Parse variable declaration [/initialization] statement Params ====== tokens (list) = List of tokens i (int) = Current index in token table (SymbolTable) = Symbol table constructed holding information about identifiers and constants func_ret_type (string) = Function return type Returns ======= OpCode, int: The opcode for the var_assign/var_no_assign code and the index after parsing var statement Grammar ======= var_statement -> var id [= expr]? expr -> string | number | id | operator string -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote quote -> " number -> [0-9]+ id -> [a-zA-Z_]?[a-zA-Z0-9_]* operator -> + | - | * | / """ is_ptr, count_ast, i = check_ptr(tokens, i) # Check if identifier is present after var check_if(tokens[i].type, "id", "Expected id after var keyword", tokens[i].line_num) # Tokens that are not accepted after declaration of a variable invalid_tokens = [ "plus_equal", "minus_equal", "divide_equal", "multiply_equal", "plus", "minus", "divide", "multiply", "modulus", "modulus_equal", "equal", "not_equal", ] # Check if variable is also initialized if i + 1 < len(tokens) and tokens[i + 1].type == "assignment": # Store the index of identifier id_idx = i # Check if expression follows = in var statement op_value, op_type, i, func_ret_type = expression( tokens, i + 2, table, "Required expression after assignment operator", expect_paren=False, func_ret_type=func_ret_type, ) # Map datatype to appropriate datatype in C prec_to_type = { 0: "string", 1: "string", 2: "char", 3: "int", 4: "float", 5: "double", } # Modify datatype of the identifier table.symbol_table[tokens[id_idx].val][1] = prec_to_type[op_type] if is_ptr: return ( OpCode( "ptr_assign", table.symbol_table[tokens[id_idx].val][0] + "---" + op_value + "---" + str(count_ast), prec_to_type[op_type], ), i, func_ret_type, ) else: # Return the opcode and i (the token after var statement) return ( OpCode( "var_assign", table.symbol_table[tokens[id_idx].val][0] + "---" + op_value, prec_to_type[op_type], ), i, func_ret_type, ) elif i + 1 < len(tokens) and tokens[i + 1].type in invalid_tokens: error("Invalid Syntax for declaration", tokens[i].line_num) else: # Get the value from symbol table by id value, type, _ = table.get_by_id(tokens[i].val) # If already declared then throw error if type in [ "declared", "int", "char", "float", "double", "string", "char *", "char*", ]: error("Variable %s already declared" % value, tokens[i].line_num) # Set declared table.symbol_table[tokens[i].val][1] = "declared" # Return the opcode and i+1 (the token after var statement) if is_ptr: return OpCode("ptr_no_assign", value), i + 1, func_ret_type return OpCode("var_no_assign", value), i + 1, func_ret_type
def setUp(self): self.opcode = OpCode("var_assign", "a---1 + 2", "int")
def assign_statement(tokens, i, table, func_ret_type): """ Parse assignment statement Params ====== tokens (list) = List of tokens i (int) = Current index in token table (SymbolTable) = Symbol table constructed holding information about identifiers and constants Returns ======= OpCode, int: The opcode for the assign code and the index after parsing assign statement Grammar ======= var_statement -> var id [= expr]? expr -> string | number | id | operator string -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote quote -> " number -> [0-9]+ id -> [a-zA-Z_]?[a-zA-Z0-9_]* operator -> + | - | * | / """ # Check if the identifier is a pointer is_ptr = False # count depth of pointer count_ast = 0 if tokens[i - 2].type == "multiply": j = -2 while tokens[j + i].type == "multiply": j -= 1 count_ast = -1 * j - 2 is_ptr = True # Check if variable is declared or not value, type, _ = table.get_by_id(tokens[i - 1].val) if type == "var": error("Variable %s used before declaration" % value, tokens[i - 1].line_num) # Dictionary to convert tokens to their corresponding assignment types assignment_type = { "assignment": "=", "plus_equal": "+=", "minus_equal": "-=", "multiply_equal": "*=", "divide_equal": "/=", "modulus_equal": "%=", } # Check if assignment operator follows identifier name check_if( tokens[i].type, [ "assignment", "plus_equal", "minus_equal", "multiply_equal", "divide_equal", "modulus_equal", ], "Expected assignment operator after identifier", tokens[i].line_num, ) # Convert the token to respective symbol converted_type = assignment_type[tokens[i].type] # Store the index of identifier id_idx = i - 1 # Check if expression follows = in assign statement op_value, op_type, i, func_ret_type = expression( tokens, i + 1, table, "Required expression after assignment operator", expect_paren=False, func_ret_type=func_ret_type, ) # Map datatype to appropriate datatype in C prec_to_type = { 0: "string", 1: "string", 2: "char", 3: "int", 4: "float", 5: "double", } op_value = converted_type + "---" + op_value # Modify datatype of the identifier table.symbol_table[tokens[id_idx].val][1] = prec_to_type[op_type] # Check if a pointer is being assigned if is_ptr: return ( OpCode( "ptr_only_assign", table.symbol_table[tokens[id_idx].val][0] + "---" + op_value + "---" + str(count_ast), "", ), i, func_ret_type, ) # Return the opcode and i (the token after assign statement) return ( OpCode("assign", table.symbol_table[tokens[id_idx].val][0] + "---" + op_value, ""), i, func_ret_type, )
def test_compile_switch_case_default(self): # Test swtich, case, and default statements opcodes = [ OpCode("var_assign", "a---1", "int"), OpCode("switch", "a", ""), OpCode("scope_begin", "", ""), OpCode("case", "1", ""), OpCode("scope_begin", "", ""), OpCode("print", '"Hello"', None), OpCode("scope_over", "", ""), OpCode("default", "", ""), OpCode("scope_begin", "", ""), OpCode("print", '"Bye"', None), OpCode("scope_over", "", ""), OpCode("scope_over", "", ""), ] table = SymbolTable() table.symbol_table = { 1: ["a", "int", "variable"], 2: ["1", "int", "constant"], 3: ["1", "int", "constant"], 4: ['"Hello"', "string", "constant"], 5: ['"Bye"', "string", "constant"], } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual( data, [ "#include <stdio.h>", "\tint a = 1;", "\tswitch(a) {", "\tcase 1:", "{", '\tprintf("Hello");', "}", "\tdefault:", "{", '\tprintf("Bye");', "}", "}", "", ], )