def p_postfix_expression_4(p): # struct ref ''' postfix_expression : postfix_expression ARROW IDENTIFIER ''' if p[1].type == "error": p[0] = Node(type="error") return if p[1].type.class_type != "PointerType" or p[ 1].type.type.class_type != "StructType": p[0] = Node(type="error") Errors(errorType='TypeError', errorText='not pointer of struct type', token_object=p.slice[-1]) return # arg_dict = p[1].type.type.arg_dict # success = arg_dict.get(p[3]) struct_sym = p[1].type.type.symbol_table success = struct_sym._look_up(p[3], token_object=p.slice[3], in_struct=True, no_error=True) if success == None: Errors(errorType='DeclarationError', errorText='variable ' + p[3] + ' not declared in struct', token_object=p.slice[-1]) p[0] = Node(type="error") return p[3] = Node(name="id", value=p[3]) p[0] = Node(name="struct ref", value=p[2], type=success.type, children=[p[1], p[3]])
def type_check_relational(node1, node2, op, token, is_bool=False): allowed_class = {'BasicType'} allowed_base = {'int', 'long', 'char', 'double', 'float'} if node1.type == "error" or node2.type == "error": return Node(type="error") if node1.type.class_type not in allowed_class or node2.type.class_type not in allowed_class: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") if is_bool: if node1.type.type == "bool" and node2.type.type == "bool": return Node(name="binary_op", value="bool" + op, children=[node1, node2], type=BasicType('bool')) if node1.type.type not in allowed_base or node2.type.type not in allowed_base: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") node1, node2, typ = implicit_casting(node1, node2) return Node(name="binary_op", value=typ.stype + op, children=[node1, node2], type=BasicType('bool'))
def p_conditional_expression(p): ''' conditional_expression : logical_or_expression | logical_or_expression QUES_MARK expression COLON conditional_expression ''' if len(p) == 2: p[0] = p[1] else: if p[1].type == "error" or p[3].type == "error" or p[5].type == "error": p[0] = Node(type="error") return allowed_class = {'BasicType'} allowed_base = {'int', 'long', 'char', 'bool', 'double', 'float'} if p[1].type.class_type not in allowed_class or p[ 1].type.type not in allowed_base: Errors(errorType='TypeError', errorText=p[2] + ' not support type ' + p[1].type.stype, token_object=p.slice[2]) p[0] = Node(type="error") return if p[3].type != p[5].type: Errors(errorType='TypeError', errorText='type should be same', token_object=p.slice[2]) p[0] = Node(type="error") p[1] = Node(name="type_cast", value='bool', children=[p[1]], type=BasicType('bool')) p[0] = Node("ternary_op", children=[p[1], p[3], p[5]], type=p[3].type)
def type_check_multi(node1, node2, op, token, decimal=True): allowed_class = {'BasicType'} allowed_base = {'int', 'long', 'char'} if decimal: allowed_base = {'int', 'long', 'char', 'float', 'double'} if node1.type == "error" or node2.type == "error": return Node(type="error") if node1.type.class_type not in allowed_class or node2.type.class_type not in allowed_class: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") if node1.type.type not in allowed_base or node2.type.type not in allowed_base: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") node1, node2, typ = implicit_casting(node1, node2) return Node(name="binary_op", value=typ.stype + op, children=[node1, node2], type=typ)
def p_func_rparen_2(p): ''' func_rparen_2 : R_PAREN ''' if p.stack[-2].value == "error": return func_name = p.stack[-1].value[0].value token = p.stack[-1].value[0].data['token'] func_type = p.stack[-1].value[1] func_type.param_list = [] success = sym_table.curr_symbol_table.parent._look_up(name=func_name, token_object=token, no_error=True) if success: if success.type.defined == True: Errors(errorType='DeclarationError', errorText='Function is already defined', token_object=p.slice[-1]) #checking if same parameters elif func_type.is_same(success.type) == False: Errors( errorType='DeclarationError', errorText= 'Function is declared with different parameters/return type', token_object=p.slice[-1]) else: success.type.symbol_table.unused = True success.type.symbol_table = sym_table.curr_symbol_table else: sym_table.curr_symbol_table.parent._add_entry(name=func_name, type=func_type, token_object=token)
def type_check_bit(node1, node2, op, token): allowed_class = {'BasicType'} allowed_base = {'int', 'long', 'char', 'bool'} if node1.type == "error" or node2.type == "error": return Node(type="error") if node1.type.class_type not in allowed_class or node2.type.class_type not in allowed_class: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") if node1.type.type not in allowed_base or node2.type.type not in allowed_base: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") node1, node2, typ = implicit_casting(node1, node2) node = Node(name="binary_op", value=typ.stype + op, children=[node1, node2], type=typ) node.code = node1.code + node2.code tmp, code = get_opcode(op=typ.stype + op, place1=node1.place, place2=node2.place, type=typ) node.code += [code] node.place = tmp return node
def p_func_rparen_1(p): ''' func_rparen_1 : R_PAREN ''' if p.stack[-2].value == "error": return func_name = p.stack[-2].value[0].value token = p.stack[-2].value[0].data['token'] func_type = p.stack[-2].value[1] param_list = p.stack[-1].value func_type.add_param_list(param_list) #adding function to global table after creating paramlist success = sym_table.curr_symbol_table.parent._look_up( name=func_name, token_object=p.slice[-1], no_error=True) if success and success.type.class_type == "FunctionType": if success.type.defined == True: Errors(errorType='DeclarationError', errorText='Function is already defined', token_object=p.slice[-1]) #checking if same parameters elif func_type.is_same(success.type) == False: Errors( errorType='DeclarationError', errorText= 'Function is declared with different parameters/return type', token_object=p.slice[-1]) else: #removing sym table of decl success.type.symbol_table.unused = True success.type.symbol_table = sym_table.curr_symbol_table else: sym_table.curr_symbol_table.parent._add_entry(name=func_name, type=func_type, token_object=token)
def type_check_logical(node1, node2, op, token): allowed_class = {'BasicType'} allowed_base = {'int', 'long', 'char', 'double', 'float', 'bool'} if node1.type == "error" or node2.type == "error": return Node(type="error") if node1.type.class_type not in allowed_class or node2.type.class_type not in allowed_class: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") if node1.type.type not in allowed_base or node2.type.type not in allowed_base: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") if node1.type.type != "bool": node1 = Node(name="type_cast", value='bool', children=[node1], type=BasicType('bool')) if node2.type.type != "bool": node2 = Node(name="type_cast", value='bool', children=[node2], type=BasicType('bool')) return Node(name="binary_op", value=op, children=[node1, node2], type=BasicType('bool'))
def type_check_add(node1, node2, op, token): allowed_class = [('PointerType', 'BasicType'), ('BasicType', 'PointerType'), ('BasicType', 'BasicType')] allowed_base = [{'int', 'long', 'char'}, {'int', 'long', 'char'}, {'int', 'long', 'char', 'double', 'float'}] if node1.type == "error" or node2.type == "error": return Node(type="error") class1 = node1.type.class_type class2 = node2.type.class_type if (class1, class2) not in allowed_class: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") i = allowed_class.index((class1, class2)) if i == 0: if node2.type.type not in allowed_base[i]: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") return Node(name="binary_op", value=node1.type.stype + op, type=node1.type, children=[node1, node2]) if i == 1: if node1.type.type not in allowed_base[i]: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") return Node(name="binary_op", value=node2.type.stype + op, type=node2.type, children=[node2, node1]) if i == 2: if node1.type.type not in allowed_base[ i] or node2.type.type not in allowed_base[i]: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") node1, node2, typ = implicit_casting(node1, node2) return Node(name="binary_op", value=typ.stype + op, children=[node1, node2], type=typ)
def type_check_assign(node1, node2, token): if node1.type == "error" or node2.type == "error": return Node(type="error") if "const@" in node1.place: Errors(errorType='TypeError', errorText="cannot asign to constant", token_object=token) return Node(type="error") if "sconst@" in node2.place: Errors(errorType='TypeError', errorText="can asign string constant only in declaration", token_object=token) return Node(type="error") if node1.type.class_type == "PointerType" and node1.type.is_array == True: Errors(errorType='TypeError', errorText="cannot asign to array", token_object=token) return Node(type="error") if node2.type.is_convertible_to(node1.type): node2 = typecast(node2, type=node1.type) node = Node("binary_op", node1.type.stype + "=", children=[node1, node2], type=node1.type) node.code = node1.code + node2.code if "load$" in node1.place: place = node1.place.split("load$")[-1] node.code += [ gen(op=get_type(node2) + "_eq", place1=node2.place, place3=place) ] else: node.code += [ gen(op=get_type(node2) + "=", place1=node2.place, place3=node1.place) ] if "const@" in node2.place: const_use(node2.place) node.place = node1.place node = load_place(node) return node Errors(errorType='TypeError', errorText="cannot assign " + node2.type.stype + " to " + node1.type.stype, token_object=token) return Node(type="error")
def p_postfix_expression(p): ''' postfix_expression : primary_expression | postfix_expression INCREMENT | postfix_expression DECREMENT ''' if len(p) == 2: p[0] = p[1] else: allowed_base = {'int', 'float', 'double', 'char', 'long'} allowed_class = {'PointerType'} if p[1].type == 'error': p[0] = p[1] elif p[1].type.class_type == 'BasicType' and p[ 1].type.type in allowed_base: p[0] = Node(name="unary_op", value=str(p[1].type) + ': p' + p[2], children=[p[1]], type=p[1].type) elif p[1].type.class_type in allowed_class: p[0] = Node(name="unary_op", value=str(p[1].type) + ': p' + p[2], children=[p[1]], type=p[1].type) else: p[0] = p[1] p[0].type = 'error' Errors(errorType='TypeError', errorText='increment/decrement not possible', token_object=p.slice[-1])
def p_function_definition_1(p): ''' function_definition : type_specifier declarator func_scope parameter_type_list R_PAREN SEMI_COLON pop_sym | type_specifier declarator func_scope R_PAREN SEMI_COLON pop_sym ''' p[0] = [] #make unused sym = True # if len(p) == 8: p[7].unused = True # else: p[6].unused = True success = sym_table.look_up(name=p[2].value, token_object=p[2].data['token'], no_error=True) if success != None: Errors(errorType='DeclarationError', errorText='Function is already declared/defined', token_object=p[2].data['token']) else: if len(p) == 8: sym_table.add_entry(name=p[2].value, type=FunctionType(return_type=p[2].type, param_list=p[4], defined=False, symbol_table=p[7])) else: sym_table.add_entry(name=p[2].value, type=FunctionType(return_type=p[2].type, param_list=[], defined=False, symbol_table=p[6]))
def op_on_const(op, place1, place2): value1 = get_const_value(place1) value2 = get_const_value(place2) #length 2 op <= , => , == , >> , << , != if op[-2:] == "<=": return int(value1 <= value2) if op[-2:] == ">=": return int(value1 >= value2) if op[-2:] == "==": return int(value1 == value2) if op[-2:] == "!=": return int(value1 != value2) if op[-2:] == ">>": return value1 >> value2 if op[-2:] == "<<": return value1 << value2 #length 1 +,-,/,*,%,^,&,| if op[-1] == "+": return value1 + value2 if op[-1] == "-": return value1 - value2 if op[-1] == "/": if value2 == 0: Errors( errorType='RuntimeError', errorText='division by zero', ) return 0 return value1 / value2 if op[-1] == "*": return value1 * value2 if op[-1] == "%": return value1 % value2 if op[-1] == "^": return value1 ^ value2 if op[-1] == "&": return value1 & value2 if op[-1] == "|": return value1 | value2 if op[-1] == ">": return int(value1 > value2) if op[-1] == "<": return int(value1 < value2) assert False, op + "not in list"
def p_postfix_expression_2(p): # function ref ''' postfix_expression : postfix_expression L_PAREN R_PAREN | postfix_expression L_PAREN argument_expression_list R_PAREN ''' if p[1].type == "error": p[0] = Node(type="error") return if p[1].type.class_type != "FunctionType": p[0] = Node(type="error") Errors(errorType='TypeError', errorText='not function type', token_object=p.slice[-1]) return param_list = p[1].type.param_list return_type = p[1].type.return_type if len(p) == 4: if len(param_list) != 0: p[0] = Node(type="error") Errors(errorType='TypeError', errorText='Function require more arguments', token_object=p.slice[-1]) else: p[0] = Node(name="func_call", type=return_type, children=[p[1]]) else: arg_list = p[3].data['args_type'] if len(arg_list) != len(param_list): p[0] = Node(type="error") Errors(errorType='TypeError', errorText='No of arguments is not matching', token_object=p.slice[-1]) # check for type cast if possible else: p[0] = Node(name="func_call", type=return_type, children=[p[1], p[3]])
def typecast(node1, type, token=None, hard=False): node1 = load_place(node1) assert isinstance(type, Type), "not of class Type" assert type.class_type in {"BasicType", "PointerType", "StructType"}, "not valid type" # assert node1.type.class_type in {"BasicType","PointerType"}, "not valid type" # assert "sconst@" not in node1.place, "string in typecast" if node1.type.stype == type.stype: if hard: node1.type = type return node1 elif "sconst@" in node1.place: # print(str(node1.type.stype),str(type.stype)) if token: Errors(errorType='TypeError', errorText="cannot assign string constant to type " + type.stype, token_object=token) return Node(type="error") else: assert "string in typecast" else: node = Node(name="type_cast", value=type.stype, children=[node1], type=type) node.code = node1.code node.place = node1.place if type.class_type == 'PointerType': type1 = 'long' else: type1 = type.type if node1.type.class_type == 'PointerType': type2 = 'long' else: type2 = node1.type.type if type1 == type2: return node if type1 in {"long", "int"} and type2 in {"long", "int"}: return node if "const@" in node1.place: node.place = get_const(const=get_const_value(node1.place), type=BasicType(type1)) return node node.place = get_newtmp(type=BasicType(type1)) node.code += [ gen(op=type2 + "_to_" + type1, place1=node1.place, place3=node.place, code=node.place + " = " + type2 + "_to_" + type1 + " " + node1.place) ] return node
def p_jump_statement_1(p): ''' jump_statement : RETURN SEMI_COLON | RETURN expression SEMI_COLON ''' success = sym_table.look_up(name='return', token_object=p.slice[1], no_error=True) if success: if len(p) == 3: if success.type != Type(): p[0] = Node(type="error") Errors(errorType='TypeError', errorText='return type not matching', token_object=p.slice[2]) return p[0] = Node(name="return", type="ok") else: if p[2].type == "error": p[0] = Node(type="error") return if p[2].type.is_convertible_to(success.type): if str(p[2].type) != str(success.type): p[2] = Node(name="type_cast", value=success.type.stype, children=[p[2]], type=success.type) p[0] = Node(name="return", children=[p[2]], type="ok") return p[0] = Node(type="error") Errors(errorType='TypeError', errorText='return type not matching', token_object=p.slice[2]) else: p[0] = Node(type="error") Errors(errorType='TypeError', errorText='Not Function', token_object=p.slice[2])
def type_check_init(init, type, token): if isinstance(init, Node): if init.type == "error": return Node(type="error") if init.type.is_convertible_to(type): return init else: Errors(errorType='TypeError', errorText="cannot assign " + init.type.stype + " to " + type.stype, token_object=token) return Node(type="error") if type.class_type != "PointerType": Errors(errorType='TypeError', errorText="cannot assign array to " + type.stype, token_object=token) return Node(type="error") if isinstance(init[0], Node): leng = 0 else: leng = len(init[0]) size_error = False for i in init[1:]: if (isinstance(i, Node) and leng != 0) or (isinstance(i, list) and leng != len(i)): size_error = True break if size_error: Errors(errorType='TypeError', errorText="size of arrays not matching", token_object=token) return Node(type="error") children = [] for i in init: node = type_check_init(i, type.type, token) if node.type == "error": return Node(type="error") children.append(node) return Node(name="init_array", value="{}", children=children, type=type)
def p_postfix_expression_1(p): # Array ref ''' postfix_expression : postfix_expression L_SQBR expression R_SQBR ''' allowed_class = {'BasicType'} allowd_base = {'int', 'long'} check1 = True check2 = True if p[3].type == "error": check1 = False p[0] = Node(type="error") elif p[3].type.class_type not in allowed_class or p[ 3].type.type not in allowd_base: Errors(errorType='TypeError', errorText='wrong index type ' + p[3].type.stype, token_object=p.slice[-1]) check1 = False p[0] = Node(type="error") allowed_class = {'PointerType'} if p[1].type == "error": check2 = False p[0] = Node(type="error") elif p[1].type.class_type not in allowed_class: Errors(errorType='TypeError', errorText='cannot reference type ' + p[1].type.stype, token_object=p.slice[-1]) check2 = False p[0] = Node(type="error") if check1 and check2: p[0] = Node(name="array_ref", value="[]", type=p[1].type.type, children=[p[1], p[3]])
def p_func_scope(p): ''' func_scope : L_PAREN ''' decl = p.stack[-1].value #getting function name and return type if decl.type.class_type == "PointerType" and len( decl.type.array_size) != 0: Errors(errorType='DeclarationError', errorText='Function cannot have array return type', token_object=p.slice[-1]) sym_table.start_scope(name=decl.value) #starting scope of function sym_table.add_entry(name='return', type=decl.type) #creating entry to check return type p[0] = (decl, FunctionType(return_type=decl.type, symbol_table=sym_table.curr_symbol_table) ) #type of function
def type_check_assign(node1, node2, token): if node1.type == "error" or node2.type == "error": return Node(type="error") if node2.type.is_convertible_to(node1.type): if str(node2.type) != str(node1.type): node2 = Node(name="type_cast", value=node1.type.stype, children=[node2], type=node1.type) return Node("binary_op", node1.type.stype + "=", children=[node1, node2], type=node1.type) Errors(errorType='TypeError', errorText="cannot assign " + node2.type.stype + " to " + node1.type.stype, token_object=token) return Node(type="error")
def type_check_assign_op(node1, node2, op, token): if node1.type == "error" or node2.type == "error": return Node(type="error") if op == "=": return type_check_assign(node1, node2, token) op = op[:-1] if op == "+" or op == "-": node = type_check_add(node1, node2, op, token) elif op == "/" or op == "*": node = type_check_multi(node1, node2, op, token) elif op == "%": node = type_check_multi(node1, node2, op, token, decimal=False) elif op == "^" or op == "&" or op == "|": node = type_check_bit(node1, node2, op, token) else: Errors(errorType='SyntaxError', errorText="invalid " + op + "=", token_object=token) return Node(type="error") return type_check_assign(node1, node, token)
def p_cast_expression(p): ''' cast_expression : unary_expression | L_PAREN type_name R_PAREN cast_expression ''' if len(p) == 2: p[0] = p[1] else: if p[2].type == 'error' or p[4].type == 'error': p[0] = Node(type='error') else: if p[4].type.is_convertible_to(p[2].type): p[0] = Node(name="type_cast", value=p[2].type.stype, type=p[2].type, children=[p[4]]) else: Errors(errorType='TypeError', errorText="cannot typecast " + p[4].types.stype + " to " + p[2].type.stype, token_object=p.slice[1]) p[0] = Node(type='error')
def type_check_logical(node1, node2, op, token): allowed_class = {'BasicType', 'PointerType'} allowed_base = {'int', 'long', 'char', 'float', 'bool'} if node1.type == "error" or node2.type == "error": return Node(type="error") if "sconst@" in node1.place or "sconst@" in node2.place: Errors(errorType='TypeError', errorText=op + ' not support string constant', token_object=token) return Node(type="error") if node1.type.class_type not in allowed_class or node2.type.class_type not in allowed_class: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") if node1.type.class_type == "BasicType" and node1.type.type not in allowed_base: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") if node2.type.class_type == "BasicType" and node2.type.type not in allowed_base: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") # if node1.type.type != "bool": # node1 = typecast(node1,BasicType("bool")) # if node2.type.type != "bool": # node2 = typecast(node2,BasicType("bool")) node = Node(name="binary_op", value=op, children=[node1, node2], type=BasicType('bool')) # node.code = node1.code + node2.code # tmp,code = get_opcode(op=op,place1=node1.place,place2=node2.place,type=BasicType('bool')) # node.code += [code] # node.place = tmp if op == "&&": if "const@" in node1.place and "const@" in node2.place: value = 1 if get_const_value(node1.place) and get_const_value( node2.place) else 0 node.place = get_const(value, "bool") return node if "const@" in node1.place: const_use(node1.place) if "const@" in node2.place: const_use(node2.place) node.place = get_newtmp(BasicType('bool')) label1 = get_newlabel() label2 = get_newlabel() node.code = node1.code node.code += [gen(op="ifz", place1=node1.place, place2=label1)] node.code += node2.code node.code += [gen(op="ifz", place1=node2.place, place2=label1)] node.code += [ gen(op="bool=", place1=get_const(1, type='bool', use=True), place3=node.place) ] node.code += [gen(op="goto", place1=label2)] node.code += [gen(op="label", place1=label1)] node.code += [ gen(op="bool=", place1=get_const(0, type='bool', use=True), place3=node.place) ] node.code += [gen(op="label", place1=label2)] else: if "const@" in node1.place and "const@" in node2.place: value = 1 if get_const_value(node1.place) or get_const_value( node2.place) else 0 node.place = get_const(value, "bool") return node if "const@" in node1.place: const_use(node1.place) if "const@" in node2.place: const_use(node2.place) node.place = get_newtmp(BasicType('bool')) label1 = get_newlabel() label2 = get_newlabel() node.code = node1.code node.code += [gen(op="ifnz", place1=node1.place, place2=label1)] node.code += node2.code node.code += [gen(op="ifnz", place1=node2.place, place2=label1)] node.code += [ gen(op="bool=", place1=get_const(0, type='bool', use=True), place3=node.place) ] node.code += [gen(op="goto", place1=label2)] node.code += [gen(op="label", place1=label1)] node.code += [ gen(op="bool=", place1=get_const(1, type='bool', use=True), place3=node.place) ] node.code += [gen(op="label", place1=label2)] return node
def main(): """Driver code for Abstract syntax tree Generation""" #read source code provided by user arg_parser = argparse.ArgumentParser(description="C compiler for x86_64") arg_parser.add_argument('source_code', help="location of source code file") arg_parser.add_argument('-o', help="output file name, \{default a.out\}", default="a.out") arg_parser.add_argument( '-f', help="name of file for additional files, \{default a\}", default="a") arg_parser.add_argument('-c', action='store_true', help="output object file") arg_parser.add_argument('-d', action='store_true', help="output assembly") arg_parser.add_argument('-a', action='store_true', help="output ast") arg_parser.add_argument('-s', action='store_true', help="output symbol table") arg_parser.add_argument('-t', action='store_true', help="output 3 address code") arg_parser.add_argument('-l', action='store_true', help="output lexeme table") arg_parser.add_argument( '-stdc', action='store_true', help= "linker method, if specified it'll uses custom elf entry else from standard X86-64-linux.so" ) arg_parser.add_argument( '-n', action="store_false", help="only create till asm, , do not create executable") args = arg_parser.parse_args() try: source_code = open(args.source_code, "r").read() except FileNotFoundError: print( "source file cannot be open/read.\nCheck the file name or numbers of arguments!!" ) sys.exit(-1) source_file = args.source_code file_name = args.f grammar = get_grammar(source_file, source_code, debug=1) if len(Errors.get_all_error()): for error in Errors.get_all_error(): print(error) return tac_code = grammar.code tac_code = remove_none(tac_code) #to remove redundant labels tac_code = remove_label(tac_code) print_asm(tac_code, stdc=args.stdc) #assembly if args.d: os.system("cp temp.asm {}.asm".format(file_name)) if args.n: asm_file = "temp.asm" os.system('yasm -g dwarf2 -f elf64 temp.asm 2> temp') os.system('touch temp') if args.c: os.system("cp temp.o " + file_name + ".o") if args.stdc: os.system( "ld -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o {} temp.o -lc -lm 2> temp" .format(args.o)) else: os.system( "ld -o {} -dynamic-linker /lib64/ld-linux-x86-64.so.2 /usr/lib/x86_64-linux-gnu/crt1.o /usr/lib/x86_64-linux-gnu/crti.o -lc temp.o /usr/lib/x86_64-linux-gnu/crtn.o -lm 2> temp" .format(args.o)) os.system("rm -rf temp.asm temp.o temp") else: os.system("rm -rf temp.asm") # ast if args.a: Graph = draw_ast(grammar) Graph.draw(file_name + '.png', format='png') # symbol table if args.s: print_csv(sym_table=sym_table, filename=file_name + ".csv") # 3AC if args.t: print_code(tac_code, filename=file_name + ".3ac") # lexeme if args.l: print_lexeme(source_code, file_name + ".lex")
def type_check_unary(node1, op, token, is_typename=False): allowed_base = {'int', 'float', 'char', 'long'} allowed_base_1 = {'int', 'char', 'long'} error = False error_const = False if node1.type == "error": return Node(type="error") const = False if 'const@' in node1.place: const = True if (op == "++" or op == "--"): if const: error_const = True elif (node1.type.class_type == "BasicType" and node1.type.type in allowed_base) or node1.type.class_type == "PointerType": node = Node(name="unary_op", value=node1.type.stype + op, children=[node1], type=node1.type) node.place = node1.place const_place = get_const(const=1, type=BasicType("long")) node_1 = Node(type=BasicType("long")) node_1.place = const_place node_assign = type_check_assign_op(node1, node_1, op=op[0] + "=", token=token) if node_assign.type == "error": return Node(type="error") node.code = node_assign.code return node # elif node1.type.class_type == "BasicType" and node1.type.type in allowed_base: # node = Node(name="unary_op",value=node1.type.stype+op,children=[node1],type=node1.type) # node.code = node1.code # const_place = get_const(const=1,type=node1.type,use=True) # node.code += [gen(op=node1.type.stype+op[0],place1=node1.place,place2=const_place,place3=node1.place)] # node.place = node1.place # return node # elif node1.type.class_type == "PointerType": # node = Node(name="unary_op",value=node1.type.stype+op,children=[node1],type=node1.type) # node.code = node1.code # width = node1.type.type_size # const_place = get_const(const=width,type="long",use=True) # node.code += [gen(op="long"+op[0],place1=node1.place,place2=const_place,place3=node1.place)] # node.place = node1.place # return node else: error = True elif op == "+" or op == "-": node1 = load_place(node1) if node1.type.class_type == "BasicType" and node1.type.type in allowed_base: node = Node(name="unary_op", value=node1.type.stype + op, children=[node1], type=node1.type) if op == "+": node.place = node1.place return node if "const@" in node1.place: neg = -1 if op == "-" else 1 node.place = get_const(neg * get_const_value(node1.place), type=node1.type) return node node.code = node1.code node.place = get_newtmp(type=node1.type) node.code += [ gen(op="u" + node1.type.stype + op, place1=node1.place, place3=node.place) ] return node error = True elif op == "&": if const: error_const = True else: node = Node(name="unary_op", value=op, children=[node1], type=PointerType(node1.type)) node.code = node1.code if "load$" in node1.place: node.place = node1.place.split("load$")[-1] return node node.place = get_newtmp() node.code += [ gen(op="addr", place1=node1.place, place3=node.place, code=node.place + " = " + "addr(" + node1.place + ")") ] return node elif op == "*": node1 = load_place(node1) if const: error_const = True elif node1.type.class_type == "PointerType": node = Node(name="unary_op", value=op, children=[node1], type=node1.type.type) if node.type.class_type == "PointerType": node.type.array_size = node1.type.array_size[1:] node.code = node1.code # node.place = get_newtmp(node1.type.type) # node.code += [gen(op="load",place1=node1.place,place3=node.place,code=node.place+" = "+"load("+node1.place+")")] node.place = "load$" + node1.place return node else: Errors(errorType='TypeError', errorText="cannot dereference non-pointer type " + node1.type.stype, token_object=token) return Node(type="error") elif op == "~": node1 = load_place(node1) if node1.type.class_type == "BasicType" and node1.type.type in allowed_base_1: node = Node(name="unary_op", value=node1.type.stype + op, children=[node1], type=node1.type) if "const@" in node1.place: node.place = get_const(~get_const_value(node1.place), type=node1.type) return node node.code = node1.code node.place = get_newtmp(type=node1.type) node.code += [ gen(op=node1.type.stype + op, place1=node1.place, place3=node.place) ] return node error = True elif op == "!": node1 = load_place(node1) if node1.type.is_convertible_to(BasicType('bool')): node = Node(name="unary_op", value=node1.type.stype + op, children=[node1], type=BasicType('bool')) if "const@" in node1.place: value = 0 if get_const_value(node1.place) == 0 else 1 node.place = get_const(value, type='bool') return node node.code = node1.code node.place = get_newtmp(type=BasicType('bool')) node.code += [ gen(op="not_bool", place1=node1.place, place3=node.place) ] return node error = True elif op == "sizeof": if is_typename: node = Node(name="unary_op", value=op + ':' + node1.type.stype, type=BasicType(type='long')) node.place = get_const(const=node1.type.width, type="long") # node.code += [gen(op="=",place1=str(node1.type.width),place3=node.place)] return node if isinstance(node1.type, Type) == False: Errors(errorType='TypeError', errorText="cannot do sizeof on non type", token_object=token) return Node(type="error") if node1.type.class_type in {'BasicType', 'PointerType', 'StructType'}: node = Node(name="unary_op", value=op, type=BasicType(type='long'), children=[node1]) node.place = get_const(const=node1.type.width, type="long") # node.code += [gen(op="=",place1=str(node1.type.width),place3=node.place)] return node error = True if error_const: Errors(errorType='TypeError', errorText=op + " not valid on constant", token_object=token) return Node(type="error") if error: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype, token_object=token) return Node(type="error")
def main(): """Driver code for Abstract syntax tree Generation""" #read source code provided by user arg_parser = argparse.ArgumentParser( description="Lexer for Source Language C") arg_parser.add_argument('source_code', help="source code file location") arg_parser.add_argument('-o', help="take the name of dot script", default="ast.dot") arg_parser.add_argument('-f', help="take the name of png file", default="ast.png") arg_parser.add_argument('-d', help="take the name of csv file", default="dump.csv") arg_parser.add_argument('-p', action='store_true', help="output dot script to console") arg_parser.add_argument('-l', action='store_true', help="output lexeme table") args = arg_parser.parse_args() try: # source_code = open(sys.argv[1],"r").read() source_code = open(args.source_code, "r").read() except FileNotFoundError: print( "source file cannot be open/read.\nCheck the file name or numbers of arguments!!" ) sys.exit(-1) if args.l: print_lexeme(source_code) parser = yacc.yacc(debug=1) lexer.lexer.filename = args.source_code result = parser.parse(source_code, lexer=lexer.lexer) #print(sym_table) if len(Errors.get_all_error()): for error in Errors.get_all_error(): print(error) return Graph = draw_ast(result) # print(args) if args.p: Graph.draw(args.f, format='png') print(Graph.string()) return Graph.draw(args.f, format='png') file = open(args.o, 'w') file.write(Graph.string()) file.close() # print(sym_table) # print(args.d) print_csv(sym_table=sym_table, filename=args.d)
def type_check_unary(node1, op, token, is_typename=False): allowed_base = {'int', 'float', 'double', 'char', 'long'} allowed_base_1 = {'int', 'char', 'long'} error = False if node1.type == "error": return Node(type="error") if op == "++" or op == "--": if node1.type.class_type == "BasicType" and node1.type.type in allowed_base: return Node(name="unary_op", value=node1.type.stype + op, children=[node1], type=node1.type) if node1.type.class_type == "PointerType": return Node(name="unary_op", value=node1.type.stype + op, children=[node1], type=node1.type) error = True elif op == "+" or op == "-": if node1.type.class_type == "BasicType" and node1.type.type in allowed_base: return Node(name="unary_op", value=node1.type.stype + op, children=[node1], type=node1.type) error = True elif op == "&": return Node(name="unary_op", value=op, children=[node1], type=PointerType(node1.type)) elif op == "*": if node1.type.class_type == "PointerType": return Node(name="unary_op", value=op, children=[node1], type=node1.type.type) Errors(errorType='TypeError', errorText="cannot dereference non-pointer type " + node1.type.stype, token_object=token) return Node(type="error") elif op == "~": if node1.type.class_type == "BasicType" and node1.type.type in allowed_base_1: return Node(name="unary_op", value=node1.type.stype + op, children=[node1], type=node1.type) error = True elif op == "!": if node1.type.is_convertible_to(BasicType('bool')): return Node(name="unary_op", value=node1.type.stype + op, children=[node1], type=BasicType('bool')) error = True elif op == "sizeof": if is_typename: return Node(name="unary_op", value=op + ':' + node1.type.stype, type=BasicType(type='long')) if isinstance(node1.type, Type) == False: Errors(errorType='TypeError', errorText="cannot do sizeof on non type", token_object=token) return Node(type="error") if node1.type.class_type in {'BasicType', 'PointerType', 'StructType'}: return Node(name="unary_op", value=op, type=BasicType(type='long'), children=[node1]) error = True if error: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype, token_object=token) return Node(type="error")
def type_check_add(node1, node2, op, token): allowed_class = [('PointerType', 'BasicType'), ('BasicType', 'PointerType'), ('BasicType', 'BasicType')] allowed_base = [{'int', 'long', 'char'}, {'int', 'long', 'char'}, {'int', 'long', 'char', 'float', 'bool'}] if node1.type == "error" or node2.type == "error": return Node(type="error") if "sconst@" in node1.place or "sconst@" in node2.place: Errors(errorType='TypeError', errorText=op + ' not support string constant', token_object=token) return Node(type="error") class1 = node1.type.class_type class2 = node2.type.class_type if (class1, class2) not in allowed_class: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") i = allowed_class.index((class1, class2)) if i == 0: if node2.type.type not in allowed_base[i]: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") node = Node(name="binary_op", value=node1.type.stype + op, type=node1.type, children=[node1, node2]) node.code = node1.code + node2.code width = node1.type.type_size const_place = get_const(width, type="long") tmp, code = get_opcode(op="long*", place1=node2.place, place2=const_place, type="long") node.code += [code] tmp, code = get_opcode(op="long" + op, place1=node1.place, place2=tmp, type="long") node.code += [code] node.place = tmp return node if i == 1: if node1.type.type not in allowed_base[i]: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") node = Node(name="binary_op", value=node2.type.stype + op, type=node2.type, children=[node2, node1]) node.code = node1.code + node2.code width = node2.type.type_size const_place = get_const(width, type="long") tmp, code = get_opcode(op="long*", place1=node1.place, place2=const_place, type="long") node.code += [code] tmp, code = get_opcode(op="long" + op, place1=node2.place, place2=tmp, type="long") node.code += [code] node.place = tmp return node if i == 2: if node1.type.type not in allowed_base[ i] or node2.type.type not in allowed_base[i]: Errors(errorType='TypeError', errorText=op + ' not support type ' + node1.type.stype + ',' + node2.type.stype, token_object=token) return Node(type="error") node1, node2, typ = implicit_casting(node1, node2) node = Node(name="binary_op", value=typ.stype + op, children=[node1, node2], type=typ) node.code = node1.code + node2.code tmp, code = get_opcode(op=typ.stype + op, place1=node1.place, place2=node2.place, type=typ) node.code += [code] node.place = tmp return node