def _gen_code_boolop(self, **kwargs): self.label_true = kwargs.get('on_true', CC.new_label()) self.label_false = kwargs.get('on_false', CC.new_label()) self.label_right = CC.new_label( ) # additional label to jump to the right operand for case in switch(self.type.type): if case(LP.AND): self.add_child_by_idx(0, on_true=self.label_right, on_false=self.label_false) self.add_instr(CC.LABEL, label=self.label_right) self.add_child_by_idx(1, on_true=self.label_true, on_false=self.label_false) break if case(LP.OR): self.add_child_by_idx(0, on_true=self.label_true, on_false=self.label_right) self.add_instr(CC.LABEL, label=self.label_right) self.add_child_by_idx(1, on_true=self.label_true, on_false=self.label_false) break if case(): raise InternalError('wrong bool op type %s' % str(self.type)) # if no jump keywords were given, the result will be used as a value -- push it if not self.has_jump_codes(kwargs): self.label_after = CC.new_label() self.add_instr(CC.LABEL, label=self.label_true) self.add_instr(CC.PUSH, src=Loc.const(1)) self.add_instr(CC.JUMP, label=self.label_after) self.add_instr(CC.LABEL, label=self.label_false) self.add_instr(CC.PUSH, src=Loc.const(0)) self.add_instr(CC.LABEL, label=self.label_after)
def gen_code(self, **kwargs): for case in switch(self.type.type): if case(LP.NEG): # integer negation self.add_child_by_idx(0) self.add_instr(CC.POP, dest=Loc.reg('a')) self.add_instr(CC.NEG, rhs=Loc.reg('a')) self.add_instr(CC.PUSH, src=Loc.reg('a')) break if case(LP.NOT): # logical not if self.has_jump_codes(kwargs): # called as part of condition evaluation -- just forward the jump labels self.add_child_by_idx(0, on_true=kwargs['on_false'], on_false=kwargs['on_true']) return # otherwise -- evaluate the boolean value: arg == false (0) self.add_child_by_idx(0) self.add_instr(CC.POP, dest=Loc.reg('a')) self.add_instr(CC.BOOL_OP, lhs=Loc.const(0), rhs=Loc.reg('a'), op='sete', dest=Loc.reg('a')) self.add_instr(CC.PUSH, src=Loc.reg('a')) break if case(): raise InternalError('wrong unop value type') self.check_unused_result()
def _gen_code_in_cond(self, **kwargs): # [0]: load the bool value into a register. for case in switch(self.type.type): if case(LP.IDENT) and self.tree.symbol( self.value).type == LP.BOOLEAN: # A variable referenced while instantiating is surely a class member. if NewCode.instantiating_class: NewCode.new_member_val(self, self.value, Loc.reg('a')) else: self.add_instr(CC.MOV, src=Loc.sym(self.tree.symbol(self.value)), dest=Loc.reg('a')) break if case(LP.ELEM) and self.tree.value_type.type == LP.BOOLEAN: self._gen_code_load_array_elem(dest_reg=Loc.reg('a')) break if case(LP.ATTR) and self.tree.value_type.type == LP.BOOLEAN: self._gen_code_load_member(dest_reg=Loc.reg('a')) if case(): raise InternalError( 'jump-expr codes for non-bool %s expression at %s!' % (str(self.type), self.tree.pos)) # [1]: Compare and jump (note: comparing with 0, so on equality jump to false!) self.add_instr(CC.IF_JUMP, lhs=Loc.const(0), rhs=Loc.reg('a'), op='je', label=kwargs['on_false']) self.add_instr(CC.JUMP, label=kwargs['on_true'])
def _gen_code_intop(self): self.add_child_by_idx(0) self.add_child_by_idx(1) for case in switch(self.type.type): if case(LP.PLUS, LP.MINUS, LP.MULT): op = { LP.PLUS: CC.ADD, LP.MINUS: CC.SUB, LP.MULT: CC.MUL }[self.type.type.id] self.add_instr(CC.POP, dest=Loc.reg('d')) self.add_instr(CC.POP, dest=Loc.reg('a')) self.add_instr(op, lhs=Loc.reg('d'), rhs=Loc.reg('a')) self.add_instr(CC.PUSH, src=Loc.reg('a')) break if case(LP.DIV, LP.MOD): self.add_instr(CC.POP, dest=Loc.reg('c')) self.add_instr(CC.POP, dest=Loc.reg('a')) # quotient in eax, remainder in edx result = { LP.DIV: Loc.reg('a'), LP.MOD: Loc.reg('d') }[self.type.type.id] code = {LP.DIV: CC.DIV, LP.MOD: CC.MOD}[self.type.type.id] self.add_instr(code, lhs=Loc.reg('c'), rhs=Loc.reg('a'), dest=result) self.add_instr(CC.PUSH, src=result) break if case(): raise InternalError('wrong int op type %s' % str(self.type))
def gen_code(self, **kwargs): for case in switch(self.type.type): if case(LP.RETURN): if self.children: # Evaluate the expression and pop the result to eax for returning. self.add_child_by_idx(0) self.add_instr(CC.POP, dest=Loc.reg('a')) # Jump to the return section self.add_instr(CC.JUMP, label=self.get_cur_fun().ret_label) break if case(LP.IF): # children: cond, (then-block)?, (else-block)? self.add_child_by_idx(0, on_true=self.label_then, on_false=self.label_else) if len(self.children) > 1: # there is a then-block self.add_instr(CC.LABEL, label=self.label_then) self.add_child_by_idx(1) if len(self.children) > 2: # there is an else-block self.add_instr( CC.JUMP, label=self.label_after) # jump out of then-block self.add_instr(CC.LABEL, label=self.label_else) self.add_child_by_idx(2) self.add_instr(CC.LABEL, label=self.label_after) break if case(LP.WHILE): # children: cond, (block)? if len(self.children) > 1: # there is a loop block self.add_instr(CC.JUMP, label=self.label_cond) self.add_instr(CC.LABEL, label=self.label_block) self.add_child_by_idx(1) self.add_instr(CC.LABEL, label=self.label_cond) self.add_child_by_idx(0, on_true=self.label_block, on_false=self.label_after) self.add_instr(CC.LABEL, label=self.label_after) break if case(LP.ASSIGN): # compute assigned value on stack self.add_child_by_idx(1) # compute the destination address, if needed self.add_child_by_idx(0, addr_only=True) # put the value into destination address self.add_instr(CC.POP, dest=Loc.reg('a')) var_loc = self.children[0].get_loc() self.add_instr(CC.MOV, src=Loc.reg('a'), dest=var_loc) break if case(LP.INCR, LP.DECR): op = CC.ADD if self.type.type == LP.INCR else CC.SUB # compute the destination address, if needed self.add_child_by_idx(0, addr_only=True) var_loc = self.children[0].get_loc() self.add_instr(op, lhs=Loc.const(1), rhs=var_loc) break if case(): raise NotImplementedError('unknown statement type: %s' % str(self.type))
def _stmt_constructor(tree, **kwargs): if isinstance(tree, ExprTree): return ExprFactory for case in switch(tree.type.type): if case(LP.BLOCK): return BlockCode if case(LP.DECL): return DeclCode return StmtCode
def get_loc(self): """ Return where the variable is located after the generated code is executed. """ for case in switch(self.type.type): if case(LP.IDENT): return Loc.sym(self.tree.symbol(self.value)) if case(LP.ATTR): return Loc.mem(Loc.reg_d) if case(LP.ELEM): return Loc.mem(Loc.reg_d)
def _gen_code_as_value(self, **kwargs): for case in switch(self.type.type): if case(LP.BOOLEAN, LP.INT, LP.OBJECT, LP.ARRAY): self.add_instr(CC.PUSH, src=Loc.const(self.value)) break if case(LP.STRING): self.add_instr(CC.PUSH, src=Loc.stringlit(self)) break if case(): raise InternalError('invalid literal type %s' % str(self.type.type))
def default_asm_value(type): """ Return Loc of a value that should be assigned to allocated but unset objects. """ for case in switch(type): if case(LP.INT, LP.BOOLEAN, LP.ARRAY, LP.OBJECT): return Loc.const( 0) # False=0 for boolean, NULL=0 for array and object. if case(LP.STRING): return Loc.stringlit(LiteralTree( LP.STRING, '""')) # Pointer to "" constant. if case(): raise InternalError('no default asm value for type %s' % str(type))
def __init__(self, tree, **kwargs): super(LiteralCode, self).__init__(tree, **kwargs) for case in switch(self.type.type): if case(LP.BOOLEAN): # We already checked that a boolean is a boolean, now we use numbers. self.value = 1 if tree.value == 'true' else 0 break if case(LP.OBJECT, LP.ARRAY): self.value = 0 # The only case of an object literal is NULL. break if case(): self.value = tree.value
def _gen_code_in_cond(self, **kwargs): for case in switch(self.type.type): if case(LP.BOOLEAN): label = { 0: kwargs['on_false'], 1: kwargs['on_true'] }[self.value] self.add_instr(CC.JUMP, label=label) break if case(): raise InternalError( 'jump-expr codes for non-bool %s literal at %s!' % (str(self.type), self.tree.pos))
def _expr_constructor(tree, **kwargs): for case in switch(tree.type.type): if case(LP.INT, LP.STRING, LP.BOOLEAN, LP.ARRAY, LP.OBJECT): return LiteralCode if case(LP.IDENT, LP.ATTR, LP.ELEM): return VarCode if case(LP.NOT, LP.NEG): return UnopCode if case(LP.MULT, LP.DIV, LP.MOD, LP.PLUS, LP.MINUS, LP.LT, LP.LEQ, LP.GT, LP.GEQ, LP.EQ, LP.NEQ, LP.AND, LP.OR): return BinopCode if case(LP.FUNCALL): return FuncallCode if case(LP.NEW): return NewCode raise InternalError('wrong expr code for construction: ' + str(tree.type))
def _gen_code_as_value(self, addr_only=False, **kwargs): # If addr_only is set, don't push the value, stop after its location is computed. for case in switch(self.type.type): if case(LP.IDENT): if addr_only: return # A variable referenced while instantiating is surely a class member. if NewCode.instantiating_class: NewCode.new_member_val(self, self.value, Loc.reg('a')) else: self.add_instr(CC.MOV, src=Loc.sym(self.tree.symbol(self.value)), dest=Loc.reg('a')) self.add_instr(CC.PUSH, src=Loc.reg('a')) break if case(LP.ATTR): if self.tree.obj_type.type == LP.ARRAY and self.value == Builtins.LENGTH: # Array length is stored in first element of its memory block. self.add_child_by_idx(0) self.add_instr(CC.POP, dest=Loc.reg('d')) if addr_only: return self.add_instr(CC.MOV, src=Loc.mem(Loc.reg_d), dest=Loc.reg('d')) self.add_instr(CC.PUSH, src=Loc.reg('d')) elif self.tree.obj_type.type == LP.OBJECT: self._gen_code_load_member(dest_reg=Loc.reg('d'), addr_only=addr_only) if addr_only: return self.add_instr(CC.PUSH, src=Loc.reg('d')) else: raise InternalError('invalid attr `%s` for type `%s`' % (self.value, str(self.tree.obj_type))) break if case(LP.ELEM): self._gen_code_load_array_elem(dest_reg=Loc.reg('d'), addr_only=addr_only) if addr_only: return self.add_instr(CC.PUSH, src=Loc.reg('d')) break if case(): raise InternalError('invalid variable type %s' % str(self.type.type))
def gen_code(self, **kwargs): # detect operator type for case in switch(self.value_type.type): if case(LP.INT): self._gen_code_intop() break if case(LP.BOOLEAN): if self.type.type in BinopTree._rel_ops: self._gen_code_relop(**kwargs) # comparision else: self._gen_code_boolop(**kwargs) # logical operation break if case(LP.STRING): self._gen_code_stringop() break if case(): raise InternalError('wrong binop value type %s') self.check_unused_result()
def __init__(self, tree, **kwargs): super(StmtCode, self).__init__(tree, **kwargs) self.type = tree.type if 'no_children' not in kwargs: for child in tree.children: self.add_child(StmtFactory(child)) for case in switch(self.type.type): if case(LP.IF): self.label_after = CC.new_label() # if there are less blocks, just evaluate condition and jump to label_after self.label_then = CC.new_label() if len( self.children) > 1 else self.label_after self.label_else = CC.new_label() if len( self.children) > 2 else self.label_after break if case(LP.WHILE): self.label_after = CC.new_label() self.label_cond = CC.new_label() self.label_block = CC.new_label() if len( self.children) > 1 else self.label_cond break
def __str__(self): for case in switch(self.type): if case(self.CONST): return '$' + str(self.value) if case(self.REG): r = self.value if r in ['a', 'b', 'c', 'd']: return '%e' + r + 'x' if r == 'cmp': return '%al' if r == 'top': return '%esp' if r in ['edi', 'esi', 'ebp']: return '%' + r if r == self.ANY: return r # TODO remove after debugging raise InternalError('invalid register name: `%s`' % r) if case(self.MEM): return self.value if case(self.STRINGLIT): return '$' + self.value raise InternalError('invalid loc type: ' + self.type)
def _asm_instr(cls, code): """ Generator of assembly instructions from a single intermediate code. """ for case in switch(code['type']): if case(cls.PUSH): yield cls._str_asm('pushl', [str(code['src'])], code) return if case(cls.POP): yield cls._str_asm('popl', [str(code['dest'])], code) return if case(cls.MOV): yield cls._str_asm( 'movl', [str(code['src']), str(code['dest'])], code) return if case(cls.LEA): yield cls._str_asm( 'leal', [str(code['src']), str(code['dest'])], code) return if case(cls.JUMP): yield cls._str_asm('jmp', [code['label']], code) return if case(cls.IF_JUMP): yield cls._str_asm( 'cmpl', [str(code['lhs']), str(code['rhs'])], code) yield cls._str_asm(code['op'], [code['label']], code) return if case(cls.LABEL): yield cls._str_asm(code['label'] + ':', [], code) return if case(cls.CALL): yield cls._str_asm('call', [code['label']], code) return if case(cls.FUNC): # asm declaration yield cls._str_asm('.globl', [code['label']], code) yield cls._str_asm('.type', [code['label'], '@function'], code) # function label yield cls._str_asm(code['label'] + ':', [], code) # standard prologue yield cls._str_asm('pushl', ['%ebp'], code) yield cls._str_asm('movl', ['%esp', '%ebp'], code) if code['tree'].var_count: var_space = Loc.const( (code['tree'].var_count) * cls.var_size) yield cls._str_asm('subl', [str(var_space), '%esp'], code) return if case(cls.ENDFUNC): if code['tree'].var_count: var_space = Loc.const( (code['tree'].var_count) * cls.var_size) yield cls._str_asm('addl', [str(var_space), '%esp'], code) yield cls._str_asm('leave', [], code) yield cls._str_asm( 'ret', [], { 'comment': 'function ' + code['tree'].tree.fun_symbol.full_name() }) return if case(cls.ADD, cls.SUB, cls.MUL): op = { cls.ADD: 'addl', cls.SUB: 'subl', cls.MUL: 'imull' }[code['type']] yield cls._str_asm( op, [str(code['lhs']), str(code['rhs'])], code) return if case(cls.DIV, cls.MOD): if not code['lhs'].is_reg() or code['lhs'].value in ['a', 'd']: # the operand must be in a register other than %eax, %edx yield cls._str_asm( 'movl', [str(code['lhs']), str(Loc.reg('c'))], code) code['lhs'] = Loc.reg('c') if code['rhs'] != Loc.reg( 'a'): # the first operand must be in %eax yield cls._str_asm( 'movl', [str(code['rhs']), str(Loc.reg('a'))], code) yield cls._str_asm('cdq', [], code) yield cls._str_asm('idivl', [str(code['lhs'])], code) return if case(cls.NEG): yield cls._str_asm('negl', [str(code['rhs'])], code) return if case(cls.BOOL_OP): yield cls._str_asm( 'cmpl', [str(code['lhs']), str(code['rhs'])], code) yield cls._str_asm(code['op'], [str(Loc.reg('cmp'))], code) yield cls._str_asm('movzbl', [str(Loc.reg('cmp')), str(code['dest'])], code) return if case(cls.ASM): yield cls._str_asm(code['parts'][0], code['parts'][1:], code) return if case(cls.EMPTY): yield '' return if case(cls.DELETED): if Flags.debug: d = code.copy() del d['type'] yield '\t# [deleted] ' + cls._str_code(d) return if case(cls.SCOPE): if Flags.debug: yield '# ' + (' ' * cls._scope_depth * 2) + '{' + code.get( 'comment', '') cls._scope_depth += 1 return if case(cls.ENDSCOPE): if Flags.debug: cls._scope_depth -= 1 yield '# ' + (' ' * cls._scope_depth * 2) + '}' + code.get( 'comment', '') return if case(cls.CHILD): raise InternalError('code type %s not allowed here', cls._code_name(code['type'])) if case(): raise InternalError('invalid code: ' + str(code))
def gen_code(self, **kwargs): for case in switch(self.value_type.type): if case(LP.ARRAY): self.add_child_by_idx(0) # evaluate array size self.add_instr(CC.POP, dest=Loc.reg('a')) self.add_instr( CC.PUSH, src=Loc.reg('a')) # the value needs to be saved later # Convention: array of size N is a block of memory for (N+1) variables, and the # first variable will contain array's size ( = N) self.add_instr(CC.LEA, src=Loc.mem('', offset=CC.var_size, idx=Loc.reg_a, mult=CC.var_size), dest=Loc.reg('a'), drop_reg1=Loc.reg('a')) # calc memory size init_value = self.default_asm_value( self.value_type.type.subtype) debug('init value for %s:' % self.value_type, str(init_value)) self.add_instr(CC.PUSH, src=init_value) self.add_instr(CC.PUSH, src=Loc.reg('a')) self.add_instr(CC.CALL, label=Builtins.MALLOC_FUNCTION) self.add_instr(CC.ADD, lhs=Loc.const(2 * CC.var_size), rhs=Loc.reg('top')) # Write the array size into the first index. self.add_instr( CC.POP, dest=Loc.reg('d')) # load the array size saved earlier self.add_instr(CC.MOV, src=Loc.reg('d'), dest=Loc.mem(Loc.reg_a)) # Push the memory pointer as expression result. self.add_instr(CC.PUSH, src=Loc.reg('a')) break if case(LP.OBJECT): # Allocate required space. self.add_instr(CC.PUSH, src=Loc.const(0)) # Fill the memory with 0. # Allocate space for the whole object, along with superclass members. self.add_instr(CC.PUSH, src=Loc.const(self.cls.total_var_count * CC.var_size)) self.add_instr(CC.CALL, label=Builtins.MALLOC_FUNCTION) self.add_instr(CC.ADD, lhs=Loc.const(2 * CC.var_size), rhs=Loc.reg('top')) if self.cls.has_nonzero_initializers(): # Save %ebx to store the class base pointer there. self.add_instr(CC.PUSH, src=Loc.reg('b')) self.add_instr(CC.MOV, src=Loc.reg('a'), dest=Loc.reg('b')) # Assign the non-0 default values and specified initializations. old_instantiating_class = NewCode.instantiating_class NewCode.instantiating_class = (self.cls, Loc.reg('b')) self._add_nonzero_inits(self.cls) NewCode.instantiating_class = old_instantiating_class # Restore %ebx and push the object memory pointer as expression result. self.add_instr(CC.MOV, src=Loc.reg('b'), dest=Loc.reg('a')) self.add_instr(CC.POP, dest=Loc.reg('b')) self.add_instr(CC.PUSH, src=Loc.reg('a')) break if case(): raise InternalError('invalid type for new operator: ' + str(self.value_type)) self.check_unused_result()
def gen_code(self, **kwargs): if self.fsym.cls: # calling a method, the called expr must be an ATTR for case in switch(self.children[0].type.type): if case(LP.ATTR): obj_expr = self.children[0].children[0] break if case(LP.IDENT): obj_expr = None break if case(): raise InternalError('expr of invalid type `%s` to call' % str(self.children[0].type)) else: obj_expr = None # [1] Compute memory usage for arguments. argmem = CC.var_size * self.arg_count total_argmem = argmem + (CC.var_size if obj_expr else 0) # [2] Push arguments. # Arguments need to be pushed in reverse order, but evaluated in normal order -- hence # we first make enough stack space for all of them and move them in the right place after # evaluation. if self.arg_count > 1: self.add_instr(CC.SUB, lhs=Loc.const(argmem), rhs=Loc.reg('top')) for i in xrange(len(self.children) - 1): # One less -- [0] is the function! self.add_child_by_idx(i + 1) # Leaves the value on stack. self.add_instr(CC.POP, dest=Loc.reg('a')) # i-th argument should be placed in 4*i(%esp) self.add_instr(CC.MOV, src=Loc.reg('a'), dest=Loc.mem(Loc.top, i * CC.var_size)) elif self.arg_count > 0: self.add_child_by_idx( 1) # With only one argument we can just push it on stack. # [2a] When calling a method, push reference to `self` (first function's argument). if obj_expr: self.add_child_code(obj_expr) # [3] Call and pop arguments. self.add_instr(CC.CALL, label=self.fsym.call_name()) if total_argmem > 0: self.add_instr(CC.ADD, lhs=Loc.const(total_argmem), rhs=Loc.reg('top')) # [4] finish depending on how we were called: if self.has_jump_codes(kwargs): if self.fsym.ret_type.type != LP.BOOLEAN: raise InternalError( 'jump-expr codes for non-bool function %s %s at %s!' % (self.fsym.full_name(), str(self.fsym), self.tree.pos)) # [4a] bool function as part of condition evaluation -- jump basing on the result # note: comparing with 0, so on equality jump to false! self.add_instr(CC.IF_JUMP, lhs=Loc.const(0), rhs=Loc.reg('a'), op='je', label=kwargs['on_false']) self.add_instr(CC.JUMP, label=kwargs['on_true']) else: # [4b] normal expression -- push the return value on stack if needed if self.fsym.ret_type.type != LP.VOID: self.add_instr(CC.PUSH, src=Loc.reg('a')) self.check_unused_result()