class ArmTranslator(InstructionTranslator): """ARM to IR Translator.""" def __init__(self, architecture_mode=ARCH_ARM_MODE_THUMB): super(ArmTranslator, self).__init__() # Set *Architecture Mode*. The translation of each instruction # into the REIL language is based on this. self._arch_mode = architecture_mode # An instance of *ArchitectureInformation*. self._arch_info = ArmArchitectureInformation(architecture_mode) self._builder = ReilBuilder() self._flags = { "nf": ReilRegisterOperand("nf", 1), "zf": ReilRegisterOperand("zf", 1), "cf": ReilRegisterOperand("cf", 1), "vf": ReilRegisterOperand("vf", 1), } if self._arch_mode in [ARCH_ARM_MODE_ARM, ARCH_ARM_MODE_THUMB]: self._sp = ReilRegisterOperand("r13", 32) # TODO: Implement alias self._pc = ReilRegisterOperand("r15", 32) self._lr = ReilRegisterOperand("r14", 32) self._ws = ReilImmediateOperand(4, 32) # word size def translate(self, instruction): """Return IR representation of an instruction. """ try: trans_instrs = self.__translate(instruction) except NotImplementedError: unkn_instr = self._builder.gen_unkn() unkn_instr.address = instruction.address << 8 | (0x0 & 0xff) trans_instrs = [unkn_instr] self._log_not_supported_instruction(instruction) except Exception: self._log_translation_exception(instruction) raise return trans_instrs def __translate(self, instruction): # Retrieve translation function. mnemonic = instruction.mnemonic tb = ArmTranslationBuilder(self._ir_name_generator, self._arch_mode) # TODO: Improve this. if instruction.mnemonic in [ "b", "bl", "bx", "blx", "bne", "beq", "bpl", "ble", "bcs", "bhs", "blt", "bge", "bhi", "blo", "bls" ]: if instruction.condition_code is None: instruction.condition_code = ARM_COND_CODE_AL # TODO: unify translations else: # Pre-processing: evaluate flags if instruction.condition_code is not None: self._evaluate_condition_code(tb, instruction) # Translate instruction. if mnemonic in translators.dispatcher: translators.dispatcher[mnemonic](self, tb, instruction) else: tb.add(self._builder.gen_unkn()) self._log_not_supported_instruction(instruction) return tb.instanciate(instruction.address) # Flag translation. # ======================================================================== # def _update_nf(self, tb, oprnd0, oprnd1, result): sign = tb._extract_bit(result, oprnd0.size - 1) tb.add(self._builder.gen_str(sign, self._flags["nf"])) def _carry_from_uf(self, tb, oprnd0, oprnd1, result): assert (result.size == oprnd0.size * 2) carry = tb._extract_bit(result, oprnd0.size) tb.add(self._builder.gen_str(carry, self._flags["cf"])) def _borrow_from_uf(self, tb, oprnd0, oprnd1, result): # BorrowFrom as defined in the ARM Reference Manual has the same implementation as CarryFrom self._carry_from_uf(tb, oprnd0, oprnd1, result) def _overflow_from_add_uf(self, tb, oprnd0, oprnd1, result): op1_sign = tb._extract_bit(oprnd0, oprnd0.size - 1) op2_sign = tb._extract_bit(oprnd1, oprnd0.size - 1) res_sign = tb._extract_bit(result, oprnd0.size - 1) overflow = tb._and_regs(tb._equal_regs(op1_sign, op2_sign), tb._unequal_regs(op1_sign, res_sign)) tb.add(self._builder.gen_str(overflow, self._flags["vf"])) def _overflow_from_sub_uf(self, tb, oprnd0, oprnd1, result): # Evaluate overflow and update the flag tb.add( self._builder.gen_str( tb._overflow_from_sub(oprnd0, oprnd1, result), self._flags["vf"])) def _update_zf(self, tb, oprnd0, oprnd1, result): zf = self._flags["zf"] imm0 = tb.immediate((2**oprnd0.size) - 1, result.size) tmp0 = tb.temporal(oprnd0.size) tb.add(self._builder.gen_and(result, imm0, tmp0)) # filter low part of result tb.add(self._builder.gen_bisz(tmp0, zf)) def _carry_out(self, tb, carry_operand, oprnd0, oprnd1, result): if isinstance(carry_operand, ArmImmediateOperand): return elif isinstance(carry_operand, ArmRegisterOperand): return elif isinstance(carry_operand, ArmShiftedRegisterOperand): base = ReilRegisterOperand(carry_operand.base_reg.name, carry_operand.size) shift_type = carry_operand.shift_type shift_amount = carry_operand.shift_amount if shift_type == 'lsl': if isinstance(shift_amount, ArmImmediateOperand): if shift_amount.immediate == 0: return else: # carry_out = Rm[32 - shift_imm] shift_carry_out = tb._extract_bit( base, 32 - shift_amount.immediate) elif isinstance(shift_amount, ArmRegisterOperand): # Rs: register with shift amount # if Rs[7:0] == 0 then # carry_out = C Flag # else if Rs[7:0] <= 32 then # carry_out = Rm[32 - Rs[7:0]] # else /* Rs[7:0] > 32 */ # carry_out = 0 shift_carry_out = tb.temporal(1) tb.add( self._builder.gen_str(self._flags["cf"], shift_carry_out)) rs = ReilRegisterOperand(shift_amount.name, shift_amount.size) rs_7_0 = tb._and_regs(rs, tb.immediate(0xFF, rs.size)) end_label = tb.label('end_label') rs_greater_32_label = tb.label('rs_greater_32_label') # if Rs[7:0] == 0 then # carry_out = C Flag tb._jump_if_zero( rs_7_0, end_label ) # shift_carry_out already has the C flag set, so do nothing tb.add( self._builder.gen_jcc( tb._greater_than_or_equal( rs_7_0, tb.immediate(33, rs_7_0.size)), rs_greater_32_label)) # Rs > 0 and Rs <= 32 # carry_out = Rm[32 - Rs[7:0]] extract_bit_number = tb.temporal(rs_7_0.size) tb.add( self._builder.gen_sub(tb.immediate(32, rs_7_0.size), rs_7_0, extract_bit_number)) tb.add( self._builder.gen_str( tb._extract_bit_with_register( base, extract_bit_number), shift_carry_out)) tb._jump_to(end_label) # else /* Rs[7:0] > 32 */ # carry_out = 0 tb.add(rs_greater_32_label) tb.add( self._builder.gen_str(tb.immediate(0, 1), shift_carry_out)) # tb._jump_to(end_label) tb.add(end_label) else: raise Exception("carry_out: Unknown shift amount type.") else: # TODO: Implement other shift types raise NotImplementedError( "Instruction Not Implemented: carry_out: shift type " + carry_operand.shift_type) else: raise Exception("carry_out: Unknown operand type.") tb.add(self._builder.gen_str(shift_carry_out, self._flags["cf"])) def _update_flags_data_proc_add(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._carry_from_uf(tb, oprnd0, oprnd1, result) self._overflow_from_add_uf(tb, oprnd0, oprnd1, result) def _update_flags_data_proc_sub(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._borrow_from_uf(tb, oprnd0, oprnd1, result) # C Flag = NOT BorrowFrom (to be used by subsequent instructions like SBC and RSC) tb.add( self._builder.gen_str(tb._negate_reg(self._flags["cf"]), self._flags["cf"])) self._overflow_from_sub_uf(tb, oprnd0, oprnd1, result) def _update_flags_data_proc_other(self, tb, second_operand, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) self._carry_out(tb, second_operand, oprnd0, oprnd1, result) # Overflow Flag (V) unaffected def _update_flags_other(self, tb, oprnd0, oprnd1, result): self._update_zf(tb, oprnd0, oprnd1, result) self._update_nf(tb, oprnd0, oprnd1, result) # Carry Flag (C) unaffected # Overflow Flag (V) unaffected def _undefine_flag(self, tb, flag): # NOTE: In every test I've made, each time a flag is leave # undefined it is always set to 0. imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _clear_flag(self, tb, flag): imm = tb.immediate(0, flag.size) tb.add(self._builder.gen_str(imm, flag)) def _set_flag(self, tb, flag): imm = tb.immediate(1, flag.size) tb.add(self._builder.gen_str(imm, flag)) # Helpers. # ======================================================================== # def _evaluate_eq(self, tb): # EQ: Z set return self._flags["zf"] def _evaluate_ne(self, tb): # NE: Z clear return tb._negate_reg(self._flags["zf"]) def _evaluate_cs(self, tb): # CS: C set return self._flags["cf"] def _evaluate_cc(self, tb): # CC: C clear return tb._negate_reg(self._flags["cf"]) def _evaluate_mi(self, tb): # MI: N set return self._flags["nf"] def _evaluate_pl(self, tb): # PL: N clear return tb._negate_reg(self._flags["nf"]) def _evaluate_vs(self, tb): # VS: V set return self._flags["vf"] def _evaluate_vc(self, tb): # VC: V clear return tb._negate_reg(self._flags["vf"]) def _evaluate_hi(self, tb): # HI: C set and Z clear return tb._and_regs(self._flags["cf"], tb._negate_reg(self._flags["zf"])) def _evaluate_ls(self, tb): # LS: C clear or Z set return tb._or_regs(tb._negate_reg(self._flags["cf"]), self._flags["zf"]) def _evaluate_ge(self, tb): # GE: N == V return tb._equal_regs(self._flags["nf"], self._flags["vf"]) def _evaluate_lt(self, tb): # LT: N != V return tb._negate_reg(self._evaluate_ge(tb)) def _evaluate_gt(self, tb): # GT: (Z == 0) and (N == V) return tb._and_regs(tb._negate_reg(self._flags["zf"]), self._evaluate_ge(tb)) def _evaluate_le(self, tb): # LE: (Z == 1) or (N != V) return tb._or_regs(self._flags["zf"], self._evaluate_lt(tb)) def _evaluate_condition_code(self, tb, instruction): if instruction.condition_code == ARM_COND_CODE_AL: return eval_cc_fn = { ARM_COND_CODE_EQ: self._evaluate_eq, ARM_COND_CODE_NE: self._evaluate_ne, ARM_COND_CODE_CS: self._evaluate_cs, ARM_COND_CODE_HS: self._evaluate_cs, ARM_COND_CODE_CC: self._evaluate_cc, ARM_COND_CODE_LO: self._evaluate_cc, ARM_COND_CODE_MI: self._evaluate_mi, ARM_COND_CODE_PL: self._evaluate_pl, ARM_COND_CODE_VS: self._evaluate_vs, ARM_COND_CODE_VC: self._evaluate_vc, ARM_COND_CODE_HI: self._evaluate_hi, ARM_COND_CODE_LS: self._evaluate_ls, ARM_COND_CODE_GE: self._evaluate_ge, ARM_COND_CODE_LT: self._evaluate_lt, ARM_COND_CODE_GT: self._evaluate_gt, ARM_COND_CODE_LE: self._evaluate_le, } neg_cond = tb._negate_reg(eval_cc_fn[instruction.condition_code](tb)) end_addr = ReilImmediateOperand( (instruction.address + instruction.size) << 8, self._arch_info.address_size + 8) tb.add(self._builder.gen_jcc(neg_cond, end_addr)) return
class TranslationBuilder(object): def __init__(self, ir_name_generator, architecture_information): self._ir_name_generator = ir_name_generator self._instructions = [] self._builder = ReilBuilder() self._arch_info = architecture_information def add(self, instr): self._instructions.append(instr) def temporal(self, size): return ReilRegisterOperand(self._ir_name_generator.get_next(), size) def immediate(self, value, size): return ReilImmediateOperand(value, size) def label(self, name): return Label(name) def instanciate(self, address): # Set instructions address. instrs = self._instructions for instr in instrs: instr.address = address << 8 instrs = self._resolve_loops(instrs) return instrs # Auxiliary functions # ======================================================================== # def _resolve_loops(self, instrs): idx_by_labels = {} # Collect labels. # curr = 0 # for index, instr in enumerate(instrs): # if isinstance(instr, Label): # idx_by_labels[instr.name] = curr # # del instrs[index] # else: # curr += 1 # TODO: Hack to avoid deleting while iterating instrs_no_labels = [] curr = 0 for i in instrs: if isinstance(i, Label): idx_by_labels[i.name] = curr else: instrs_no_labels.append(i) curr += 1 instrs[:] = instrs_no_labels # Resolve instruction addresses and JCC targets. for index, instr in enumerate(instrs): assert isinstance(instr, ReilInstruction) instr.address |= index if instr.mnemonic == ReilMnemonic.JCC: target = instr.operands[2] if isinstance(target, Label): idx = idx_by_labels[target.name] address = (instr.address & ~0xff) | idx instr.operands[2] = ReilImmediateOperand( address, self._arch_info.address_size + 8) return instrs def _all_ones_imm(self, reg): return self.immediate((2**reg.size) - 1, reg.size) def _negate_reg(self, reg): neg = self.temporal(reg.size) self.add(self._builder.gen_xor(reg, self._all_ones_imm(reg), neg)) return neg def _and_regs(self, reg1, reg2): ret = self.temporal(reg1.size) self.add(self._builder.gen_and(reg1, reg2, ret)) return ret def _or_regs(self, reg1, reg2): ret = self.temporal(reg1.size) self.add(self._builder.gen_or(reg1, reg2, ret)) return ret def _xor_regs(self, reg1, reg2): ret = self.temporal(reg1.size) self.add(self._builder.gen_xor(reg1, reg2, ret)) return ret def _equal_regs(self, reg1, reg2): return self._negate_reg(self._xor_regs(reg1, reg2)) def _unequal_regs(self, reg1, reg2): return self._xor_regs(reg1, reg2) def _shift_reg(self, reg, sh): ret = self.temporal(reg.size) self.add(self._builder.gen_bsh(reg, sh, ret)) return ret def _extract_bit(self, reg, bit): assert (0 <= bit < reg.size) tmp = self.temporal(reg.size) ret = self.temporal(1) self.add( self._builder.gen_bsh(reg, self.immediate(-bit, reg.size), tmp)) # shift to LSB self.add(self._builder.gen_and(tmp, self.immediate(1, reg.size), ret)) # filter LSB return ret # Same as before but the bit number is indicated by a register and it will be resolved at runtime def _extract_bit_with_register(self, reg, bit): # assert(bit >= 0 and bit < reg.size2) # It is assumed, it is not checked tmp = self.temporal(reg.size) neg_bit = self.temporal(reg.size) ret = self.temporal(1) self.add( self._builder.gen_sub( self.immediate(0, bit.size), bit, neg_bit)) # as left bit is indicated by a negative number self.add(self._builder.gen_bsh(reg, neg_bit, tmp)) # shift to LSB self.add(self._builder.gen_and(tmp, self.immediate(1, reg.size), ret)) # filter LSB return ret def _extract_msb(self, reg): return self._extract_bit(reg, reg.size - 1) def _extract_sign_bit(self, reg): return self._extract_msb(reg) def _greater_than_or_equal(self, reg1, reg2): assert (reg1.size == reg2.size) result = self.temporal(reg1.size * 2) self.add(self._builder.gen_sub(reg1, reg2, result)) sign = self._extract_bit(result, reg1.size - 1) overflow = self._overflow_from_sub(reg1, reg2, result) return self._equal_regs(sign, overflow) def _jump_to(self, target): self.add(self._builder.gen_jcc(self.immediate(1, 1), target)) def _jump_if_zero(self, reg, label): is_zero = self.temporal(1) self.add(self._builder.gen_bisz(reg, is_zero)) self.add(self._builder.gen_jcc(is_zero, label)) def _add_to_reg(self, reg, value): res = self.temporal(reg.size) self.add(self._builder.gen_add(reg, value, res)) return res def _sub_to_reg(self, reg, value): res = self.temporal(reg.size) self.add(self._builder.gen_sub(reg, value, res)) return res def _overflow_from_sub(self, oprnd0, oprnd1, result): op1_sign = self._extract_bit(oprnd0, oprnd0.size - 1) op2_sign = self._extract_bit(oprnd1, oprnd0.size - 1) res_sign = self._extract_bit(result, oprnd0.size - 1) return self._and_regs(self._unequal_regs(op1_sign, op2_sign), self._unequal_regs(op1_sign, res_sign))