class RISCV64(RISCV): name = "riscv64" address_size = 8 default_int_size = 4 disassembler = RVDisassembler(address_size) lifter = Lifter(address_size) regs = { k: (RegisterInfo(k, 8) if v.size == 4 else RegisterInfo(k, v.size)) for k, v in RISCV.regs.items() }
def read_register(self, reg_name: str) -> int: regs = dict(self.view.session_data.get("emulator.registers", {})) if reg_name.startswith('temp'): register = RegisterInfo(reg_name, self.view.address_size) else: register = self.view.arch.regs.get(reg_name) if register is None: raise UninitializedRegisterError(register) full_width_reg = register.full_width_reg if reg_name == full_width_reg: return regs.get(reg_name, 0) offset = register.offset size = register.size mask = (1 << (offset * 8)) - 1 mask ^= (1 << ((size + offset) * 8)) - 1 value = regs.get(full_width_reg, 0) value &= mask value >>= offset * 8 return value
class RISCV64(RISCV): name = "riscv64" address_size = 8 default_int_size = 8 max_instr_length = 4 endianness = Endianness.LittleEndian disassembler = RVDisassembler(address_size) lifter = Lifter(address_size) regs = {k: RegisterInfo(k, 8) for k, v in RISCV.regs.items()}
class RISCV(Architecture): name = "riscv" address_size = 4 default_int_size = 4 # TODO: This actually depends on whether the F, D, Q extension is # implemented, but we'll just assume it is the Q extension (128 bit) default_float_size = 16 # TODO: not sure this is true for all extensions? max_instr_length = 4 endianness = Endianness.LittleEndian disassembler = RVDisassembler(address_size) lifter = Lifter(address_size) # we are using the ABI names here, as those are also the register names # returned by capstone. regs = { # x0 - hard-wired zero "zero": RegisterInfo("zero", address_size), # x1 - return address (caller saved) "ra": RegisterInfo("ra", address_size), # x2 - stack pointer (callee saved) "sp": RegisterInfo("sp", address_size), # x3 - global pointer "gp": RegisterInfo("gp", address_size), # x4 - threat pointer "tp": RegisterInfo("tp", address_size), # x5-7 - temporaries (caller saved) "t0": RegisterInfo("t0", address_size), "t1": RegisterInfo("t1", address_size), "t2": RegisterInfo("t2", address_size), # x8 - saved register / frame pointer (caller saved) "s0": RegisterInfo("s0", address_size), # x9 - saved register "s1": RegisterInfo("s1", address_size), # x10-x11 - first function argument and return value (caller saved) "a0": RegisterInfo("a0", address_size), "a1": RegisterInfo("a1", address_size), # x12-17 - function arguments (caller saved) "a2": RegisterInfo("a2", address_size), "a3": RegisterInfo("a3", address_size), "a4": RegisterInfo("a4", address_size), "a5": RegisterInfo("a5", address_size), "a6": RegisterInfo("a6", address_size), "a7": RegisterInfo("a7", address_size), # x18-27 - saved registers (caller saved) "s2": RegisterInfo("s2", address_size), "s3": RegisterInfo("s3", address_size), "s4": RegisterInfo("s4", address_size), "s5": RegisterInfo("s5", address_size), "s6": RegisterInfo("s6", address_size), "s7": RegisterInfo("s7", address_size), "s8": RegisterInfo("s8", address_size), "s9": RegisterInfo("s9", address_size), "s10": RegisterInfo("s10", address_size), "s11": RegisterInfo("s11", address_size), # x28-31 - temporaries "t3": RegisterInfo("t3", address_size), "t4": RegisterInfo("t4", address_size), "t5": RegisterInfo("t5", address_size), "t6": RegisterInfo("t6", address_size), # pc (caller saved) "pc": RegisterInfo("pc", address_size), # f0-7 - FP temporaries (caller saved) "ft0": RegisterInfo("ft0", default_float_size), "ft1": RegisterInfo("ft1", default_float_size), "ft2": RegisterInfo("ft2", default_float_size), "ft3": RegisterInfo("ft3", default_float_size), "ft4": RegisterInfo("ft4", default_float_size), "ft5": RegisterInfo("ft5", default_float_size), "ft6": RegisterInfo("ft6", default_float_size), "ft7": RegisterInfo("ft7", default_float_size), # f8-9 - FP saved registers (callee saved) "fs0": RegisterInfo("fs0", default_float_size), "fs1": RegisterInfo("fs1", default_float_size), # f10-11 - FP arguments/return values (caller saved) "fa0": RegisterInfo("fa0", default_float_size), "fa1": RegisterInfo("fa1", default_float_size), # f12-17 - FP arguments (caller saved) "fa2": RegisterInfo("fa2", default_float_size), "fa3": RegisterInfo("fa3", default_float_size), "fa4": RegisterInfo("fa4", default_float_size), "fa5": RegisterInfo("fa5", default_float_size), "fa6": RegisterInfo("fa6", default_float_size), "fa7": RegisterInfo("fa7", default_float_size), # f18–27 - FP saved registers (callee saved) "fs2": RegisterInfo("fs2", default_float_size), "fs3": RegisterInfo("fs3", default_float_size), "fs4": RegisterInfo("fs4", default_float_size), "fs5": RegisterInfo("fs5", default_float_size), "fs6": RegisterInfo("fs6", default_float_size), "fs7": RegisterInfo("fs7", default_float_size), "fs8": RegisterInfo("fs8", default_float_size), "fs9": RegisterInfo("fs9", default_float_size), "fs10": RegisterInfo("fs10", default_float_size), "fs11": RegisterInfo("fs11", default_float_size), # f28-31 - FP temporaries (caller saved) "ft8": RegisterInfo("ft8", default_float_size), "ft9": RegisterInfo("ft9", default_float_size), "ft10": RegisterInfo("ft10", default_float_size), "ft11": RegisterInfo("ft11", default_float_size), } stack_pointer = "sp" def get_instruction_info(self, data, addr): instr = self.disassembler.decode(data, addr) if instr is None: return None result = InstructionInfo() result.length = instr.size dest = None if instr.imm is not None: dest = addr + instr.imm if instr.name == 'ret' or self._looks_like_ret(instr): result.add_branch(BranchType.FunctionReturn) elif instr.name in branch_ins: result.add_branch(BranchType.TrueBranch, dest) result.add_branch(BranchType.FalseBranch, addr + instr.size) elif instr.name in direct_jump_ins: result.add_branch(BranchType.UnconditionalBranch, dest) elif instr.name in indirect_jump_ins: result.add_branch(BranchType.UnresolvedBranch) elif instr.name in direct_call_ins: result.add_branch(BranchType.CallDestination, dest) elif instr.name in indirect_call_ins: result.add_branch(BranchType.UnresolvedBranch) return result def _looks_like_ret(self, instr): """ Check for jump instruction that look like functions returns. """ # any register jump to 'ra' the return address register, is probably a # function return. if (instr.name == 'jalr' and instr.operands[0] == 'zero' and instr.operands[1] == 'ra' and not instr.imm): # if jalr does not link into zero, then something weird # is going on and we don't want to mark this as a return. # similarly if a offset is added (via imm) to the ra register, # then this also doesn't look like a function return. return True elif (instr.name == 'jr' and instr.operands[0] == 'ra' and not instr.imm): return True return False def get_instruction_text(self, data, addr): instr = self.disassembler.decode(data, addr) if instr is None: return None tokens = gen_token(instr) return tokens, instr.size def get_instruction_low_level_il(self, data, addr, il): instr = self.disassembler.decode(data, addr) if instr is None: return None self.lifter.lift(il, instr, instr.name) return instr.size
class Moxie(Architecture): name = "moxie" address_size = 4 default_int_size = 4 stack_pointer = 'sp' regs = { 'sp': RegisterInfo('sp', 2), 'fp': RegisterInfo('fp', 2), 'r0': RegisterInfo('r0', 2), 'r1': RegisterInfo('r1', 2), 'r2': RegisterInfo('r2', 2), 'r3': RegisterInfo('r3', 2), 'r4': RegisterInfo('r4', 2), 'r5': RegisterInfo('r5', 2), 'r6': RegisterInfo('r6', 2), 'r7': RegisterInfo('r7', 2), 'r8': RegisterInfo('r8', 2), 'r9': RegisterInfo('r9', 2), 'r10': RegisterInfo('r10', 2), 'r11': RegisterInfo('r11', 2), 'r12': RegisterInfo('r12', 2), 'r13': RegisterInfo('r13', 2), } def get_operands(self, instr, word): dst = None src = None if instr in NO_IMM_INSTRUCTIONS: return src, dst a = (word & 0xF0) >> 4 b = word & 0xF if instr in ONE_REG_INSTRUCTIONS: dst = REGISTERS[a] elif instr in TWO_REG_INSTRUCTIONS: dst = REGISTERS[a] src = REGISTERS[b] return src, dst def decode_instruction(self, data, addr): instr = None length = 2 extra = None src_value, dst_value = None, None dst_op, src_op = DEFAULT_MODE, DEFAULT_MODE src, dst = None, None if len(data) < 2: return instr, src, src_op, dst, dst_op, src_value, dst_value, length word = struct.unpack('>H', data[:2])[0] opcode_type = word >> 14 if opcode_type == 0b11: # is branch branch_type = (word & 0x3c00) >> 10 if branch_type < len(BRANCH_INSTRUCTIONS): instr = BRANCH_INSTRUCTIONS[branch_type] else: log_error('[%x] Bad branch opcode: %x' % (addr, branch_type)) return instr, src, src_op, dst, dst_op, src_value, dst_value, length branch_offset = word & 0x3ff dst_value = (branch_offset << 1) + addr src_op = EMPTY_MODE dst_op = IMM_ADDRESS_MODE elif opcode_type == 0b10: instr = SPECIAL_INSTRUCTIONS[(word >> 12) & 0x3] dst = (word & 0xf00 >> 8) dst = REGISTERS[dst] dst_op = REGISTER_MODE src_value = word & 0xff src_op = IMM_INTEGER_MODE elif opcode_type == 0b00: opcode = word >> 8 instr = INSTRUCTIONS[opcode] src, dst = self.get_operands(instr, word) if instr in IMM_INSTRUCTION_16: extra = struct.unpack('>H', data[2:4])[0] length += 2 elif instr in IMM_INSTRUCTION_32: extra = struct.unpack('>I', data[2:6])[0] length += 4 if instr in ONE_REG_INSTRUCTIONS: dst_op = REGISTER_MODE src_op = EMPTY_MODE if extra: src_value = extra src_op = IMM_INTEGER_MODE elif instr in TWO_REG_INSTRUCTIONS: src_op = REGISTER_MODE dst_op = REGISTER_MODE elif instr in NO_IMM_INSTRUCTIONS and extra: src = None dst = None src_op = EMPTY_MODE dst_op = IMM_INTEGER_MODE dst_value = extra else: src_op = EMPTY_MODE dst_op = EMPTY_MODE return instr, src, src_op, dst, dst_op, src_value, dst_value, length def perform_get_instruction_info(self, data, addr): instr, src, src_op, dst, dst_op, src_value, dst_value, length = self.decode_instruction( data, addr) res = InstructionInfo() res.length = length if instr in {'ret'}: res.add_branch(BranchType.FunctionReturn) elif instr in BRANCH_INSTRUCTIONS: res.add_branch(BranchType.TrueBranch, dst_value) res.add_branch(BranchType.FalseBranch, addr + 16) elif instr == 'jsra': res.add_branch(BranchType.CallDestination, dst_value) elif instr == "jmpa": res.add_branch(BranchType.UnconditionalBranch, dst_value) return res def perform_get_instruction_text(self, data, addr): instr, src, src_op, dst, dst_op, src_value, dst_value, length = self.decode_instruction( data, addr) if instr is None: return None instruction_text = instr dst_token = None src_token = None tokens = [ InstructionTextToken(InstructionTextTokenType.TextToken, '{:9s}'.format(instruction_text)) ] if dst_op != EMPTY_MODE: dst_token = OperandTokens[dst_op](dst, dst_value) tokens += dst_token if src_op != EMPTY_MODE: src_token = OperandTokens[src_op](src, src_value) if src_op != EMPTY_MODE and dst_op != EMPTY_MODE: tokens += [ InstructionTextToken(InstructionTextTokenType.TextToken, ', ') ] if src_token: tokens += src_token return tokens, length def perform_get_instruction_low_level_il(self, data, addr, il): instr, src, src_op, dst, dst_op, src_value, dst_value, length = self.decode_instruction( data, addr) if instr is None: return None il.append(il.unimplemented()) return length
class Clemency(Architecture): name = "clemency" address_size = 4 default_int_size = 4 # Register setup regs = { 'R0': RegisterInfo('R0', 4), 'R1': RegisterInfo('R1', 4), 'R2': RegisterInfo('R2', 4), 'R3': RegisterInfo('R3', 4), 'R4': RegisterInfo('R4', 4), 'R5': RegisterInfo('R5', 4), 'R6': RegisterInfo('R6', 4), 'R7': RegisterInfo('R7', 4), 'R8': RegisterInfo('R8', 4), 'R9': RegisterInfo('R9', 4), 'R10': RegisterInfo('R10', 4), 'R11': RegisterInfo('R11', 4), 'R12': RegisterInfo('R12', 4), 'R13': RegisterInfo('R13', 4), 'R14': RegisterInfo('R14', 4), 'R15': RegisterInfo('R15', 4), 'R16': RegisterInfo('R16', 4), 'R17': RegisterInfo('R17', 4), 'R18': RegisterInfo('R18', 4), 'R19': RegisterInfo('R19', 4), 'R20': RegisterInfo('R20', 4), 'R21': RegisterInfo('R21', 4), 'R22': RegisterInfo('R22', 4), 'R23': RegisterInfo('R23', 4), 'R24': RegisterInfo('R24', 4), 'R25': RegisterInfo('R25', 4), 'R26': RegisterInfo('R26', 4), 'R27': RegisterInfo('R27', 4), 'R28': RegisterInfo('R28', 4), 'ST': RegisterInfo('ST', 4), 'RA': RegisterInfo('RA', 4), 'PC': RegisterInfo('PC', 4) } stack_pointer = 'ST' # Flag setup flags = ['s', 'o', 'c', 'z'] flag_roles = { 's': FlagRole.NegativeSignFlagRole, 'o': FlagRole.OverflowFlagRole, 'c': FlagRole.CarryFlagRole, 'z': FlagRole.ZeroFlagRole } flag_write_types = ['', '*'] flags_written_by_flag_write_type = {'*': ['s', 'o', 'c', 'z']} flags_required_for_flag_condition = { LowLevelILFlagCondition.LLFC_NE: ['z'], #not equal LowLevelILFlagCondition.LLFC_E: ['z'], #equal LowLevelILFlagCondition.LLFC_ULT: ['c', 'z'], # unsigned less than LowLevelILFlagCondition.LLFC_ULE: ['c', 'z'], # unsigned less than or equal LowLevelILFlagCondition.LLFC_UGT: ['c', 'z'], # unsigned greater than LowLevelILFlagCondition.LLFC_UGE: ['c', 'z'], # unsigned greater than or equal LowLevelILFlagCondition.LLFC_SLT: ['s'], # signed less than LowLevelILFlagCondition.LLFC_SLE: ['s', 'z'], # signed less than or equal LowLevelILFlagCondition.LLFC_SGT: ['s', 'z'], # signed greater than LowLevelILFlagCondition.LLFC_SGE: ['s'], # signed greater than or equal LowLevelILFlagCondition.LLFC_NEG: ['s'], # negative LowLevelILFlagCondition.LLFC_POS: ['s'], # positive LowLevelILFlagCondition.LLFC_O: ['o'], # overflow LowLevelILFlagCondition.LLFC_NO: ['o'] # no overflow } def perform_get_instruction_info(self, data, addr): reader = BitReader16(BytestringReader(data)) try: ins = disassemble(reader) except InvalidMachineCodeException as e: log_error("InvalidMachineCodeException at address: " + hex(addr) + " {0}".format(e)) return None insInfo = InstructionInfo() insInfo.length = reader.nytes_read() * 2 op = ins.mnemonic if op in ['re', 'ht']: insInfo.add_branch(BranchType.FunctionReturn) elif op in ['b', 'brr']: # relative direct unconditional insInfo.add_branch(BranchType.UnconditionalBranch, addr + 2 * ins.op1.value) elif op in [ 'bn', 'be', 'bl', 'ble', 'bg', 'bge', 'bno', 'bo', 'bns', 'bs', 'bsl', 'bsle', 'bsg', 'bsge' ]: # relative direct conditional insInfo.add_branch(BranchType.TrueBranch, addr + 2 * ins.op1.value) insInfo.add_branch(BranchType.FalseBranch, addr + insInfo.length) elif op == 'br': # absolute indirect unconditonal insInfo.add_branch(BranchType.IndirectBranch) elif op in [ 'brn', 'bre', 'brl', 'brle', 'brg', 'brge', 'brno', 'bro', 'brns', 'brs', 'brsl', 'brsle', 'brsg', 'brsge' ]: # absolute indirect conditonal insInfo.add_branch(BranchType.TrueBranch) insInfo.add_branch(BranchType.FalseBranch, addr + insInfo.length) elif op == 'bra': # absolute direct insInfo.add_branch(BranchType.UnconditionalBranch, 2 * ins.op1.value) elif op in ['c', 'car']: # relative direct unconditional insInfo.add_branch(BranchType.CallDestination, addr + 2 * ins.op1.value) elif op in [ 'cn', 'ce', 'cl', 'cle', 'cg', 'cge', 'cno', 'co', 'cns', 'cs', 'csl', 'csle', 'csg', 'csge' ]: # relative direct conditional insInfo.add_branch(BranchType.CallDestination, addr + 2 * ins.op1.value) #insInfo.add_branch(BranchType.TrueBranch, addr + 2 * ins.op1.value) insInfo.add_branch(BranchType.FalseBranch, addr + insInfo.length) elif op == 'caa': insInfo.add_branch(BranchType.CallDestination, 2 * ins.op1.value) elif op == 'cr': insInfo.add_branch(BranchType.CallDestination) elif op in [ 'crn', 'cre', 'crl', 'crle', 'crg', 'crge', 'crno', 'cro', 'crns', 'crs', 'crsl', 'crsle', 'crsg', 'crsge' ]: insInfo.add_branch(BranchType.CallDestination) insInfo.add_branch(BranchType.FalseBranch, addr + insInfo.length) return insInfo def perform_get_instruction_text(self, data, addr): reader = BitReader16(BytestringReader(data)) try: ins = disassemble(reader) except InvalidMachineCodeException: log_error("InvalidMachineCodeException at address: " + addr) return None tokens = [] tokens.append( InstructionTextToken(InstructionTextTokenType.InstructionToken, ins.mnemonic)) tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, " ")) if ins.is_load_or_store(): tokens.append( InstructionTextToken(InstructionTextTokenType.RegisterToken, str(ins.op1.reg))) tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ', [')) tokens.append( InstructionTextToken(InstructionTextTokenType.RegisterToken, str(ins.op2.reg))) tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ' + ')) tokens.append( InstructionTextToken(InstructionTextTokenType.IntegerToken, hex(2 * ins.op3.value), 2 * ins.op3.value)) tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ', ')) tokens.append( InstructionTextToken(InstructionTextTokenType.IntegerToken, str(ins.op4.value), ins.op4.value)) tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ']')) elif ins.is_direct_relative_jmp_or_call(): tokens.append( InstructionTextToken( InstructionTextTokenType.CodeRelativeAddressToken, hex(addr + 2 * ins.op1.value), addr + 2 * ins.op1.value)) elif ins.is_direct_jmp_or_call(): tokens.append( InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, hex(2 * ins.op1.value), 2 * ins.op1.value)) elif ins.mnemonic == 'mov': tokens.append( InstructionTextToken(InstructionTextTokenType.RegisterToken, str(ins.op1.reg))) tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ', ')) tokens.append( InstructionTextToken(InstructionTextTokenType.IntegerToken, hex(ins.op2.value), ins.op2.value)) tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ' / ')) tokens.append( InstructionTextToken(InstructionTextTokenType.IntegerToken, hex(2 * ins.op2.value), 2 * ins.op2.value)) else: for i, op in enumerate(ins.operands): if op.is_reg(): tokens.append( InstructionTextToken( InstructionTextTokenType.RegisterToken, str(op.reg))) elif op.is_imm(): tokens.append( InstructionTextToken( InstructionTextTokenType.IntegerToken, hex(op.value), op.value)) if i + 1 != ins.arity: tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ', ')) global strings if ins.mnemonic == 'mov' and ins.operands[1].value in strings: string = strings[ins.operands[1].value] tokens.append( InstructionTextToken(InstructionTextTokenType.StringToken, ' // {}'.format(string))) return tokens, reader.nytes_read() * 2 def perform_get_instruction_low_level_il(self, data, addr, il): reader = BitReader16(BytestringReader(data)) try: ins = disassemble(reader) except InvalidMachineCodeException: log_error("InvalidMachineCodeException at address: " + addr) return None # TODO return reader.nytes_read() * 2
class Spu(Architecture): name = 'spu' address_size = 4 default_int_size = 4 max_instr_length = 4 regs = dict((reg, RegisterInfo(reg, 16)) for reg in registers) stack_pointer = 'sp' flags = ('c', 'z', 'i', 'd', 'b', 'v', 's') flag_write_types = ('*', 'czs', 'zvs', 'zs') flag_roles = { 'c': FlagRole. SpecialFlagRole, # Not a normal carry flag, subtract result is inverted 'z': FlagRole.ZeroFlagRole, 'v': FlagRole.OverflowFlagRole, 's': FlagRole.NegativeSignFlagRole } flags_required_for_flag_condition = { LowLevelILFlagCondition.LLFC_UGE: ['c'], LowLevelILFlagCondition.LLFC_ULT: ['c'], LowLevelILFlagCondition.LLFC_E: ['z'], LowLevelILFlagCondition.LLFC_NE: ['z'], LowLevelILFlagCondition.LLFC_NEG: ['s'], LowLevelILFlagCondition.LLFC_POS: ['s'] } flags_written_by_flag_write_type = { '*': ['c', 'z', 'v', 's'], 'czs': ['c', 'z', 's'], 'zvs': ['z', 'v', 's'], 'zs': ['z', 's'] } itable = [None] * 2048 _comma_separator = InstructionTextToken(OperandSeparatorToken, ', ') def __init__(self, *args, **kwargs): super(Spu, self).__init__(*args, **kwargs) self.init_instructions() def init_instructions(self): # Start idef classes class idef(object): def __init__(self, name): self.name = name def decode(self, opcode, addr): raise NotImplementedError def get_text(self, opcode, addr): raise NotImplementedError class idef_RR(idef): def decode(self, opcode, addr): op, rb, ra, rt = decode_RR(opcode) return ThreeRegisters(registers[rb], registers[ra], registers[rt]) def get_text(self, opcode, addr): rb, ra, rt = self.decode(opcode, addr) return ( InstructionTextToken(TextToken, '{:10s}'.format(self.name)), InstructionTextToken(RegisterToken, rt), Spu._comma_separator, InstructionTextToken(RegisterToken, ra), Spu._comma_separator, InstructionTextToken(RegisterToken, rb), ) class idef_ROHROL(idef_RR): def decode(self, opcode, addr): op, roh, ra, rol = decode_RR(opcode) # prefetch = roh & 0x40 != 0 roh &= 3 val = roh << 7 | rol if val & 0x100: val -= 0x200 val = (val << 2) + addr # if prefetch: # if p.cmd.Op2.reg == 0: # p.cmd.Op2.type = o_void # if val == 0: # p.cmd.Op1.type = o_void return ImmediateRegister(val, registers[ra]) def get_text(self, opcode, addr): brinst, brtarg = self.decode(opcode, addr) return ( InstructionTextToken(TextToken, '{:10s}'.format(self.name)), InstructionTextToken(PossibleAddressToken, '{:#x}'.format(brinst), brinst), Spu._comma_separator, InstructionTextToken(RegisterToken, brtarg), ) class idef_R(idef_RR): def __init__(self, name, noRA=False): self.name = name self.noRA = noRA def get_text(self, opcode, addr): _, ra, rt = self.decode(opcode, addr) tokens = [ InstructionTextToken(TextToken, '{:10s}'.format(self.name)) ] if not self.noRA: tokens.extend( (InstructionTextToken(RegisterToken, ra), Spu._comma_separator)) tokens.append(InstructionTextToken(RegisterToken, rt)) return tokens class idef_SPR(idef): def __init__(self, name, swap=False, offset=128): self.name = name self.swap = swap self.offset = offset def decode(self, opcode, addr): op, iii, sa, rt = decode_RR(opcode) sa += self.offset if self.swap: rt, sa = sa, rt return ImmediateTwoRegisters(iii, registers[sa], registers[rt]) def get_text(self, opcode, addr): _, sa, rt = self.decode(opcode, addr) return ( InstructionTextToken(TextToken, '{:10s}'.format(self.name)), InstructionTextToken(RegisterToken, rt), Spu._comma_separator, InstructionTextToken(RegisterToken, sa), ) class idef_CH(idef_SPR): def __init__(self, name, swap=False): idef_SPR.__init__(self, name, swap, 256) class idef_noops(idef): def __init__(self, name, cbit=False): self.name = name self.cbit = cbit self.cf = 0 def decode(self, opcode, addr): op, iii1, iii2, iii3 = decode_RR(opcode) # if self.cbit and p.cmd.Op3.reg & 0x40 != 0: # iii1 &= ~0x40 return def get_text(self, opcode, addr): # TODO: To add false targets or not to add.. that is the question return InstructionTextToken(TextToken, '{:10s}'.format(self.name)), class idef_RRR(idef): def decode(self, opcode, addr): op, rt, rb, ra, rc = decode_RRR(opcode) return FourRegisters(registers[rt], registers[rb], registers[ra], registers[rc]) def get_text(self, opcode, addr): rt, rb, ra, rc = self.decode(opcode, addr) return ( InstructionTextToken(TextToken, '{:10s}'.format(self.name)), InstructionTextToken(RegisterToken, rt), Spu._comma_separator, InstructionTextToken(RegisterToken, ra), Spu._comma_separator, InstructionTextToken(RegisterToken, rb), Spu._comma_separator, InstructionTextToken(RegisterToken, rc), ) class idef_Branch(idef_RR): def __init__(self, name, no2=False, uncond=False): self.name = name self.no2 = no2 def get_text(self, opcode, addr): rb, ra, rt = self.decode(opcode, addr) tokens = [ InstructionTextToken(TextToken, '{:10s}'.format(self.name)) ] if not self.no2: tokens.extend( (InstructionTextToken(RegisterToken, ra), Spu._comma_separator, InstructionTextToken(RegisterToken, rb))) tokens.append(InstructionTextToken(RegisterToken, rt)) return tokens class idef_RI7(idef): def __init__(self, name, signed=True): self.name = name self.signed = signed def decode(self, opcode, addr): op, i7, ra, rt = decode_RI7(opcode) if self.signed and i7 & 0x40: i7 -= 0x80 return ImmediateTwoRegisters(i7, registers[ra], registers[rt]) def get_text(self, opcode, addr): i7, ra, rt = self.decode(opcode, addr) return ( InstructionTextToken(TextToken, '{:10s}'.format(self.name)), InstructionTextToken(RegisterToken, rt), Spu._comma_separator, InstructionTextToken(RegisterToken, ra), Spu._comma_separator, InstructionTextToken(IntegerToken, '{:#x}'.format(i7), i7), ) class idef_RI8(idef): def __init__(self, name, bias): self.name = name self.bias = bias # self.cf = CF_CHG1 | CF_USE2 | CF_USE3 def decode(self, opcode): op, i8, ra, rt = decode_RI8(opcode) i8 = self.bias - i8 return ImmediateTwoRegisters(i8, registers[ra], registers[rt]) class idef_RI7_ls(idef_RI7): pass # def decode(self, opcode, addr): # # _, p.cmd.Op2.addr, p.cmd.Op2.reg, p.cmd.Op1.reg = decode_RI7(opcode) # return decode_RI7(opcode) # # p.cmd.Op1.type = o_reg # # p.cmd.Op2.type = o_displ # # p.cmd.Op2.dtyp = dt_byte16 # # if p.cmd.Op2.addr & 0x40: # # p.cmd.Op2.addr -= 0x80 # # p.cmd.Op2.specval |= spu_processor_t.FL_SIGNED class idef_RI10(idef): def __init__(self, name, signed=True): self.name = name self.signed = signed def decode(self, opcode, addr): op, i10, ra, rt = decode_RI10(opcode) if self.signed: if i10 & 0x200: i10 -= 0x400 return ImmediateTwoRegisters(i10, registers[ra], registers[rt]) def get_text(self, opcode, addr): i10, ra, rt = self.decode(opcode, addr) name = self.name if i10 == 0 and name is 'ori': name = 'lr' tokens = [ InstructionTextToken(TextToken, '{:10s}'.format(name)), InstructionTextToken(RegisterToken, rt), Spu._comma_separator, InstructionTextToken(RegisterToken, ra) ] if name is not 'lr': tokens.extend( (Spu._comma_separator, InstructionTextToken(IntegerToken, '{:#x}'.format(i10), i10))) return tokens class idef_RI10_ls(idef_RI10): def decode(self, opcode, addr): op, i10, ra, rt = decode_RI10(opcode) i10 <<= 4 if i10 & 0x2000: i10 -= 0x4000 return ImmediateTwoRegisters(i10, registers[ra], registers[rt]) class idef_RI16(idef): def __init__(self, name, flags=0, noRA=False, isBranch=True, signext=False): self.name = name self.noRA = noRA self.isBranch = isBranch self.signext = signext def decode(self, opcode, addr): op, i16, rt = decode_RI16(opcode) if self.signext and i16 & 0x8000: i16 -= 0x10000 # self.fixRA() return ImmediateRegister(i16, registers[rt]) def get_text(self, opcode, addr): i16, rt = self.decode(opcode, addr) tokens = [ InstructionTextToken(TextToken, '{:10s}'.format(self.name)) ] if not self.noRA: tokens.extend( (InstructionTextToken(RegisterToken, rt), Spu._comma_separator)) tokens.append( InstructionTextToken(PossibleAddressToken, '{:#x}'.format(i16), i16)) return tokens class idef_RI16_abs(idef_RI16): def decode(self, opcode, addr): i16, rt = idef_RI16.decode(self, opcode, addr) i16 <<= 2 return ImmediateRegister(i16, rt) class idef_RI16_rel(idef_RI16_abs): def decode(self, opcode, addr): i16, rt = idef_RI16.decode(self, opcode, addr) i16 = (i16 << 2) + addr if i16 & 0x40000: i16 &= ~0x40000 return ImmediateRegister(i16, rt) def get_text(self, opcode, addr): i16, rt = self.decode(opcode, addr) tokens = [ InstructionTextToken(TextToken, '{:10s}'.format(self.name)) ] if not self.noRA: tokens.extend( (InstructionTextToken(RegisterToken, rt), Spu._comma_separator)) tokens.append( InstructionTextToken(PossibleAddressToken, '{:#x}'.format(i16), i16)) return tokens class idef_RI18(idef): def decode(self, opcode, addr): op, i18, rt = decode_RI18(opcode) return ImmediateRegister(i18, registers[rt]) def get_text(self, opcode, addr): i18, rt = self.decode(opcode, addr) return ( InstructionTextToken(TextToken, '{:10s}'.format(self.name)), InstructionTextToken(RegisterToken, rt), Spu._comma_separator, InstructionTextToken(PossibleAddressToken, '{:#x}'.format(i18), i18), ) class idef_I16RO(idef): def __init__(self, name, rel=False): self.name = name self.cf = 0 self.rel = rel def decode(self, opcode, addr): op, roh, i16, rol = decode_I16RO(opcode) val = (roh << 7) | rol if val & 0x200: val -= 0x400 val = (val << 2) + addr if self.rel: # i16 is signed relative offset if i16 & 0x8000: i16 -= 0x10000 i16 = addr + (i16 << 2) else: i16 <<= 2 return TwoImmediates(val, i16) def get_text(self, opcode, addr): brinst, brtarg = self.decode(opcode, addr) return ( InstructionTextToken(TextToken, '{:10s}'.format(self.name)), InstructionTextToken(PossibleAddressToken, '{:#x}'.format(brinst), brinst), Spu._comma_separator, InstructionTextToken(PossibleAddressToken, '{:#x}'.format(brtarg), brtarg), ) class idef_stop(idef): def decode(self, opcode, addr): _, t = decode_STOP(opcode) # p.cmd.Op1.type = o_imm # p.cmd.Op1.value = t return t def get_text(self, opcode, addr): # t = self.decode(opcode, addr) return tuple() # End idef classes itable_RI10 = { 0x04: idef_RI10('ori'), 0x05: idef_RI10('orhi'), 0x06: idef_RI10('orbi'), 0x0c: idef_RI10('sfi'), 0x0d: idef_RI10('sfhi'), 0x14: idef_RI10('andi'), 0x15: idef_RI10('andhi'), 0x16: idef_RI10('andbi'), 0x1c: idef_RI10('ai'), 0x1d: idef_RI10('ahi'), 0x24: idef_RI10_ls('stqd'), 0x34: idef_RI10_ls('lqd'), 0x44: idef_RI10('xori'), 0x45: idef_RI10('xorhi'), 0x46: idef_RI10('xorbi', signed=False), 0x4c: idef_RI10('cgti'), 0x4d: idef_RI10('cgthi'), 0x4e: idef_RI10('cgtbi'), 0x4f: idef_RI10('hgti'), # false target 0x5c: idef_RI10('clgti'), 0x5d: idef_RI10('clgthi'), 0x5e: idef_RI10('clgtbi'), 0x5f: idef_RI10('hlgti'), # false target 0x74: idef_RI10('mpyi'), 0x75: idef_RI10('mpyui'), 0x7c: idef_RI10('ceqi'), 0x7d: idef_RI10('ceqhi'), 0x7e: idef_RI10('ceqbi'), 0x7f: idef_RI10('heqi'), } # 11-bit opcodes (bits 0:10) itable_RR = { 0x000: idef_stop('stop'), 0x001: idef_noops('lnop'), # no regs 0x002: idef_noops('sync', cbit=True), # C/#C 0x003: idef_noops('dsync'), # no regs 0x00c: idef_SPR('mfspr'), # SA = number 0x00d: idef_CH('rdch'), # //, CA, RT 0x00f: idef_CH('rchcnt'), # //, CA, RT 0x040: idef_RR('sf'), 0x041: idef_RR('or'), 0x042: idef_RR('bg'), 0x048: idef_RR('sfh'), 0x049: idef_RR('nor'), 0x053: idef_RR('absdb'), 0x058: idef_RR('rot'), 0x059: idef_RR('rotm'), 0x05a: idef_RR('rotma'), 0x05b: idef_RR('shl'), 0x05c: idef_RR('roth'), 0x05d: idef_RR('rothm'), 0x05e: idef_RR('rotmah'), 0x05f: idef_RR('shlh'), 0x07f: idef_RR('shlhi'), 0x0c0: idef_RR('a'), 0x0c1: idef_RR('and'), 0x0c2: idef_RR('cg'), 0x0c8: idef_RR('ah'), 0x0c9: idef_RR('nand'), 0x0d3: idef_RR('avgb'), 0x10c: idef_SPR('mtspr', swap=True), # SA = number 0x10d: idef_CH('wrch', swap=True), # // CA RT 0x128: idef_Branch('biz'), # branch 0x129: idef_Branch('binz'), # branch 0x12a: idef_Branch('bihz'), # branch 0x12b: idef_Branch('bihnz'), # branch 0x140: idef_RR('stopd'), 0x144: idef_RR('stqx'), 0x1a8: idef_Branch('bi', no2=True, uncond=True), # branch 0x1a9: idef_Branch('bisl'), # branch 0x1aa: idef_Branch('iret', no2=True, uncond=True), # branch 0x1ab: idef_Branch('bisled'), # branch 0x1ac: idef_ROHROL('hbr'), # ROH/ROL form 0x1b0: idef_R('gb'), # no first reg 0x1b1: idef_R('gbh'), # no first reg 0x1b2: idef_R('gbb'), # no first reg 0x1b4: idef_R('fsm'), # no first reg 0x1b5: idef_R('fsmh'), # no first reg 0x1b6: idef_R('fsmb'), # no first reg 0x1b8: idef_R('frest'), # no first reg 0x1b9: idef_R('frsqest'), # no first reg 0x1c4: idef_RR('lqx'), 0x1cc: idef_RR('rotqbybi'), 0x1cd: idef_RR('rotqmbybi'), 0x1cf: idef_RR('shlqbybi'), 0x1d4: idef_RR('cbx'), 0x1d5: idef_RR('chx'), 0x1d6: idef_RR('cwx'), 0x1d7: idef_RR('cdx'), 0x1d8: idef_RR('rotqbi'), 0x1d9: idef_RR('rotqmbi'), 0x1db: idef_RR('shlqbi'), 0x1dc: idef_RR('rotqby'), 0x1dd: idef_RR('rotqmby'), 0x1df: idef_RR('shlqby'), 0x1f0: idef_R('orx'), # no first reg 0x201: idef_noops('nop'), # no regs 0x240: idef_RR('cgt'), 0x241: idef_RR('xor'), 0x248: idef_RR('cgth'), 0x249: idef_RR('eqv'), 0x250: idef_RR('cgtb'), 0x253: idef_RR('sumb'), 0x258: idef_RR('hgt'), 0x2a5: idef_R('clz'), # no first reg 0x2a6: idef_R('xswd'), # no first reg 0x2ae: idef_R('xshw'), # no first 0x2b4: idef_R('cntb'), # no first reg 0x2b6: idef_R('xsbh'), # no first reg 0x2c0: idef_RR('clgt'), 0x2c1: idef_RR('andc'), 0x2c2: idef_RR('fcgt'), 0x2c3: idef_RR('dfcgt'), 0x2c4: idef_RR('fa'), 0x2c5: idef_RR('fs'), 0x2c6: idef_RR('fm'), 0x2c8: idef_RR('clgth'), 0x2c9: idef_RR('orc'), 0x2ca: idef_RR('fcmgt'), 0x2cb: idef_RR('dfcmgt'), 0x2cc: idef_RR('dfa'), 0x2cd: idef_RR('dfs'), 0x2ce: idef_RR('dfm'), 0x2d0: idef_RR('clgtb'), 0x2d8: idef_RR('hlgt'), # false target 0x340: idef_RR('addx'), 0x341: idef_RR('sfx'), 0x342: idef_RR('cgx'), 0x343: idef_RR('bgx'), 0x346: idef_RR('mpyhha'), 0x34e: idef_RR('mpyhhau'), 0x35c: idef_RR('dfma'), 0x35d: idef_RR('dfms'), 0x35e: idef_RR('dfnms'), 0x35f: idef_RR('dfnma'), 0x398: idef_R('fscrrd', noRA=True), # no first and second 0x3b8: idef_R('fesd'), # no first 0x3b9: idef_R('frds'), # no first 0x3ba: idef_R('fscrwr'), # no first, rt is false target 0x3c0: idef_RR('ceq'), 0x3c2: idef_RR('fceq'), 0x3c3: idef_RR('dfceq'), 0x3c4: idef_RR('mpy'), 0x3c5: idef_RR('mpyh'), 0x3c7: idef_RR('mpys'), 0x3c6: idef_RR('mpyhh'), 0x3c8: idef_RR('ceqh'), 0x3ca: idef_RR('fcmeq'), 0x3cb: idef_RR('dfcmeq'), 0x3cc: idef_RR('mpyu'), 0x3ce: idef_RR('mpyhhu'), 0x3d0: idef_RR('ceqb'), 0x3d4: idef_RR('fi'), 0x3d8: idef_RR('heq'), # rt is false target } # 4-bit opcodes (bits 0:3) itable_RRR = { 0x8: idef_RRR('selb'), 0xb: idef_RRR('shufb'), 0xc: idef_RRR('mpya'), 0xd: idef_RRR('fnms'), 0xe: idef_RRR('fma'), 0xf: idef_RRR('fms'), } itable_RI16 = { 0x040: idef_RI16_rel('brz'), 0x041: idef_RI16_abs('stqa', isBranch=False), 0x042: idef_RI16_rel('brnz'), 0x044: idef_RI16_rel('brhz'), 0x046: idef_RI16_rel('brhnz'), 0x047: idef_RI16_rel('stqr', isBranch=False), 0x060: idef_RI16_abs('bra', noRA=True), 0x061: idef_RI16_abs('lqa', isBranch=False), 0x062: idef_RI16_abs('brasl'), 0x064: idef_RI16_rel('br', noRA=True), 0x065: idef_RI16('fsmbi'), 0x066: idef_RI16_rel('brsl'), 0x067: idef_RI16_rel('lqr', isBranch=False), 0x081: idef_RI16('il', signext=True), 0x082: idef_RI16('ilhu'), 0x083: idef_RI16('ilh'), 0x0c1: idef_RI16('iohl'), } itable_RI7 = { 0x078: idef_RI7('roti'), 0x079: idef_RI7('rotmi'), 0x07a: idef_RI7('rotmai'), 0x07b: idef_RI7('shli'), 0x07c: idef_RI7('rothi'), 0x07d: idef_RI7('rothmi'), 0x07e: idef_RI7('rotmahi'), 0x1f4: idef_RI7_ls('cbd'), 0x1f5: idef_RI7_ls('chd'), 0x1f6: idef_RI7_ls('cwd'), 0x1f7: idef_RI7_ls('cdd'), 0x1f8: idef_RI7('rotqbii'), 0x1f9: idef_RI7('rotqmbii'), 0x1fb: idef_RI7('shlqbii'), 0x1fc: idef_RI7('rotqbyi'), 0x1fd: idef_RI7('rotqmbyi'), 0x1ff: idef_RI7('shlqbyi'), 0x3bf: idef_RI7('dftsv', signed=False), } itable_RI18 = { 0x21: idef_RI18('ila'), 0x08: idef_I16RO('hbra'), # roh/rol 0x09: idef_I16RO('hbrr', rel=True), # roh/rol } # 10-bit opcodes (bits 0:9) itable_RI8 = { 0x1d8: idef_RI8('cflts', 173), 0x1d9: idef_RI8('cfltu', 173), 0x1da: idef_RI8('csflt', 155), 0x1db: idef_RI8('cuflt', 155), } for i in xrange(2048): opcode = i << 21 RR = decode_RR(opcode) RRR = decode_RRR(opcode) RI7 = decode_RI7(opcode) RI8 = decode_RI8(opcode) RI10 = decode_RI10(opcode) RI16 = decode_RI16(opcode) RI18 = decode_RI18(opcode) ins = (itable_RR.get(RR[0], None) or itable_RRR.get(RRR[0], None) or itable_RI7.get(RI7[0], None) or itable_RI8.get(RI8[0], None) or itable_RI10.get(RI10[0], None) or itable_RI16.get(RI16[0], None) or itable_RI18.get(RI18[0], None)) if ins: self.itable[i] = ins def retrieve_instruction(self, data): try: opcode = struct.unpack('>I', data[:self.address_size])[0] except struct.error: return return self.itable[IBITS(opcode, 0, 10)], opcode def perform_get_instruction_info(self, data, addr): instruction, opcode = self.retrieve_instruction(data) if not instruction: return result = InstructionInfo() result.length = self.address_size inst_name = instruction.name if inst_name in ('bi', 'iret'): result.add_branch(FunctionReturn) elif inst_name in ('brsl', 'brasl'): branch_addr, _ = instruction.decode(opcode, addr) result.add_branch(CallDestination, branch_addr) elif inst_name == ('bisl', 'biz', 'binz', 'bihnz', 'bisled'): _, ra, _ = instruction.decode(opcode, addr) result.add_branch(IndirectBranch, ra) elif inst_name in ('brz', 'brnz', 'brhz', 'brhnz'): branch_addr, _ = instruction.decode(opcode, addr) result.add_branch(TrueBranch, branch_addr) result.add_branch(FalseBranch, addr + self.address_size) elif inst_name in ('br', 'bra'): branch_addr, _ = instruction.decode(opcode, addr) result.add_branch(UnconditionalBranch, branch_addr) return result def perform_get_instruction_text(self, data, addr): instruction, opcode = self.retrieve_instruction(data) if instruction is None: return return instruction.get_text(opcode, addr), self.address_size def perform_get_instruction_low_level_il(self, data, addr, il): instruction, opcode = self.retrieve_instruction(data) if instruction is None: return il_func = instruction_il.get(instruction.name, lambda *x: il.unimplemented()) decoded = instruction.decode(opcode, addr) lifted = il_func(il, addr, decoded) if isinstance(lifted, LowLevelILExpr): il.append(lifted) elif lifted: for llil in lifted: il.append(llil) return self.address_size
class MSP430(Architecture): name = 'msp430' address_size = 2 default_int_size = 2 regs = { 'pc': RegisterInfo('pc', 2), 'sp': RegisterInfo('sp', 2), 'sr': RegisterInfo('sr', 2), 'cg': RegisterInfo('cg', 2), 'r4': RegisterInfo('r4', 2), 'r5': RegisterInfo('r5', 2), 'r6': RegisterInfo('r6', 2), 'r7': RegisterInfo('r7', 2), 'r8': RegisterInfo('r8', 2), 'r9': RegisterInfo('r9', 2), 'r10': RegisterInfo('r10', 2), 'r11': RegisterInfo('r11', 2), 'r12': RegisterInfo('r12', 2), 'r13': RegisterInfo('r13', 2), 'r14': RegisterInfo('r14', 2), 'r15': RegisterInfo('r15', 2), } flags = ['v', 'n', 'c', 'z'] # The first flag write type is ignored currently. # See: https://github.com/Vector35/binaryninja-api/issues/513 flag_write_types = ['', '*', 'cnv', 'cnz'] flags_written_by_flag_write_type = { '*': ['v', 'n', 'c', 'z'], 'cnv': ['v', 'n', 'c'], 'cnz': ['c', 'n', 'z'] } flag_roles = { 'c': FlagRole.CarryFlagRole, 'n': FlagRole.NegativeSignFlagRole, 'z': FlagRole.ZeroFlagRole, 'v': FlagRole.OverflowFlagRole } flags_required_for_flag_condition = { LowLevelILFlagCondition.LLFC_UGE: ['c'], LowLevelILFlagCondition.LLFC_ULT: ['c'], LowLevelILFlagCondition.LLFC_SGE: ['n', 'v'], LowLevelILFlagCondition.LLFC_SLT: ['n', 'v'], LowLevelILFlagCondition.LLFC_E: ['z'], LowLevelILFlagCondition.LLFC_NE: ['z'], LowLevelILFlagCondition.LLFC_NEG: ['n'], LowLevelILFlagCondition.LLFC_POS: ['n'] } stack_pointer = 'sp' def decode_instruction(self, data, addr): error_value = (None, None, None, None, None, None, None, None, None) if len(data) < 2: return error_value instruction = struct.unpack('<H', data[0:2])[0] # emulated instructions if instruction == 0x4130: return 'ret', None, None, None, None, None, 2, None, None opcode = (instruction & 0xf000) >> 12 mask = InstructionMask.get(opcode) shift = InstructionMaskShift.get(opcode) if mask and shift: instr = InstructionNames[opcode][(instruction & mask) >> shift] else: instr = InstructionNames[opcode] if instr is None: log_error('[{:x}] Bad opcode: {:x}'.format(addr, opcode)) return error_value if instr not in TYPE3_INSTRUCTIONS: width = 1 if (instruction & 0x40) >> 6 else 2 else: width = None src, src_operand, dst, dst_operand = GetOperands(instr, instruction) operand_length = 0 if src_operand is not None: operand_length = OperandLengths[src_operand] if dst_operand is not None: operand_length += OperandLengths[dst_operand] length = 2 + operand_length if len(data) < length: return error_value src_value, dst_value = None, None if instr in TYPE3_INSTRUCTIONS: branch_target = (instruction & 0x3ff) << 1 # check if it's a negative offset if branch_target & 0x600: branch_target |= 0xf800 branch_target -= 0x10000 src_value = addr + 2 + branch_target elif operand_length == 2: value = struct.unpack('<H', data[2:4])[0] if OperandLengths[src_operand]: src_value = value else: dst_value = value elif operand_length == 4: src_value, dst_value = struct.unpack('<HH', data[2:6]) if instr == 'mov' and dst == 'pc': instr = 'br' return instr, width, src_operand, dst_operand, src, dst, length, src_value, dst_value def perform_get_instruction_info(self, data, addr): instr, _, _, _, _, _, length, src_value, _ = self.decode_instruction( data, addr) if instr is None: return None result = InstructionInfo() result.length = length # Add branches if instr in ['ret', 'reti']: result.add_branch(BranchType.FunctionReturn) elif instr in ['jmp', 'br'] and src_value is not None: result.add_branch(BranchType.UnconditionalBranch, src_value) elif instr in TYPE3_INSTRUCTIONS: result.add_branch(BranchType.TrueBranch, src_value) result.add_branch(BranchType.FalseBranch, addr + 2) elif instr == 'call' and src_value is not None: result.add_branch(BranchType.CallDestination, src_value) return result def perform_get_instruction_text(self, data, addr): (instr, width, src_operand, dst_operand, src, dst, length, src_value, dst_value) = self.decode_instruction(data, addr) if instr is None: return None tokens = [] instruction_text = instr if width == 1: instruction_text += '.b' tokens = [ InstructionTextToken(InstructionTextTokenType.TextToken, '{:7s}'.format(instruction_text)) ] if instr in TYPE1_INSTRUCTIONS: tokens += OperandTokens[src_operand](src, src_value) tokens += [ InstructionTextToken(InstructionTextTokenType.TextToken, ',') ] tokens += OperandTokens[dst_operand](dst, dst_value) elif instr in TYPE2_INSTRUCTIONS: tokens += OperandTokens[src_operand](src, src_value) elif instr in TYPE3_INSTRUCTIONS: tokens += OperandTokens[src_operand](src, src_value) return tokens, length def perform_get_instruction_low_level_il(self, data, addr, il): (instr, width, src_operand, dst_operand, src, dst, length, src_value, dst_value) = self.decode_instruction(data, addr) if instr is None: return None if InstructionIL.get(instr) is None: log_error('[0x{:4x}]: {} not implemented'.format(addr, instr)) il.append(il.unimplemented()) else: il_instr = InstructionIL[instr](il, src_operand, dst_operand, src, dst, width, src_value, dst_value) if isinstance(il_instr, list): for i in [i for i in il_instr if i is not None]: il.append(i) elif il_instr is not None: il.append(il_instr) return length
class CLEM(Architecture): name = 'clem' address_size = 4 default_int_size = 3 regs = { 'r0': RegisterInfo('r0', 4), 'r1': RegisterInfo('r1', 4), 'r2': RegisterInfo('r2', 4), 'r3': RegisterInfo('r3', 4), 'r4': RegisterInfo('r4', 4), 'r5': RegisterInfo('r5', 4), 'r6': RegisterInfo('r6', 4), 'r7': RegisterInfo('r7', 4), 'r8': RegisterInfo('r8', 4), 'r9': RegisterInfo('r9', 4), 'r10': RegisterInfo('r10', 4), 'r11': RegisterInfo('r11', 4), 'r12': RegisterInfo('r12', 4), 'r13': RegisterInfo('r13', 4), 'r14': RegisterInfo('r14', 4), 'r15': RegisterInfo('r15', 4), 'r16': RegisterInfo('r16', 4), 'r17': RegisterInfo('r17', 4), 'r18': RegisterInfo('r18', 4), 'r19': RegisterInfo('r19', 4), 'r20': RegisterInfo('r20', 4), 'r21': RegisterInfo('r21', 4), 'r22': RegisterInfo('r22', 4), 'r23': RegisterInfo('r23', 4), 'r24': RegisterInfo('r24', 4), 'r25': RegisterInfo('r25', 4), 'r26': RegisterInfo('r26', 4), 'r27': RegisterInfo('r27', 4), 'r28': RegisterInfo('r28', 4), 'st': RegisterInfo('st', 4), 'ra': RegisterInfo('ra', 4), 'pc': RegisterInfo('pc', 4), } flags = ['s', 'o', 'c', 'z'] # The first flag write type is ignored currently. # See: https://github.com/Vector35/binaryninja-api/issues/513 flag_write_types = ['', '*'] flags_written_by_flag_write_type = { '*': ['s', 'o', 'c', 'z'], } flag_roles = { 's': FlagRole.NegativeSignFlagRole, 'o': FlagRole.OverflowFlagRole, 'c': FlagRole.CarryFlagRole, 'z': FlagRole.ZeroFlagRole, } flags_required_for_flag_condition = { LowLevelILFlagCondition.LLFC_UGE: ['c', 'z'], LowLevelILFlagCondition.LLFC_ULT: ['c'], LowLevelILFlagCondition.LLFC_SGE: ['s', 'o', 'z'], LowLevelILFlagCondition.LLFC_SLT: ['s', 'o'], LowLevelILFlagCondition.LLFC_E: ['z'], LowLevelILFlagCondition.LLFC_NE: ['z'], LowLevelILFlagCondition.LLFC_NEG: ['s'], LowLevelILFlagCondition.LLFC_POS: ['s'] } stack_pointer = 'st' link_reg = 'ra' address_size = 3 def find_instruction(self, addr): found = [] bytes_per_size = {} for name, (inst_type, values) in Instructions.items(): size = inst_type.SIZE if size not in bytes_per_size: bytes_per_size[size] = read_memory_value(addr, size) # If we weren't able to get the memory (we're past the end of the unpacked bytes) if bytes_per_size[size] == None: continue inst = inst_type.decode(inst_type, name, values, addr, bytes_per_size[size]) if inst != None: found.append(inst) if len(found) > 1: for inst in found: if inst.name == "LA": return inst raise RuntimeError("Multiple instructions found {}".format( [x.__class__.__name__ for x in found])) elif len(found) == 0: return None return found[0] def decode_instruction(self, data, addr): if len(data) < 4: return None instr = self.find_instruction(addr) if instr == None: log_error('[{:x}] Bad opcode'.format(addr)) return None return instr def perform_get_instruction_info(self, data, addr): instr = self.decode_instruction(data, addr) if instr is None: return None result = InstructionInfo() result.length = instr.SIZE instr.add_branches(result) return result def perform_get_instruction_text(self, data, addr): instr = self.decode_instruction(data, addr) if instr is None: return None tokens = [] instruction_text = instr.get_name() if instr.conditional_sets_flags(): instruction_text += '.' tokens = [ InstructionTextToken(InstructionTextTokenType.InstructionToken, '{:7s}'.format(instruction_text)) ] operand_tokens = instr.get_operand_tokens() if instr.add_commas: for i in range(len(operand_tokens)): tokens.append(operand_tokens[i]) if i != len(operand_tokens) - 1: tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ",")) else: tokens.extend(operand_tokens) return tokens, instr.SIZE def perform_get_instruction_low_level_il(self, data, addr, il): return None def perform_assemble(self, code, addr): global FILE_BYTE_STREAM if FILE_BYTE_STREAM == None: make_file_contents() new_insts = asm.asm(code) num_bytes_changed = sum([len(x) for x in new_insts]) # Update FILE_BYTE_STREAM for new_inst in new_insts: for new_byte in new_inst: FILE_BYTE_STREAM.bytes[addr] = (new_byte, 1) addr += 1 # Rewrite the input file if num_bytes_changed != 0: rewrite_file() # Give binja something so it reloads the instructions return ("A" * num_bytes_changed, "") def perform_convert_to_nop(self, data, addr): # There's no NOP instruction, so do an AND with r0 without flag update bytes_changed, error = self.perform_assemble("AN r0, r0, r0", addr) return bytes_changed
class Brainfuck(Architecture): name = "Brainfuck" address_size = 1 default_int_size = 1 max_instr_length = 1 stack_pointer = 's' regs = { 'tmp': RegisterInfo('tmp', 1), 'ptr': RegisterInfo('ptr', 4), 's': RegisterInfo('s', 1) } def parse_instruction(self, data, addr): try: ret_data = ord(data) except: ret_data = data return ret_data, 1 def get_instruction_info(self, data, addr): opcode, length = self.parse_instruction(data, addr) info = InstructionInfo() info.length = length if opcodes[opcode] == 'Close': info.add_branch(BranchType.UnresolvedBranch) info.add_branch(BranchType.FalseBranch, addr + 1) elif opcodes[opcode] == 'Open': info.add_branch(BranchType.TrueBranch, addr + 1) info.add_branch(BranchType.UnresolvedBranch) return info def get_instruction_text(self, data, addr): opcode, length = self.parse_instruction(data, addr) tokens = [] op = opcodes[opcode] tokens.append( InstructionTextToken( InstructionTextTokenType.InstructionToken, "{}".format(op) ) ) return tokens, length def get_instruction_low_level_il(self, data, addr, il): opcode, length = self.parse_instruction(data, addr) op = opcodes[opcode] if addr == 0x10000: il.append( il.set_reg(4, 'ptr', il.const(1, 0)) ) if op == "Right": il.append( il.set_reg(4, 'ptr', il.add( 4, il.reg(4, 'ptr'), il.const(1, 1)), None) ) elif op == "Left": il.append( il.set_reg(4, 'ptr', il.sub( 4, il.reg(4, 'ptr'), il.const(1, 1)), None) ) elif op == "Add": il.append( il.store(1, il.reg(4, 'ptr'), il.add( 1, il.load(1, il.reg(4, 'ptr')), il.const(1, 1)), None) ) elif op == "Subtract": il.append( il.store(1, il.reg(4, 'ptr'), il.sub( 1, il.load(1, il.reg(4, 'ptr')), il.const(1, 1)), None) ) elif op == "In": il.append( il.unimplemented() ) elif op == "Out": il.append( il.unimplemented() ) elif op == "Open": true_label = il.get_label_for_address( Architecture['Brainfuck'], addr + 1) br = BinaryReader(il._source_function._view) br.seek(addr+1) # print("Found Open at : ", br.offset-1) counter = 1 while counter != 0: instr = opcodes[br.read8()] if instr == "Open": counter += 1 elif instr == "Close": counter -= 1 if counter == 0: false_label = il.get_label_for_address( Architecture['Brainfuck'], br.offset) # print("Found loop close at offset : ", br.offset-1) break elif br.offset == il._source_function._view.end: print("Unfinished loop! This should never happen!") return il.append( il.if_expr(il.compare_not_equal(1, il.load( 1, il.reg(4, 'ptr')), il.const(1, 0)), true_label, false_label) ) elif op == "Close": false_label = il.get_label_for_address( Architecture['Brainfuck'], addr + 1) br = BinaryReader(il._source_function._view) br.seek(addr) # print("Found Close at : ", br.offset) counter = 1 while counter != 0: br.seek_relative(-2) instr = opcodes[br.read8()] if instr == "Close": counter += 1 elif instr == "Open": counter -= 1 if counter == 0: true_label = il.get_label_for_address( Architecture['Brainfuck'], br.offset) # print("Found loop Open at offset : ", br.offset-1) break elif br.offset == il._source_function._view.end: print("Unfinished loop! This should never happen!") return il.append( il.if_expr(il.compare_not_equal(1, il.load( 1, il.reg(4, 'ptr')), il.const(1, 0)), true_label, false_label) ) else: il.append( il.nop() ) return length
class Intel8086(Architecture): name = "8086" endianness = Endianness.LittleEndian default_int_size = 2 address_size = 3 stack_pointer = 'sp' regs = { # General 'ax': RegisterInfo('ax', 2, 0), 'al': RegisterInfo('ax', 1, 0), 'ah': RegisterInfo('ax', 1, 1), 'cx': RegisterInfo('cx', 2, 0), 'cl': RegisterInfo('cx', 1, 0), 'ch': RegisterInfo('cx', 1, 1), 'bx': RegisterInfo('bx', 2, 0), 'bl': RegisterInfo('bx', 1, 0), 'bh': RegisterInfo('bx', 1, 1), 'dx': RegisterInfo('dx', 2, 0), 'dl': RegisterInfo('dx', 1, 0), 'dh': RegisterInfo('dx', 1, 1), 'sp': RegisterInfo('sp', 2), 'bp': RegisterInfo('bp', 2), 'si': RegisterInfo('si', 2), 'di': RegisterInfo('di', 2), # Segment 'cs': RegisterInfo('cs', 2), 'ds': RegisterInfo('ds', 2), 'es': RegisterInfo('es', 2), 'ss': RegisterInfo('ss', 2), # Instruction pointer 'ip': RegisterInfo('ip', 2) } flags = [ # Status 'c', # carry 'p', # parity 'a', # aux carry 'z', # zero 's', # sign 'o', # overflow # Control 'i', # interrupt 'd', # direction 't', # trap ] flag_roles = { 'c': FlagRole.CarryFlagRole, 'p': FlagRole.OddParityFlagRole, 'a': FlagRole.HalfCarryFlagRole, 'z': FlagRole.ZeroFlagRole, 's': FlagRole.NegativeSignFlagRole, 't': FlagRole.SpecialFlagRole, 'i': FlagRole.SpecialFlagRole, 'd': FlagRole.SpecialFlagRole, 'o': FlagRole.OverflowFlagRole, } flag_write_types = [ '', '*', '!c', 'co', ] flags_written_by_flag_write_type = { '*': ['c', 'p', 'a', 'z', 's', 'o'], '!c': ['p', 'a', 'z', 's', 'o'], 'co': ['c', 'o'], } flags_required_for_flag_condition = { LowLevelILFlagCondition.LLFC_E: ['z'], LowLevelILFlagCondition.LLFC_NE: ['z'], LowLevelILFlagCondition.LLFC_SLT: ['s', 'o'], LowLevelILFlagCondition.LLFC_ULT: ['c'], LowLevelILFlagCondition.LLFC_SLE: ['z', 's', 'o'], LowLevelILFlagCondition.LLFC_ULE: ['c', 'z'], LowLevelILFlagCondition.LLFC_SGE: ['s', 'o'], LowLevelILFlagCondition.LLFC_UGE: ['c'], LowLevelILFlagCondition.LLFC_SGT: ['z', 's', 'o'], LowLevelILFlagCondition.LLFC_UGT: ['c', 'z'], LowLevelILFlagCondition.LLFC_NEG: ['s'], LowLevelILFlagCondition.LLFC_POS: ['s'], LowLevelILFlagCondition.LLFC_O: ['o'], LowLevelILFlagCondition.LLFC_NO: ['o'], } intrinsics = { 'outb': IntrinsicInfo([Type.int(2), Type.int(1)], []), 'outw': IntrinsicInfo([Type.int(2), Type.int(2)], []), 'inb': IntrinsicInfo([Type.int(1)], [Type.int(2)]), 'inw': IntrinsicInfo([Type.int(2)], [Type.int(2)]), } def get_instruction_info(self, data, addr): decoded = mc.decode(data, addr) if decoded: info = InstructionInfo() decoded.analyze(info, addr) return info def get_instruction_text(self, data, addr): decoded = mc.decode(data, addr) if decoded: encoded = data[:decoded.total_length()] recoded = mc.encode(decoded, addr) if encoded != recoded: log_error("Instruction roundtrip error") log_error("".join([str(x) for x in decoded.render(addr)])) log_error("Orig: {}".format(encoded.hex())) log_error("New: {}".format(recoded.hex())) return decoded.render(addr), decoded.total_length() def get_instruction_low_level_il(self, data, addr, il): decoded = mc.decode(data, addr) if decoded: decoded.lift(il, addr) return decoded.total_length() def convert_to_nop(self, data, addr): return b'\x90' * len(data) def is_always_branch_patch_available(self, data, addr): decoded = mc.decode(data, addr) if decoded: return isinstance(decoded, mc.instr.jmp.JmpCond) def always_branch(self, data, addr): branch = mc.decode(data, addr) branch = branch.to_always() return mc.encode(branch, addr) def is_invert_branch_patch_available(self, data, addr): decoded = mc.decode(data, addr) if decoded: return isinstance(decoded, mc.instr.jmp.JmpCond) def invert_branch(self, data, addr): branch = mc.decode(data, addr) branch = branch.to_inverted() return mc.encode(branch, addr)
class SuperH(Architecture): name = "superh" endianness = Endianness.LittleEndian address_size = 4 default_int_size = 2 max_instr_length = 4 instr_alignment = 2 regs = dict() for r in registers: regs[r] = RegisterInfo(r, RSIZE) for r in system_registers: regs[r] = RegisterInfo(r, RSIZE) for r in control_registers: regs[r] = RegisterInfo(r, RSIZE) flags = ['t'] flag_roles = {'t': FlagRole.SpecialFlagRole} stack_pointer = 'R15' link_reg = 'PR' system_regs = system_registers + control_registers def __init__(self): super().__init__() def get_instruction_info(self, data, addr): result = InstructionInfo() result.length = ISIZE insn = disasm_single(data, addr) if not insn: return result result.length = insn.size Brancher.find_branches(insn, result) return result def get_instruction_text(self, data, addr): tokens = list() insn = disasm_single(data, addr) if not insn: tokens.append( InstructionTextToken(InstructionTextTokenType.TextToken, "<unknown>")) return tokens, ISIZE for token_type, token_text in insn.tokens: tokens.append(InstructionTextToken(token_type, token_text)) return tokens, insn.size def get_instruction_low_level_il(self, data, addr, il): insn = disasm_single(data, addr) if not insn: il.append(il.unimplemented()) return None Lifter.lift(il, insn) return insn.size
def get_regs(): regs = dict() for i in range(16): n = "a{}".format(i) regs[n] = RegisterInfo(n, 4) regs['pc'] = RegisterInfo('pc', 4) regs['sar'] = RegisterInfo('sar', 1) # 6 bits? regs['lbeg'] = RegisterInfo('lbeg', 4) regs['lend'] = RegisterInfo('lend', 4) regs['lcount'] = RegisterInfo('lcount', 4) regs['acclo'] = RegisterInfo('acclo', 4) regs['acchi'] = RegisterInfo('acchi', 4) regs['m0'] = RegisterInfo('m0', 4) regs['m1'] = RegisterInfo('m1', 4) regs['m2'] = RegisterInfo('m2', 4) regs['m3'] = RegisterInfo('m3', 4) regs['br'] = RegisterInfo('br', 2) regs['litbase'] = RegisterInfo('litbase', 3) # 21 bits? regs['scompare1'] = RegisterInfo('scompare1', 4) regs['ps'] = RegisterInfo('ps', 2) # 15 bits? # Could do like ps.intlevel here too? # There are a bunch of other "Special registers" that we could implement here return regs
class RenesasM16CArchitecture(Architecture): name = "m16c" endianness = Endianness.LittleEndian default_int_size = 2 address_size = 3 stack_pointer = 'SP' regs = { # Data (banked in hardware) 'R2R0': RegisterInfo('R2R0', 4, 0), 'R2': RegisterInfo('R2R0', 2, 2), 'R0': RegisterInfo('R2R0', 2, 0), 'R0H': RegisterInfo('R0H', 1, 1), 'R0L': RegisterInfo('R0L', 1, 0), 'R3R1': RegisterInfo('R3R1', 4, 0), 'R3': RegisterInfo('R3R1', 2, 2), 'R1': RegisterInfo('R3R1', 2, 0), 'R1H': RegisterInfo('R1H', 1, 1), 'R1L': RegisterInfo('R1L', 1, 0), # Address 'A1A0': RegisterInfo('A1A0', 4, 0), 'A1': RegisterInfo('A1A0', 2, 2), 'A0': RegisterInfo('A1A0', 2, 0), # Frame base (banked in hardware) 'FB': RegisterInfo('FB', 2, 0), # Program counter 'PC': RegisterInfo('PC', 3, 0), # Stack pointer (banked in hardware as USP/ISP) 'SP': RegisterInfo('SP', 2, 0), # Static base 'SB': RegisterInfo('SB', 2, 0), # Interrupt base 'INTB': RegisterInfo('INTB', 4, 0), 'INTBH': RegisterInfo('INTB', 1, 2), 'INTBL': RegisterInfo('INTB', 2, 0), } flags = [ 'C', # Carry 'D', # Debug 'Z', # Zero 'S', # Sign 'B', # Register bank select 'O', # Overflow 'I', # Interrupt enable 'U', # Stack pointer select # IPL is not modelled ] flag_roles = { 'C': FlagRole.CarryFlagRole, 'D': FlagRole.SpecialFlagRole, 'Z': FlagRole.ZeroFlagRole, 'S': FlagRole.NegativeSignFlagRole, 'B': FlagRole.SpecialFlagRole, 'O': FlagRole.OverflowFlagRole, 'I': FlagRole.SpecialFlagRole, 'U': FlagRole.SpecialFlagRole, } flags_required_for_flag_condition = { LowLevelILFlagCondition.LLFC_E: ['Z'], LowLevelILFlagCondition.LLFC_NE: ['Z'], LowLevelILFlagCondition.LLFC_POS: ['S'], LowLevelILFlagCondition.LLFC_NEG: ['S'], LowLevelILFlagCondition.LLFC_SGE: ['S', 'O'], LowLevelILFlagCondition.LLFC_SLT: ['S', 'O'], LowLevelILFlagCondition.LLFC_SGT: ['Z', 'S', 'O'], LowLevelILFlagCondition.LLFC_SLE: ['Z', 'S', 'O'], LowLevelILFlagCondition.LLFC_UGE: ['C'], LowLevelILFlagCondition.LLFC_ULT: ['C'], LowLevelILFlagCondition.LLFC_UGT: ['C', 'Z'], LowLevelILFlagCondition.LLFC_ULE: ['C', 'Z'], LowLevelILFlagCondition.LLFC_O: ['O'], LowLevelILFlagCondition.LLFC_NO: ['O'], } def get_instruction_info(self, data, addr): decoded = mc.decode(data, addr) if decoded: info = InstructionInfo() decoded.analyze(info, addr) return info def get_instruction_text(self, data, addr): decoded = mc.decode(data, addr) if decoded: encoded = data[:decoded.length()] recoded = mc.encode(decoded, addr) if encoded != recoded: log.log_error("Instruction roundtrip error") log.log_error("".join([str(x) for x in decoded.render(addr)])) log.log_error("Orig: {}".format(encoded.hex())) log.log_error("New: {}".format(recoded.hex())) decoded.show_suffix = Settings().get_bool('arch.m16c.showSuffix') return decoded.render(addr), decoded.length() def get_instruction_low_level_il(self, data, addr, il): decoded = mc.decode(data, addr) if decoded: decoded.lift(il, addr) return decoded.length() def convert_to_nop(self, data, addr): return b'\x04' * len(data)
class RISCV(Architecture): name = "riscv" address_size = 4 default_int_size = 4 max_instr_length = 4 endianness = Endianness.LittleEndian disassembler = RVDisassembler(address_size) lifter = Lifter(address_size) # we are using the ABI names here, as those are also the register names # returned by capstone. regs = { # x0 - hard-wired zero "zero": RegisterInfo("zero", address_size), # x1 - return address (caller saved) "ra": RegisterInfo("ra", address_size), # x2 - stack pointer (caller saved) "sp": RegisterInfo("sp", address_size), # x3 - global pointer "gp": RegisterInfo("gp", address_size), # x4 - threat pointer "tp": RegisterInfo("tp", address_size), # x5-7 - temporaries (caller saved) "t0": RegisterInfo("t0", address_size), "t1": RegisterInfo("t1", address_size), "t2": RegisterInfo("t2", address_size), # x8 - saved register / frame pointer (caller saved) "s0": RegisterInfo("s0", address_size), # x9 - saved register "s1": RegisterInfo("s1", address_size), # x10-x11 - first function argument and return value (caller saved) "a0": RegisterInfo("a0", address_size), "a1": RegisterInfo("a1", address_size), # x12-17 - function arguments (caller saved) "a2": RegisterInfo("a2", address_size), "a3": RegisterInfo("a3", address_size), "a4": RegisterInfo("a4", address_size), "a5": RegisterInfo("a5", address_size), "a6": RegisterInfo("a6", address_size), "a7": RegisterInfo("a7", address_size), # x18-27 - saved registers (caller saved "s2": RegisterInfo("s2", address_size), "s3": RegisterInfo("s3", address_size), "s4": RegisterInfo("s4", address_size), "s5": RegisterInfo("s5", address_size), "s6": RegisterInfo("s6", address_size), "s7": RegisterInfo("s7", address_size), "s8": RegisterInfo("s8", address_size), "s9": RegisterInfo("s9", address_size), "s10": RegisterInfo("s10", address_size), "s11": RegisterInfo("s11", address_size), # x28-31 - temporaries "t3": RegisterInfo("t3", address_size), "t4": RegisterInfo("t4", address_size), "t5": RegisterInfo("t5", address_size), "t6": RegisterInfo("t6", address_size), # pc "pc": RegisterInfo("pc", address_size), } stack_pointer = "sp" def get_instruction_info(self, data, addr): instr = self.disassembler.decode(data, addr) if instr is None: return None result = InstructionInfo() result.length = instr.size dest = addr + instr.imm if instr.name == 'ret': result.add_branch(BranchType.FunctionReturn) elif instr.name in branch_ins: result.add_branch(BranchType.TrueBranch, dest) result.add_branch(BranchType.FalseBranch, addr + 4) elif instr.name in direct_call_ins: result.add_branch(BranchType.CallDestination, dest) elif instr.name in indirect_call_ins: result.add_branch(BranchType.UnresolvedBranch) return result def get_instruction_text(self, data, addr): instr = self.disassembler.decode(data, addr) if instr is None: return None tokens = gen_token(instr) return tokens, instr.size def get_instruction_low_level_il(self, data, addr, il): instr = self.disassembler.decode(data, addr) if instr is None: return None self.lifter.lift(il, instr, instr.name) return instr.size
class M6800(Architecture): '''M6800 Architecture class.''' name = 'M6800' address_size = 2 default_int_size = 2 regs = { 'SP': RegisterInfo('SP', 2), # Stack Pointer 'PC': RegisterInfo('PC', 2), # Program Counter 'IX': RegisterInfo('IX', 2), # Index Register 'ACCA': RegisterInfo('ACCA', 1), # Accumulator A 'ACCB': RegisterInfo('ACCB', 1) # Accumulator B } flags = ['C', 'V', 'Z', 'N', 'I', 'H'] flag_roles = { 'C': FlagRole.CarryFlagRole, 'V': FlagRole.OverflowFlagRole, 'Z': FlagRole.ZeroFlagRole, 'N': FlagRole.NegativeSignFlagRole, 'I': FlagRole.SpecialFlagRole, # Interrupt Flag 'H': FlagRole.HalfCarryFlagRole } flag_write_types = ['', 'HNZVC', 'NZVC', 'NZV', 'Z'] flags_written_by_flag_write_type = { 'HNZVC': ['H', 'N', 'Z', 'V', 'C'], 'NZVC': ['N', 'Z', 'V', 'C'], 'NZV': ['N', 'Z', 'V'], 'Z': ['Z'] } flags_required_for_flag_condition = { LowLevelILFlagCondition.LLFC_UGE: ['C'], LowLevelILFlagCondition.LLFC_UGT: ['C', 'Z'], LowLevelILFlagCondition.LLFC_ULE: ['C', 'Z'], LowLevelILFlagCondition.LLFC_ULT: ['C'], LowLevelILFlagCondition.LLFC_SGE: ['N', 'V'], LowLevelILFlagCondition.LLFC_SLT: ['N', 'V'], LowLevelILFlagCondition.LLFC_SGT: ['Z', 'N', 'V'], LowLevelILFlagCondition.LLFC_SLE: ['Z', 'N', 'V'], LowLevelILFlagCondition.LLFC_E: ['Z'], LowLevelILFlagCondition.LLFC_NE: ['Z'], LowLevelILFlagCondition.LLFC_NEG: ['N'], LowLevelILFlagCondition.LLFC_POS: ['N'], LowLevelILFlagCondition.LLFC_O: ['V'], LowLevelILFlagCondition.LLFC_NO: ['V'] } stack_pointer = 'SP' # pylint: disable=invalid-name @staticmethod def _handle_jump(il: LowLevelILFunction, value): label = il.get_label_for_address(Architecture['M6800'], value) return il.jump(il.const(2, value)) if label is None else il.goto(label) # pylint: disable=invalid-name @staticmethod def _handle_branch(il: LowLevelILFunction, nmemonic, inst_length, value): true_label = il.get_label_for_address(Architecture['M6800'], value) if true_label is None: true_label = LowLevelILLabel() indirect = True else: indirect = False false_label_found = True false_label = il.get_label_for_address( Architecture['M6800'], il.current_address + inst_length) if false_label is None: false_label = LowLevelILLabel() false_label_found = False il.append( il.if_expr(LLIL_OPERATIONS[nmemonic](il, None, None), true_label, false_label)) if indirect: il.mark_label(true_label) il.append(il.jump(il.const(2, value))) if not false_label_found: il.mark_label(false_label) @staticmethod def _decode_instruction(data, addr): opcode = data[0] try: nmemonic, inst_length, inst_operand, inst_type, mode = INSTRUCTIONS[ opcode] except KeyError: raise LookupError( f'Opcode 0x{opcode:X} at address 0x{addr:X} is invalid.') value = None # need to collect information based on each address mode # INHERENT addressing => value is None # ACCUMULATOR addressing => value is in accumulator try: if mode == AddressMode.RELATIVE: # calculate absolute address here # should always be 2 bytes long, second byte is 2's complement value = addr + inst_length + int.from_bytes( data[1:2], 'big', signed=True) # use address mask to set value to real space value &= ADDRESS_MASK elif mode == AddressMode.IMMEDIATE: if inst_length == 2: value = data[1] else: value = struct.unpack('>H', data[1:3])[0] # use address mask to set value to real space value &= ADDRESS_MASK elif mode == AddressMode.EXTENDED: value = struct.unpack('>H', data[1:3])[0] # use address mask to set value to real space value &= ADDRESS_MASK elif mode in [AddressMode.INDEXED, AddressMode.DIRECT]: value = data[1] except struct.error: raise LookupError( f'Unable to decode instruction at address 0x{addr}') return nmemonic, inst_length, inst_operand, inst_type, mode, value def get_instruction_text(self, data, addr): try: (nmemonic, inst_length, inst_operand, _, mode, value) = M6800._decode_instruction(data, addr) except LookupError as error: log_error(error.__str__()) return None tokens = [InstructionTextToken(ITTT.InstructionToken, nmemonic)] if mode == AddressMode.ACCUMULATOR: tokens.append(InstructionTextToken(ITTT.OperandSeparatorToken, ' ')) tokens.append( InstructionTextToken(ITTT.RegisterToken, inst_operand)) elif mode in [ AddressMode.DIRECT, AddressMode.EXTENDED, AddressMode.RELATIVE ]: tokens.append(InstructionTextToken(ITTT.OperandSeparatorToken, ' ')) tokens.append( InstructionTextToken(ITTT.PossibleAddressToken, f'0x{value:X}', value)) elif mode == AddressMode.IMMEDIATE: if inst_operand in ['ACCA', 'ACCB']: tokens.append( InstructionTextToken(ITTT.OperandSeparatorToken, ' ')) tokens.append( InstructionTextToken(ITTT.RegisterToken, inst_operand)) tokens.append(InstructionTextToken(ITTT.OperandSeparatorToken, ' ')) tokens.append( InstructionTextToken(ITTT.IntegerToken, f'0x{value:X}', value)) elif mode == AddressMode.INDEXED: if inst_operand in ['ACCA', 'ACCB']: tokens.append( InstructionTextToken(ITTT.OperandSeparatorToken, ' ')) tokens.append( InstructionTextToken(ITTT.RegisterToken, inst_operand)) tokens.append(InstructionTextToken(ITTT.OperandSeparatorToken, ' ')) tokens.append( InstructionTextToken(ITTT.BeginMemoryOperandToken, '[')) tokens.append(InstructionTextToken(ITTT.RegisterToken, 'IX')) tokens.append( InstructionTextToken(ITTT.OperandSeparatorToken, ' + ')) tokens.append( InstructionTextToken(ITTT.IntegerToken, f'0x{value:X}', value)) tokens.append(InstructionTextToken(ITTT.EndMemoryOperandToken, ']')) return tokens, inst_length def get_instruction_info(self, data, addr): try: (_, inst_length, _, inst_type, mode, value) = M6800._decode_instruction(data, addr) except LookupError as error: log_error(error.__str__()) return None inst = InstructionInfo() inst.length = inst_length if inst_type == InstructionType.CONDITIONAL_BRANCH: if mode == AddressMode.INDEXED: inst.add_branch(BranchType.UnresolvedBranch) else: inst.add_branch(BranchType.TrueBranch, value) inst.add_branch(BranchType.FalseBranch, addr + inst_length) elif inst_type == InstructionType.UNCONDITIONAL_BRANCH: if mode == AddressMode.INDEXED: inst.add_branch(BranchType.UnresolvedBranch) else: inst.add_branch(BranchType.UnconditionalBranch, value) elif inst_type == InstructionType.CALL: if mode == AddressMode.INDEXED: inst.add_branch(BranchType.UnresolvedBranch) else: inst.add_branch(BranchType.CallDestination, value) elif inst_type == InstructionType.RETURN: inst.add_branch(BranchType.FunctionReturn) return inst def get_instruction_low_level_il(self, data, addr, il: LowLevelILFunction): try: (nmemonic, inst_length, inst_operand, inst_type, mode, value) = M6800._decode_instruction(data, addr) except LookupError as error: log_error(error.__str__()) return None # Figure out what the instruction uses load_size = 2 if nmemonic in BIGGER_LOADS else 1 operand, second_operand = None, None # if this is a conditional branch, handle that separately if inst_type == InstructionType.CONDITIONAL_BRANCH: M6800._handle_branch(il, nmemonic, inst_length, value) return inst_length # if this is an unconditional branch, handle that separately if inst_type == InstructionType.UNCONDITIONAL_BRANCH: M6800._handle_jump(il, value) return inst_length if mode == AddressMode.ACCUMULATOR: # handle the case where we need the name, not the reg, for pop operand = inst_operand if nmemonic == 'PUL' else il.reg( 1, inst_operand) elif mode == AddressMode.INDEXED: # set the destination variable for the memory store operations destination = il.add(2, il.reg(2, 'IX'), il.const(1, value)) operand = il.load(load_size, destination) elif mode in [AddressMode.DIRECT, AddressMode.EXTENDED]: # set the destination variable for the memory store operations destination = il.const(inst_length - 1, value) operand = il.load(load_size, destination) elif mode == AddressMode.IMMEDIATE: operand = il.const(inst_length - 1, value) elif mode == AddressMode.RELATIVE: # we have already calculated the absolute address # set the destination variable for the memory store operations destination = il.const(2, value) operand = il.load(load_size, destination) # if we are dual mode, we have to handle things special if inst_type == InstructionType.DUAL: second_operand = inst_operand # calculate the base LLIL operation = LLIL_OPERATIONS[nmemonic](il, operand, second_operand) # if the instruction has different destinations, set them appropriately if nmemonic in REGISTER_OR_MEMORY_DESTINATIONS: if mode == AddressMode.ACCUMULATOR: operation = il.set_reg(1, inst_operand, operation) else: operation = il.store(1, destination, operation) # Finally, calculate and append the instruction(s) il.append(operation) return inst_length
class AVR(Architecture): name = 'AVR' address_size = 2 default_int_size = 2 max_instr_length = 4 regs = { 'r0': RegisterInfo('r0', 1), 'r1': RegisterInfo('r1', 1), 'r2': RegisterInfo('r2', 1), 'r3': RegisterInfo('r3', 1), 'r4': RegisterInfo('r4', 1), 'r5': RegisterInfo('r5', 1), 'r6': RegisterInfo('r6', 1), 'r7': RegisterInfo('r7', 1), 'r8': RegisterInfo('r8', 1), 'r9': RegisterInfo('r9', 1), 'r10': RegisterInfo('r10', 1), 'r11': RegisterInfo('r11', 1), 'r12': RegisterInfo('r12', 1), 'r13': RegisterInfo('r13', 1), 'r14': RegisterInfo('r14', 1), 'r15': RegisterInfo('r15', 1), 'r16': RegisterInfo('r16', 1), 'r17': RegisterInfo('r17', 1), 'r18': RegisterInfo('r18', 1), 'r19': RegisterInfo('r19', 1), 'r20': RegisterInfo('r20', 1), 'r21': RegisterInfo('r21', 1), 'r22': RegisterInfo('r22', 1), 'r23': RegisterInfo('r23', 1), 'r24': RegisterInfo('r24', 1), 'r25': RegisterInfo('r25', 1), 'r26': RegisterInfo('r26', 1), 'r27': RegisterInfo('r27', 1), 'r28': RegisterInfo('r28', 1), 'r29': RegisterInfo('r29', 1), 'r30': RegisterInfo('r30', 1), 'r31': RegisterInfo('r31', 1) } stack_pointer = 'SP' flags = ['C', 'Z', 'N', 'V', 'S', 'H', 'T', 'I'] flag_write_types = [ '', '*', 'onlyT', 'svnz', 'onlyC', 'onlyH', 'onlyI', 'onlyN', 'onlyS', 'onlyV', 'onlyZ', 'svnzc', 'hsvnzc', 'zc' ] flags_written_by_flag_write_type = { '*': ['C', 'Z', 'N', 'V', 'S', 'H', 'T', 'I'], 'onlyT': ['T'], 'svnz': ['S', 'V', 'N', 'Z'], 'onlyC': ['C'], 'onlyH': ['H'], 'onlyI': ['I'], 'onlyN': ['N'], 'onlyS': ['S'], 'onlyV': ['V'], 'onlyZ': ['Z'], 'svnzc': ['S', 'V', 'N', 'Z', 'C'], 'hsvnzc': ['H', 'S', 'V', 'N', 'Z', 'C'], 'zc': ['Z', 'C'] } flag_roles = { 'C': FlagRole.CarryFlagRole, 'Z': FlagRole.ZeroFlagRole, 'N': FlagRole.NegativeSignFlagRole, 'V': FlagRole.OverflowFlagRole, 'S': FlagRole.SpecialFlagRole, #TODO 'H': FlagRole.SpecialFlagRole, #TODO 'T': FlagRole.SpecialFlagRole, #TODO 'I': FlagRole.SpecialFlagRole #TODO } # flags_required_for_flag_condition = { # LLFC_E : ['Z'], #Equal # LLFC_NE : ['Z'], #Not Equal # LLFC_SLT : ['N'], #Signed Less Than # LLFC_ULT : [''], #Unsigned Less Than # LLFC_SLE : ['N'], #Signed Less Then or Equal to # LLFC_ULE : [''], #Unsigned Less Than or Equal to # LLFC_SGE : ['N'], #Signed Greather Than # LLFC_UGE : [''], #Unsigned Greater Than # LLFC_SGT : ['N'], #Signed Greater Than # LLFC_UGT : ['C'], #Unsigned Greater Than # LLFC_NEG : ['N'], #Negative # LLFC_POS : ['N'], #Positive # LLFC_O : ['V'], #Overflow # LLFC_NO : ['V'] #No Overflow # } def decode_instruction(self, data, addr): error_value = (None, None, None, None, None, None, None, None, None) if len(data) < 2: return error_value instruction = struct.unpack('<H', data[0:2])[0] #print("Current Instruction is " + str(hex(instruction))) if instruction == 0x95C8: return 'lpm', None, None, None, None, None, 2, None, None elif instruction == 0x95D8: return 'elpm', None, None, None, None, None, 2, None, None elif instruction == 0x0000: return 'nop', None, None, None, None, None, 2, None, None elif instruction == 0x9508: return 'ret', None, None, None, None, None, 2, None, None elif instruction == 0x9518: return 'reti', None, None, None, None, None, 2, None, None elif instruction == 0x9408: return 'sec', None, None, None, None, None, 2, None, None elif instruction == 0x9458: return 'seh', None, None, None, None, None, 2, None, None elif instruction == 0x9478: return 'sei', None, None, None, None, None, 2, None, None elif instruction == 0x9428: return 'sen', None, None, None, None, None, 2, None, None elif instruction == 0x9448: return 'ses', None, None, None, None, None, 2, None, None elif instruction == 0x9468: return 'set', None, None, None, None, None, 2, None, None elif instruction == 0x9438: return 'sev', None, None, None, None, None, 2, None, None elif instruction == 0x9418: return 'sez', None, None, None, None, None, 2, None, None elif instruction == 0x9588: return 'sleep', None, None, None, None, None, 2, None, None elif instruction == 0x95E8: return 'spm', None, None, None, None, None, 2, None, None elif instruction == 0x95F8: #TODO return 'spm z+', None, None, None, None, None, 2, None, None elif instruction == 0x95A8: return 'wdr', None, None, None, None, None, 2, None, None elif instruction == 0x9598: return 'break', None, None, None, None, None, 2, None, None elif instruction == 0x9488: return 'clc', None, None, None, None, None, 2, None, None elif instruction == 0x94D8: return 'clh', None, None, None, None, None, 2, None, None elif instruction == 0x94F8: return 'cli', None, None, None, None, None, 2, None, None elif instruction == 0x94A8: return 'cln', None, None, None, None, None, 2, None, None elif instruction == 0x94C8: return 'cls', None, None, None, None, None, 2, None, None elif instruction == 0x94E8: return 'clt', None, None, None, None, None, 2, None, None elif instruction == 0x94B8: return 'clv', None, None, None, None, None, 2, None, None elif instruction == 0x9498: return 'clz', None, None, None, None, None, 2, None, None elif instruction == 0x9519: return 'eicall', None, None, None, None, None, 2, None, None elif instruction == 0x9419: return 'eijmp', None, None, None, None, None, 2, None, None elif instruction == 0x9509: return 'icall', None, None, None, None, None, 2, None, None elif instruction == 0x9409: return 'ijmp', None, None, None, None, None, 2, None, None #High byte most significant nibble high_msn = (instruction & 0xf000) >> 12 #print("The high byte most significant nibble is : " + str(high_msn)) instr = get_instr_name(instruction, high_msn) if instr is None: log_error('Bad opcode: {:x}'.format(instruction)) return error_value if instr == 'sts' or instr == 'lds' or instr == 'call' or instr == 'jmp': width = 2 else: width = None src, src_operand_type, dst, dst_operand_type = GetOperands( instr, instruction) if width != None: length = 2 + width else: length = 2 if length == 4: direct_addr = struct.unpack('<H', data[2:4])[0] if instr == 'sts': dst = direct_addr elif instr == 'lds': src = direct_addr elif instr == 'call': dst = direct_addr elif instr == 'jmp': dst = direct_addr src_value, dst_value = None, None return instr, width, src_operand_type, dst_operand_type, src, dst, length, src_value, dst_value def perform_get_instruction_info(self, data, addr): instr, _, _, _, _, dst, length, src_value, _ = self.decode_instruction( data, addr) if instr is None: return None result = InstructionInfo() result.length = length if instr == 'ret': result.add_branch(BranchType.FunctionReturn) elif instr == 'reti': result.add_branch(BranchType.FunctionReturn) elif instr == 'call': result.add_branch(BranchType.CallDestination, dst * 2) elif instr == 'rcall': result.add_branch(BranchType.CallDestination, addr + dst * 2 + 1 * 2) elif instr == 'jmp': result.add_branch(BranchType.UnconditionalBranch, dst * 2) elif instr == 'rjmp': result.add_branch(BranchType.UnconditionalBranch, addr + dst * 2 + 1 * 2) elif (instr == 'breq' or instr == 'brne' or instr == 'brcs' or instr == 'brcc' or instr == 'brsh' or instr == 'brlo' or instr == 'brmi' or instr == 'brpl' or instr == 'brge' or instr == 'brlt' or instr == 'brhs' or instr == 'brhc' or instr == 'brts' or instr == 'brtc' or instr == 'brvs' or instr == 'brvc' or instr == 'brie' or instr == 'brid'): result.add_branch(BranchType.TrueBranch, addr + dst * 2 + 1 * 2) result.add_branch(BranchType.FalseBranch, addr + 1 * 2) elif (instr == 'brbs' or instr == 'brbc'): result.add_branch(BranchType.TrueBranch, addr + dst * 2 + 1 * 2) result.add_branch(BranchType.FalseBranch, addr + 1 * 2) elif (instr == 'cpse' or instr == 'sbrc' or instr == 'sbrs' or instr == 'sbic' or instr == 'sbis'): result.add_branch(BranchType.TrueBranch, addr + 2 * 2) result.add_branch(BranchType.FalseBranch, addr + 1 * 2) elif (instr == 'icall' or instr == 'ijmp'): result.add_branch(BranchType.IndirectBranch) #TODO return result def perform_get_instruction_text(self, data, addr): instr, width, src_operand_type, dst_operand_type, src, dst, length, src_value, dst_value = self.decode_instruction( data, addr) if instr is None: return None tokens = [] instruction_text = instr tokens = [ InstructionTextToken(InstructionTextTokenType.TextToken, '{:7s}'.format(instruction_text)) ] if dst_operand_type != None: tokens += OperandTokenGen[dst_operand_type](dst, addr, instr) # if dst_operand_type != None and src_operand_type != None: tokens += [ InstructionTextToken(InstructionTextTokenType.TextToken, ',') ] # if src_operand_type != None: tokens += OperandTokenGen[src_operand_type](src, addr, instr) return tokens, length #TODO def perform_get_instruction_low_level_il(self, data, addr, il): instr, width, src_operand_type, dst_operand_type, src, dst, length, src_value, dst_value = self.decode_instruction( data, addr) if instr is None: return None # if InstructionIL.get(instr) is None: # log_error('[0x{:4x}]: {} not implemented'.format(addr, instr)) # il.append(il.unimplemented()) # # return length def perform_get_flag_write_low_level_il(self, op, size, write_type, flag, operands, il): return def perform_get_flag_condition_low_level_il(self, cond, il): return
def write_register(self, reg_name: str, value: int): registers = self.view.session_data.get("emulator.registers", []) if not registers: self.view.session_data["emulator.registers"] = registers regs = { r[0]: (i, r[1]) for i, r in enumerate( self.view.session_data.get("emulator.registers", [])) } if reg_name.startswith('temp'): register = RegisterInfo(reg_name, self.view.address_size) else: register = self.view.arch.regs[reg_name] size = register.size offset = register.offset extend = register.extend full_width_reg = register.full_width_reg if full_width_reg == reg_name: if not regs or reg_name.startswith('temp'): regs[reg_name] = (0, None) execute_on_main_thread_and_wait( self.view.session_data["emulator.registers.model"].startUpdate) registers[regs[reg_name][0]] = (reg_name, value) execute_on_main_thread_and_wait( self.view.session_data["emulator.registers.model"].endUpdate) if reg_name == self.view.arch.stack_pointer: execute_on_main_thread_and_wait(lambda: self.view.session_data[ 'emulator.stack.model'].update(value)) return full_width_value = self.read_register(full_width_reg) mask = (1 << (offset * 8)) - 1 mask ^= (1 << ((size + offset) * 8)) - 1 shifted_value = value << (offset * 8) masked_value = shifted_value & mask full_width_size = self.view.arch.regs[full_width_reg].size full_width_mask = (1 << (full_width_size * 8)) - 1 full_width_mask ^= mask if extend == ImplicitRegisterExtend.NoExtend: full_width_value = masked_value | (full_width_mask & full_width_value) elif extend == ImplicitRegisterExtend.ZeroExtendToFullWidth: full_width_value = masked_value | (full_width_value & ((1 << ((size + offset) * 8)) - 1)) elif extend == ImplicitRegisterExtend.SignExtendToFullWidth: sign_bit = shifted_value & (1 << ((size + offset - 1) * 8)) full_width_value = masked_value | (full_width_value & ((1 << ((size + offset) * 8)) - 1)) if sign_bit: full_width_value |= full_width_mask ^ ((1 << ( (size + offset) * 8)) - 1) if not regs: regs[full_width_reg] = (full_width_reg, full_width_value) execute_on_main_thread_and_wait( self.view.session_data["emulator.registers.model"].startUpdate) registers[regs[full_width_reg][0]] = (full_width_reg, full_width_value) execute_on_main_thread_and_wait( self.view.session_data["emulator.registers.model"].endUpdate)
class VMArch(Architecture): name = "VMArch" address_size = 1 default_int_size = 1 max_instr_length = 3 stack_pointer = 's' regs = { 'k': RegisterInfo('k', 1), 'c': RegisterInfo('c', 1), 's': RegisterInfo('s', 1) } def parse_instruction(self, data, addr): opcode, offset, value = data[:3] return opcode, offset, value, 3 def get_instruction_info(self, data, addr): opcode, offset, value, length = self.parse_instruction(data, addr) info = InstructionInfo() info.length = length if opcodes[opcode] == 'hlt': info.add_branch(BranchType.FunctionReturn) return info def get_instruction_text(self, data, addr): opcode, offset, value, length = self.parse_instruction(data, addr) tokens = [] op = opcodes[opcode] # create the opcode token tokens.append( InstructionTextToken(InstructionTextTokenType.InstructionToken, f'{op:<.6s}', value=opcode)) # create the offset token if op != 'hlt': tokens.append( InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, f' {offset}', value=offset, size=1)) if op == 'set': tokens.append( InstructionTextToken(InstructionTextTokenType.IntegerToken, f' {value}', value=value, size=1)) return tokens, length def get_instruction_low_level_il(self, data, addr, il): opcode, offset, value, length = self.parse_instruction(data, addr) op = opcodes[opcode] # [offset].b = value if op == 'set': il.append(il.store(1, il.const(1, offset), il.const(1, value))) # c = [offset].b elif op == 'get': il.append(il.set_reg(1, 'c', il.load(1, il.const(1, offset)))) # [offset].b = [offset].b ^ c elif op == 'xor': il.append(il.set_reg(1, 'k', il.load(1, il.const(1, offset)))) il.append( il.store(1, il.const(1, offset), il.xor_expr(1, il.reg(1, 'k'), il.reg(1, 'c')))) elif op == 'hlt': il.append(il.no_ret()) return length
class VMNDH(Architecture): name = 'vmndh-2k12' address_size = 2 default_int_size = 2 max_instr_length = 5 regs = { 'r0': RegisterInfo('r0', 2), 'r1': RegisterInfo('r1', 2), 'r2': RegisterInfo('r2', 2), 'r3': RegisterInfo('r3', 2), 'r4': RegisterInfo('r4', 2), 'r5': RegisterInfo('r5', 2), 'r6': RegisterInfo('r6', 2), 'r7': RegisterInfo('r7', 2), 'sp': RegisterInfo('sp', 2), 'bp': RegisterInfo('bp', 2), 'pc': RegisterInfo('pc', 2), } flags = ['a', 'b', 'z'] # The first flag write type is ignored currently. # See: https://github.com/Vector35/binaryninja-api/issues/513 flag_write_types = ['', '*', 'a', 'b', 'z'] flags_written_by_flag_write_type = {'*': ['a', 'b', 'z'], 'z': ['z']} flag_roles = { 'a': FlagRole.CarryFlagRole, 'b': FlagRole.NegativeSignFlagRole, 'z': FlagRole.ZeroFlagRole, #'v': FlagRole.OverflowFlagRole } # WHAT IS THIS???? flags_required_for_flag_condition = { # LowLevelILFlagCondition.LLFC_UGE: ['c'], # LowLevelILFlagCondition.LLFC_ULT: ['c'], LowLevelILFlagCondition.LLFC_SGT: ['a'], LowLevelILFlagCondition.LLFC_SLT: ['b'], LowLevelILFlagCondition.LLFC_E: ['z'], LowLevelILFlagCondition.LLFC_NE: ['z'], # LowLevelILFlagCondition.LLFC_NEG: ['n'], # LowLevelILFlagCondition.LLFC_POS: ['n'] } stack_pointer = 'sp' def is_never_branch_patch_available(self, data, addr): return ord(data[0:1]) in [0x10, 0x11, 0x16, 0x1b, 0x1e, 0x1f] def is_invert_branch_patch_available(self, data, addr): return ord(data[0:1]) in [0x10, 0x11, 0x1e, 0x1f] def is_always_branch_patch_available(self, data, addr): return ord(data[0:1]) in [0x10, 0x11, 0x1e, 0x1f] def is_skip_and_return_zero_patch_available(self, data, addr): return (data[0:1] == "\x19") and (len(data) == 4) def is_skip_and_return_value_patch_available(self, data, addr): return (data[0:1] == "\x19") and (len(data) == 4) def convert_to_nop(self, data, addr): return b"\x02" * len(data) def never_branch(self, data, addr): return self.convert_to_nop(data, addr) def always_branch(self, data, addr): if ord(data[0:1]) not in [0x10, 0x11, 0x1e, 0x1f]: return None return b"\x1b" + data[1:] def invert_branch(self, data, addr): if ord(data[0:1]) not in [0x10, 0x11, 0x1e, 0x1f]: return None return chr(ord(data[0:1]) ^ 0x01).encode('charmap') + data[1:] def skip_and_return_value(self, data, addr, value): if (data[0:1] != "\x19") or (len(data) != 4): return None return ("\x04" + chr(OP_FLAG_REG_DIRECT08) + "\x00" + chr(value & 0xff)).encode('charmap') def assemble(self, code, addr): code = code.decode('charmap') if ".b" in code: code = code.replace(".b", "") code = list(filter(None, code.replace(", ", " ").split(" "))) mnemonic = code[0] if mnemonic not in mnemonics: raise ValueError("Invalid mnemonic {}".format(code)) assembly = mnemonics[mnemonic] cls = instruction_dict[ord(assembly)] if cls.__base__ == Instruction: return assembly.encode('charmap') valid_flags = None if FlagInstruction in cls.__mro__: valid_flags = cls.valid_flags else: valid_flags = [cls.flag] dst_flag = None dst = code[1] if dst[0] == '[' and dst[-1] == ']': if OP_FLAG_REGINDIRECT_REG not in valid_flags: raise ValueError("Invalid destination operand {}".format(dst)) dst = dst[1:-1] dst_flag = OP_FLAG_REGINDIRECT_REG # python2 0x8723L if dst[-1:] == 'L': dst = dst[:-1] try: dst_value = int(dst, 0) & 0xffff if dst_value < 0x100: flag = OP_FLAG_DIRECT08 dst = chr(dst_value) else: if BranchInstruction in cls.__mro__: dst_value -= addr if mnemonic == 'jns': dst_value -= 2 elif mnemonic == 'call': dst_value -= 4 else: dst_value -= 3 dst_value &= 0xffff flag = OP_FLAG_DIRECT16 dst = struct.pack("<H", dst_value).decode('charmap') except: dst = register_indexes[dst] flag = OP_FLAG_REG if len(code) == 2: if flag not in valid_flags: raise ValueError("Invalid destination operand {}".format(dst)) if len(valid_flags) > 1: assembly += chr(flag) assembly += dst return assembly.encode('charmap') src_flag = None src = code[2] if src[0] == '[' and src[-1] == ']': if OP_FLAG_REG_REGINDIRECT not in valid_flags: raise ValueError("Invalid destination operand {}".format(dst)) src = src[1:-1] src_flag = OP_FLAG_REGINDIRECT_REG if flag != OP_FLAG_REG: raise ValueError("Invalid destination register: {}".format(dst)) flag = None if not src_flag: try: src_value = int(src, 0) & 0xffff if src_value < 0x100: src_flag = OP_FLAG_DIRECT08 src = chr(src_value) else: src_flag = OP_FLAG_DIRECT16 src = struct.pack("<H", src_value).decode('charmap') except: src = register_indexes[src] src_flag = OP_FLAG_REG if dst_flag: if src_flag == dst_flag: flag = OP_FLAG_REGINDIRECT_REGINDIRECT elif src_flag == OP_FLAG_DIRECT08: flag = OP_FLAG_REGINDIRECT_DIRECT08 elif src_flag == OP_FLAG_DIRECT16: flag = OP_FLAG_REGINDIRECT_DIRECT16 elif src_flag == OP_FLAG_REG: flag = OP_FLAG_REGINDIRECT_REG else: raise ValueError("src_flag is bugged: {:x}".format(src_flag)) else: if src_flag == OP_FLAG_REGINDIRECT_REG: flag = OP_FLAG_REG_REGINDIRECT elif src_flag == OP_FLAG_DIRECT08: flag = OP_FLAG_REG_DIRECT08 elif src_flag == OP_FLAG_DIRECT16: flag = OP_FLAG_REG_DIRECT16 elif src_flag == OP_FLAG_REG: flag = OP_FLAG_REG_REG else: raise ValueError("src_flag is bugged: {:x}".format(src_flag)) if flag not in valid_flags: raise ValueError( "Invalid operands for operation: {}".format(mnemonic)) if len(valid_flags) > 1: assembly += chr(flag) assembly += dst + src return assembly.encode('charmap') def decode_instruction(self, data, addr): if addr < 0x8000: return opcode = ord(data[:1]) if opcode not in instruction_dict: log_error('0x{:x} : Bad opcode: {:x}'.format(addr, opcode)) return None instr_obj = None try: instr_obj = instruction_dict[opcode](data, addr) except VMNDHError as e: log_error('0x{:x} : Bad instruction: {:s}'.format(addr, str(e))) return None return instr_obj def get_instruction_info(self, data, addr): instr_obj = self.decode_instruction(data, addr) if not instr_obj: return None result = InstructionInfo() result.length = instr_obj.length instr_name = instr_obj.getName() # TODO: update this properly # Add branches if instr_name in ['ret', 'end']: result.add_branch(BranchType.FunctionReturn) elif instr_name.startswith('jmp'): result.add_branch(BranchType.UnconditionalBranch, instr_obj.dst_value) elif instr_name in BRANCH_INSTRUCTIONS: result.add_branch(BranchType.TrueBranch, instr_obj.dst_value) result.add_branch(BranchType.FalseBranch, addr + instr_obj.length) elif instr_name == 'call': result.add_branch(BranchType.CallDestination, instr_obj.dst_value) elif instr_name == 'syscall': result.add_branch(BranchType.SystemCall) return result def get_instruction_text(self, data, addr): instr_obj = self.decode_instruction(data, addr) if not instr_obj: return None tokens = [] instruction_text = instr_obj.getName() if hasattr(instr_obj, "flag") and flag_word_size[instr_obj.flag] == 1: instruction_text += '.b' tokens = [ InstructionTextToken(InstructionTextTokenType.InstructionToken, '{:7s}'.format(instruction_text)) ] tokens += instr_obj.getTextToken() return tokens, instr_obj.length def get_instruction_low_level_il(self, data, addr, il): instr_obj = self.decode_instruction(data, addr) if not instr_obj: return None insns = instr_obj.do_il(data, addr, il) if isinstance(insns, list): [il.append(i) for i in insns] elif insns is not None: try: il.append(insns) except: traceback.print_exc() print(type(insns), insns, instr_obj.getName(), hex(addr)) return instr_obj.length
class Synacor(Architecture): name = 'Synacor' address_size = size default_int_size = size instr_alignment = 1 max_instr_length = max([op.size for op in operations]) regs = { 'R0': RegisterInfo('R0', size), 'R1': RegisterInfo('R1', size), 'R2': RegisterInfo('R2', size), 'R3': RegisterInfo('R3', size), 'R4': RegisterInfo('R4', size), 'R5': RegisterInfo('R5', size), 'R6': RegisterInfo('R6', size), 'R7': RegisterInfo('R7', size), # Not sure if used, but required by Binary Ninja 'sp': RegisterInfo('sp', size) } stack_pointer = 'sp' def assemble(self, code, _addr): parts = re.split('[ ,]+', code.decode().strip()) instr = parts.pop(0) op_cls = lookup.get(instr) or lookup.get(safeint(instr, 0)) if op_cls is None: raise ValueError("No operation found for '%s'" % instr) types = op_cls.operand_types if len(parts) != len(types): raise ValueError("'%s' requires exactly %d operands" % (op_cls.label, len(types))) values = [op_cls.opcode] for (i, optype) in enumerate(types): values.append(Operand.assemble(i, optype, parts[i])) return struct.pack('<%iH' % len(values), *values) def convert_to_nop(self, data, _addr): nop = struct.pack('<1H', NoopOperation.opcode) return nop * (len(data) // size) def decode(self, data, count, offset=0): start = offset * size end = start + count * size if len(data) < end - start: return [None] * count return struct.unpack('<%iH' % count, data[start:end]) def decode_operation(self, data, addr): opcode, = self.decode(data, count=1) op_cls = lookup.get(opcode) if op_cls is None: return None types = op_cls.operand_types values = self.decode(data, count=len(types), offset=1) if values is None: return None operands = [ Operand(i, optype, values[i]) for (i, optype) in enumerate(types) ] return op_cls(self, addr, operands) def get_instruction_info(self, data, addr): op = self.decode_operation(data, addr) if op is None: return None ii = InstructionInfo() ii.length = op.size op.branching(ii) return ii def get_instruction_text(self, data, addr): op = self.decode_operation(data, addr) if op is None: return None tokens = [] op.tokenize(tokens) return tokens, op.size def get_instruction_low_level_il(self, data, addr, il): op = self.decode_operation(data, addr) if op is None: return None op.low_level_il(il) return op.size
class EVM(Architecture): name = "EVM" # Actual size is 32 but we're going to truncate everything address_size = ADDR_SIZE # should be 32 default_int_size = ADDR_SIZE instr_alignment = 1 max_instr_length = 33 endianness = Endianness.BigEndian regs = { "sp": RegisterInfo("sp", ADDR_SIZE), } stack_pointer = "sp" def get_instruction_info(self, data, addr): instruction = disassemble_one(data, addr) result = InstructionInfo() result.length = instruction.size if instruction.name == "JUMP": result.add_branch(BranchType.UnresolvedBranch) elif instruction.name == "JUMPI": result.add_branch(BranchType.UnresolvedBranch) result.add_branch(BranchType.FalseBranch, addr + 1) elif instruction.name in ('RETURN', 'REVERT', 'SUICIDE', 'INVALID', 'STOP', 'SELFDESTRUCT'): result.add_branch(BranchType.FunctionReturn) return result def get_instruction_text(self, data, addr): instruction = disassemble_one(data, addr) tokens = [] tokens.append( InstructionTextToken( InstructionTextTokenType.TextToken, "{:7} ".format( instruction.name ) ) ) if instruction.name.startswith('PUSH'): tokens.append( InstructionTextToken( InstructionTextTokenType.IntegerToken, '#{:0{i.operand_size}x}'.format( instruction.operand, i=instruction ), instruction.operand ) ) return tokens, instruction.size def get_instruction_low_level_il(self, data, addr, il): instruction = disassemble_one(data, addr) ill = insn_il.get(instruction.name, None) if ill is None: for i in range(instruction.pops): il.append( il.set_reg(ADDR_SIZE, LLIL_TEMP(i), il.pop(ADDR_SIZE)) ) for i in range(instruction.pushes): il.append(il.push(ADDR_SIZE, il.unimplemented())) il.append(il.nop()) return instruction.size ils = ill(il, addr, instruction.operand) if isinstance(ils, list): for i in ils: il.append(il) else: il.append(ils) return instruction.size def assemble(self, code, addr=0): try: return assemble(code, addr), '' except Exception as e: return None, str(e)
class Subleq(Architecture): name = "subleq" address_size = 4 default_int_size = 4 max_instr_length = 12 # Each instruction is 3 dwords # SP register is required, even if we are not going to use it regs = {'sp': RegisterInfo('sp', 2)} stack_pointer = 'sp' def perform_get_instruction_info(self, data, addr): # If we can't decode an instruction return None if len(data) < 12: return None # Unpack our operands from the data a, b, c = struct.unpack('<3I', data[:12]) # Create the InstructionInfo object for our instruction res = InstructionInfo() res.length = 12 if c != 0: if b == a: # Unconditional branch jumps to integer index c res.add_branch(BranchType.UnconditionalBranch, c * 4) else: # True branch jumps to integer index c res.add_branch(BranchType.TrueBranch, c * 4) # False branch continues to next instruction res.add_branch(BranchType.FalseBranch, addr + 12) return res def perform_get_instruction_text(self, data, addr): # If we can't decode an instruction return None if len(data) < 12: return None # Unpack our operands from the data a, b, c = struct.unpack('<3I', data[:4 * 3]) tokens = [] # Check for invalid instructions that would crash if b * 4 >= 0x4400 or a * 4 >= 0x4400: tokens = [] tokens.append(makeToken('i', '{:7s}'.format('invalid'))) return tokens, 4 * 3 # Clear instruction to be less verbose # clear [B] elif a == b: tokens = [] tokens.append(makeToken('i', '{:7s}'.format('clear'))) tokens.append(makeToken('t', '[')) tokens.append(makeToken('a', hexr(b * 4), b * 4)) tokens.append(makeToken('t', ']')) # Normal sub instruction # sub [B], [A] else: tokens.append(makeToken('i', '{:7s}'.format('sub'))) tokens.append(makeToken('t', '[')) tokens.append(makeToken('a', hexr(b * 4), b * 4)) tokens.append(makeToken('t', ']')) tokens.append(makeToken('s', ', ')) tokens.append(makeToken('t', '[')) tokens.append(makeToken('a', hexr(a * 4), a * 4)) tokens.append(makeToken('t', ']')) # Unconditional jump # ; jmp C if c != 0 and b == a: tokens.append(makeToken('s', '; ')) tokens.append(makeToken('i', '{:7s}'.format('jmp'))) tokens.append(makeToken('a', hex(c * 4), c * 4)) # Conditional jump # ; jmp C if [B] <= 0 elif c != 0: tokens.append(makeToken('s', '; ')) tokens.append(makeToken('i', '{:7s}'.format('jmp'))) tokens.append(makeToken('a', hex(c * 4), c * 4)) tokens.append(makeToken('s', ' if ')) tokens.append(makeToken('t', '[')) tokens.append(makeToken('a', hex(b * 4), b * 4)) tokens.append(makeToken('t', ']')) tokens.append(makeToken('t', ' <= 0')) return tokens, 4 * 3 # Full LLIL lifting for subleq def perform_get_instruction_low_level_il(self, data, addr, il): # If we can't decode an instruction return None if len(data) < 12: return None # Unpack our operands from the data a, b, c = struct.unpack('<3I', data[:4 * 3]) # If this instruction would crash, ignore it if b * 4 >= 0x4400 or a * 4 >= 0x4400: il.append(il.nop()) return 4 * 3 # A, B, and C as pointers addr_a = il.const_pointer(4, a * 4) addr_b = il.const_pointer(4, a * 4) addr_c = il.const_pointer(4, c * 4) # mem[A] and mem[B] pointers mem_a = il.load(4, addr_a) mem_b = il.load(4, addr_b) # For a clear instruction just store 0 if a == b: # *B = 0 store_b = il.store(4, addr_b, il.const(4, 0)) il.append(store_b) # For normal operation, construct a subtraction else: # *B = *B - *A sub_op = il.sub(4, mem_b, mem_a) store_b = il.store(4, addr_b, sub_op) il.append(store_b) # Unconditional jump if c != 0 and b == a: # goto C jmp = il.jump(addr_c) il.append(jmp) # Conditional jump elif c != 0: # See if we have marked the True jump target before t_target = il.get_label_for_address( Architecture['subleq'], il[il.const_pointer(4, c * 4)].constant) # Create the False jump label f_target = LowLevelILLabel() # If we have to create a jump IL for the True target indirect = t_target is None if indirect: t_target = LowLevelILLabel() less_op = il.compare_signed_less_equal(4, mem_b, il.const(4, 0)) if_op = il.if_expr(less_op, t_target, f_target) il.append(if_op) # We need to create a jump to the true target if it doesn't exist if indirect: il.mark_label(t_target) jmp = il.jump(addr_c) il.append(jmp) # Last is the fall though for the false target il.mark_label(f_target) return 12