class XtensaLE(Architecture): name = "Xtensa LE" endianness = Endianness.LittleEndian address_size = 4 default_int_size = 4 instr_alignment = 1 max_instr_length = 3 # Include extra useless garbage in the LLIL # and also dump ESIL in the instruction VERBOSE_IL = False regs = { "pc": RegisterInfo("pc", 4), "sar": RegisterInfo("sar", 1), # actually 6 bits but whatever "lbegin": RegisterInfo("lbegin", 4), "lend": RegisterInfo("lend", 4), "lcount": RegisterInfo("lcount", 4), # ours actually just has the full number, not the log_2 of it "PS.CALLINC": RegisterInfo("PS.CALLINC", 1), } stack_pointer = 'a1' # Standard ABI # Note: not using a reg_stack because this is intended for x87/FPU # it's not intended for windowed registers for i in range(16): n = "a{0}".format(i) regs[n] = RegisterInfo(n, 4) intrinsics = { "memw": IntrinsicInfo([], []), "entry": IntrinsicInfo([], []), } _branch_instrs = ["bbci", "bbsi", "bgeu", "bltu", "bany", "bnone", "ball", "bnall", "bbc", "bbs"] for operand in ["z", "i", "ui", ""]: for cmp in ["eq", "ne", "ge", "lt"]: _branch_instrs.append("b"+cmp+operand) _esil_to_llil = { "-": "sub", "+": "add", "&": "and_expr", "|": "or_expr", "^": "xor_expr", ">>": "logical_shift_right", #?? "<<": "shift_left", "==": "compare_equal", ">=": "compare_unsigned_greater_equal", "<=": "compare_unsigned_less_equal", ">": "compare_unsigned_greater_than", "<": "compare_unsigned_less_than", } def __init__(self): super(XtensaLE, self).__init__() self.r2 = self._init_r2() self.cache = {} self._lock = threading.Lock() self._looplock = threading.Lock() self.loops = {} def _init_r2(self): r = r2pipe.open('/dev/null') r.cmd("e asm.arch=xtensa") return r def _r2_cache(self, cmd): with self._lock: if cmd in self.cache: return self.cache[cmd] res = self.r2.cmd(cmd) self.cache[cmd] = res return res def _inst_length(self, name): return 2 if name.endswith(".n") else 3 def _get_asm(self, data, addr): asm = self._r2_cache("s {0}; pad {1}".format(addr, hexlify(data))) firstline = asm.strip().split("\n")[0].encode("ascii") if " " not in firstline: return firstline, [] inst, args = firstline.split(" ", 1) inst = inst.lower() args = args.split(", ") return inst, args def _get_esil(self, data, addr): return self._r2_cache("s {0}; pade {1}".format(addr, hexlify(data))).strip().encode("ascii") def _get_reil(self, esil): return self._r2_cache("aetr '" + esil + "'") def get_instruction_info(self, data, addr): inst,args = self._get_asm(data, addr) if inst == "ill": return None res = InstructionInfo() res.length = self._inst_length(inst) if inst in ("jx"): if args[0] in self.regs: res.add_branch(BranchType.IndirectBranch) else: res.add_branch(BranchType.UnconditionalBranch, int(args[0], 16)) elif inst in ("callx0", "callx4", "callx8", "callx12"): res.add_branch(BranchType.CallDestination) elif inst in ("ret", "retw", "ret.n", "retw.n"): res.add_branch(BranchType.FunctionReturn) elif inst == "j": res.add_branch(BranchType.UnconditionalBranch, int(args[0], 16)) elif inst in ("call0", "call4", "call8", "call12"): res.add_branch(BranchType.CallDestination, int(args[0], 16)) elif inst in ("loopgtz", "loopnez"): res.add_branch(BranchType.FalseBranch, int(args[1], 16)) res.add_branch(BranchType.TrueBranch, addr + res.length) elif inst in self._branch_instrs or (inst.endswith(".n") and inst[:-2] in self._branch_instrs): res.add_branch(BranchType.TrueBranch, int(args[-1], 16)) res.add_branch(BranchType.FalseBranch, addr + res.length) return res def _decode_l32r(self, litbase, addr, bytes): a, b, c = tuple(ord(x) for x in reversed(str(bytes[0:3]))) imm16 = (a << 8) + b t = (c >> 4) offset = (0x3FFF << 18) | (imm16 << 2) if LITBASE & 0x1: target = (LITBASE & 0xFFFFF000) + offset else: target = ((addr + 3) & 0xFFFFFFFC) + offset target = target % (1 << 32) return ("a{0}".format(t), target) def get_instruction_text(self, data, addr): inst,args = self._get_asm(data, addr) if inst == "ill": return None # override buggy l32r in radare if inst == "l32r" and LITBASE & 0x1 == 1: a,b = self._decode_l32r(LITBASE, addr, data) args[1] = hex(b) tokens = [] tokens.append(makeToken("inst", inst)) tokens.append(makeToken("sep", " ")) for i, arg in enumerate(args): if i != 0: tokens.append(makeToken("sep", ", ")) if arg.startswith("0x"): tokens.append(makeToken("addr", arg)) elif arg.isdigit(): tokens.append(makeToken("int", arg)) else: tokens.append(makeToken("reg", arg)) if self.VERBOSE_IL: esil = self._get_esil(data, addr) tokens.append(makeToken("sep", " ")) tokens.append(makeToken("text", "esil='"+esil+"'")) return tokens, self._inst_length(inst) def force_label(self, il, a): t = il.get_label_for_address(self, a) if t is None: t = il.add_label_for_address(self, a) if t is None: return self.force_label(il, a) return t def goto_or_jmp(self, il, a): t = self.force_label(il, a) if t is None: il.append(il.jump(il.const_pointer(4, a))) else: il.append(il.goto(t)) def get_instruction_low_level_il(self, data, addr, il): locals = threading.local() inst,args = self._get_asm(data, addr) if inst == "ill": return None l = self._inst_length(inst) if inst in ("jx"): if args[0] in self.regs: il.append(il.jump(il.reg(4, args[0]))) else: self.goto_or_jmp(il, int(args[0], 16)) return l elif inst.startswith("call"): spilled_regs = int(inst[5 if inst.startswith("callx") else 4:]) # Spill onto stack a = lambda a: "a{0}".format(a) r = lambda r: il.reg(4, "a{0}".format(r)) # if spilled_regs != 0: # for i in range(spilled_regs): # il.append(il.push(4, r(i))) # for i in range(spilled_regs, 16): # il.append(il.set_reg(4, a(i-spilled_regs), r(i))) if spilled_regs != 0 and self.VERBOSE_IL: il.append(il.set_reg(1, "PS.CALLINC", il.const(1, spilled_regs))) # return address # il.append(il.set_reg(4, a(spilled_regs), il.const(4, addr + l))) target = il.reg(4, args[0]) if inst.startswith("callx") else il.const_pointer(4, int(args[0], 16)) il.append(il.call(target)) # unspill from stack # if spilled_regs != 0: # for i in range(15, spilled_regs-1, -1): # il.append(il.set_reg(4, a(i), r(i-spilled_regs))) # for i in range(spilled_regs-1, -1, -1): # il.append(il.set_reg(4, a(i), il.pop(4))) return l elif inst in ("ret", "retw", "ret.n", "retw.n"): il.append(il.ret(il.reg(4, "a0"))) return l elif inst == "j": il.append(il.jump(il.const_pointer(4, int(args[0], 16)))) return l elif inst in ("loopgtz", "loopnez", "loop"): lbegin = addr + l lend = int(args[1], 16) r = il.reg(4, args[0]) lcount = il.sub(4, r, il.const(4,1)) # lend must come before lbegin for loop detection to work lower down if self.VERBOSE_IL: il.append(il.set_reg(4, "lend", il.const_pointer(4, lend))) il.append(il.set_reg(4, "lbegin", il.const_pointer(4, lbegin))) il.append(il.set_reg(4, "lcount", lcount)) if inst in ("loopgtz", "loopnez"): t = self.force_label(il, lbegin) f = self.force_label(il, lend) set_t = False set_f = False if t is None: set_t = True t = LowLevelILLabel() if f is None: set_f = True f = LowLevelILLabel() if inst == "loopnez": cond = il.compare_unsigned_greater_equal(4, r, il.const(4, 0)) else: cond = il.compare_signed_greater_equal(4, r, il.const(4, 0)) il.append(il.if_expr(cond, t, f)) if set_f: il.mark_label(f) self.goto_or_jmp(il, lend) if set_t: il.mark_label(t) # fallthrough with self._looplock: self.loops[lend] = lbegin return l elif inst == "entry": # Entry doesn't *do* anything, basically il.append(il.intrinsic([], "entry", [])) return l elif inst == "memw": il.append(il.intrinsic([], "memw", [])) return l # override buggy l32r in radare elif inst == "l32r" and LITBASE & 0x1 == 1: a,b = self._decode_l32r(LITBASE, addr, data) il.append(il.set_reg(4, a, il.load(4, il.const_pointer(4, b)))) return l esil = self._get_esil(data[0:l], addr) if esil == "": il.append(il.unimplemented()) return l parts = esil.split(",") # For basic instructions, interpret the ESIL self.esil_to_llil(inst, parts, il, addr, l) # Scan the function for loop instructions pointing to here lbegin = None with self._looplock: n = addr + l if n in self.loops: lbegin = self.loops[n] if lbegin is not None: cond = il.compare_unsigned_greater_than(4, il.reg(4, "lcount"), il.const(4, 0)) f = self.force_label(il, n) t = self.force_label(il, lbegin) #il.get_label_for_address(self, lbegin) set_f = False set_t = False if f is None: set_f = True f = LowLevelILLabel() if t is None: set_t = True t = LowLevelILLabel() il.append(il.if_expr(cond, t, f)) if set_t: il.mark_label(t) self.goto_or_jmp(il, lbegin) if set_f: il.mark_label(f) # fallthrough return l # Implement a basic stack machine to translate ESIL to LLIL def esil_to_llil(self, inst, parts, il, addr, l): stack = [] label_stack = [] skip_to_close = False # pop for reading - interprets the PC register as # the value of the next instruction def popr(): r = stack.pop() if r == "pc": return il.const_pointer(4, addr + l) return r for i, token in enumerate(parts): # No idea why I need this if token == "" and i == len(parts)-1: break if skip_to_close and token != "}": continue if token == "$$": stack.append(il.const_pointer(4, addr)) continue if token == "pc": stack.append("pc") continue if token in self.regs: stack.append(il.reg(4, token)) continue if token in self._esil_to_llil: dst = popr() src = popr() stack.append(getattr(il, self._esil_to_llil[token])(4, dst, src)) continue if token == "$z" or token == "!": stack.append(il.compare_equal(4, stack[-1], il.const(4, 0))) continue if token == "DUP": stack.append(stack[-1]) continue if token == "=": dst = stack.pop() src = popr() if dst == "pc": srci = il[src] if srci.operation == LowLevelILOperation.LLIL_CONST: self.goto_or_jmp(il, srci.operands[0]) continue il.append(il.jump(src)) continue dst = il[dst] if dst.operation != LowLevelILOperation.LLIL_REG: raise ValueError("unimplemented il store to {0!r}".format(dst)) il.append(il.set_reg(4, dst.operands[0].name, src)) continue if token == "+=": dste = stack.pop() src = popr() if dste == "pc": srci = il[src] # Note in ESIL this is w.r.t. the *next* address # For narrow branch instructions, it calculates the pc relative # wrong in the ESIL and uses 3 bytes anyway # also, srci.operands[0] is 8 bytes *signed* but ESIL # doesn't seem to reflect this? # Note: except beqz, bnez, bgez, bltz which have 12 bytes *signed* # and beqz.n and bnez.n which are 4 bytes unsigned if srci.operation == LowLevelILOperation.LLIL_CONST: offset = srci.operands[0] if inst in ("beqz", "bnez", "bgez", "bltz"): if offset > (1 << 11) - 1: offset = ((1<<12)-offset) * -1 elif inst in ("beqz.n", "bnez.n"): pass elif offset > 127: offset = (256-offset) * -1 self.goto_or_jmp(il, offset + addr + 3) else: il.append(il.jump(il.add(4, il.const_pointer(4, addr + 3), src))) continue dst = il[dste] if dst.operation != LowLevelILOperation.LLIL_REG: raise ValueError("unimplemented il store to {0!r}".format(dst)) il.append(il.set_reg(4, dst.operands[0].name, il.add(4, dste, src))) continue if token.startswith("=["): sz = int(token[2:-1]) dst = popr() src = popr() il.append(il.store(sz, dst, src)) continue if token.startswith("["): sz = int(token[1:-1]) if sz == 1 or sz == 2: stack.append(il.zero_extend(4, il.load(sz, popr()))) elif sz == 4: stack.append(il.load(4, popr())) else: raise ValueError("Invalid load size {0}".format(sz)) continue # Base 16 constants try: i = int(token, 16) except ValueError: pass else: stack.append(il.const(4, i)) continue # Base 10 constants try: i = int(token) except ValueError: pass else: stack.append(il.const(4, i)) continue # Hack to support branch instructions if token == "?{": t = None set_t = False end = parts.index("}", i+1) f = None # Don't create useless labels if this is at the end # of the instruction (e.g. a branch) if end == len(parts)-1: f = self.force_label(il, addr+l) if f is None: f = LowLevelILLabel() label_stack.append(f) inner = parts[i+1:end] fakeil = ThreaderILDuck() try: self.esil_to_llil(inst, inner, fakeil, addr, l) except AttributeError as e: pass except IndexError as e: # Tried to access the stack outside! Bad! pass except Exception as e: log.log_error("{0} {1}".format(e, inner)) raise e else: if fakeil.target is not None: t = self.force_label(il, fakeil.target) # log.log_info("Prediction successful at {0:X}, {1}, {2:X} {3} {4}".format(addr, inner, fakeil.target, t, parts)) # else: # log.log_warn("Prediction succesful but no target {0} {1}".format(inner, parts)) if t is None: set_t = True t = LowLevelILLabel() il.append(il.if_expr(stack.pop(), t, f)) if set_t: il.mark_label(t) elif len(label_stack) == 0: break else: skip_to_close = True continue if token == "}": if len(label_stack) == 0: break il.mark_label(label_stack.pop()) skip_to_close = False continue raise ValueError("Unimplemented esil {0} in {1} for {2}".format(token, esil, inst))
class Z80(Architecture): name = 'Z80' address_size = 2 default_int_size = 1 instr_alignment = 1 max_instr_length = 4 # register related stuff regs = { # main registers 'AF': RegisterInfo('AF', 2), 'BC': RegisterInfo('BC', 2), 'DE': RegisterInfo('DE', 2), 'HL': RegisterInfo('HL', 2), # alternate registers 'AF_': RegisterInfo('AF_', 2), 'BC_': RegisterInfo('BC_', 2), 'DE_': RegisterInfo('DE_', 2), 'HL_': RegisterInfo('HL_', 2), # main registers (sub) 'A': RegisterInfo('AF', 1, 1), 'B': RegisterInfo('BC', 1, 1), 'C': RegisterInfo('BC', 1, 0), 'D': RegisterInfo('DE', 1, 1), 'E': RegisterInfo('DE', 1, 0), 'H': RegisterInfo('HL', 1, 1), 'L': RegisterInfo('HL', 1, 0), 'Flags': RegisterInfo('AF', 0), # alternate registers (sub) 'A_': RegisterInfo('AF_', 1, 1), 'B_': RegisterInfo('BC_', 1, 1), 'C_': RegisterInfo('BC_', 1, 0), 'D_': RegisterInfo('DE_', 1, 1), 'E_': RegisterInfo('DE_', 1, 0), 'H_': RegisterInfo('HL_', 1, 1), 'L_': RegisterInfo('HL_', 1, 0), 'Flags_': RegisterInfo('AF_', 0), # index registers 'IX': RegisterInfo('IX', 2), 'IY': RegisterInfo('IY', 2), 'SP': RegisterInfo('SP', 2), # other registers 'I': RegisterInfo('I', 1), 'R': RegisterInfo('R', 1), # program counter 'PC': RegisterInfo('PC', 2), # status 'status': RegisterInfo('status', 1) } stack_pointer = "SP" # internal cond_strs = ['C', 'NC', 'Z', 'NZ', 'M', 'P', 'PE', 'PO'] reg8_strs = list('ABDHCELIR') + ['A\'', 'B\'', 'C\'', 'D\'', 'E\'', 'H\'', 'L\'', 'Flags', 'Flags\'', 'IXh', 'IXl', 'IYh', 'IYl'] reg16_strs = ['AF', 'BC', 'DE', 'HL', 'AF', 'AF\'', 'BC\'', 'DE\'', 'HL\'', 'IX', 'IY', 'SP', 'PC'] reg_strs = reg8_strs + reg16_strs def get_instruction_info(self, data, addr): (instrTxt, instrLen) = skwrapper.disasm(data, addr) if instrLen == 0: return None result = InstructionInfo() result.length = instrLen rccs = r'(?:C|NC|Z|NZ|M|P|PE|PO)' regexes = [ \ r'^(?:JP|JR) '+rccs+r',\$(.*)$', # 0: conditional jump eg: JP PE,#DEAD r'^(?:JP|JR) \$(.*)$', # 1: unconditional jump eg: JP #DEAD r'^(?:JP|JR) \((?:HL|IX|IY)\)$', # 2: unconditional indirect eg: JP (IX) r'^DJNZ \$(.*)$', # 3: dec, jump if not zero eg: DJNZ #DEAD r'^CALL '+rccs+r',\$(.*)$', # 4: conditional call eg: CALL PE,#DEAD r'^CALL \$(.*)$', # 5: unconditional call eg: CALL #DEAD r'^RET '+rccs+'$', # 6: conditional return r'^(?:RET|RETN|RETI)$', # 7: return, return (nmi), return (interrupt) ] m = None for (i,regex) in enumerate(regexes): m = re.match(regex, instrTxt) if not m: continue if i==0 or i==3: dest = int(m.group(1), 16) result.add_branch(BranchType.TrueBranch, dest) result.add_branch(BranchType.FalseBranch, addr + instrLen) pass elif i==1: dest = int(m.group(1), 16) result.add_branch(BranchType.UnconditionalBranch, dest) pass elif i==2: result.add_branch(BranchType.IndirectBranch) pass elif i==4 or i==5: dest = int(m.group(1), 16) result.add_branch(BranchType.CallDestination, dest) pass elif i==6: pass # conditional returns don't end block elif i==7: result.add_branch(BranchType.FunctionReturn) break return result def get_instruction_text(self, data, addr): (instrTxt, instrLen) = skwrapper.disasm(data, addr) if instrLen == 0: return None result = [] atoms = [t for t in re.split(r'([, ()\+])', instrTxt) if t] # delimeters kept if in capture group result.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, atoms[0])) if atoms[1:]: result.append(InstructionTextToken(InstructionTextTokenType.TextToken, ' ')) # for atom in atoms[1:]: if not atom or atom == ' ': continue # PROBLEM: cond 'C' conflicts with register C # eg: "RET C" is it "RET <reg>" or "REG <cc>" ? # eg: "CALL C" is it "CALL <reg>" or "CALL C,$0000" ? elif atom == 'C' and atoms[0] in ['CALL','RET']: # flag, condition code result.append(InstructionTextToken(InstructionTextTokenType.TextToken, atom)) elif atom in self.reg16_strs or atom in self.reg8_strs: result.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, atom)) elif atom in self.cond_strs: result.append(InstructionTextToken(InstructionTextTokenType.TextToken, atom)) elif atom[0] == '#': result.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, atom, int(atom[1:],16))) elif atom[0] == '$': if len(atom)==5: result.append(InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, atom, int(atom[1:],16))) else: result.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, atom, int(atom[1:],16))) elif atom.isdigit(): result.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, atom, int(atom))) elif atom == '(': result.append(InstructionTextToken(InstructionTextTokenType.BeginMemoryOperandToken, atom)) elif atom == ')': result.append(InstructionTextToken(InstructionTextTokenType.EndMemoryOperandToken, atom)) elif atom == '+': result.append(InstructionTextToken(InstructionTextTokenType.TextToken, atom)) elif atom == ',': result.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, atom)) else: raise Exception('unfamiliar token: %s from instruction %s' % (tok, instrTxt)) return result, instrLen def get_instruction_low_level_il(self, data, addr, il): return None
class MCS48_Base(Architecture): address_size = 2 default_int_size = 1 instr_alignment = 1 max_instr_length = 2 regs = { 'A': RegisterInfo('A', 1), 'T': RegisterInfo('T', 1), 'PSW': RegisterInfo('PSW', 1), 'SP': RegisterInfo('SP', 1), } if WREG_REG: for reg in range(8): regs['R{}'.format(reg)] = RegisterInfo('R{}'.format(reg), 1) for reg in range(8): regs['R{}\''.format(reg)] = RegisterInfo('R{}\''.format(reg), 1) stack_pointer = 'SP' global_regs = ['T', 'PSW'] # PSW: CY, AC, F0, BS, 1, S2, S1, S0 # carry, aux carry, flag 0, bank select, stack pointer # BS:0, addr=0, BS:1, addr=24 flags = [ 'CY', # carry 'AC', # auxiliary carry 'F0', # flag 0 'BS', # bank switch 'DBF', 'F1', # flag 1 'T0', # test 0 'T1', # test 1 'TF', # timer flag 'INT' # interrupt ] # The first flag write type is ignored currently. # See: https://github.com/Vector35/binaryninja-api/issues/513 flag_write_types = ['', 'C'] flags_written_by_flag_write_type = { 'C': ['CY'], } flag_roles = { 'CY': FlagRole.CarryFlagRole, 'AC': FlagRole.HalfCarryFlagRole, 'F0': FlagRole.SpecialFlagRole, 'BS': FlagRole.SpecialFlagRole, 'DBF': FlagRole.SpecialFlagRole, 'F1': FlagRole.SpecialFlagRole, 'T0': FlagRole.SpecialFlagRole, 'T1': FlagRole.SpecialFlagRole, 'TF': FlagRole.SpecialFlagRole, 'INT': FlagRole.SpecialFlagRole, } #flags_required_for_flag_condition = {} instructions = [ # 0x00-0x0f [('NOP', 1), [], lambda self, il: il.nop()], None, [('OUTL', 1), ['BUS', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('ADD', 2), ['A', '#IMM8'], lambda self, il, imm: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), il.const(1, imm), 'C'))], [('JMP', 2), ['ADDR11'], lambda self, il, imm: branch(il, CODE_ADDR(0x000, imm))], [('EN', 1), ['I']], None, [('DEC', 1), ['A'], lambda self, il: il.set_reg(1, 'A', il.sub(1, il.reg(1, 'A'), il.const(1, 1)))], [('INS', 1), ['A', 'BUS'], lambda self, il: il.set_reg(1, 'A', il.unimplemented())], [('IN', 1), ['A', 'P1'], lambda self, il: il.set_reg(1, 'A', il.unimplemented())], [('IN', 1), ['A', 'P2'], lambda self, il: il.set_reg(1, 'A', il.unimplemented())], None, [('MOVD', 1), ['A', 'P4'], lambda self, il: il.set_reg(1, 'A', il.unimplemented())], [('MOVD', 1), ['A', 'P5'], lambda self, il: il.set_reg(1, 'A', il.unimplemented())], [('MOVD', 1), ['A', 'P6'], lambda self, il: il.set_reg(1, 'A', il.unimplemented())], [('MOVD', 1), ['A', 'P7'], lambda self, il: il.set_reg(1, 'A', il.unimplemented())], # 0x10-0x1f [('INC', 1), ['@R0'], lambda self, il: il.store(1, self.wreg_get(il, 0), il.add(1, il.load(1, self.wreg_get(il, 0)), il.const(1, 1)))], [('INC', 1), ['@R1'], lambda self, il: il.store(1, self.wreg_get(il, 1), il.add(1, il.load(1, self.wreg_get(il, 1)), il.const(1, 1)))], [('JB0', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 0)], [('ADDC', 2), ['A', '#IMM8'], lambda self, il, imm: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), il.const(1, imm), il.flag('CY'), 'C'))], [('CALL', 2), ['ADDR11'], lambda self, il, imm: call_helper(il, CODE_ADDR(0x000, imm))], [('DIS', 1), ['I']], [('JTF', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'TF', 1)], [('INC', 1), ['A'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), il.const(1, 1)))], [('INC', 1), ['R0'], lambda self, il: self.wreg_set(il, 0, il.add(1, self.wreg_get(il, 0), il.const(1, 1)))], [('INC', 1), ['R1'], lambda self, il: self.wreg_set(il, 1, il.add(1, self.wreg_get(il, 1), il.const(1, 1)))], [('INC', 1), ['R2'], lambda self, il: self.wreg_set(il, 2, il.add(1, self.wreg_get(il, 2), il.const(1, 1)))], [('INC', 1), ['R3'], lambda self, il: self.wreg_set(il, 3, il.add(1, self.wreg_get(il, 3), il.const(1, 1)))], [('INC', 1), ['R4'], lambda self, il: self.wreg_set(il, 4, il.add(1, self.wreg_get(il, 4), il.const(1, 1)))], [('INC', 1), ['R5'], lambda self, il: self.wreg_set(il, 5, il.add(1, self.wreg_get(il, 5), il.const(1, 1)))], [('INC', 1), ['R6'], lambda self, il: self.wreg_set(il, 6, il.add(1, self.wreg_get(il, 6), il.const(1, 1)))], [('INC', 1), ['R7'], lambda self, il: self.wreg_set(il, 7, il.add(1, self.wreg_get(il, 7), il.const(1, 1)))], # 0x20-0x2f [('XCH', 1), ['A', '@R0'], lambda self, il: [ il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', il.load(1, self.wreg_get(il, 0))), self.wreg_set(il, 0, il.reg(1, LLIL_TEMP(1))) ]], [('XCH', 1), ['A', '@R1'], lambda self, il: [ il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', il.load(1, self.wreg_get(il, 1))), self.wreg_set(il, 1, il.reg(1, LLIL_TEMP(1))) ]], None, [('MOV', 2), ['A', '#IMM8'], lambda self, il, imm: il.set_reg(1, 'A', il.const(1, imm))], [('JMP', 2), ['ADDR11'], lambda self, il, imm: branch(il, CODE_ADDR(0x100, imm))], [('EN', 1), ['TCNTI']], [('JNT0', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'T0', 0)], [('CLR', 1), ['A'], lambda self, il: il.set_reg(1, 'A', il.const(1, 0))], [('XCH', 1), ['A', 'R0'], lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 0)), self.wreg_set(il, 0, il.reg(1, LLIL_TEMP(1)))]], [('XCH', 1), ['A', 'R1'], lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 1)), self.wreg_set(il, 1, il.reg(1, LLIL_TEMP(1)))]], [('XCH', 1), ['A', 'R2'], lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 2)), self.wreg_set(il, 2, il.reg(1, LLIL_TEMP(1)))]], [('XCH', 1), ['A', 'R3'], lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 3)), self.wreg_set(il, 3, il.reg(1, LLIL_TEMP(1)))]], [('XCH', 1), ['A', 'R4'], lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 4)), self.wreg_set(il, 4, il.reg(1, LLIL_TEMP(1)))]], [('XCH', 1), ['A', 'R5'], lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 5)), self.wreg_set(il, 5, il.reg(1, LLIL_TEMP(1)))]], [('XCH', 1), ['A', 'R6'], lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 6)), self.wreg_set(il, 6, il.reg(1, LLIL_TEMP(1)))]], [('XCH', 1), ['A', 'R7'], lambda self, il: [il.set_reg(1, LLIL_TEMP(1), il.reg(1, 'A')), il.set_reg(1, 'A', self.wreg_get(il, 7)), self.wreg_set(il, 7, il.reg(1, LLIL_TEMP(1)))]], # 0x30-0x3f [('XCHD', 1), ['A', '@R0']], [('XCHD', 1), ['A', '@R1']], [('JB1', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 1)], None, [('CALL', 2), ['ADDR11'], lambda self, il, imm: call_helper(il, CODE_ADDR(0x100, imm))], [('DIS', 1), ['TCNTI']], [('JT0', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'T0', 1)], [('CPL', 1), ['A'], lambda self, il: il.set_reg(1, 'A', il.not_expr(1, il.reg(1, 'A')))], None, [('OUTL', 1), ['P1', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('OUTL', 1), ['P2', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read None, [('MOVD', 1), ['P4', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('MOVD', 1), ['P5', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('MOVD', 1), ['P6', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('MOVD', 1), ['P7', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read # 0x40-0x4f [('ORL', 1), ['A', '@R0'], lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 0))))], [('ORL', 1), ['A', '@R1'], lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 1))))], [('MOV', 1), ['A', 'T'], lambda self, il: il.set_reg(1, 'A', il.reg(1, 'T'))], [('ORL', 2), ['A', '#IMM8'], lambda self, il, imm: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), il.const(1, imm)))], [('JMP', 2), ['ADDR11'], lambda self, il, imm: branch(il, CODE_ADDR(0x200, imm))], [('STRT', 1), ['CNT']], [('JNT1', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'T1', 0)], [('SWAP', 1), ['A'], lambda self, il: il.set_reg(1, 'A', il.rotate_left(1, il.reg(1, 'A'), il.const(1, 4)))], [('ORL', 1), ['A', 'R0'], lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 0)))], [('ORL', 1), ['A', 'R1'], lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 1)))], [('ORL', 1), ['A', 'R2'], lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 2)))], [('ORL', 1), ['A', 'R3'], lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 3)))], [('ORL', 1), ['A', 'R4'], lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 4)))], [('ORL', 1), ['A', 'R5'], lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 5)))], [('ORL', 1), ['A', 'R6'], lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 6)))], [('ORL', 1), ['A', 'R7'], lambda self, il: il.set_reg(1, 'A', il.or_expr(1, il.reg(1, 'A'), self.wreg_get(il, 7)))], # 0x50-0x5f [('ANL', 1), ['A', '@R0'], lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 0))))], [('ANL', 1), ['A', '@R1'], lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 1))))], [('JB2', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 2)], [('ANL', 2), ['A', '#IMM8'], lambda self, il, imm: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), il.const(1, imm)))], [('CALL', 2), ['ADDR11'], lambda self, il, imm: call_helper(il, CODE_ADDR(0x200, imm))], [('STRT', 1), ['T'], lambda self, il: il.reg(1, 'T')], # DUMMY [('JT1', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'T1', 1)], [('DA', 1), ['A']], [('ANL', 1), ['A', 'R0'], lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 0)))], [('ANL', 1), ['A', 'R1'], lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 1)))], [('ANL', 1), ['A', 'R2'], lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 2)))], [('ANL', 1), ['A', 'R3'], lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 3)))], [('ANL', 1), ['A', 'R4'], lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 4)))], [('ANL', 1), ['A', 'R5'], lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 5)))], [('ANL', 1), ['A', 'R6'], lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 6)))], [('ANL', 1), ['A', 'R7'], lambda self, il: il.set_reg(1, 'A', il.and_expr(1, il.reg(1, 'A'), self.wreg_get(il, 7)))], # 0x60-0x6f [('ADD', 1), ['A', '@R0'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 0)), 'C'))], [('ADD', 1), ['A', '@R1'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 1)), 'C'))], [('MOV', 1), ['T', 'A'], lambda self, il: il.set_reg(1, 'T', il.reg(1, 'A'))], None, [('JMP', 2), ['ADDR11'], lambda self, il, imm: branch(il, CODE_ADDR(0x300, imm))], [('STOP', 1), ['TCNT']], None, [('RRC', 1), ['A'], lambda self, il: il.set_reg(1, 'A', il.rotate_right_carry(1, il.reg(1, 'A'), il.const(1, 1), il.flag('CY'), 'C'))], [('ADD', 1), ['A', 'R0'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 0), 'C'))], [('ADD', 1), ['A', 'R1'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 1), 'C'))], [('ADD', 1), ['A', 'R2'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 2), 'C'))], [('ADD', 1), ['A', 'R3'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 3), 'C'))], [('ADD', 1), ['A', 'R4'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 4), 'C'))], [('ADD', 1), ['A', 'R5'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 5), 'C'))], [('ADD', 1), ['A', 'R6'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 6), 'C'))], [('ADD', 1), ['A', 'R7'], lambda self, il: il.set_reg(1, 'A', il.add(1, il.reg(1, 'A'), self.wreg_get(il, 7), 'C'))], # 0x70-0x7f [('ADDC', 1), ['A', '@R0'], lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 0)), il.flag('CY'), 'C'))], [('ADDC', 1), ['A', '@R1'], lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 1)), il.flag('CY'), 'C'))], [('JB3', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 3)], None, [('CALL', 2), ['ADDR11'], lambda self, il, imm: call_helper(il, CODE_ADDR(0x300, imm))], [('ENT0', 1), ['CLK']], [('JF1', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'F1', 1)], [('RR', 1), ['A'], lambda self, il: il.set_reg(1, 'A', il.rotate_right(1, il.reg(1, 'A'), il.const(1, 1)))], [('ADDC', 1), ['A', 'R0'], lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 0), il.flag('CY'), 'C'))], [('ADDC', 1), ['A', 'R1'], lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 1), il.flag('CY'), 'C'))], [('ADDC', 1), ['A', 'R2'], lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 2), il.flag('CY'), 'C'))], [('ADDC', 1), ['A', 'R3'], lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 3), il.flag('CY'), 'C'))], [('ADDC', 1), ['A', 'R4'], lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 4), il.flag('CY'), 'C'))], [('ADDC', 1), ['A', 'R5'], lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 5), il.flag('CY'), 'C'))], [('ADDC', 1), ['A', 'R6'], lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 6), il.flag('CY'), 'C'))], [('ADDC', 1), ['A', 'R7'], lambda self, il: il.set_reg(1, 'A', il.add_carry(1, il.reg(1, 'A'), self.wreg_get(il, 7), il.flag('CY'), 'C'))], # 0x80-0x8f [('MOVX', 1), ['A', '@R0']], [('MOVX', 1), ['A', '@R1']], None, [('RET', 1), [], lambda self, il: ret_helper(il, False)], [('JMP', 2), ['ADDR11'], lambda self, il, imm: branch(il, CODE_ADDR(0x400, imm))], [('CLR', 1), ['F0'], lambda self, il: il.set_flag('F0', il.const(0, 0))], [('JNI', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'INT', 0)], None, [('ORL', 2), ['BUS', '#IMM8']], [('ORL', 2), ['P1', '#IMM8']], [('ORL', 2), ['P2', '#IMM8']], None, [('ORLD', 1), ['P4', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('ORLD', 1), ['P5', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('ORLD', 1), ['P6', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('ORLD', 1), ['P7', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read # 0x90-0x9f [('MOVX', 1), ['@R0', 'A']], [('MOVX', 1), ['@R1', 'A']], [('JB4', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 4)], [('RETR', 1), [], lambda self, il: ret_helper(il, True)], [('CALL', 2), ['ADDR11'], lambda self, il, imm: call_helper(il, CODE_ADDR(0x400, imm))], [('CPL', 1), ['F0'], lambda self, il: il.set_flag('F0', il.not_expr(0, il.flag('F0')))], [('JNZ', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'NZ')], [('CLR', 1), ['C'], lambda self, il: il.set_flag('CY', il.const(0, 0))], [('ANL', 2), ['BUS', '#IMM8']], [('ANL', 2), ['P1', '#IMM8']], [('ANL', 2), ['P2', '#IMM8']], None, [('ANLD', 1), ['P4', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('ANLD', 1), ['P5', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('ANLD', 1), ['P6', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read [('ANLD', 1), ['P7', 'A'], lambda self, il: il.reg(1, 'A')], # dummy read # 0xa0-0xaf [('MOV', 1), ['@R0', 'A'], lambda self, il: il.store(1, self.wreg_get(il, 0), il.reg(1, 'A'))], [('MOV', 1), ['@R1', 'A'], lambda self, il: il.store(1, self.wreg_get(il, 1), il.reg(1, 'A'))], None, [('MOVP', 1), ['A', '@A'], lambda self, il: il.set_reg(1, 'A', il.load(1, il.or_expr(2, il.const(2, CODE_ADDR(il.current_address + 1, 0)), il.reg(1, 'A'))))], [('JMP', 2), ['ADDR11'], lambda self, il, imm: branch(il, CODE_ADDR(0x500, imm))], [('CLR', 1), ['F1'], lambda self, il: il.set_flag('F1', il.const(0, 0))], None, [('CPL', 1), ['C'], lambda self, il: il.set_flag('CY', il.not_expr(0, il.flag('CY')))], [('MOV', 1), ['R0', 'A'], lambda self, il: self.wreg_set(il, 0, il.reg(1, 'A'))], [('MOV', 1), ['R1', 'A'], lambda self, il: self.wreg_set(il, 1, il.reg(1, 'A'))], [('MOV', 1), ['R2', 'A'], lambda self, il: self.wreg_set(il, 2, il.reg(1, 'A'))], [('MOV', 1), ['R3', 'A'], lambda self, il: self.wreg_set(il, 3, il.reg(1, 'A'))], [('MOV', 1), ['R4', 'A'], lambda self, il: self.wreg_set(il, 4, il.reg(1, 'A'))], [('MOV', 1), ['R5', 'A'], lambda self, il: self.wreg_set(il, 5, il.reg(1, 'A'))], [('MOV', 1), ['R6', 'A'], lambda self, il: self.wreg_set(il, 6, il.reg(1, 'A'))], [('MOV', 1), ['R7', 'A'], lambda self, il: self.wreg_set(il, 7, il.reg(1, 'A'))], # 0xb0-0xbf [('MOV', 2), ['@R0', '#IMM8'], lambda self, il, imm: il.store(1, self.wreg_get(il, 0), il.const(1, imm))], [('MOV', 2), ['@R1', '#IMM8'], lambda self, il, imm: il.store(1, self.wreg_get(il, 1), il.const(1, imm))], [('JB5', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 5)], [('JMPP', 1), ['@A'], lambda self, il: il.jump(il.or_expr(2, il.const(2, CODE_ADDR(il.current_address, 0)), il.reg(1, 'A')))], # FIXME: addr + 1? [('CALL', 2), ['ADDR11'], lambda self, il, imm: call_helper(il, CODE_ADDR(0x500, imm))], [('CPL', 1), ['F1'], lambda self, il: il.set_flag('F1', il.not_expr(0, il.flag('F1')))], [('JF0', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'F0', 1)], None, [('MOV', 2), ['R0', '#IMM8'], lambda self, il, imm: self.wreg_set(il, 0, il.const(1, imm))], [('MOV', 2), ['R1', '#IMM8'], lambda self, il, imm: self.wreg_set(il, 1, il.const(1, imm))], [('MOV', 2), ['R2', '#IMM8'], lambda self, il, imm: self.wreg_set(il, 2, il.const(1, imm))], [('MOV', 2), ['R3', '#IMM8'], lambda self, il, imm: self.wreg_set(il, 3, il.const(1, imm))], [('MOV', 2), ['R4', '#IMM8'], lambda self, il, imm: self.wreg_set(il, 4, il.const(1, imm))], [('MOV', 2), ['R5', '#IMM8'], lambda self, il, imm: self.wreg_set(il, 5, il.const(1, imm))], [('MOV', 2), ['R6', '#IMM8'], lambda self, il, imm: self.wreg_set(il, 6, il.const(1, imm))], [('MOV', 2), ['R7', '#IMM8'], lambda self, il, imm: self.wreg_set(il, 7, il.const(1, imm))], # 0xc0-0xcf None, None, None, None, [('JMP', 2), ['ADDR11'], lambda self, il, imm: branch(il, CODE_ADDR(0x600, imm))], [('SEL', 1), ['RB0'], lambda self, il: il.set_flag('BS', il.const(0, 0))], [('JZ', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'Z')], [('MOV', 1), ['A', 'PSW'], lambda self, il: il.set_reg(1, 'A', il.reg(1, 'PSW'))], [('DEC', 1), ['R0'], lambda self, il: self.wreg_set(il, 0, il.sub(1, self.wreg_get(il, 0), il.const(1, 1)))], [('DEC', 1), ['R1'], lambda self, il: self.wreg_set(il, 1, il.sub(1, self.wreg_get(il, 1), il.const(1, 1)))], [('DEC', 1), ['R2'], lambda self, il: self.wreg_set(il, 2, il.sub(1, self.wreg_get(il, 2), il.const(1, 1)))], [('DEC', 1), ['R3'], lambda self, il: self.wreg_set(il, 3, il.sub(1, self.wreg_get(il, 3), il.const(1, 1)))], [('DEC', 1), ['R4'], lambda self, il: self.wreg_set(il, 4, il.sub(1, self.wreg_get(il, 4), il.const(1, 1)))], [('DEC', 1), ['R5'], lambda self, il: self.wreg_set(il, 5, il.sub(1, self.wreg_get(il, 5), il.const(1, 1)))], [('DEC', 1), ['R6'], lambda self, il: self.wreg_set(il, 6, il.sub(1, self.wreg_get(il, 6), il.const(1, 1)))], [('DEC', 1), ['R7'], lambda self, il: self.wreg_set(il, 7, il.sub(1, self.wreg_get(il, 7), il.const(1, 1)))], # 0xd0-0xdf [('XRL', 1), ['A', '@R0'], lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 0))))], [('XRL', 1), ['A', '@R1'], lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), il.load(1, self.wreg_get(il, 1))))], [('JB6', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 6)], [('XRL', 2), ['A', '#IMM8'], lambda self, il, imm: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), il.const(1, imm)))], [('CALL', 2), ['ADDR11'], lambda self, il, imm: call_helper(il, CODE_ADDR(0x600, imm))], [('SEL', 1), ['RB1'], lambda self, il: il.set_flag('BS', il.const(0, 1))], None, [('MOV', 1), ['PSW', 'A'], lambda self, il: il.set_reg(1, 'PSW', il.reg(1, 'A'))], # TODO: set/clear flags [('XRL', 1), ['A', 'R0'], lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 0)))], [('XRL', 1), ['A', 'R1'], lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 1)))], [('XRL', 1), ['A', 'R2'], lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 2)))], [('XRL', 1), ['A', 'R3'], lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 3)))], [('XRL', 1), ['A', 'R4'], lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 4)))], [('XRL', 1), ['A', 'R5'], lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 5)))], [('XRL', 1), ['A', 'R6'], lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 6)))], [('XRL', 1), ['A', 'R7'], lambda self, il: il.set_reg(1, 'A', il.xor_expr(1, il.reg(1, 'A'), self.wreg_get(il, 7)))], # 0xe0-0xef None, None, None, [('MOVP3', 1), ['A', '@A'], lambda self, il: il.set_reg(1, 'A', il.load(1, il.or_expr(2, il.const(2, CODE_ADDR(0x300, 0)), il.reg(1, 'A'))))], [('JMP', 2), ['ADDR11'], lambda self, il, imm: branch(il, CODE_ADDR(0x700, imm))], [('SEL', 1), ['MB0'], lambda self, il: il.set_flag('DBF', il.const(0, 0))], [('JNC', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'CY', 0)], [('RL', 1), ['A'], lambda self, il: il.set_reg(1, 'A', il.rotate_left(1, il.reg(1, 'A'), il.const(1, 1)))], [('DJNZ', 2), ['R0', 'ADDR8'], lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 0)], [('DJNZ', 2), ['R1', 'ADDR8'], lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 1)], [('DJNZ', 2), ['R2', 'ADDR8'], lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 2)], [('DJNZ', 2), ['R3', 'ADDR8'], lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 3)], [('DJNZ', 2), ['R4', 'ADDR8'], lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 4)], [('DJNZ', 2), ['R5', 'ADDR8'], lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 5)], [('DJNZ', 2), ['R6', 'ADDR8'], lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 6)], [('DJNZ', 2), ['R7', 'ADDR8'], lambda self, il, imm: self.djnz_helper(il, CODE_ADDR(il.current_address, imm), 7)], # 0xf0-0xff [('MOV', 1), ['A', '@R0'], lambda self, il: il.set_reg(1, 'A', il.load(1, self.wreg_get(il, 0)))], [('MOV', 1), ['A', '@R1'], lambda self, il: il.set_reg(1, 'A', il.load(1, self.wreg_get(il, 1)))], [('JB7', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'B', 7)], None, [('CALL', 2), ['ADDR11'], lambda self, il, imm: call_helper(il, CODE_ADDR(0x700, imm))], [('SEL', 1), ['MB1'], lambda self, il: il.set_flag('DBF', il.const(0, 1))], [('JC', 2), ['ADDR8'], lambda self, il, imm: cond_branch(il, CODE_ADDR(il.current_address, imm), 'CY', 1)], [('RLC', 1), ['A'], lambda self, il: il.set_reg(1, 'A', il.rotate_left_carry(1, il.reg(1, 'A'), il.const(1, 1), il.flag('CY')))], [('MOV', 1), ['A', 'R0'], lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 0))], [('MOV', 1), ['A', 'R1'], lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 1))], [('MOV', 1), ['A', 'R2'], lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 2))], [('MOV', 1), ['A', 'R3'], lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 3))], [('MOV', 1), ['A', 'R4'], lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 4))], [('MOV', 1), ['A', 'R5'], lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 5))], [('MOV', 1), ['A', 'R6'], lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 6))], [('MOV', 1), ['A', 'R7'], lambda self, il: il.set_reg(1, 'A', self.wreg_get(il, 7))], ] def get_instruction_info(self, data, addr): # instruction lookup instruction = self.instructions[ord(data[0])] if instruction is None: return None (opcode, length) = instruction[0] result = InstructionInfo() result.length = length # add branches if opcode in ['RET', 'RETI', 'RETR']: result.add_branch(BranchType.FunctionReturn) elif opcode in ['JMP']: # TODO: memory bank selection result.add_branch(BranchType.UnconditionalBranch, CODE_ADDR((ord(data[0]) & 0xe0) << 3, ord(data[1]))) elif opcode in ['JMPP']: result.add_branch(BranchType.UnresolvedBranch) elif opcode == 'DJNZ' or opcode[0] == 'J': # conditional branches result.add_branch(BranchType.TrueBranch, CODE_ADDR(addr, ord(data[1]))) result.add_branch(BranchType.FalseBranch, addr + length) elif opcode == 'CALL': # TODO: memory bank selection result.add_branch(BranchType.CallDestination, CODE_ADDR((ord(data[0]) & 0xe0) << 3, ord(data[1]))) elif opcode == 'SEL': # FIXME: fake branches to support bank switching if instruction[1][0] == 'RB0': result.add_branch(BranchType.UnconditionalBranch, addr + length, Architecture['{}_rb{}mb{}'.format(self.device, 0, self.mb)]) elif instruction[1][0] == 'RB1': result.add_branch(BranchType.UnconditionalBranch, addr + length, Architecture['{}_rb{}mb{}'.format(self.device, 1, self.mb)]) elif instruction[1][0] == 'MB0': result.add_branch(BranchType.UnconditionalBranch, addr + length, Architecture['{}_rb{}mb{}'.format(self.device, self.rb, 0)]) elif instruction[1][0] == 'MB1': result.add_branch(BranchType.UnconditionalBranch, addr + length, Architecture['{}_rb{}mb{}'.format(self.device, self.rb, 1)]) return result def get_instruction_text(self, data, addr): # instruction lookup instruction = self.instructions[ord(data[0])] if instruction is None: return None (opcode, length) = instruction[0] # opcode tokens = [InstructionTextToken(InstructionTextTokenType.InstructionToken, '{:6}'.format(opcode))] # operands for operand in instruction[1]: # add a separator if needed if len(tokens) > 1: tokens += [InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ',')] # append suffix for second bank working registers if self.rb == 1 and re.match('\@?R\d', operand) is not None: operand += '\'' if operand == '#IMM8': immediate = ord(data[1]) tokens += [InstructionTextToken(InstructionTextTokenType.IntegerToken, '#{:X}H'.format(immediate), immediate)] elif operand == 'ADDR8': address = (addr & 0xf00) | ord(data[1]) tokens += [InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, '{:X}H'.format(address), CODE_ADDR(0, address))] elif operand == 'ADDR11': # TODO: memory bank selection address = ((ord(data[0]) & 0xe0) << 3) | ord(data[1]) tokens += [InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, '{:X}H'.format(address), CODE_ADDR(0, address))] elif operand in self.regs: tokens += [InstructionTextToken(InstructionTextTokenType.RegisterToken, operand)] elif operand[0] == '@' and operand[1:] in self.regs: tokens += [InstructionTextToken(InstructionTextTokenType.InstructionToken, '@'), InstructionTextToken(InstructionTextTokenType.RegisterToken, operand[1:])] else: tokens += [InstructionTextToken(InstructionTextTokenType.TextToken, operand)] return tokens, length def get_instruction_low_level_il(self, data, addr, il): # instruction lookup instruction = self.instructions[ord(data[0])] if instruction is None: return None (opcode, length) = instruction[0] if len(instruction) == 3: # instructions are either one byte (opcode) or two bytes (opcode + immediate) if length == 1: il_instr = instruction[2](self, il) else: il_instr = instruction[2](self, il, ord(data[1])) if isinstance(il_instr, list): for i in [i for i in il_instr if i is not None]: il.append(i) elif il_instr is not None: il.append(il_instr) else: il.append(il.unimplemented()) return length def get_flag_write_low_level_il(self, op, size, write_type, flag, operands, il): if flag == 'CY': if op == LowLevelILOperation.LLIL_RRC: return il.and_expr(1, il.reg(1, operands[0]), il.const(1, 0x01)) elif op == LowLevelILOperation.LLIL_RLC: return il.and_expr(1, il.reg(1, operands[0]), il.const(1, 0x80)) return Architecture.perform_get_flag_write_low_level_il(self, op, size, write_type, flag, operands, il) def wreg_set(self, il, reg, expr): if WREG_REG: il.append(il.set_reg(1, 'R{}'.format(reg) if self.rb == 0 else 'R{}\''.format(reg), expr)) else: il.append(il.store(1, il.const_pointer(1, reg if self.rb == 0 else reg + 24), expr)) def wreg_get(self, il, reg): if WREG_REG: return il.reg(1, 'R{}'.format(reg) if self.rb == 0 else 'R{}\''.format(reg)) else: return il.load(1, il.const_pointer(1, reg if self.rb == 0 else reg + 24)) def djnz_helper(self, il, addr, reg): # decrement the register self.wreg_set(il, reg, il.sub(1, self.wreg_get(il, reg), il.const(1, 1))) # try to find a label for the branch target taken = il.get_label_for_address(il.arch, addr) # create taken target taken_found = True if taken is None: taken = LowLevelILLabel() taken_found = False # create untaken target untaken_found = True untaken = il.get_label_for_address(il.arch, il.current_address + 2) if untaken is None: untaken = LowLevelILLabel() untaken_found = False # generate the conditional branch LLIL il.append(il.if_expr(il.compare_not_equal(1, self.wreg_get(il, reg), il.const(1, 0)), taken, untaken)) # generate a jump to the branch target if a label couldn't be found if not taken_found: il.mark_label(taken) il.append(il.jump(il.const(2, addr))) # generate a label for the untaken branch if not untaken_found: il.mark_label(untaken)
class XTENSA(Architecture): name = 'XTENSA' address_size = 4 default_int_size = 4 instr_alignment = 3 max_instr_length = 3 # register related stuff regs = { # main registers 'a0': RegisterInfo('a0', 4), 'a1': RegisterInfo('a1', 4), 'a2': RegisterInfo('a2', 4), 'a3': RegisterInfo('a3', 4), 'a4': RegisterInfo('a4', 4), 'a5': RegisterInfo('a5', 4), 'a6': RegisterInfo('a6', 4), 'a7': RegisterInfo('a7', 4), 'a8': RegisterInfo('a8', 4), 'a9': RegisterInfo('a9', 4), 'a10': RegisterInfo('a10', 4), 'a11': RegisterInfo('a11', 4), 'a12': RegisterInfo('a12', 4), 'a13': RegisterInfo('a13', 4), 'a14': RegisterInfo('a14', 4), 'a15': RegisterInfo('a15', 4), # program counter 'pc': RegisterInfo('pc', 4), # special status 'sar': RegisterInfo('sar', 4) } stack_pointer = "a1" #------------------------------------------------------------------------------ # CFG building #------------------------------------------------------------------------------ def get_instruction_info(self, data, addr): if len(data) < 2 or len(data) > 3: return None obj = decode(data, addr) if obj.name == "UNKNOWN": return None result = InstructionInfo() result.length = obj.len if obj.name in ["RET", "RET.N"]: # RETURN result.add_branch(BranchType.FunctionReturn) if obj.name in [ "BALL", "BNALL", "BANY", "BNONE", "BBC", "BBCI", "BBS", "BBSI", "BEQ", "BEQI", "BEQZ", "BNE", "BNEI", "BNEZ", "BGE", "BGEI", "BGEU", "BGEUI", "BGEZ", "BLT", "BLTI", "BLTU", "BLTUI", "BLTZ" ]: # CONDITIONAL BRANCH for l in obj.prop["format"]: if l[0] == "TYPE_LABEL": result.add_branch(BranchType.TrueBranch, l[1]) result.add_branch(BranchType.FalseBranch, addr + obj.len) if obj.name in ["J"]: # UNCONDITIONAL JUMP for l in obj.prop["format"]: if l[0] == "TYPE_LABEL": result.add_branch(BranchType.UnconditionalBranch, l[1]) if obj.name in ["CALL0", "CALL4", "CALL8", "CALL12"]: # DIRECT CALL for l in obj.prop["format"]: if l[0] == "TYPE_LABEL": result.add_branch(BranchType.CallDestination, l[1]) if obj.name in ["JX"]: # UNCONDITIONAL JUMP TO REGISTER result.add_branch(BranchType.IndirectBranch) #if obj.name in ["CALLX0", "CALLX4", "CALLX8", "CALLX12"]: # CALL TO REGISTER # result.add_branch(BranchType.IndirectBranch) return result def get_instruction_text(self, data, addr): if len(data) < 2 or len(data) > 3: return None obj = decode(data, addr) if obj.name == "UNKNOWN": return None result = [] result.append( InstructionTextToken(InstructionTextTokenType.InstructionToken, obj.name)) li = obj.prop["format"] for i in range(len(li)): result.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ' ')) l = li[i] if l[0] == "TYPE_REG": result.append( InstructionTextToken( InstructionTextTokenType.RegisterToken, "a" + str(l[1]))) elif l[0] == "TYPE_FREG": result.append( InstructionTextToken( InstructionTextTokenType.RegisterToken, "f" + str(l[1]))) elif l[0] == "TYPE_BREG": result.append( InstructionTextToken( InstructionTextTokenType.RegisterToken, "b" + str(l[1]))) elif l[0] == "TYPE_SREG": result.append( InstructionTextToken( InstructionTextTokenType.RegisterToken, "s" + str(l[1]))) elif l[0] == "TYPE_UREG": result.append( InstructionTextToken( InstructionTextTokenType.RegisterToken, "u" + str(l[1]))) elif l[0] == "TYPE_MREG": result.append( InstructionTextToken( InstructionTextTokenType.RegisterToken, "m" + str(l[1]))) elif l[0] == "TYPE_IMM": result.append( InstructionTextToken(InstructionTextTokenType.IntegerToken, str(l[1]), l[1])) elif l[0] == "TYPE_LABEL": result.append( InstructionTextToken( InstructionTextTokenType.CodeRelativeAddressToken, '0x%08x' % (l[1]), l[1])) # PossibleAddressToken? if i < len(li) - 1: result.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ',')) return result, obj.len def get_flag_write_low_level_il(self, op, size, write_type, flag, operands, il): return Architecture.get_flag_write_low_level_il( self, op, size, write_type, flag, operands, il) def get_instruction_low_level_il(self, data, addr, il): if len(data) < 2 or len(data) > 3: return None obj = decode(data, addr) if obj.name == "UNKNOWN": return None il.append(il.unimplemented()) return obj.len
class VTIL(Architecture): name = "VTIL" max_instr_length = 1 stack_pointer = "$sp" regs = { "$sp" : RegisterInfo("$sp", 1) } instructions = { "str": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "str"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.TextToken, "["), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, "+"), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, "]"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ", "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), ], "operands": [3, 5, 8] }, "ldd": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "ldd"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ", ["), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, "+"), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, "]"), ], "operands": [2, 4, 6] }, "te": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "te"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " := ("), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " == "), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ")") ], "operands": [2, 4, 6] }, "tne": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "tne"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " := ("), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " != "), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ")") ], "operands": [2, 4, 6] }, "tg": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "tg"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " := ("), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " > "), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ")") ], "operands": [2, 4, 6] }, "tge": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "tge"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " := ("), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " >= "), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ")") ], "operands": [2, 4, 6] }, "tl": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "tl"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " := ("), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " < "), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ")") ], "operands": [2, 4, 6] }, "tle": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "tle"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " := ("), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " <= "), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ")") ], "operands": [2, 4, 6] }, "tug": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "tug"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " := ("), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " u> "), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ")") ], "operands": [2, 4, 6] }, "tuge": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "tuge"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " := ("), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " u>= "), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ")") ], "operands": [2, 4, 6] }, "tul": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "tul"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " := ("), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " u< "), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ")") ], "operands": [2, 4, 6] }, "tule": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "tule"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " := ("), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " u<= "), InstructionTextToken(InstructionTextTokenType.TextToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, ")") ], "operands": [2, 4, 6] }, "ifs": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "ifs"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " := "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " ? "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " : "), InstructionTextToken(InstructionTextTokenType.IntegerToken, "0") ], "operands": [2, 4] }, "js": { "tokens": [ InstructionTextToken(InstructionTextTokenType.InstructionToken, "js"), InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " ? "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN"), InstructionTextToken(InstructionTextTokenType.TextToken, " : "), InstructionTextToken(InstructionTextTokenType.RegisterToken, "UNKNOWN") ], "operands": [2, 4, 6] } } def get_instruction_info(self, data, addr): global active_vtil_file result = InstructionInfo() result.length = 1 next_vip, _, _, _, code = find_instruction(addr, active_vtil_file) if code != None and code.startswith("js"): _, _, true, false = code.split(" ") true = find_block_address(int(true, 16), active_vtil_file) false = find_block_address(int(false, 16), active_vtil_file) result.add_branch(BranchType.TrueBranch, true) result.add_branch(BranchType.FalseBranch, false) elif code != None and code.startswith("vxcall"): addr = find_block_address(next_vip[0], active_vtil_file) result.add_branch(BranchType.UnconditionalBranch, addr) elif code != None and code.startswith("jmp"): if len(next_vip) == 1: addr = find_block_address(next_vip[0], active_vtil_file) result.add_branch(BranchType.UnconditionalBranch, addr) else: result.add_branch(BranchType.IndirectBranch) for vip in next_vip: result.add_branch(BranchType.UnconditionalBranch, find_block_address(vip, active_vtil_file)) elif code != None and code.startswith("vexit"): result.add_branch(BranchType.FunctionReturn) return result def get_instruction_text(self, data, addr): global active_vtil_file tokens = [] next_vip, sp_index, sp_reset, sp_offset, code = find_instruction(addr, active_vtil_file) if code == None: tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "ERROR")) return tokens, 1 if sp_index > 0: tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "[")) tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, f"{int(sp_index):>2}", value=sp_index, size=64)) tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "] ")) else: tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, " ")) prefix = "-" if sp_offset >= 0: prefix = "+" sp_offset = abs(sp_offset) if sp_reset > 0: txt = f">{prefix}{hex(sp_offset)}" txt = f"{txt:<6}" tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, txt)) else: txt = f" {prefix}{hex(sp_offset)}" txt = f"{txt:<6}" tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, txt)) tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, " ")) if " " in code: instr, operands = code.split(" ", 1) if " " in operands: operands = operands.split(" ") else: operands = [operands] if instr in self.instructions.keys(): token_set = self.instructions[instr]["tokens"] for index in self.instructions[instr]["operands"]: operand = operands.pop(0) if "0x" in operand: if instr == "js": token_set[index] = InstructionTextToken(InstructionTextTokenType.GotoLabelToken, f"vip_{operand[2:]}") elif instr == "jmp": token_set[index] = InstructionTextToken(InstructionTextTokenType.GotoLabelToken, f"vip_{hex(next_vip[0])[2:]}") else: token_set[index] = InstructionTextToken(InstructionTextTokenType.IntegerToken, operand, value=int(operand, 16), size=64) else: token_set[index] = InstructionTextToken(InstructionTextTokenType.RegisterToken, operand) tokens.extend(token_set) else: # fallback tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, instr)) tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " ")) for operand in operands: if "0x" in operand: if instr == "jmp": tokens.append(InstructionTextToken(InstructionTextTokenType.GotoLabelToken, f"vip_{hex(next_vip[0])[2:]}")) else: tokens.append(InstructionTextToken(InstructionTextTokenType.IntegerToken, operand, value=int(operand, 16), size=64)) else: tokens.append(InstructionTextToken(InstructionTextTokenType.RegisterToken, operand)) tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ", ")) tokens.pop() else: tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, code)) return tokens, 1 def get_instruction_low_level_il(self, data, addr, il): pass
class HigherSubleq64(Architecture): name = "hsq64" address_size = 8 default_int_size = 8 instr_alignment = 1 max_instr_length = address_size * 32 disassembler: HsqDisassembler = None regs = { "sp": RegisterInfo("sp", 8), "bp": RegisterInfo("bp", 8), "ax": RegisterInfo("ax", 8), } stack_pointer = "sp" def get_instruction_info(self, data, addr): instr = self.disassembler.instrs[addr // self.address_size] if instr is None: return None result = InstructionInfo() result.length = instr.width * self.address_size next_addr = instr.get_next_addr() if isinstance(instr, Call): result.add_branch(BranchType.CallDestination, instr.c * self.address_size) elif isinstance(instr, Ret) or isinstance(instr, Exit): result.add_branch(BranchType.FunctionReturn) else: if len(next_addr) == 2: result.add_branch(BranchType.TrueBranch, next_addr[1] * self.address_size) result.add_branch(BranchType.FalseBranch, next_addr[0] * self.address_size) elif len(next_addr) == 1: result.add_branch(BranchType.UnconditionalBranch, next_addr[0] * self.address_size) return result def get_instruction_text(self, data, addr): instr = self.disassembler.instrs[addr // self.address_size] if instr is None: return None tokens = [] tokens.append( InstructionTextToken(InstructionTextTokenType.TextToken, type(instr).__name__.lower())) tokens.append( InstructionTextToken(InstructionTextTokenType.TextToken, " ")) for i in range(len(instr.operands)): operand = instr.operands[i] if self.disassembler.is_register(operand): tokens.append( InstructionTextToken( InstructionTextTokenType.RegisterToken, self.disassembler.symbol[operand], )) else: tokens.append( InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, hex(operand * self.address_size), )) if i != len(instr.operands) - 1: tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ", ")) return tokens, instr.width * self.address_size def get_instruction_low_level_il(self, data, addr, il: LowLevelILFunction): instr = self.disassembler.instrs[addr // self.address_size] if instr is None: return None if isinstance(instr, Subleq): a, b, c = instr.a, instr.b, instr.c _, mem_a = self.get_addr_mem_il(a, il) addr_b, mem_b = self.get_addr_mem_il(b, il) sub_op = il.sub(self.address_size, mem_b, mem_a) if self.disassembler.is_register(b): store_b = il.set_reg(self.address_size, self.disassembler.symbol[b], sub_op) else: store_b = il.store(self.address_size, addr_b, sub_op) il.append(store_b) less_op = il.compare_signed_less_equal( self.address_size, mem_b, il.const(self.address_size, 0)) t_target = il.get_label_for_address(il.arch, c * self.address_size) t_label_found = True if t_target is None: t_label_found = False t_target = LowLevelILLabel() f_label_found = True f_target = il.get_label_for_address( il.arch, addr + instr.width + self.address_size) if f_target is None: f_target = LowLevelILLabel() f_label_found = False il.append(il.if_expr(less_op, t_target, f_target)) if not t_label_found: il.mark_label(t_target) il.append( il.jump(il.const(self.address_size, c * self.address_size))) if not f_label_found: il.mark_label(f_target) elif isinstance(instr, Clear): b = instr.b c = instr.c addr_b, _ = self.get_addr_mem_il(b, il) store_b = il.store(self.address_size, addr_b, il.const(self.address_size, 0)) il.append(store_b) jump_c = il.jump( il.const(self.address_size, instr.c * self.address_size)) il.append(jump_c) elif isinstance(instr, Push): v = instr.v addr_v, mem_v = self.get_addr_mem_il(v, il) push_v = il.push(self.address_size, mem_v) il.append(push_v) elif isinstance(instr, Mov): a, b = instr.a, instr.b addr_a, mem_a = self.get_addr_mem_il(a, il) addr_b, mem_b = self.get_addr_mem_il(b, il) if self.disassembler.is_register(b): mov_op = il.set_reg(self.address_size, self.disassembler.symbol[b], mem_a) else: mov_op = il.store(self.address_size, addr_b, mem_a) il.append(mov_op) elif isinstance(instr, Ret): il.append( il.ret( il.load(self.address_size, il.reg(self.address_size, "sp")))) elif isinstance(instr, Pop): v = instr.v addr_v, _ = self.get_addr_mem_il(v, il) pop_op = il.pop(self.address_size) if self.disassembler.is_register(v): store_op = il.set_reg(self.address_size, self.disassembler.symbol[v], pop_op) else: store_op = il.store(self.address_size, addr_v, pop_op) il.append(store_op) elif isinstance(instr, Call): il.append( il.call( il.const(self.address_size, instr.c * self.address_size))) elif isinstance(instr, Inc): b = instr.b addr_b, mem_b = self.get_addr_mem_il(b, il) if self.disassembler.is_register(b): store_op = il.set_reg( self.address_size, self.disassembler.symbol[b], il.add(self.address_size, mem_b, il.const(self.address_size, 1)), ) else: store_op = il.store( self.address_size, addr_b, il.add(self.address_size, mem_b, il.const(self.address_size, 1)), ) il.append(store_op) elif isinstance(instr, Dec): b = instr.b addr_b, mem_b = self.get_addr_mem_il(b, il) if self.disassembler.is_register(b): store_op = il.set_reg( self.address_size, self.disassembler.symbol[b], il.add(self.address_size, mem_b, il.const(self.address_size, -1)), ) else: store_op = il.store( self.address_size, addr_b, il.add(self.address_size, mem_b, il.const(self.address_size, -1)), ) il.append(store_op) elif isinstance(instr, Exit): il.append(il.no_ret()) elif isinstance(instr, Jmp): il.append( il.jump( il.const(self.address_size, instr.c * self.address_size))) return instr.width * self.address_size def get_addr_mem_il(self, addr, il): if self.disassembler.is_register(addr): addr_il = il.reg(self.address_size, self.disassembler.symbol[addr]) mem_il = addr_il else: addr_il = il.const_pointer(self.address_size, addr * self.address_size) mem_il = il.load(self.address_size, addr_il) return addr_il, mem_il
class MCS51(Architecture): """ Capitalization convention: memory-mapped stuff in allcaps, bits and true registers lower? Except r0-r7, also lower? Foolish consistency. """ name = "8051" # C 'pointers' tend to be 3 bytes, but architecture-wise it's just 2? # Our fake address space that keeps all flash banks mapped needs 3. # Full XRAM/IRAM tags need 5. address_size = 2 # sets default return value size, nothing else... ??? endianness = Endianness.BigEndian # up to compiler... needs to be chosen default_int_size = 1 max_instr_length = 3 stack_pointer = 'SP' regs = {r: RegisterInfo(r, 1) for r in [ 'SP', 'A', 'B', ]} regs['DPTR'] = RegisterInfo('DPTR', 2) regs['DPL'] = RegisterInfo('DPTR', 1) regs['DPH'] = RegisterInfo('DPTR', 1, 1) # FIXME what endianness is this? if 0: regs.update( {r: RegisterInfo(r, 1) for r in ['R%d' % n for n in range(8)]}) else: # This is cute, but I'm not yet sure if it's useful. Register merging # doesn't come in until HLIL? # # On closer look, this might be the only way to make calling # conventions work. At least as they are now. # Need to re-visit once this subregister bug is fixed: # https://github.com/Vector35/binaryninja-api/issues/715 regs['PTR'] = RegisterInfo('Y0', 3, 1) # C pointers under some compilers regs['Y0'] = RegisterInfo('Y0', 4) regs['Y4'] = RegisterInfo('Y4', 4) regs['T0'] = RegisterInfo('Y0', 2) regs['T2'] = RegisterInfo('Y0', 2, 2) regs['T4'] = RegisterInfo('Y4', 2) regs['T6'] = RegisterInfo('Y4', 2, 2) regs['R0'] = RegisterInfo('Y0', 1) regs['R1'] = RegisterInfo('Y0', 1, 1) regs['R2'] = RegisterInfo('Y0', 1, 2) regs['R3'] = RegisterInfo('Y0', 1, 3) regs['R4'] = RegisterInfo('Y4', 1) regs['R5'] = RegisterInfo('Y4', 1, 1) regs['R6'] = RegisterInfo('Y4', 1, 2) regs['R7'] = RegisterInfo('Y4', 1, 3) flags = [ # actual flags stored in PSW special function register: 'p', # parity of accumulator #'ud', # user defined/unused by base hardware 'ov', # signed overflow on add #'rs0', 'rs1', # R0-R7 register bank select #'f0', # software use, like ud 'ac', # aux carry, because BCD is *important*! 'c', # synthesized flags: 'z', # "There is no zero bit in the PSW. The JZ and JNZ instructions 's', # test the Accumulator data for that condition." ] flag_write_types = [ '', # first element *might* be ignored due to known bug 'c', 'zsp', # modify A, without touching other flags 'zspc', # modify A and carry flag 'zspc ov', # */ operations #'zspc ov ac', # +- operations '*', # +- operations # should mov indirect into PSW/ACC have its own flag settings? ] flags_written_by_flag_write_type = { 'c': ['c'], 'zsp': ['z', 's', 'p'], 'zspc': ['z', 's', 'p', 'c'], #'zspc ov': ['z','s','p','c','ov'], '*': ['z', 's', 'p', 'c', 'ov', 'ac'], } flag_roles = { # real: 'c': FlagRole.CarryFlagRole, 'ac': FlagRole.HalfCarryFlagRole, 'ov': FlagRole.OverflowFlagRole, 'p': FlagRole.OddParityFlagRole, # imaginary: 's': FlagRole.NegativeSignFlagRole, 'z': FlagRole.ZeroFlagRole, } flags_required_for_flag_condition = { LowLevelILFlagCondition.LLFC_E: ["z"], LowLevelILFlagCondition.LLFC_NE: ["z"], LowLevelILFlagCondition.LLFC_NEG: ["s"], LowLevelILFlagCondition.LLFC_POS: ["s"], LowLevelILFlagCondition.LLFC_UGE: ["c"], LowLevelILFlagCondition.LLFC_ULT: ["c"], # not set by nes.py, going to try setting: LowLevelILFlagCondition.LLFC_O: ["ov"], LowLevelILFlagCondition.LLFC_NO: ["ov"], } def perform_get_instruction_info(self, data, addr): if not len(data): return # edge case during linear sweep nfo = InstructionInfo() # ana size, branch = self.lut.branches[ord(data[0])] nfo.length = size # emu if branch: branch_type, target = branch if callable(target): target = target(data, addr, size) if size <= len(data) else 0 if branch_type == BranchType.CallDestination: # TODO: keep track of return-effect functions, tweak target +=dx pass # TODO: arch is probably global; need to store this in bv somehow :| nfo.add_branch(branch_type, target=target) if branch_type == BranchType.TrueBranch: nfo.add_branch(BranchType.FalseBranch, addr + size) return nfo def perform_get_instruction_text(self, data, addr): # ana size, vals = self.lut.decoders[ord(data[0])] assert len(data) >= size vals = [decoder(data, addr, size) for decoder in vals] # out / outop toks = self.lut.text[ord(data[0])] return out.render(toks, vals), size def perform_get_instruction_low_level_il(self, data, addr, il): # ana code = ord(data[0]) size, vals = self.lut.decoders[code] if len(data) < size: # incomplete code due to disassembling data or missing memory return size # abort further analysis before it errors vals = [decoder(data, addr, size) for decoder in vals] # sem build = llil_mangler.patch_at(self, addr) or self.lut.llil[code] size_override = build(il, vals, addr) return size_override if size_override != None else size #def perform_get_flag_condition_low_level_il(self, cond, il): # il.append(il.unimplemented()) def perform_get_flag_write_low_level_il(self, op, size, write_type, flag, operands, il): # This can't be right; why doesn't it work on its own? if 0 and flag == 'c': fun = self.get_default_flag_write_low_level_il return fun(op, size, FlagRole.CarryFlagRole, operands, il) elif 0 and op == LowLevelILOperation.LLIL_RLC: #return il.const(0, 1) return il.test_bit(1, il.reg(1, operands[0]), il.const(0, 0x80)) elif 0 and op == LowLevelILOperation.LLIL_RRC: #return il.const(0, 1) return il.test_bit(1, il.reg(1, operands[0]), il.const(0, 0x01)) else: fun = Architecture.perform_get_flag_write_low_level_il retval = fun(self, op, size, write_type, flag, operands, il) #log_info('flag_write '+hex(il.current_address)+' | '+repr(retval)+' | '+repr((op, size, write_type, flag, operands, il))) return retval flag = self.get_flag_index(flag) return self.get_default_flag_write_low_level_il( op, size, self._flag_roles[flag], operands, il) # default fallback if 0 and op == LowLevelILOperation.LLIL_SBB and flag == 'c': left, right, carry = operands return il.logical_shift_right( 1, il.sub(1, left, il.add(1, right, carry)), il.const(1, 8)) if 0 and flag == 'c': fun = self.get_default_flag_write_low_level_il return fun(op, size, FlagRole.CarryFlagRole, operands, il) if 0: fun = self.get_default_flag_write_low_level_il return fun(op, size, FlagRole.CarryFlagRole, operands, il) @specification.lazy_memoized_property def lut(self): """Look up tables generated once. All available architectures are *instantiated* on start, even if never used. To be a good neighbour but still get to write fun code, complex processing should be deferred until needed using this decorator. """ luts = Tables() if 1: # DEBUG urls = [ ('spu plugin', 'https://github.com/bambu/binaryninja-spu/blob/master/spu.py' ), ('nes plugin', 'https://github.com/Vector35/binaryninja-api/blob/dev/python/examples/nes.py' ), ('m68k plugin', 'https://github.com/alexforencich/binaryninja-m68k/blob/master/__init__.py' ), ] md = '## Still Unlifted\n\n' + luts.unlifted md += '\n\n## Reference Examples\n\n' for title, url in urls: md += '- [{0}]({1})\n'.format(title, url) binaryninja.show_markdown_report("Architecture Progress", md) return luts def perform_get_associated_arch_by_address(self, addr): # Waaait a second. add_branch has an optional 'arch' argument # # Can I branch from x86 into BPF? Or .NET IL? Or obfs. interpreter # uops? In one idb? # OMG IF YES TEST TEST TEST THIS omg, there's even a hinter # # guess this is from arm thumb shenanigans? or 32/64 in general? return self, addr ## ## That from-IDA patching thing them game hackers are so keen on... ## def perform_always_branch(self, data, addr): return # TODO do this, even if that's not how you normally patch def perform_convert_to_nop(data, addr): return def perform_assemble(code, addr): # TODO either hand-assemble, or find some nice embeddable asm /w # macros and proper labels and stuff? will need to double-check syntax # compat # also TODO: sdcc 8051 training binary return
class Z80(Architecture): name = 'Z80' address_size = 2 default_int_size = 1 instr_alignment = 1 max_instr_length = 4 # register related stuff regs = { # main registers 'AF': RegisterInfo('AF', 2), 'BC': RegisterInfo('BC', 2), 'DE': RegisterInfo('DE', 2), 'HL': RegisterInfo('HL', 2), # alternate registers "AF'": RegisterInfo("AF'", 2), "BC'": RegisterInfo("BC'", 2), "DE'": RegisterInfo("DE'", 2), "HL'": RegisterInfo("HL'", 2), # main registers (sub) "A": RegisterInfo("AF", 1, 1), "F": RegisterInfo("AF", 1, 0), "B": RegisterInfo("BC", 1, 1), "C": RegisterInfo("BC", 1, 0), "D": RegisterInfo("DE", 1, 1), "E": RegisterInfo("DE", 1, 0), "H": RegisterInfo("HL", 1, 1), "L": RegisterInfo("HL", 1, 0), "Flags": RegisterInfo("AF", 0), # alternate registers (sub) "A'": RegisterInfo("AF'", 1, 1), "F'": RegisterInfo("AF'", 1, 0), "B'": RegisterInfo("BC'", 1, 1), "C'": RegisterInfo("BC'", 1, 0), "D'": RegisterInfo("DE'", 1, 1), "E'": RegisterInfo("DE'", 1, 0), "H'": RegisterInfo("HL'", 1, 1), "L'": RegisterInfo("HL'", 1, 0), "Flags'": RegisterInfo("AF'", 0), # index registers 'IX': RegisterInfo('IX', 2), 'IY': RegisterInfo('IY', 2), 'SP': RegisterInfo('SP', 2), # other registers 'I': RegisterInfo('I', 1), 'R': RegisterInfo('R', 1), # program counter 'PC': RegisterInfo('PC', 2), # status 'status': RegisterInfo('status', 1) } stack_pointer = "SP" #------------------------------------------------------------------------------ # FLAG fun #------------------------------------------------------------------------------ flags = ['s', 'z', 'h', 'pv', 'n', 'c'] # remember, class None is default/integer semantic_flag_classes = ['class_bitstuff'] # flag write types and their mappings flag_write_types = ['dummy', '*', 'c', 'z', 'cszpv', 'not_c'] flags_written_by_flag_write_type = { 'dummy': [], '*': ['s', 'z', 'h', 'pv', 'n', 'c'], 'c': ['c'], 'z': ['z'], 'not_c': ['s', 'z', 'h', 'pv', 'n'] # eg: z80's DEC } semantic_class_for_flag_write_type = { # by default, everything is type None (integer) # '*': 'class_integer', # 'c': 'class_integer', # 'z': 'class_integer', # 'cszpv': 'class_integer', # 'not_c': 'class_integer' } # groups and their mappings semantic_flag_groups = ['group_e', 'group_ne', 'group_lt'] flags_required_for_semantic_flag_group = { 'group_lt': ['c'], 'group_e': ['z'], 'group_ne': ['z'] } flag_conditions_for_semantic_flag_group = { #'group_e': {None: LowLevelILFlagCondition.LLFC_E}, #'group_ne': {None: LowLevelILFlagCondition.LLFC_NE} } # roles flag_roles = { 's': FlagRole.NegativeSignFlagRole, 'z': FlagRole.ZeroFlagRole, 'h': FlagRole.HalfCarryFlagRole, 'pv': FlagRole. OverflowFlagRole, # actually overflow or parity: TODO: implement later 'n': FlagRole. SpecialFlagRole, # set if last instruction was a subtraction (incl. CP) 'c': FlagRole.CarryFlagRole } # MAP (condition x class) -> flags def get_flags_required_for_flag_condition(self, cond, sem_class): #LogDebug('incoming cond: %s, incoming sem_class: %s' % (str(cond), str(sem_class))) if sem_class == None: lookup = { # Z, zero flag for == and != LowLevelILFlagCondition.LLFC_E: ['z'], LowLevelILFlagCondition.LLFC_NE: ['z'], # S, sign flag is in NEG and POS LowLevelILFlagCondition.LLFC_NEG: ['s'], # Z, zero flag for == and != LowLevelILFlagCondition.LLFC_E: ['z'], LowLevelILFlagCondition.LLFC_NE: ['z'], # H, half carry for ??? # P, parity for ??? # s> s>= s< s<= done by sub and overflow test #if cond == LowLevelILFlagCondition.LLFC_SGT: #if cond == LowLevelILFlagCondition.LLFC_SGE: #if cond == LowLevelILFlagCondition.LLFC_SLT: #if cond == LowLevelILFlagCondition.LLFC_SLE: # C, for these LowLevelILFlagCondition.LLFC_UGE: ['c'], LowLevelILFlagCondition.LLFC_ULT: ['c'] } if cond in lookup: return lookup[cond] return [] #------------------------------------------------------------------------------ # CFG building #------------------------------------------------------------------------------ def get_instruction_info(self, data, addr): decoded = decode(data, addr) # on error, return nothing if decoded.status == DECODE_STATUS.ERROR or decoded.len == 0: return None # on non-branching, return length result = InstructionInfo() result.length = decoded.len if decoded.typ != INSTRTYPE.JUMP_CALL_RETURN: return result # jp has several variations if decoded.op == OP.JP: (oper_type, oper_val) = decoded.operands[0] # jp pe,0xDEAD if oper_type == OPER_TYPE.COND: assert decoded.operands[1][0] == OPER_TYPE.ADDR result.add_branch(BranchType.TrueBranch, decoded.operands[1][1]) result.add_branch(BranchType.FalseBranch, addr + decoded.len) # jp (hl); jp (ix); jp (iy) elif oper_type in [ OPER_TYPE.REG_DEREF, OPER_TYPE.MEM_DISPL_IX, OPER_TYPE.MEM_DISPL_IY ]: result.add_branch(BranchType.IndirectBranch) # jp 0xDEAD elif oper_type == OPER_TYPE.ADDR: result.add_branch(BranchType.UnconditionalBranch, oper_val) else: raise Exception('handling JP') # jr can be conditional elif decoded.op == OP.JR: (oper_type, oper_val) = decoded.operands[0] # jr c,0xdf07 if oper_type == OPER_TYPE.COND: assert decoded.operands[1][0] == OPER_TYPE.ADDR result.add_branch(BranchType.TrueBranch, decoded.operands[1][1]) result.add_branch(BranchType.FalseBranch, addr + decoded.len) # jr 0xdf07 elif oper_type == OPER_TYPE.ADDR: result.add_branch(BranchType.UnconditionalBranch, oper_val) else: raise Exception('handling JR') # djnz is implicitly conditional elif decoded.op == OP.DJNZ: (oper_type, oper_val) = decoded.operands[0] assert oper_type == OPER_TYPE.ADDR result.add_branch(BranchType.TrueBranch, oper_val) result.add_branch(BranchType.FalseBranch, addr + decoded.len) # call can be conditional elif decoded.op == OP.CALL: (oper_type, oper_val) = decoded.operands[0] # call c,0xdf07 if oper_type == OPER_TYPE.COND: assert decoded.operands[1][0] == OPER_TYPE.ADDR result.add_branch(BranchType.CallDestination, decoded.operands[1][1]) # call 0xdf07 elif oper_type == OPER_TYPE.ADDR: result.add_branch(BranchType.CallDestination, oper_val) else: raise Exception('handling CALL') # ret can be conditional elif decoded.op == OP.RET: if decoded.operands and decoded.operands[0][0] == OPER_TYPE.COND: # conditional returns dont' end block pass else: result.add_branch(BranchType.FunctionReturn) # ret from interrupts elif decoded.op == OP.RETI or decoded.op == OP.RETN: result.add_branch(BranchType.FunctionReturn) return result #------------------------------------------------------------------------------ # STRING building, disassembly #------------------------------------------------------------------------------ def reg2str(self, r): reg_name = r.name return reg_name if reg_name[-1] != '_' else reg_name[:-1] + "'" # from api/python/function.py: # # TextToken Text that doesn't fit into the other tokens # InstructionToken The instruction mnemonic # OperandSeparatorToken The comma or whatever else separates tokens # RegisterToken Registers # IntegerToken Integers # PossibleAddressToken Integers that are likely addresses # BeginMemoryOperandToken The start of memory operand # EndMemoryOperandToken The end of a memory operand # FloatingPointToken Floating point number def get_instruction_text(self, data, addr): decoded = decode(data, addr) if decoded.status != DECODE_STATUS.OK or decoded.len == 0: return None result = [] # opcode result.append(InstructionTextToken( \ InstructionTextTokenType.InstructionToken, decoded.op.name)) # space for operand if decoded.operands: result.append( InstructionTextToken(InstructionTextTokenType.TextToken, ' ')) # operands for i, operand in enumerate(decoded.operands): (oper_type, oper_val) = operand if oper_type == OPER_TYPE.REG: result.append(InstructionTextToken( \ InstructionTextTokenType.RegisterToken, self.reg2str(oper_val))) elif oper_type == OPER_TYPE.REG_DEREF: result.append(InstructionTextToken( \ InstructionTextTokenType.BeginMemoryOperandToken, '(')) result.append(InstructionTextToken( \ InstructionTextTokenType.RegisterToken, self.reg2str(oper_val))) result.append(InstructionTextToken( \ InstructionTextTokenType.EndMemoryOperandToken, ')')) elif oper_type == OPER_TYPE.ADDR: if oper_val < 0: oper_val = oper_val & 0xFFFF txt = '0x%04x' % oper_val result.append(InstructionTextToken( \ InstructionTextTokenType.PossibleAddressToken, txt, oper_val)) elif oper_type == OPER_TYPE.ADDR_DEREF: result.append(InstructionTextToken( \ InstructionTextTokenType.BeginMemoryOperandToken, '(')) txt = '0x%04x' % oper_val result.append(InstructionTextToken( \ InstructionTextTokenType.PossibleAddressToken, txt, oper_val)) result.append(InstructionTextToken( \ InstructionTextTokenType.EndMemoryOperandToken, ')')) elif oper_type in [OPER_TYPE.MEM_DISPL_IX, OPER_TYPE.MEM_DISPL_IY]: result.append(InstructionTextToken( \ InstructionTextTokenType.BeginMemoryOperandToken, '(')) txt = 'IX' if oper_type == OPER_TYPE.MEM_DISPL_IX else 'IY' result.append(InstructionTextToken( \ InstructionTextTokenType.RegisterToken, txt)) if oper_val == 0: # omit displacement of 0 pass elif oper_val >= 16: # (iy+0x28) result.append(InstructionTextToken( \ InstructionTextTokenType.TextToken, '+')) result.append(InstructionTextToken( \ InstructionTextTokenType.IntegerToken, '0x%X' % oper_val, oper_val)) elif oper_val > 0: result.append(InstructionTextToken( \ InstructionTextTokenType.TextToken, '+')) result.append(InstructionTextToken( \ InstructionTextTokenType.IntegerToken, '%d' % oper_val, oper_val)) elif oper_val <= -16: # adc a,(ix-0x55) result.append(InstructionTextToken( \ InstructionTextTokenType.TextToken, '-')) result.append(InstructionTextToken( \ InstructionTextTokenType.IntegerToken, '0x%X' % (-oper_val), oper_val)) else: result.append(InstructionTextToken( \ InstructionTextTokenType.IntegerToken, '%d' % oper_val, oper_val)) result.append(InstructionTextToken( \ InstructionTextTokenType.EndMemoryOperandToken, ')')) elif oper_type == OPER_TYPE.IMM: if oper_val == 0: txt = '0' elif oper_val >= 16: txt = '0x%x' % oper_val else: txt = '%d' % oper_val result.append(InstructionTextToken( \ InstructionTextTokenType.IntegerToken, txt, oper_val)) elif oper_type == OPER_TYPE.COND: txt = CC_TO_STR[oper_val] result.append(InstructionTextToken( \ InstructionTextTokenType.TextToken, txt)) elif oper_type in [OPER_TYPE.REG_C_DEREF, OPER_TYPE.REG_BC_DEREF, OPER_TYPE.REG_DE_DEREF, \ OPER_TYPE.REG_HL_DEREF, OPER_TYPE.REG_SP_DEREF]: result.append(InstructionTextToken( \ InstructionTextTokenType.BeginMemoryOperandToken, '(')) result.append(InstructionTextToken( \ InstructionTextTokenType.RegisterToken, self.reg2str(oper_val))) result.append(InstructionTextToken( \ InstructionTextTokenType.EndMemoryOperandToken, ')')) else: raise Exception('unknown operand type: ' + str(oper_type)) # if this isn't the last operand, add comma if i < len(decoded.operands) - 1: result.append(InstructionTextToken( \ InstructionTextTokenType.OperandSeparatorToken, ',')) # crazy undoc shit if decoded.metaLoad: extras = [] (oper_type, oper_val) = decoded.metaLoad assert oper_type == OPER_TYPE.REG extras.append(InstructionTextToken( \ InstructionTextTokenType.InstructionToken, 'ld')) extras.append(InstructionTextToken( \ InstructionTextTokenType.TextToken, ' ')) extras.append(InstructionTextToken( \ InstructionTextTokenType.RegisterToken, self.reg2str(oper_val))) extras.append(InstructionTextToken( \ InstructionTextTokenType.OperandSeparatorToken, ',')) result = extras + result return result, decoded.len #------------------------------------------------------------------------------ # LIFTING #------------------------------------------------------------------------------ def get_flag_write_low_level_il(self, op, size, write_type, flag, operands, il): flag_il = Z80IL.gen_flag_il(op, size, write_type, flag, operands, il) if flag_il: return flag_il return Architecture.get_flag_write_low_level_il( self, op, size, write_type, flag, operands, il) def get_instruction_low_level_il(self, data, addr, il): decoded = decode(data, addr) if decoded.status != DECODE_STATUS.OK or decoded.len == 0: return None Z80IL.gen_instr_il(addr, decoded, il) return decoded.len
class Smali(Architecture): # type: ignore """Architecture class for disassembling Dalvik bytecode into Smali Initializing the class calls android.smali.load_insns(), which imports cached instruction information from "android/instruction_data.pickle". The three mandatory Architecture functions are implemented: - get_instruction_info - get_instruction_text - get_instruction_low_level_il There is also load_dex(), which is called the first time any of the three functions are called. It grabs the reference to DexFile. """ name = "Smali" # FIXME there should be 65536 registers, but binja hangs when the number gets above a thousand or so regs = dict( {f"v{i}": RegisterInfo(f"v{i}", 4) for i in range(256)}, pc=RegisterInfo("pc", 4), fp=RegisterInfo("fp", 4), sp=RegisterInfo("sp", 4), ) stack_pointer = "sp" max_instr_length = 200 instr_alignment = 2 def __init__(self) -> None: self.insns = load_insns() self.inialized_df: bool = False super().__init__() def load_dex(self) -> None: """Load DexFile from disk. Should only be called once.""" # FIXME all tabs in a window share the same Architecture class, # apparently. This means that, as far as I know, there's no way to # store this information per-tab. This could be hacked around if there # was a way to determine what binary is opened, but I don't see a way # to do that either. # # Edit: The settings API seems to provide a way to do this, but the # 'Context' instance doesn't seem to work. # https://api.binary.ninja/binaryninja.settings-module.html # Settings('Context').register_group('newgrp', 'asdfasdf') # Settings('Context').register_setting( # 'newgrp.asdff', # '{"description" : "test descr", "title" : "test title", "default" : "asd", "type" : "string"}', # ) # Setting group: newgrp does not exist! self.df: DexFile = Architecture['Smali'].df self.inialized_df = True def get_instruction_info(self, data: bytes, addr: FileOffset) -> InstructionInfo: if not self.inialized_df: self.load_dex() ii = InstructionInfo() # Handle pseudoinstructions if data[0] == 0 and data[1] != 0: if data[1] > 3: ii.length = 2 return ii ii.length = min( self.max_instr_length, self.df.pseudoinstructions[addr]._total_size ) ii.add_branch(BranchType.FunctionReturn) return ii # Handle normal instructions insn_info = self.insns[data[0]] ii.length = insn_info.fmt.insn_len * 2 if insn_info.mnemonic.startswith("return"): ii.add_branch(BranchType.FunctionReturn) elif insn_info.mnemonic == "throw": ii.add_branch(BranchType.ExceptionBranch) # TODO elif insn_info.mnemonic.startswith("goto"): data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) offset = sign(args["A"], insn_info.fmt.format_.count("A")) ii.add_branch(BranchType.UnconditionalBranch, target=addr + offset * 2) elif ( insn_info.mnemonic == "packed-switch" or insn_info.mnemonic == "sparse-switch" ): data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) offset = sign(args["B"], insn_info.fmt.format_.count("B")) ii.add_branch(BranchType.UnresolvedBranch) # Adding more than 2 branches causes binja to segfault, so this has # to be handled in LLIL instead. elif insn_info.mnemonic == "fill-array-data": data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) offset = sign(args["B"], insn_info.fmt.format_.count("B")) ii.add_branch(BranchType.TrueBranch, target=addr + offset * 2) ii.add_branch( BranchType.FalseBranch, target=addr + insn_info.fmt.insn_len * 2 ) elif insn_info.mnemonic.startswith("if-"): data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) var = "C" if "C" in args else "B" offset = sign(args[var], insn_info.fmt.format_.count(var)) ii.add_branch(BranchType.TrueBranch, target=addr + offset * 2) ii.add_branch( BranchType.FalseBranch, target=addr + insn_info.fmt.insn_len * 2 ) elif insn_info.mnemonic.startswith("invoke-"): if insn_info.mnemonic.startswith("invoke-custom"): log_warn("Resolution of invoke-custom is not implemented") ii.add_branch(BranchType.UnresolvedBranch) else: data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) meth = self.df.method_ids[args["B"]] if meth._insns_off is not None: ii.add_branch(BranchType.CallDestination, target=meth._insns_off) return ii def get_instruction_text( self, data: bytes, addr: FileOffset ) -> Tuple[List[InstructionTextToken], int]: if not self.inialized_df: self.load_dex() return disassemble(self.df, data, addr) def get_instruction_low_level_il( self, data: bytes, addr: FileOffset, il: LowLevelILFunction ) -> int: if not self.inialized_df: self.load_dex() insn_info = self.insns[data[0]] if data[0] == 0x2B or data[0] == 0x2C and False: data_to_parse = endian_swap_shorts(data[: 2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) offset = sign(args["B"], insn_info.fmt.format_.count("B")) branches = list() # [addr + offset * 2, addr + insn_info.fmt.insn_len * 2] if data[0] == 0x2B: # packed-switch payload = cast( SmaliPackedSwitchPayload, self.df.pseudoinstructions[cast(FileOffset, addr + offset * 2)], ) for i in range(len(payload.targets)): key = payload.first_key + i target_addr = addr + payload.targets[i] * 2 label = il.get_label_for_address(self, target_addr) if label is None: il.add_label_for_address(self, target_addr) label = il.get_label_for_address(self, target_addr) branches.append(label) else: # sparse-switch log_error("NOT IMPLEMENTED YET") # for key, target in zip(payload.keys, payload.targets): # branches.append(addr + target * 2) # log_warn(f'{branches=}') # reg=il.add(4, il.reg(4, f'v{args["A"]}'), il.const(4, 1)) # branches_list = il.add_label_list(branches) # expr=il.expr(LowLevelILOperation.LLIL_JUMP_TO, reg, branches) #, size=insn_info.fmt.insn_len * 2)) # il.append(expr) return insn_info.fmt.insn_len * 2
class Smali(Architecture): # type: ignore """Architecture class for disassembling Dalvik bytecode into Smali Initializing the class calls android.smali.load_insns(), which imports cached instruction information from "android/instruction_data.pickle". The three mandatory Architecture functions are implemented: - get_instruction_info - get_instruction_text - get_instruction_low_level_il There is also load_dex(), which is called at the beginning of all three functions. It grabs the reference to the DexFile in view. """ name = "Smali" # FIXME there should be 65536 registers, but binja hangs when the number gets above a thousand or so regs = dict( {f"v{i}": RegisterInfo(f"v{i}", 4) for i in range(256)}, pc=RegisterInfo("pc", 4), fp=RegisterInfo("fp", 4), sp=RegisterInfo("sp", 4), ) stack_pointer = "sp" max_instr_length = 200 instr_alignment = 2 def __init__(self) -> None: self.insns = load_insns() super().__init__() def load_dex(self) -> None: """Set self.df to DexFile of focused file.""" self.df: DexFile = Architecture["Smali"].dfs[ Architecture["Smali"].frame] def get_instruction_info(self, data: bytes, addr: FileOffset) -> InstructionInfo: self.load_dex() ii = InstructionInfo() # Handle pseudoinstructions if data[0] == 0 and data[1] != 0: if data[1] > 3: ii.length = 2 return ii ii.length = min(self.max_instr_length, self.df.pseudoinstructions[addr]._total_size) ii.add_branch(BranchType.FunctionReturn) return ii # Handle normal instructions insn_info = self.insns[data[0]] ii.length = insn_info.fmt.insn_len * 2 if insn_info.mnemonic.startswith("return"): ii.add_branch(BranchType.FunctionReturn) elif insn_info.mnemonic == "throw": ii.add_branch(BranchType.ExceptionBranch) # TODO elif insn_info.mnemonic.startswith("goto"): data_to_parse = endian_swap_shorts(data[:2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) offset = sign(args["A"], insn_info.fmt.format_.count("A")) ii.add_branch(BranchType.UnconditionalBranch, target=addr + offset * 2) elif (insn_info.mnemonic == "packed-switch" or insn_info.mnemonic == "sparse-switch"): data_to_parse = endian_swap_shorts(data[:2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) offset = sign(args["B"], insn_info.fmt.format_.count("B")) ii.add_branch(BranchType.UnresolvedBranch) # Adding more than 2 branches causes binja to segfault, so this has # to be handled in LLIL instead. elif insn_info.mnemonic == "fill-array-data": data_to_parse = endian_swap_shorts(data[:2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) offset = sign(args["B"], insn_info.fmt.format_.count("B")) ii.add_branch(BranchType.TrueBranch, target=addr + offset * 2) ii.add_branch(BranchType.FalseBranch, target=addr + insn_info.fmt.insn_len * 2) elif insn_info.mnemonic.startswith("if-"): data_to_parse = endian_swap_shorts(data[:2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) var = "C" if "C" in args else "B" offset = sign(args[var], insn_info.fmt.format_.count(var)) ii.add_branch(BranchType.TrueBranch, target=addr + offset * 2) ii.add_branch(BranchType.FalseBranch, target=addr + insn_info.fmt.insn_len * 2) elif insn_info.mnemonic.startswith("invoke-"): if insn_info.mnemonic.startswith("invoke-custom"): log_warn("Resolution of invoke-custom is not implemented") ii.add_branch(BranchType.UnresolvedBranch) else: data_to_parse = endian_swap_shorts( data[:2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) meth = self.df.method_ids[args["B"]] if meth._insns_off is not None: ii.add_branch(BranchType.CallDestination, target=meth._insns_off) return ii def get_instruction_text( self, data: bytes, addr: FileOffset) -> Tuple[List[InstructionTextToken], int]: self.load_dex() return disassemble(self.df, data, addr) def get_instruction_low_level_il(self, data: bytes, addr: FileOffset, il: LowLevelILFunction) -> int: self.load_dex() insn_info = self.insns[data[0]] if data[0] == 0x2B or data[0] == 0x2C and False: data_to_parse = endian_swap_shorts(data[:2 * insn_info.fmt.insn_len]) args = parse_with_format(data_to_parse, insn_info.fmt.format_) offset = sign(args["B"], insn_info.fmt.format_.count("B")) branches = list( ) # [addr + offset * 2, addr + insn_info.fmt.insn_len * 2] if data[0] == 0x2B: # packed-switch payload = cast( SmaliPackedSwitchPayload, self.df.pseudoinstructions[cast(FileOffset, addr + offset * 2)], ) for i in range(len(payload.targets)): key = payload.first_key + i target_addr = addr + payload.targets[i] * 2 label = il.get_label_for_address(self, target_addr) if label is None: il.add_label_for_address(self, target_addr) label = il.get_label_for_address(self, target_addr) branches.append(label) else: # sparse-switch log_error("NOT IMPLEMENTED YET") # for key, target in zip(payload.keys, payload.targets): # branches.append(addr + target * 2) # log_warn(f'{branches=}') # reg=il.add(4, il.reg(4, f'v{args["A"]}'), il.const(4, 1)) # branches_list = il.add_label_list(branches) # expr=il.expr(LowLevelILOperation.LLIL_JUMP_TO, reg, branches) #, size=insn_info.fmt.insn_len * 2)) # il.append(expr) return insn_info.fmt.insn_len * 2
class GB(Architecture): name = "GB" address_size = 2 default_int_size = 1 max_instr_length = 3 regs = { 'a': RegisterInfo('a', 1), 'b': RegisterInfo('b', 1), 'c': RegisterInfo('c', 1), 'd': RegisterInfo('d', 1), 'e': RegisterInfo('e', 1), 'f': RegisterInfo('f', 1), 'h': RegisterInfo('h', 1), 'l': RegisterInfo('l', 1), 'af': RegisterInfo('af', 2), 'bc': RegisterInfo('bc', 2), 'cb': RegisterInfo('cb', 2), 'de': RegisterInfo('de', 2), 'hl': RegisterInfo('hl', 2), 'sp': RegisterInfo('sp', 2), 'pc': RegisterInfo('pc', 2), } stack_pointer = 'sp' flags = ["z", "n", "h", "c"] flag_write_types = ["*", "czn", "zn"] flag_roles = { 'z': FlagRole.ZeroFlagRole, 'n': FlagRole.NegativeSignFlagRole, 'h': FlagRole.HalfCarryFlagRole, 'c': FlagRole.CarryFlagRole, } flags_written_by_flag_write_type = { "*": ["c", "z", "h", "n"], "czn": ["c", "z", "n"], "zn": ["z", "n"], } def decode_operand(self, operand): if operand in self.regs.keys(): return operand return None def decode_instruction(self, data, addr): if len(data) < 1: return None, None, None, None, None opcode = data[0] try: info = opcodes[hex(opcode)] except KeyError: return None, None, None, None, None instr = info['mnemonic'] length = info['length'] operands = [] if 'operand1' in info: operands.append(info['operand1'].lower()) if 'operand2' in info: operands.append(info['operand2'].lower()) flags = [f.lower() for f in info['flags']] if length == 2: value = data[1] elif length == 3: value = struct.unpack('<H', data[1:3])[0] else: value = None return instr, length, operands, flags, value def perform_get_instruction_info(self, data, addr): instr, length, operands, flags, value = self.decode_instruction( data, addr) if instr is None: return None result = InstructionInfo() result.length = length opcode = data[0] if instr == 'JR': arg = data[1] dest = arg if arg < 128 else (256 - arg) * (-1) if opcode == 0x28 or opcode == 0x38: result.add_branch(BranchType.TrueBranch, addr + 2 + dest) result.add_branch(BranchType.FalseBranch, addr + 2) elif opcode == 0x20 or opcode == 0x30: result.add_branch(BranchType.TrueBranch, addr + 2) result.add_branch(BranchType.FalseBranch, addr + 2 + dest) else: result.add_branch(BranchType.UnconditionalBranch, addr + 2 + dest) elif instr == 'JP': if opcode == 0xe9: result.add_branch(BranchType.UnconditionalBranch, 0xdead) else: arg = struct.unpack('<H', data[1:3])[0] if opcode == 0xca or opcode == 0xda: result.add_branch(BranchType.TrueBranch, arg) result.add_branch(BranchType.FalseBranch, addr + 3) elif opcode == 0xc2 or opcode == 0xd2: result.add_branch(BranchType.TrueBranch, addr + 3) result.add_branch(BranchType.FalseBranch, arg) else: result.add_branch(BranchType.UnconditionalBranch, arg) elif instr == 'RET': result.add_branch(BranchType.FunctionReturn) elif instr == 'CALL': result.add_branch(BranchType.CallDestination, struct.unpack("<H", data[1:3])[0]) return result def get_token(self, mnemonic, operand, data): if re.search(r'(d|r|a)8', operand) is not None: value = data[1] if re.match(r'(d|r|a)8', operand) is not None: token = InstructionTextToken( InstructionTextTokenType.IntegerToken, "0x%.2x" % value, value) elif re.match(r'\(a8\)', operand) is not None: token = InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, "0xff%.2x" % value, value | 0xff00) else: token = InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, "0x%.4x" % value, value) elif re.search(r'(d|r|a)16', operand) is not None: value = struct.unpack('<H', data[1:3])[0] if re.match(r'(d|r|a)16', operand) is not None: if mnemonic == "CALL": token = InstructionTextToken( InstructionTextTokenType.DataSymbolToken, "sub_%x" % value, value) elif re.match(r'\(a16\)', operand) is not None: token = InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, "0x%.4x" % value, value) else: token = InstructionTextToken( InstructionTextTokenType.IntegerToken, "0x%.4x" % value, value) else: token = InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, "0x%.4x" % value, value) elif re.search(r'A|B|C|D|E|F|H|L|(SP)|(PC)', operand) is not None: if re.match(r'A|B|C|D|E|F|H|L|(SP)|(PC)', operand) is not None: token = InstructionTextToken( InstructionTextTokenType.RegisterToken, operand.lower()) else: token = InstructionTextToken( InstructionTextTokenType.RegisterToken, operand.lower()) else: token = InstructionTextToken( InstructionTextTokenType.RegisterToken, operand.lower()) return token def perform_get_instruction_text(self, data, addr): instr, length, operands, flags, value = self.decode_instruction( data, addr) tokens = [] opcode = data[0] if instr is None: return None tokens.append( InstructionTextToken(InstructionTextTokenType.InstructionToken, instr.lower())) if len(operands) >= 1: tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ''.rjust(8 - len(instr)))) tokens.append(self.get_token(instr, operands[0], data)) if len(operands) == 2: tokens.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ', ')) tokens.append(self.get_token(instr, operands[1], data)) return tokens, length def perform_get_instruction_low_level_il(self, data, addr, il): return None
class SN8F2288(Architecture): name = "SN8F2288" endianness = Endianness.LittleEndian address_size = 2 default_int_size = 1 instr_alignment = 1 max_instr_length = 2 regs = { "STKP": RegisterInfo("STKP", 1), "A": RegisterInfo("A", 1), "R": RegisterInfo("R", 1), "Z": RegisterInfo("Z", 1), "Y": RegisterInfo("Y", 1), "PFLAG": RegisterInfo("PFLAG", 1), "RBANK": RegisterInfo("RBANK", 1) } stack_pointer = "STKP" flags = [] flag_write_types = [] flag_roles = {} flags_required_for_flag_condition = {} flags_written_by_flag_write_type = {} def perform_get_instruction_info(self, data, addr): if len(data) < 2: print "perform_get_instruction_info(%s, 0x%04x), not enough data!" % ( tohex(data), addr) return None if addr % 2 != 0: print "perform_get_instruction_info(%s, 0x%04x), address not aligned!" % ( tohex(data), addr) return None #print "perform_get_instruction_info(%s, 0x%04x)" % (tohex(data), addr) info = InstructionInfo() info.length = self.max_instr_length # workaround for a Binary Ninja bug, data is not guaranteed to be max_instr_length bytes data = data[:self.max_instr_length] instruction = unpack('<H', data)[0] bincode = instruction >> 8 if bincode >= 0x80: opcode_key = bincode & 0xc0 is_bit = False elif bincode >= 0x40: opcode_key = bincode & 0xf8 is_bit = True else: opcode_key = bincode is_bit = False try: mask, opspace, jump_action, opcode, caption = opcode_dict[ opcode_key] except KeyError: return None # TODO is it possible to get more information? operand = instruction & mask branches = { NONXT: lambda: [(BranchType.FunctionReturn, 0)], NEXTI: lambda: [], BRNCH: lambda: [(BranchType.TrueBranch, addr + 4), (BranchType.FalseBranch, addr + 2)], JUMPI: lambda: [(BranchType.UnconditionalBranch, operand * 2) ], # ROM addresses are 16 bits of data per address CALLI: lambda: [(BranchType.CallDestination, operand * 2) ] # ROM addresses are 16 bits of data per address }[jump_action]() for type, address in branches: info.add_branch(type, address) return info def perform_get_instruction_text(self, data, addr): if len(data) < 2: print "perform_get_instruction_text(%s, 0x%04x), not enough data!" % ( tohex(data), addr) return None if addr % 2 != 0: print "perform_get_instruction_text(%s, 0x%04x), address not aligned!" % ( tohex(data), addr) return None, None #print "perform_get_instruction_text(%s, 0x%04x)" % (tohex(data), addr) # workaround for a Binary Ninja bug, data is not guaranteed to be max_instr_length bytes data = data[:self.max_instr_length] instruction = unpack('<H', data)[0] tokens = disassemble(addr / 2, instruction) #tokens = [] #tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, ".ORG")) #tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " ")) #tokens.append(InstructionTextToken(InstructionTextTokenType.TextToken, "0x%04X" % (addr / 2))) #tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " ")) #tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, "DW")) #tokens.append(InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, " ")) #tokens.append(InstructionTextToken(InstructionTextTokenType.HexDumpTextToken, "0x%04X" % instruction)) return tokens, self.max_instr_length def perform_get_instruction_low_level_il(self, data, addr, il): return None
class Z80(Architecture): name = 'Z80' address_size = 2 default_int_size = 1 instr_alignment = 1 max_instr_length = 4 # register related stuff regs = { # main registers 'AF': RegisterInfo('AF', 2), 'BC': RegisterInfo('BC', 2), 'DE': RegisterInfo('DE', 2), 'HL': RegisterInfo('HL', 2), # alternate registers 'AF_': RegisterInfo('AF_', 2), 'BC_': RegisterInfo('BC_', 2), 'DE_': RegisterInfo('DE_', 2), 'HL_': RegisterInfo('HL_', 2), # main registers (sub) 'A': RegisterInfo('AF', 1, 1), 'B': RegisterInfo('BC', 1, 1), 'C': RegisterInfo('BC', 1, 0), 'D': RegisterInfo('DE', 1, 1), 'E': RegisterInfo('DE', 1, 0), 'H': RegisterInfo('HL', 1, 1), 'L': RegisterInfo('HL', 1, 0), 'Flags': RegisterInfo('AF', 0), # alternate registers (sub) 'A_': RegisterInfo('AF_', 1, 1), 'B_': RegisterInfo('BC_', 1, 1), 'C_': RegisterInfo('BC_', 1, 0), 'D_': RegisterInfo('DE_', 1, 1), 'E_': RegisterInfo('DE_', 1, 0), 'H_': RegisterInfo('HL_', 1, 1), 'L_': RegisterInfo('HL_', 1, 0), 'Flags_': RegisterInfo('AF_', 0), # index registers 'IX': RegisterInfo('IX', 2), 'IY': RegisterInfo('IY', 2), 'SP': RegisterInfo('SP', 2), # other registers 'I': RegisterInfo('I', 1), 'R': RegisterInfo('R', 1), # program counter 'PC': RegisterInfo('PC', 2), # status 'status': RegisterInfo('status', 1) } stack_pointer = "SP"
class LR35902(Architecture): name = 'LR35902' address_size = 2 # 16-bit addresses default_int_size = 1 # 1-byte integers instr_alignment = 1 # no instruction alignment max_instr_length = 3 # maximum length (opcodes 1-2, operans 0-2 bytes) regs = { # Main registers 'AF': RegisterInfo('AF', 2), 'BC': RegisterInfo('BC', 2), 'DE': RegisterInfo('DE', 2), 'HL': RegisterInfo('HL', 2), 'SP': RegisterInfo('SP', 2), 'PC': RegisterInfo('PC', 2), # Sub registers 'A': RegisterInfo('AF', 1, 1), 'Flags': RegisterInfo('AF', 0), 'B': RegisterInfo('BC', 1, 1), 'C': RegisterInfo('BC', 1, 0), 'D': RegisterInfo('DE', 1, 1), 'E': RegisterInfo('DE', 1, 0), 'H': RegisterInfo('HL', 1, 1), 'L': RegisterInfo('HL', 1, 0), } flags = ["z", "n", "h", "c"] flag_write_types = ["*", "czn", "zn"] flag_roles = { 'z': FlagRole.ZeroFlagRole, 'n': FlagRole.NegativeSignFlagRole, 'h': FlagRole.HalfCarryFlagRole, 'c': FlagRole.CarryFlagRole, } flags_written_by_flag_write_type = { "*": ["c", "z", "h", "n"], "czn": ["c", "z", "n"], "zn": ["z", "n"], } stack_pointer = "SP" INVALID_INS = (None, None, None, None, None) conditions_strings = ['C', 'NC', 'Z', 'NZ'] bit_instructions = ['BIT', 'RES', 'SET'] # (address, name) IO_REGISTERS = { 0xFF00: "P1", 0xFF01: "SB", 0xFF02: "SC", 0xFF04: "DIV", 0xFF05: "TIMA", 0xFF06: "TMA", 0xFF07: "TAC", 0xFF0F: "IF", 0xFF10: "NR10", 0xFF11: "NR11", 0xFF12: "NR12", 0xff13: "NR13", 0xFF14: "NR14", 0xFF16: "NR21", 0xFF17: "NR22", 0xFF18: "NR23", 0xFF19: "NR24", 0xFF1A: "NR30", 0xFF1B: "NR31", 0xFF1C: "NR32", 0xFF1D: "NR33", 0xFF1E: "NR34", 0xFF20: "NR41", 0xFF21: "NR42", 0xFF22: "NR43", 0xFF23: "NR44", 0xFF24: "NR50", 0xFF25: "NR51", 0xFF26: "NR52", 0xFF30: "WAV0", 0xFF31: "WAV1", 0xFF32: "WAV2", 0xFF33: "WAV3", 0xFF34: "WAV4", 0xFF35: "WAV5", 0xFF36: "WAV6", 0xFF37: "WAV7", 0xFF38: "WAV8", 0xFF39: "WAV9", 0xFF3A: "WAVA", 0xFF3B: "WAVB", 0xFF3C: "WAVC", 0xFF3D: "WAVD", 0xFF3E: "WAVE", 0xFF3F: "WAVF", 0xFF40: "LCDC", 0xFF41: "STAT", 0xFF42: "SCY", 0xFF43: "SCX", 0xFF44: "LY", 0xFF45: "LYC", 0xFF46: "DMA", 0xFF47: "BGP", 0xFF48: "OBP0", 0xFF49: "OBP1", 0xFF4A: "WY", 0xFF4B: "WX", 0xFF4D: "KEY1", 0xFF4F: "VBK", 0xFF51: "HDMA1", 0xFF52: "HDMA2", 0xFF53: "HDMA3", 0xFF54: "HDMA4", 0xFF55: "HDMA5", 0xFF56: "RP", 0xFF68: "BCPS", 0xFF69: "BCPD", 0xFF6A: "OCPS", 0xFF6B: "OCPD", 0xFF70: "SVBK", 0xFFFF: "IE", } def __init__(self): Architecture.__init__(self) basepath = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(basepath, 'opcodes.json')) as fin: self.opcodes = json.load(fin) def _get_io_register(self, addr): if addr in range(0xFF80, 0xFFFF): return f'HRAM_{addr-0xFF80:02X}' else: return self.IO_REGISTERS[addr] def _decode_instruction(self, data: bytes, addr: int): if len(data) == 0: return self.INVALID_INS if data[0] == 0xCB: if len(data) < 2: return self.INVALID_INS ins_entry = self.opcodes['cbprefixed'].get('%#x' % data[1], None) else: ins_entry = self.opcodes['unprefixed'].get('%#x' % data[0], None) if not ins_entry: return self.INVALID_INS ins_operands = [] if 'operand1' in ins_entry: ins_operands.append(ins_entry['operand1']) if 'operand2' in ins_entry: ins_operands.append(ins_entry['operand2']) ins_flags = [f.lower() for f in ins_entry['flags']] if ins_entry['length'] == 2: ins_value = data[1] elif ins_entry['length'] == 3: ins_value = struct.unpack('<H', data[1:3])[0] else: ins_value = None return ins_entry['mnemonic'], ins_entry['length'], ins_operands, ins_flags, ins_value def _get_token(self, mnemonic: str, operand: str, data: bytes, addr: int, instruction_length: int): if mnemonic == 'STOP': return [InstructionTextToken(InstructionTextTokenType.TextToken, '0')] if mnemonic == 'RST': value = bytes.fromhex(operand[:2])[0] return [InstructionTextToken(InstructionTextTokenType.AddressDisplayToken, f"irs_usr{value//8}", value)] result = [] depth = 0 atoms = [t for t in re.split(r'([()\+\-])', operand) if t] for atom in atoms: if atom == 'd8': value = data[1] result.append(InstructionTextToken( InstructionTextTokenType.PossibleValueToken, f'{value:#04x}', value)) elif atom == 'd16': value = struct.unpack('<H', data[1:3])[0] result.append(InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, f'{value:#06x}', value)) elif atom == 'a8': value = struct.unpack('<B', data[1:2])[0] try: result.append(InstructionTextToken( InstructionTextTokenType.DataSymbolToken, self._get_io_register(0xFF00+value), 0xFF00+value)) except: raise ValueError( f'Invalid IO register offset {value} in {mnemonic} {atoms} at addr {addr:#0x}') elif atom == 'a16': value = struct.unpack('<H', data[1:3])[0] result.append(InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, f'{value:#06x}', value)) elif atom == 'r8': value = struct.unpack('<b', data[1:2])[0] if atoms[0] == 'SP': # SP+r8 result.append(InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, f'{value:#04x}', value)) else: # r8 result.append(InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, f'{addr+value+instruction_length:#06x}', addr+instruction_length+value)) elif atom == '(': depth += 1 result.append(InstructionTextToken( InstructionTextTokenType.BeginMemoryOperandToken, atom)) elif atom == ')': depth -= 1 if depth < 0: raise ValueError(f'Unbalanced parenthesis in {atoms}') result.append(InstructionTextToken( InstructionTextTokenType.EndMemoryOperandToken, atom)) elif atom in '+-': result.append(InstructionTextToken( InstructionTextTokenType.TextToken, atom)) elif atom in self.conditions_strings and mnemonic in ['CALL', 'RET', 'JR', 'JP']: result.append(InstructionTextToken( InstructionTextTokenType.TextToken, atom)) elif atom in self.regs.keys(): result.append(InstructionTextToken( InstructionTextTokenType.RegisterToken, atom)) elif mnemonic in self.bit_instructions and atom in [str(x) for x in range(8)]: result.append(InstructionTextToken( InstructionTextTokenType.TextToken, atom)) else: raise ValueError( f'Unrecognized atom {atom} in {atoms} for instruction {mnemonic}') return result def get_instruction_info(self, data: bytes, addr: int): ins_mnem, ins_len, _, _, _ = self._decode_instruction(data, addr) if not ins_mnem: return None result = InstructionInfo() result.length = ins_len ins_end = addr + ins_len opcode = data[0] if ins_mnem == 'JR': offset = struct.unpack('<b', data[1:2])[0] if opcode == 0x28 or opcode == 0x38: result.add_branch(BranchType.TrueBranch, ins_end + offset) result.add_branch(BranchType.FalseBranch, ins_end) elif opcode == 0x20 or opcode == 0x30: result.add_branch(BranchType.TrueBranch, ins_end) result.add_branch(BranchType.FalseBranch, ins_end + offset) else: result.add_branch( BranchType.UnconditionalBranch, ins_end + offset) elif ins_mnem == 'JP': if opcode == 0xe9: result.add_branch(BranchType.IndirectBranch) else: arg = struct.unpack('<H', data[1:3])[0] if opcode == 0xca or opcode == 0xda: result.add_branch(BranchType.TrueBranch, arg) result.add_branch(BranchType.FalseBranch, ins_end) elif opcode == 0xc2 or opcode == 0xd2: result.add_branch(BranchType.TrueBranch, ins_end) result.add_branch(BranchType.FalseBranch, arg) else: result.add_branch(BranchType.UnconditionalBranch, arg) elif ins_mnem == 'RET': result.add_branch(BranchType.FunctionReturn) elif ins_mnem == 'RETI': result.add_branch(BranchType.FunctionReturn) elif ins_mnem == 'CALL': result.add_branch(BranchType.CallDestination, struct.unpack("<H", data[1:3])[0]) return result def get_instruction_text(self, data, addr): ins_mnem, ins_len, operands, _, _ = self._decode_instruction( data, addr) if ins_mnem is None: return None tokens = [] tokens.append(InstructionTextToken( InstructionTextTokenType.InstructionToken, ins_mnem.lower())) if len(operands) >= 1: tokens.append(InstructionTextToken( InstructionTextTokenType.IndentationToken, ''.rjust(8 - len(ins_mnem)))) tokens += self._get_token(ins_mnem, operands[0], data, addr, ins_len) if len(operands) == 2: tokens.append(InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ', ')) tokens += self._get_token(ins_mnem, operands[1], data, addr, ins_len) return tokens, ins_len def get_instruction_low_level_il(self, data, addr, il: LowLevelILFunction): return None
class Z80(Architecture): name = 'Z80' address_size = 2 default_int_size = 1 instr_alignment = 1 max_instr_length = 4 # register related stuff regs = { # main registers 'AF': RegisterInfo('AF', 2), 'BC': RegisterInfo('BC', 2), 'DE': RegisterInfo('DE', 2), 'HL': RegisterInfo('HL', 2), # alternate registers 'AF_': RegisterInfo('AF_', 2), 'BC_': RegisterInfo('BC_', 2), 'DE_': RegisterInfo('DE_', 2), 'HL_': RegisterInfo('HL_', 2), # main registers (sub) 'A': RegisterInfo('AF', 1, 1), 'B': RegisterInfo('BC', 1, 1), 'C': RegisterInfo('BC', 1, 0), 'D': RegisterInfo('DE', 1, 1), 'E': RegisterInfo('DE', 1, 0), 'H': RegisterInfo('HL', 1, 1), 'L': RegisterInfo('HL', 1, 0), 'Flags': RegisterInfo('AF', 0), # alternate registers (sub) 'A_': RegisterInfo('AF_', 1, 1), 'B_': RegisterInfo('BC_', 1, 1), 'C_': RegisterInfo('BC_', 1, 0), 'D_': RegisterInfo('DE_', 1, 1), 'E_': RegisterInfo('DE_', 1, 0), 'H_': RegisterInfo('HL_', 1, 1), 'L_': RegisterInfo('HL_', 1, 0), 'Flags_': RegisterInfo('AF_', 0), # index registers 'IX': RegisterInfo('IX', 2), 'IY': RegisterInfo('IY', 2), 'SP': RegisterInfo('SP', 2), # other registers 'I': RegisterInfo('I', 1), 'R': RegisterInfo('R', 1), # program counter 'PC': RegisterInfo('PC', 2), # status 'status': RegisterInfo('status', 1) } stack_pointer = "SP" # internal cond_strs = ['C', 'NC', 'Z', 'NZ', 'M', 'P', 'PE', 'PO'] reg8_strs = list('ABDHCELIR') + [ 'A\'', 'B\'', 'C\'', 'D\'', 'E\'', 'H\'', 'L\'', 'Flags', 'Flags\'', 'IXh', 'IXl', 'IYh', 'IYl' ] reg16_strs = [ 'AF', 'BC', 'DE', 'HL', 'AF', 'AF\'', 'BC\'', 'DE\'', 'HL\'', 'IX', 'IY', 'SP', 'PC' ] reg_strs = reg8_strs + reg16_strs def get_instruction_info(self, data, addr): (instrTxt, instrLen) = skwrapper.disasm(data, addr) if instrLen == 0: return None result = InstructionInfo() result.length = instrLen return result def get_instruction_text(self, data, addr): (instrTxt, instrLen) = skwrapper.disasm(data, addr) if instrLen == 0: return None result = [] atoms = [t for t in re.split(r'([, ()\+])', instrTxt) if t] # delimeters kept if in capture group result.append( InstructionTextToken(InstructionTextTokenType.InstructionToken, atoms[0])) if atoms[1:]: result.append( InstructionTextToken(InstructionTextTokenType.TextToken, ' ')) # for atom in atoms[1:]: if not atom or atom == ' ': continue # PROBLEM: cond 'C' conflicts with register C # eg: "RET C" is it "RET <reg>" or "REG <cc>" ? # eg: "CALL C" is it "CALL <reg>" or "CALL C,$0000" ? elif atom == 'C' and atoms[0] in ['CALL', 'RET']: # flag, condition code result.append( InstructionTextToken(InstructionTextTokenType.TextToken, atom)) elif atom in self.reg16_strs or atom in self.reg8_strs: result.append( InstructionTextToken( InstructionTextTokenType.RegisterToken, atom)) elif atom in self.cond_strs: result.append( InstructionTextToken(InstructionTextTokenType.TextToken, atom)) elif atom[0] == '#': result.append( InstructionTextToken(InstructionTextTokenType.IntegerToken, atom, int(atom[1:], 16))) elif atom[0] == '$': if len(atom) == 5: result.append( InstructionTextToken( InstructionTextTokenType.PossibleAddressToken, atom, int(atom[1:], 16))) else: result.append( InstructionTextToken( InstructionTextTokenType.IntegerToken, atom, int(atom[1:], 16))) elif atom.isdigit(): result.append( InstructionTextToken(InstructionTextTokenType.IntegerToken, atom, int(atom))) elif atom == '(': result.append( InstructionTextToken( InstructionTextTokenType.BeginMemoryOperandToken, atom)) elif atom == ')': result.append( InstructionTextToken( InstructionTextTokenType.EndMemoryOperandToken, atom)) elif atom == '+': result.append( InstructionTextToken(InstructionTextTokenType.TextToken, atom)) elif atom == ',': result.append( InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, atom)) else: raise Exception('unfamiliar token: %s from instruction %s' % (tok, instrTxt)) return result, instrLen def get_instruction_low_level_il(self, data, addr, il): return None
class Chip8(Architecture): name = 'CHIP-8' endianness = Endianness.BigEndian address_size = 2 default_int_size = 2 instr_alignment = 2 max_instr_length = 2 opcode_display_length = 2 regs = { 'PC': RegisterInfo('PC', 2), 'SP': RegisterInfo('SP', 1), 'I': RegisterInfo('I', 2), 'DT': RegisterInfo('DT', 1), 'ST': RegisterInfo('ST', 1), 'V0': RegisterInfo('V0', 1), 'V1': RegisterInfo('V1', 1), 'V2': RegisterInfo('V2', 1), 'V3': RegisterInfo('V3', 1), 'V4': RegisterInfo('V4', 1), 'V5': RegisterInfo('V5', 1), 'V6': RegisterInfo('V6', 1), 'V7': RegisterInfo('V7', 1), 'V8': RegisterInfo('V8', 1), 'V9': RegisterInfo('V9', 1), 'Va': RegisterInfo('Va', 1), 'Vb': RegisterInfo('Vb', 1), 'Vc': RegisterInfo('Vc', 1), 'Vd': RegisterInfo('Vd', 1), 'Ve': RegisterInfo('Ve', 1), 'Vf': RegisterInfo('Vf', 1) } stack_pointer = 'SP' def __init__(self): super().__init__() self.dis = Disassembler() def get_instruction_info(self, data, addr): """ Establishes instruction length and branch info """ if len(data) > 2: data = data[:2] result = InstructionInfo() result.length = 2 vars = self.dis._vars(data) baddr = vars['addr'] binfo = self.dis.get_branch_info(data) if binfo == BranchType.UnconditionalBranch or binfo == BranchType.CallDestination: result.add_branch(binfo, baddr) elif binfo == BranchType.FunctionReturn or binfo == BranchType.IndirectBranch: result.add_branch(binfo) elif binfo == BranchType.TrueBranch: result.add_branch(BranchType.TrueBranch, addr + 4) result.add_branch(BranchType.FalseBranch, addr + 2) elif binfo == BranchType.FalseBranch: result.add_branch(BranchType.TrueBranch, addr + 4) result.add_branch(BranchType.FalseBranch, addr + 2) return result def get_instruction_text(self, data, addr): """ Display text for tokanized instruction """ if len(data) > 2: data = data[:2] tokens = self.dis.disasm(data, addr) if not tokens: tokens = [ InstructionTextToken(InstructionTextTokenType.InstructionToken, '_emit'), InstructionTextToken(InstructionTextTokenType.TextToken, ' '), InstructionTextToken(InstructionTextTokenType.IntegerToken, hex(data[0]), data[0]), InstructionTextToken( InstructionTextTokenType.OperandSeparatorToken, ', '), InstructionTextToken(InstructionTextTokenType.IntegerToken, hex(data[1]), data[1]) ] return tokens, 2 def get_instruction_low_level_il(self, data, addr, il): """ TODO: Implement a lifter here """ return None
class M6502(Architecture): name = "6502" address_size = 2 default_int_size = 1 max_instr_length = 3 regs = { "a": RegisterInfo("a", 1), "x": RegisterInfo("x", 1), "y": RegisterInfo("y", 1), "s": RegisterInfo("s", 1) } stack_pointer = "s" flags = ["c", "z", "i", "d", "b", "v", "s"] flag_write_types = ["*", "czs", "zvs", "zs"] flag_roles = { "c": FlagRole. SpecialFlagRole, # Not a normal carry flag, subtract result is inverted "z": FlagRole.ZeroFlagRole, "v": FlagRole.OverflowFlagRole, "s": FlagRole.NegativeSignFlagRole } flags_required_for_flag_condition = { LowLevelILFlagCondition.LLFC_UGE: ["c"], LowLevelILFlagCondition.LLFC_ULT: ["c"], LowLevelILFlagCondition.LLFC_E: ["z"], LowLevelILFlagCondition.LLFC_NE: ["z"], LowLevelILFlagCondition.LLFC_NEG: ["s"], LowLevelILFlagCondition.LLFC_POS: ["s"] } flags_written_by_flag_write_type = { "*": ["c", "z", "v", "s"], "czs": ["c", "z", "s"], "zvs": ["z", "v", "s"], "zs": ["z", "s"] } def decode_instruction(self, data, addr): if len(data) < 1: return None, None, None, None opcode = ord(data[0]) instr = InstructionNames[opcode] if instr is None: return None, None, None, None operand = InstructionOperandTypes[opcode] length = 1 + OperandLengths[operand] if len(data) < length: return None, None, None, None if OperandLengths[operand] == 0: value = None elif operand == REL: value = (addr + 2 + struct.unpack("b", data[1])[0]) & 0xffff elif OperandLengths[operand] == 1: value = ord(data[1]) else: value = struct.unpack("<H", data[1:3])[0] return instr, operand, length, value def perform_get_instruction_info(self, data, addr): instr, operand, length, value = self.decode_instruction(data, addr) if instr is None: return None result = InstructionInfo() result.length = length if instr == "jmp": if operand == ADDR: result.add_branch(BranchType.UnconditionalBranch, struct.unpack("<H", data[1:3])[0]) else: result.add_branch(BranchType.UnresolvedBranch) elif instr == "jsr": result.add_branch(BranchType.CallDestination, struct.unpack("<H", data[1:3])[0]) elif instr in ["rti", "rts"]: result.add_branch(BranchType.FunctionReturn) if instr in ["bcc", "bcs", "beq", "bmi", "bne", "bpl", "bvc", "bvs"]: dest = (addr + 2 + struct.unpack("b", data[1])[0]) & 0xffff result.add_branch(BranchType.TrueBranch, dest) result.add_branch(BranchType.FalseBranch, addr + 2) return result def perform_get_instruction_text(self, data, addr): instr, operand, length, value = self.decode_instruction(data, addr) if instr is None: return None tokens = [] tokens.append( InstructionTextToken(InstructionTextTokenType.TextToken, "%-7s " % instr.replace("@", ""))) tokens += OperandTokens[operand](value) return tokens, length def perform_get_instruction_low_level_il(self, data, addr, il): instr, operand, length, value = self.decode_instruction(data, addr) if instr is None: return None operand = OperandIL[operand](il, value) instr = InstructionIL[instr](il, operand) if isinstance(instr, list): for i in instr: il.append(i) elif instr is not None: il.append(instr) return length def perform_is_never_branch_patch_available(self, data, addr): if (data[0] == "\x10") or (data[0] == "\x30") or ( data[0] == "\x50") or (data[0] == "\x70") or (data[0] == "\x90") or ( data[0] == "\xb0") or (data[0] == "\xd0") or (data[0] == "\xf0"): return True return False def perform_is_invert_branch_patch_available(self, data, addr): if (data[0] == "\x10") or (data[0] == "\x30") or ( data[0] == "\x50") or (data[0] == "\x70") or (data[0] == "\x90") or ( data[0] == "\xb0") or (data[0] == "\xd0") or (data[0] == "\xf0"): return True return False def perform_is_always_branch_patch_available(self, data, addr): return False def perform_is_skip_and_return_zero_patch_available(self, data, addr): return (data[0] == "\x20") and (len(data) == 3) def perform_is_skip_and_return_value_patch_available(self, data, addr): return (data[0] == "\x20") and (len(data) == 3) def perform_convert_to_nop(self, data, addr): return "\xea" * len(data) def perform_never_branch(self, data, addr): if (data[0] == "\x10") or (data[0] == "\x30") or ( data[0] == "\x50") or (data[0] == "\x70") or (data[0] == "\x90") or ( data[0] == "\xb0") or (data[0] == "\xd0") or (data[0] == "\xf0"): return "\xea" * len(data) return None def perform_invert_branch(self, data, addr): if (data[0] == "\x10") or (data[0] == "\x30") or ( data[0] == "\x50") or (data[0] == "\x70") or (data[0] == "\x90") or ( data[0] == "\xb0") or (data[0] == "\xd0") or (data[0] == "\xf0"): return chr(ord(data[0]) ^ 0x20) + data[1:] return None def perform_skip_and_return_value(self, data, addr, value): if (data[0] != "\x20") or (len(data) != 3): return None return "\xa9" + chr(value & 0xff) + "\xea"
class S1C88(Architecture): name = 's1c88:s1c88' address_size = 2 max_instr_length = 8 regs = { 'BA': RegisterInfo('BA', 2), 'A': RegisterInfo('BA', 1, 0), 'B': RegisterInfo('BA', 1, 1), 'HL': RegisterInfo('HL', 2), 'L': RegisterInfo('HL', 1, 0), 'H': RegisterInfo('HL', 1, 1), 'IX': RegisterInfo('IX', 2), 'IY': RegisterInfo('IY', 2), 'PC': RegisterInfo('PC', 2), 'SP': RegisterInfo('SP', 2), 'BR': RegisterInfo('BR', 1), 'NB': RegisterInfo('NB', 1), 'CB': RegisterInfo('CB', 1), 'EP': RegisterInfo('EP', 1), 'IP': RegisterInfo('YP', 2), 'XP': RegisterInfo('XP', 1, 0), 'YP': RegisterInfo('YP', 1, 1), 'SC': RegisterInfo('SC', 1), } stack_pointer = 'SP' flags = ['z', 'c', 'v', 'n', 'd', 'u', 'i0', 'i1',] flag_roles = { 'z': FlagRole.ZeroFlagRole, 'c': FlagRole.CarryFlagRole, 'v': FlagRole.OverflowFlagRole, 'n': FlagRole.NegativeSignFlagRole, 'd': FlagRole.SpecialFlagRole, 'u': FlagRole.SpecialFlagRole, 'i0': FlagRole.SpecialFlagRole, 'i1': FlagRole.SpecialFlagRole, } flags_required_for_flag_condition = { # Unsigned comparisons LowLevelILFlagCondition.LLFC_UGE: ['c'], LowLevelILFlagCondition.LLFC_ULT: ['c'], # Signed comparisions LowLevelILFlagCondition.LLFC_SGE: ['n', 'v'], LowLevelILFlagCondition.LLFC_SGT: ['z', 'n', 'v'], LowLevelILFlagCondition.LLFC_SLE: ['z', 'n', 'v'], LowLevelILFlagCondition.LLFC_SLT: ['n', 'v'], # Equals or not LowLevelILFlagCondition.LLFC_E: ['z'], LowLevelILFlagCondition.LLFC_NE: ['z'], # Overflow or not LowLevelILFlagCondition.LLFC_NO: ['v'], LowLevelILFlagCondition.LLFC_O: ['v'], # Negative or not LowLevelILFlagCondition.LLFC_NEG: ['n'], LowLevelILFlagCondition.LLFC_POS: ['n'] } flag_write_types = [ '' "*", "zcvn", "zn", "z", "zcn", ] flags_written_by_flag_write_type = { "*": ['z', 'c', 'v', 'n', 'd', 'u', 'i0', 'i1'], "zcvn": ["z", "c", "v", "n"], "zn": ["z", "n"], "z": ["z"], "zcn": ["z", "c", "n"], } def get_instruction_info(self, data, addr): r = decode(data, addr) if r is None: return None return r[1] def get_instruction_text(self, data, addr): r = decode(data, addr) if r is None: return None return r[0], r[1].length def get_instruction_low_level_il(self, data, addr, il): r = decode(data, addr) if r is None: return None fn = r[2] if fn is not None: for f in fn: f(il) return r[1].length
class BPFArch(Architecture): name = "BPF" address_size = 4 default_int_size = 4 max_instr_length = 8 regs = { "a": RegisterInfo("a", 4), # accumulator "x": RegisterInfo("x", 4), # index # BPF only has 16 Memory address to store to # and binary ninja doesn't have a concept of different # address spaces, so hacked BPF memory into registers "m0": RegisterInfo("m0", 4), # M[0] "m1": RegisterInfo("m1", 4), # M[1] "m2": RegisterInfo("m2", 4), # M[2] "m3": RegisterInfo("m3", 4), # M[3] "m4": RegisterInfo("m4", 4), # M[4] "m5": RegisterInfo("m5", 4), # M[5] "m6": RegisterInfo("m6", 4), # M[6] "m7": RegisterInfo("m7", 4), # M[7] "m8": RegisterInfo("m8", 4), # M[8] "m9": RegisterInfo("m9", 4), # M[9] "m10": RegisterInfo("m10", 4), # M[10] "m11": RegisterInfo("m11", 4), # M[11] "m12": RegisterInfo("m12", 4), # M[12] "m13": RegisterInfo("m13", 4), # M[13] "m14": RegisterInfo("m14", 4), # M[14] "m15": RegisterInfo("m15", 4), # M[15] # binary ninja doesn't have a concept of differnt # address space, so all packet accesses go through a # virtual pkt register that notionally holds the address of packet start # at program entry "pkt": RegisterInfo("pkt", 4), # virtual address to notionally holds # size of packet at program entry "len": RegisterInfo("len", 4), # binary ninja needs a stack or is unhappy "dummystack": RegisterInfo("dummystack", 4), # virtual register to make clear what return value is in llil "dummyret": RegisterInfo("dummyret", 4), # virtual link register to return to. BPF return is more akin to a halt "dummylr": RegisterInfo("dummylr", 4), } # because I _must_ have a stack pointer. (BPF has no stack) stack_pointer = "dummystack" def perform_get_instruction_info(self, data, addr): valid, instr = get_instruction(data, addr) result = InstructionInfo() if valid: result.length = 8 if instr.opcode in InstructionInfoModders: InstructionInfoModders[instr.opcode](result, instr) return result else: # This is _EXCEEDINGLY_ important to return on failure. # Things will break in creative ways if anything other than None # is returned for invalid data return None def perform_get_instruction_text(self, data, addr): valid, instr = get_instruction(data, addr) if not valid: # This is _EXCEEDINGLY_ important to return on failure. # Things will break in creative ways if anything other than None # is returned for invalid data return None if instr.opcode not in InstructionNames: log('debug: %s' % instr) return ( [InstructionTextToken(InstructionTextTokenType.InstructionToken, "unk opcode 0x%x" % instr.opcode)], 8) tokens = [] instr_name = InstructionNames[instr.opcode] tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, instr_name)) formatter = InstructionFormatters[instr.opcode] extra_tokens = formatter(instr) if len(extra_tokens) > 0: tokens += [InstructionTextToken(InstructionTextTokenType.TextToken, " ")] + extra_tokens return tokens, 8 def perform_get_instruction_low_level_il(self, data, addr, il): log('Asking to decode %d bytes at 0x%x' % (len(data), addr)) valid, instr = get_instruction(data[0:8], addr) if not valid: log('*********** Tried an failed **********') # This is _EXCEEDINGLY_ important to return on failure. # Things will break in creative ways if anything other than None # is returned for invalid data return None if instr.opcode not in InstructionLLIL: log('Adding il.undefined()') # il.append(il.unimplemented()) il.append(il.undefined()) else: il_exp = InstructionLLIL[instr.opcode](il, instr) if il_exp is not None: il.append(il_exp) log('appended: %s' % LowLevelILInstruction(il, il_exp.index)) else: log('Failed to generate il') log('Full IL Decode was successful len(il): %d' % len(il)) return 8
class Z80(Architecture): name = 'Z80' address_size = 2 default_int_size = 1 instr_alignment = 1 max_instr_length = 4 # register related stuff regs = { # main registers 'AF': RegisterInfo('AF', 2), 'BC': RegisterInfo('BC', 2), 'DE': RegisterInfo('DE', 2), 'HL': RegisterInfo('HL', 2), # alternate registers 'AF_': RegisterInfo('AF_', 2), 'BC_': RegisterInfo('BC_', 2), 'DE_': RegisterInfo('DE_', 2), 'HL_': RegisterInfo('HL_', 2), # main registers (sub) 'A': RegisterInfo('AF', 1, 1), 'B': RegisterInfo('BC', 1, 1), 'C': RegisterInfo('BC', 1, 0), 'D': RegisterInfo('DE', 1, 1), 'E': RegisterInfo('DE', 1, 0), 'H': RegisterInfo('HL', 1, 1), 'L': RegisterInfo('HL', 1, 0), 'Flags': RegisterInfo('AF', 0), # alternate registers (sub) 'A_': RegisterInfo('AF_', 1, 1), 'B_': RegisterInfo('BC_', 1, 1), 'C_': RegisterInfo('BC_', 1, 0), 'D_': RegisterInfo('DE_', 1, 1), 'E_': RegisterInfo('DE_', 1, 0), 'H_': RegisterInfo('HL_', 1, 1), 'L_': RegisterInfo('HL_', 1, 0), 'Flags_': RegisterInfo('AF_', 0), # index registers 'IX': RegisterInfo('IX', 2), 'IY': RegisterInfo('IY', 2), 'SP': RegisterInfo('SP', 2), # other registers 'I': RegisterInfo('I', 1), 'R': RegisterInfo('R', 1), # program counter 'PC': RegisterInfo('PC', 2), # status 'status': RegisterInfo('status', 1) } stack_pointer = "SP" def get_instruction_info(self, data, addr): (instrTxt, instrLen) = skwrapper.disasm(data, addr) if instrLen == 0: return None result = InstructionInfo() result.length = instrLen return result def get_instruction_text(self, data, addr): (instrTxt, instrLen) = skwrapper.disasm(data, addr) tokens = [InstructionTextToken(InstructionTextTokenType.TextToken, instrTxt)] return tokens, instrLen def get_instruction_low_level_il(self, data, addr, il): return None