def get_stack_offset(cpu_context, ip, n): """ Get the stack offset for the operand n at address ip. :param cpu_context.ProcessorContext cpu_context: processor context :param int ip: EA of interest :param int n: Operand of interest (0 - first operand, 1 - second operand, ...) :return int: signed offset of operand """ opnd = idc.GetOpnd(ip, n) if not any(stack_reg in opnd for stack_reg in ["rsp", "esp", "rbp", "ebp"]): raise ValueError( "Opnd {} does not appear to reference the stack.".format(opnd)) # Pulling offset from within esp idaapi.decode_insn(ip) # Must be called before cmd.Operands offset = idaapi.cmd.Operands[n].addr width = get_bits() if "esp" in opnd or "rsp" in opnd: return cpu_context.reg_read("RSP") + signed(offset, width) elif "rbp" in opnd or "ebp" in opnd: return cpu_context.reg_read("RBP") + signed(offset, width)
def _find_leafs(self): # Loop through every function for func_ea in idautils.Functions(): # Count the number of xrefs to this function func = idaapi.get_func(func_ea) if func: leaf_function = True ea = func.startEA # Loop through all instructions in this function looking # for call instructions; if found, then this is not a leaf. while ea <= func.endEA: idaapi.decode_insn(ea) if idaapi.is_call_insn(ea): leaf_function = False break ea += self.arch.insn_size if leaf_function: self.functions.append( Function( start=func.startEA, end=func.endEA, leaf=True, loop=self.has_loop(func), argc=self.argp.argc(func), ) ) # Sort leafs by xref count, largest first self.functions.sort(key=lambda f: f.xrefs, reverse=True)
def get_operand_value_replacement(ea, pos, state): """ A replacement for Ida's idc.GetOperandValue that handles displacements more reasonably :param ea: memory location :param pos: argument location example: add eax, ebx eax is position 0, ebx is position 1 :param state: the current stack pointer register family (usually sp) :return: computes a numerical replacement for an operand """ if is_displ(ea, pos): bit_size = 64 if is_64_bit() else 32 stack_reg = 'rsp' if bit_size == 64 else 'esp' idaapi.decode_insn(ea) offset = idaapi.cmd.Operands[pos].addr # Convert the offset to a signed value if offset & (1 << (bit_size - 1)): offset -= (1 << bit_size) if stack_reg in get_opnd_replacement(ea, pos): offset += idc.GetSpd(ea) or 0 return offset else: return idc.GetOperandValue(ea, pos)
def _calc_displacement(self): """ Calculate the displacement offset of the operand's text. e.g: word ptr [rdi+rbx] :return int: calculated value """ size = 8 if idc.__EA64__ else 4 insn = idaapi.insn_t() idaapi.decode_insn(insn, self.ip) op = insn.ops[self.idx] offset = utils.signed(op.addr, utils.get_bits()) scale = utils.sib_scale(op) base_reg = utils.x86_base_reg(insn, op) indx_reg = utils.x86_index_reg(insn, op) base_val = self._cpu_context.registers[utils.reg2str(base_reg, size)] indx_val = self._cpu_context.registers[utils.reg2str( indx_reg, size)] if indx_reg != -1 else 0 result = base_val + indx_val * scale + offset logger.debug("calc_displacement :: Displacement {} -> {}".format( self.text, result)) # Before returning, record the frame_id and stack_offset for this address. # (This can become useful information for retrieving the original location of a variable) frame_id = idc.get_frame_id(self.ip) stack_var = ida_frame.get_stkvar(insn, op, offset) if stack_var: _, stack_offset = stack_var self._cpu_context.stack_variables[result] = (frame_id, stack_offset) return result
def _find_leafs(self): # Loop through every function for func_ea in idautils.Functions(): # Count the number of xrefs to this function func = idaapi.get_func(func_ea) if func: leaf_function = True ea = func.startEA # Loop through all instructions in this function looking # for call instructions; if found, then this is not a leaf. while ea <= func.endEA: idaapi.decode_insn(ea) if idaapi.is_call_insn(ea): leaf_function = False break ea += self.arch.insn_size if leaf_function: self.functions.append( Function(start=func.startEA, end=func.endEA, leaf=True, loop=self.has_loop(func), argc=self.argp.argc(func))) # Sort leafs by xref count, largest first self.functions.sort(key=lambda f: f.xrefs, reverse=True)
def get_operand_value_replacement(ea, pos, state): ''' A replacement for Ida's idc.GetOperandValue that handles displacements more reasonably :param ea: memory location :param pos: argument location example: add eax, ebx eax is position 0, ebx is position 1 :param state: the current stack pointer register family (usually sp) :return: computes a numerical replacement for an operand ''' if is_displ(ea, pos): idaapi.decode_insn(ea) offset = idaapi.cmd.Operands[pos].addr flipped = (offset ^ (0xffffffffffffffff if is_64_bit(ea) else 0xffffffff)) + 1 # Use reg_fam[2] here as opposed to reg_fam[0] like usual because we need to mach the reg name string if any(reg_fam[2] in get_opnd_replacement(ea, pos) for reg_fam in state.stack_pointer_reg_fams): adjustment = idc.GetSpd(ea) else: adjustment = 0 if not adjustment: adjustment = 0 if flipped < offset: return -flipped + adjustment else: return offset + adjustment else: return idc.GetOperandValue(ea, pos)
def decode_insn(ea): if idaapi.IDA_SDK_VERSION >= 700 and sys.maxsize > 2**32: insn = idaapi.insn_t() if idaapi.decode_insn(insn, ea) > 0: return insn else: if idaapi.decode_insn(ea): return idaapi.cmd.copy()
def iterateInstructions(): next = 0 while next != idaapi.BADADDR: # get next instruction next = idc.NextHead(next) idaapi.decode_insn(next) for handlers in InstructionCallbacks: handlers.instruction(idaapi.cmd)
def _get_width(self): """ Based on the dtyp value, return the size of the operand in bytes :return: size of data type """ idaapi.decode_insn( self.ip) # Has symptom that fills idaapi.cmd.Operands dtype = idaapi.cmd.Operands[self.idx].dtyp return self.TYPE_DICT[dtype]
def refreshitems(self): # Pb : rop engine has not been init if self.idarop.rop == None: return # No new data present if self.rop_list_cache == self.idarop.rop.gadgets: return self.items = [] # No data present if len(self.idarop.rop.gadgets) == 0: return if len(self.idarop.rop.gadgets) > 10000: idaapi.show_wait_box("Ida rop : loading rop list ...") for i, g in enumerate(self.idarop.rop.gadgets): # reconstruct disas if g.opcodes == "": bad_gadget = False opcodes = idc.GetManyBytes(g.address, g.ret_address - g.address + 1) instructions = list() ea = g.address while ea <= g.ret_address: instructions.append( idc.GetDisasmEx(ea, idaapi.GENDSM_FORCE_CODE)) ea += idaapi.decode_insn(ea) # Badly decoded gadget if idaapi.decode_insn(ea) == 0: bad_gadget = True break if not bad_gadget: h = Gadget(address=g.address, ret_address=g.ret_address, instructions=instructions, opcodes=opcodes, size=len(opcodes)) self.idarop.rop.gadgets[i] = h self.items.append( h.get_display_list(self.idarop.addr_format)) else: self.items.append(g.get_display_list(self.idarop.addr_format)) self.rop_list_cache = self.idarop.rop.gadgets if len(self.idarop.rop.gadgets) > 10000: idaapi.hide_wait_box()
def handleDebugStepOver(self): if self.clientSocket is None: return if self.debugBreakId is None: return cur_ea = self.debugBreakId decode_insn(cur_ea) next_ea = cur_ea + idaapi.cmd.size if isCode(getFlags(next_ea)) == False: return entry = None # remove current if self.debugBreakId in self.idbHookMap: entry = self.idbHookMap[self.debugBreakId] outJSON = json.dumps({ "req_id": kFridaLink_DelHookRequest, "data": entry.genDelRequest() }) del self.idbHookMap[self.debugBreakId] self.clientSocket.sendto(outJSON, self.clientAddress) SetColor(self.debugBreakId, CIC_ITEM, kIDAViewColor_Reset) refresh_idaview_anyway() offset, moduleName = self.getAddressDetails(next_ea) # install next if entry == None: hook = InstHook() hook.id = next_ea hook.once = once hook.breakpoint = True entry = HookEntry(hook) entry.hook.id = next_ea entry.hook.mnemonic = GetDisasm(next_ea) entry.hook.address = offset entry.hook.module = moduleName outJSON = json.dumps({ "req_id": kFridaLink_SetHookRequest, "data": entry.genSetRequest() }) self.clientSocket.sendto(outJSON, self.clientAddress) self.idbHookMap[next_ea] = entry self.idbHooksView.setContent(self.idbHookMap) self.handleDebugContinue()
def trace(self, ea): ''' Given an EA where an argument register is set, attempt to trace what function call that argument is passed to. @ea - The address of an instruction that modifies a function argument register. Returns a tuple of (function EA, argv index, argument register name) on success. Returns None on failure. ''' idaapi.decode_insn(ea) features = idaapi.cmd.get_canon_feature() if self.arch.unknown: return (None, None, None) for n in range(0, len(self.CHANGE_OPND)): if idaapi.cmd.Operands[n].type in [ idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase ]: try: regname = self.arch.registers[idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n]: ea = ea - (self.arch.delay_slot * self.arch.insn_size) while True: idaapi.decode_insn(ea) if idaapi.is_call_insn(ea): for xref in idautils.XrefsFrom(ea): if xref.type in [idaapi.fl_CF, idaapi.fl_CN]: return (xref.to, index, regname) # If we couldn't figure out where the function call was going to, just quit break try: is_block_end = idaapi.is_basic_block_end(ea) except TypeError: is_block_end = idaapi.is_basic_block_end(ea, True) if is_block_end: break # TODO: Use idc.NextHead(ea) instead... ea += self.arch.insn_size return (None, None, None)
def findRspRbpDifference(curr_ea): difference = 0 for i in range(0, 256): mnem = idc.print_insn_mnem(curr_ea) debug(mnem) idaapi.decode_insn(insn, curr_ea) if mnem == 'push': push_offset = 8 difference += push_offset elif mnem == 'sub': if idc.get_operand_value( curr_ea, 0 ) == OperandValueRegister.RSP and idc.get_operand_type( curr_ea, 1) == OperandType.IMMEDIATE_VALUE: rsp_substraction = idc.get_operand_value(curr_ea, 1) difference += rsp_substraction elif mnem == 'mov' or mnem == 'lea': #debug('type: ', idc.get_operand_type(curr_ea, 0), ' val: ', idc.get_operand_value(curr_ea, 0)) debug(idc.generate_disasm_line(curr_ea, 0)) if idc.get_operand_value(curr_ea, 0) == OperandValueRegister.RBP: debug( mnem, ' type: ', idc.get_operand_type(curr_ea, 1), ' val: ', 'bp: 0x{:X}'.format(idc.get_operand_value(curr_ea, 1))) #case 1: mov if mnem == 'mov': if idc.get_operand_type( curr_ea, 1 ) == OperandType.GENERAL_REG and idc.get_operand_value( curr_ea, 1) == OperandValueRegister.RSP: displacement = 0 #case 2: lea if mnem == 'lea': if idc.get_operand_type(curr_ea, 1) == OperandType.MEMORY_REG: if idc.get_operand_value(curr_ea, 1) > 0xF000000000000000: displacement = 0x10000000000000000 - idc.get_operand_value( curr_ea, 1) difference += displacement else: displacement = idc.get_operand_value( curr_ea, 1) difference -= displacement break curr_ea += insn.size return difference
def stackwindow(ea, delta, direction=-1): '''return the block containing all instructions within the specified stack delta''' assert direction != 0, 'you make no sense with your lack of direction' next = database.next if direction > 0 else database.prev sp = getSpDelta(ea) start = (ea,sp) while abs(sp - start[1]) < delta: sp = getSpDelta(ea) ea = next(ea) if ea < start[0]: return ea+idaapi.decode_insn(ea),start[0]+idaapi.decode_insn(start[0]) return (start[0],ea)
def argc(self, function): ''' Counts the number of arguments used by the specified function. ''' argv = set() notargv = set() ea = function.startEA if self.arch.unknown: return 0 while ea < function.endEA: idaapi.decode_insn(ea) features = idaapi.cmd.get_canon_feature() for n in range(0, len(self.USE_OPND)): if idaapi.cmd.Operands[n].type in [ idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase ]: try: regname = self.arch.registers[ idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.USE_OPND[n] and regname not in notargv: argv.update(self.arch.argv[:index + 1]) for n in range(0, len(self.CHANGE_OPND)): if idaapi.cmd.Operands[n].type in [ idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase ]: try: regname = self.arch.registers[ idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n] and regname not in argv: notargv.update(self.arch.argv[index:]) if argv.union(notargv) == set(self.arch.argv): break # TODO: Use idc.NextHead(ea) instead... ea += self.arch.insn_size return len(argv)
def trace(self, ea): ''' Given an EA where an argument register is set, attempt to trace what function call that argument is passed to. @ea - The address of an instruction that modifies a function argument register. Returns a tuple of (function EA, argv index, argument register name) on success. Returns None on failure. ''' idaapi.decode_insn(ea) features = idaapi.cmd.get_canon_feature() if self.arch.unknown: return (None, None, None) for n in range(0, len(self.CHANGE_OPND)): if idaapi.cmd.Operands[n].type in [idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase]: try: regname = self.arch.registers[idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n]: ea = ea - (self.arch.delay_slot * self.arch.insn_size) while True: idaapi.decode_insn(ea) if idaapi.is_call_insn(ea): for xref in idautils.XrefsFrom(ea): if xref.type in [idaapi.fl_CF, idaapi.fl_CN]: return (xref.to, index, regname) # If we couldn't figure out where the function call was going to, just quit break try: is_block_end = idaapi.is_basic_block_end(ea) except TypeError: is_block_end = idaapi.is_basic_block_end(ea, True) if is_block_end: break # TODO: Use idc.NextHead(ea) instead... ea += self.arch.insn_size return (None, None, None)
def argv(self, func): ''' Attempts to identify what types of arguments are passed to a given function. Currently unused. ''' args = [None for x in self.arch.argv] if not self.arch.unknown: for xref in idautils.XrefsTo(func.startEA): if idaapi.is_call_insn(xref.frm): idaapi.decode_insn(xref.frm) ea = xref.frm + (self.arch.delay_slot * self.arch.insn_size) end_ea = (xref.frm - (self.arch.insn_size * 10)) while ea >= end_ea: # Stop searching if we've reached a conditional block or another call if idaapi.is_basic_block_end(ea) or ( ea != xref.frm and idaapi.is_call_insn(ea)): break idaapi.decode_insn(ea) features = idaapi.cmd.get_canon_feature() for n in range(0, len(self.CHANGE_OPND)): if idaapi.cmd.Operands[n].type in [ idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase ]: try: regname = self.arch.registers[ idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n]: for xref in idautils.XrefsFrom(ea): # TODO: Where is this xref type defined? if xref.type == 1: string = idc.GetString(xref.to) if string and len(string) > 4: args[index] = str break ea -= self.arch.insn_size yield args
def hook(self, hook_addr=0): """ Args: hook_addr(int): address for inline hook code, 0 indicates bpt hook. Returns: memory size in bytes used for inline hook. """ self.hook_addr = hook_addr self.func_addr = idc.get_name_ea_simple(self.name) if self.func_addr == 0: return 0 print("Hooking %s at 0x%x" % (self.name, self.func_addr)) if self.hook_addr == 0: idc.add_bpt(self.func_addr) idc.set_bpt_cond(self.func_addr, self.bpt_cond_hook_code) return 0 else: # assemble jmp code jmp_code = "jmp 0x%x" % self.hook_addr jmp_buf, _ = assemble(jmp_code, self.func_addr) # read function prologue according to jmp code length # NOTE: instructions like 'call $+5' in prologue will # cause problems. insn = idaapi.insn_t() move_length = 0 while move_length < len(jmp_buf): idaapi.decode_insn(insn, self.func_addr + move_length) move_length += insn.size prologue = idaapi.get_bytes(self.func_addr, move_length) # write jmp code idaapi.patch_bytes(self.func_addr, jmp_buf) # assmble hook code hook_buf, _ = assemble(self.inline_hook_code, self.hook_addr) hook_buf += prologue jmp_back_code = 'jmp 0x%x' % (self.func_addr + move_length) jmp_back_buf, _ = assemble(jmp_back_code, self.hook_addr + len(hook_buf)) hook_buf += jmp_back_buf # wirte hook code idaapi.patch_bytes(self.hook_addr, hook_buf) return len(hook_buf)
def decode_instr(eip): MakeCode(eip) mnem = idaapi.ua_mnem(eip) sz = idaapi.decode_insn(eip) x = dict(nextip=[],inst=mnem, ops=[]) if not idaapi.cmd.itype in (idaapi.NN_jmp, idaapi.NN_retn): x['nextip'].append(eip+sz) for n, op in enumerate(idaapi.cmd.Operands): if op.type == 0: break ty = op.type text = idaapi.tag_remove(idaapi.ua_outop2(eip, n)) if op.dtyp == 0: bits = 8 elif op.dtyp == 1: bits = 16 elif op.dtyp == 2: bits = 32 if ty == idc.o_reg: x['ops'].append(IR.Var(text, bits)) elif ty == idc.o_mem: x['ops'].append(IR.Mem(IR.Const(op.addr),bits)) elif ty in (idc.o_phrase, idc.o_displ): x['ops'].append(IR.Mem(parse_mem(op),bits)) elif ty == idc.o_imm: x['ops'].append(IR.Const(op.value,bits)) elif ty == idc.o_near: x['ops'].append(IR.Const(op.addr)) x['nextip'].append(op.addr) else: raise UnknownError return x
def main(): eip = idaapi.get_screen_ea(); function = idaapi.func_item_iterator_t(); function.set(idaapi.get_func(eip)); b_ok = function.first(); while b_ok: pc = function.current(); inslen = idaapi.decode_insn(function.current()); if inslen == 0: b_ok = function.next_code(); continue; if inst_is_call(pc): color = get_blue(); if is_indirect(pc): color = get_green(); idc.SetColor(pc, CIC_ITEM, color); elif inst_is_ret(pc): color = get_red(); idc.SetColor(pc, CIC_ITEM, color); elif inst_is_jcc(pc): color = get_yellow(); if is_indirect(pc): color = get_green(); idc.SetColor(pc, CIC_ITEM, color); b_ok = function.next_code();
def decode_instr(eip): MakeCode(eip) mnem = idaapi.ua_mnem(eip) sz = idaapi.decode_insn(eip) x = dict(nextip=[], inst=mnem, ops=[]) if not idaapi.cmd.itype in (idaapi.NN_jmp, idaapi.NN_retn): x['nextip'].append(eip + sz) for n, op in enumerate(idaapi.cmd.Operands): if op.type == 0: break ty = op.type text = idaapi.tag_remove(idaapi.ua_outop2(eip, n)) if op.dtyp == 0: bits = 8 elif op.dtyp == 1: bits = 16 elif op.dtyp == 2: bits = 32 if ty == idc.o_reg: x['ops'].append(IR.Var(text, bits)) elif ty == idc.o_mem: x['ops'].append(IR.Mem(IR.Const(op.addr), bits)) elif ty in (idc.o_phrase, idc.o_displ): x['ops'].append(IR.Mem(parse_mem(op), bits)) elif ty == idc.o_imm: x['ops'].append(IR.Const(op.value, bits)) elif ty == idc.o_near: x['ops'].append(IR.Const(op.addr)) x['nextip'].append(op.addr) else: raise UnknownError return x
def yara_sig_code_selection(): """Return some internal details for how we want to signature the selection""" cur_ea = SelStart() end_ea = SelEnd() d = i386DisasmParts() comments = [] rulelines = [] # Iterate over selected code bytes while cur_ea < end_ea: # Did we inadvertently select something that wasn't code? if not idc.isCode(idaapi.getFlags(cur_ea)): noncodebytes = "".join([chr(Byte(x)) for x in xrange(cur_ea, NextHead(cur_ea, end_ea))]) comments.append("Non-code at %08X: %d bytes" % (cur_ea, len(noncodebytes))) rulelines.append(binhex_spaced(noncodebytes)) else: curlen = idaapi.decode_insn(cur_ea) # Match IDA's disassembly format comments.append(GetDisasm(cur_ea)) # But we need our custom object to process curbytes = "".join([chr(Byte(b)) for b in xrange(cur_ea, cur_ea + curlen)]) codefrag = d.disasm(curbytes, 0, cur_ea) rulelines.append(yara_wildcard_instruction(codefrag)) # move along cur_ea = NextHead(cur_ea, end_ea) return (SelStart(), comments, rulelines)
def DecodeInstruction(ea): """ Decodes an instruction and returns a insn_t like class @param ea: address to decode @return: None or an insn_t like structure """ inslen = idaapi.decode_insn(ea) if inslen == 0: return None insn = idaapi.get_current_instruction() if not insn: return None class dup: def __init__(self, op): for x in dir(op): if x.startswith("__") and x.endswith("__"): continue setattr(self, x, getattr(op, x)) r = dup(insn) r.Operands = [] for n in range(0, idaapi.UA_MAXOP): t = idaapi.get_instruction_operand(insn, n) if t.type == idaapi.o_void: break r.Operands.append(dup(t)) return r
def _does_instruction_match(self, ea, instruction, regex=False): i = 0 op_cnt = 0 op_ok_cnt = 0 match = False insn_t = idaapi.insn_t() ins_size = idaapi.decode_insn(insn_t, ea) mnem = idc.print_insn_mnem(ea) if (not instruction.mnem) or (instruction.mnem == mnem) or ( regex and re.match(instruction.mnem, mnem)): for operand in instruction.operands: if operand: op_cnt += 1 op = idc.print_operand(ea, i) if regex: if re.match(operand, op): op_ok_cnt += 1 elif operand == op: op_ok_cnt += 1 i += 1 if op_cnt == op_ok_cnt: match = True return match
def main(): eip = idaapi.get_screen_ea() function = idaapi.func_item_iterator_t() function.set(idaapi.get_func(eip)) b_ok = function.first() while b_ok: pc = function.current() inslen = idaapi.decode_insn(function.current()) if inslen == 0: b_ok = function.next_code() continue if inst_is_call(pc): color = get_blue() if is_indirect(pc): color = get_green() idc.SetColor(pc, CIC_ITEM, color) elif inst_is_ret(pc): color = get_red() idc.SetColor(pc, CIC_ITEM, color) elif inst_is_jcc(pc): color = get_yellow() if is_indirect(pc): color = get_green() idc.SetColor(pc, CIC_ITEM, color) b_ok = function.next_code()
def yara_sig_code_selection(): """Return some internal details for how we want to signature the selection""" cur_ea = SelStart() end_ea = SelEnd() d = i386DisasmParts() comments = [] rulelines = [] # Iterate over selected code bytes while cur_ea < end_ea: # Did we inadvertently select something that wasn't code? if not idc.isCode(idaapi.getFlags(cur_ea)): noncodebytes = "".join([ chr(Byte(x)) for x in xrange(cur_ea, NextHead(cur_ea, end_ea)) ]) comments.append("Non-code at %08X: %d bytes" % (cur_ea, len(noncodebytes))) rulelines.append(binhex_spaced(noncodebytes)) else: curlen = idaapi.decode_insn(cur_ea) # Match IDA's disassembly format comments.append(GetDisasm(cur_ea)) # But we need our custom object to process curbytes = "".join( [chr(Byte(b)) for b in xrange(cur_ea, cur_ea + curlen)]) codefrag = d.disasm(curbytes, 0, cur_ea) rulelines.append(yara_wildcard_instruction(codefrag)) # move along cur_ea = NextHead(cur_ea, end_ea) return (SelStart(), comments, rulelines)
def _addToSig(self, sigIndex) -> bool: cmd = idaapi.insn_t() cmd.size = 0 sig = self.Sigs[sigIndex] if not idaapi.can_decode(sig.dwCurrentAddress): return False count = idaapi.decode_insn(cmd, sig.dwCurrentAddress) if count == 0 or cmd.size == 0: return False if cmd.size < 5: self._addBytesToSig(sigIndex, sig.dwCurrentAddress, cmd.size) else: self._addInsToSig(cmd, sigIndex) sig.dwCurrentAddress = sig.dwCurrentAddress + cmd.size sig.iOpCount = sig.iOpCount + 1 self.Sigs[sigIndex] = sig return True
def get_struc_offset(ea, opn): path = idaapi.tid_array(1) delta = idaapi.sval_pointer() idaapi.get_stroff_path(ea, opn, path.cast(), delta.cast()) struct = path[0] if idaapi.decode_insn(ea) == 0: print 'error in {0}'.format(GetDisasm(ea)) else: op = idaapi.cmd.Operands[opn] offset = op.value result = [] idaapi.get_stroff_path(ea, opn, path.cast(), delta.cast()) struct = path[0] while offset: member_id = idc.GetMemberId(struct, offset) member_name = idc.GetMemberName(member_id) field_struct_id = idc.GetMemberStrId(struct, offset) if field_struct_id != idc.BADADDR: result.append( [field_struct_id, idc.GetStrucName(field_struct_id)]) else: result.append([member_name, idc.GetMemberFlag(struct, offset)]) return result offset -= idc.GetMemberOffset(member_name)
def gen_skeleton_and_eas(reg): ea = idc.ScreenEA() start = idc.GetFunctionAttr(ea, idc.FUNCATTR_START) end = idc.GetFunctionAttr(ea, idc.FUNCATTR_END) ea = start eas = [] skeleton = {} while ea <= end: if idaapi.decode_insn(ea) == 0: print 'error in {0}'.format(GetDisasm(ea)) else: for opn in (0, 1): op = idaapi.cmd.Operands[opn] offset = 0 if op.type == idaapi.o_idpspec3: continue if op.type in (idaapi.o_phrase, idaapi.o_displ) and op.phrase == phrase[reg]: skeleton[op.addr] = ('field_{0}'.format(hex(op.addr)), dtyp[op.dtyp]) eas.append((ea, opn, offset)) ea = idc.NextHead(ea) skeleton = [(elem[0], elem[1][0], elem[1][1]) for elem in sorted(skeleton.items(), key=lambda x: x[0])] return [skeleton, eas]
def get_operands(self, ip=None): """ Gets the Operand objects of all operands in the current instruction and returns them in a list. :param int ip: location of instruction pointer to pull operands from (defaults to current rip in context) :return: list of Operand objects """ if ip is None: ip = self.ip operands = [] cmd = idaapi.insn_t() inslen = idaapi.decode_insn(cmd, ip) for i in range(inslen): try: operand = Operand(self, ip, i) # IDA will sometimes create hidden or "fake" operands. # These are there to represent things like an implicit EAX register. # To help avoid confusion to the opcode developer, these fake operands will not be included. if not operand.is_fake: operands.append(operand) except (IndexError, RuntimeError): # IDA will identify more operands than there actually are causing an issue. # Just break out of the loop if this happens. # IDA 7 throws RuntimeError instead of IndexError break return operands
def iterateInstructions(): next = 0 while next != idaapi.BADADDR: # get next instruction next = idc.NextHead(next) idaapi.decode_insn(next) if idaapi.cmd.itype in call_instructions: colorize(idaapi.cmd.ea, COLOR_CALL) if idaapi.cmd.itype in branch_instructions: colorize(idaapi.cmd.ea, COLOR_BRANCH) if idaapi.cmd.itype in ret_instructions: colorize(idaapi.cmd.ea, COLOR_RET)
def _does_instruction_match(self, ea, instruction, regex=False): i = 0 op_cnt = 0 op_ok_cnt = 0 match = False ins_size = idaapi.decode_insn(ea) mnem = idc.GetMnem(ea) if (not instruction.mnem) or (instruction.mnem == mnem) or (regex and re.match(instruction.mnem, mnem)): for operand in instruction.operands: if operand: op_cnt += 1 op = idc.GetOpnd(ea, i) if regex: if re.match(operand, op): op_ok_cnt += 1 elif operand == op: op_ok_cnt += 1 i += 1 if op_cnt == op_ok_cnt: match = True return match
def graph_down(ea, path=set()): """ Recursively collect all function calls. Copied with minor modifications from http://hooked-on-mnemonics.blogspot.com/2012/07/renaming-subroutine-blocks-and.html """ path.add(ea) # # extract all the call instructions from the current function # call_instructions = [] instruction_info = idaapi.insn_t() for address in idautils.FuncItems(ea): # decode the instruction if not idaapi.decode_insn(instruction_info, address): continue # check if this instruction is a call if not idaapi.is_call_insn(instruction_info): continue # save this address as a call instruction call_instructions.append(address) # # iterate through all the instructions in the target function (ea) and # inspect all the call instructions # for x in call_instructions: # TODO for r in idautils.XrefsFrom(x, idaapi.XREF_FAR): #print(0x%08X" % h, "--calls-->", "0x%08X" % r.to) if not r.iscode: continue # get the function pointed at by this call func = idaapi.get_func(r.to) if not func: continue # ignore calls to imports / library calls / thunks if (func.flags & (idaapi.FUNC_THUNK | idaapi.FUNC_LIB)) != 0: continue # # if we have not traversed to the destination function that this # call references, recurse down to it to continue our traversal # if r.to not in path: graph_down(r.to, path) return path
def op_count(ea): '''Return the number of operands of given instruction''' length = idaapi.decode_insn(ea) for c, v in enumerate(idaapi.cmd.Operands): if v.type == idaapi.o_void: return c continue # maximum operand count. ida might be wrong here... return c
def disasm_func(cls, fn): rv = list() items = list(FuncItems(fn.startEA)) for item_ea in items: obj = {'ea': item_ea, 'fn_ea': fn.startEA, 'dis': None} if idaapi.decode_insn(item_ea) > 0: obj['dis'] = idaapi.cmd.copy() rv.append(obj) return rv
def find_ref_loc(ea, ref): """ type ea: idc.ea_t type ref: idc.ea_t """ logger = logging.getLogger("idb2pat:find_ref_loc") if ea == idc.BADADDR: logger.debug("Bad parameter: ea") return idc.BADADDR if ref == idc.BADADDR: logger.debug("Bad parameter: ref") return idc.BADADDR insn = idaapi.insn_t() idaapi.decode_insn(insn, ea) op_num = 0 for idx in range(len(insn.ops)): if insn.ops[idx].type == idaapi.o_void: op_num = idx break for idx in range(op_num): op = insn.ops[idx] if op.type not in [ idaapi.o_reg, idaapi.o_mem, idaapi.o_imm, idaapi.o_far, idaapi.o_near ]: continue # HTC - raw, dummy fix start = op.offb if start == 0: start = insn.ops[idx + 1].offb if idx < op_num - 1: end = insn.ops[idx + 1].offb if end == start: end = insn.size else: end = insn.size return (start, end) return (idc.BADADDR, idc.BADADDR)
def argc(self, function): ''' Counts the number of arguments used by the specified function. ''' argv = set() notargv = set() ea = function.startEA if self.arch.unknown: return 0 while ea < function.endEA: idaapi.decode_insn(ea) features = idaapi.cmd.get_canon_feature() for n in range(0, len(self.USE_OPND)): if idaapi.cmd.Operands[n].type in [idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase]: try: regname = self.arch.registers[idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.USE_OPND[n] and regname not in notargv: argv.update(self.arch.argv[:index+1]) for n in range(0, len(self.CHANGE_OPND)): if idaapi.cmd.Operands[n].type in [idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase]: try: regname = self.arch.registers[idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n] and regname not in argv: notargv.update(self.arch.argv[index:]) if argv.union(notargv) == set(self.arch.argv): break # TODO: Use idc.NextHead(ea) instead... ea += self.arch.insn_size return len(argv)
def argv(self, func): ''' Attempts to identify what types of arguments are passed to a given function. Currently unused. ''' args = [None for x in self.arch.argv] if not self.arch.unknown: for xref in idautils.XrefsTo(func.startEA): if idaapi.is_call_insn(xref.frm): idaapi.decode_insn(xref.frm) ea = xref.frm + (self.arch.delay_slot * self.arch.insn_size) end_ea = (xref.frm - (self.arch.insn_size * 10)) while ea >= end_ea: # Stop searching if we've reached a conditional block or another call if idaapi.is_basic_block_end(ea) or (ea != xref.frm and idaapi.is_call_insn(ea)): break idaapi.decode_insn(ea) features = idaapi.cmd.get_canon_feature() for n in range(0, len(self.CHANGE_OPND)): if idaapi.cmd.Operands[n].type in [idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase]: try: regname = self.arch.registers[idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n]: for xref in idautils.XrefsFrom(ea): # TODO: Where is this xref type defined? if xref.type == 1: string = idc.GetString(xref.to) if string and len(string) > 4: args[index] = str break ea -= self.arch.insn_size yield args
def colorize(self, node, color): ''' Colorize the entire code block. ''' block = self.LookupBlock(node) if block and idc.GetColor(block.startEA, idc.CIC_ITEM) != color: ea = block.startEA while ea < block.endEA: idc.SetColor(ea, idc.CIC_ITEM, color) ea += idaapi.decode_insn(ea)
def __init__(self, ea, struct_vars): self.ea = ea self.ops = [] self.verbatim = False idaapi.decode_insn(ea) # poor man's get_num_ops; can't find real one n = 0 if should_print_verbatim(idaapi.cmd): self.verbatim = True else: to_iter_over = range(idaapi.UA_MAXOP) if op_arity_overrides.has_key(idaapi.cmd.itype): to_iter_over = op_arity_overrides[idaapi.cmd.itype] for n in to_iter_over: if idaapi.cmd.Operands[n].type != idaapi.o_void and idaapi.cmd.Operands[n].flags & idaapi.OF_SHOW != 0: self.ops.append(op_node(ea, n, struct_vars))
def DecodeInstruction(ea): """ Decodes an instruction and returns an insn_t like class @param ea: address to decode @return: None or a new insn_t instance """ inslen = idaapi.decode_insn(ea) if inslen == 0: return None return idaapi.cmd.copy()
def _find_controllable_jumps(self, start_ea, end_ea): controllable_jumps = [] t9_controls = [ MIPSInstruction("move", "\$t9"), MIPSInstruction("addiu", "\$t9", "^\$"), ] t9_jumps = [ MIPSInstruction("jalr", "\$t9"), MIPSInstruction("jr", "\$t9"), ] ra_controls = [ MIPSInstruction("lw", "\$ra"), ] ra_jumps = [ # TODO: Search for jumps to registers other than $ra. MIPSInstruction("jr", "\$ra"), ] t9_musnt_clobber = ["$t9"] ra_musnt_clobber = ["$ra"] for possible_control_instruction in t9_controls+ra_controls: ea = start_ea found = 0 if possible_control_instruction in t9_controls: jumps = t9_jumps musnt_clobber = t9_musnt_clobber else: jumps = ra_jumps musnt_clobber = ra_musnt_clobber while ea <= end_ea: ea = self._find_next_instruction_ea(ea, possible_control_instruction, end_ea, regex=True) if ea != idc.BADADDR: ins_size = idaapi.decode_insn(ea) control_instruction = self._get_instruction(ea) control_register = control_instruction.operands[1] if control_register: for jump in jumps: jump_ea = self._find_next_instruction_ea(ea+ins_size, jump, end_ea, no_baddies=True, regex=True, dont_overwrite=musnt_clobber) if jump_ea != idc.BADADDR: jump_instruction = self._get_instruction(jump_ea) controllable_jumps.append(ROPGadget(control_instruction, jump_instruction, description="Controllable Jump", base=self.base)) ea = jump_ea ea += ins_size return controllable_jumps
def _fix_data_offsets(self): ea = 0 count = 0 print "Fixing unresolved offset xrefs...", while ea != idaapi.BADADDR: (ea, n) = idaapi.find_notype(ea, idaapi.SEARCH_DOWN) if idaapi.decode_insn(ea): for i in range(0, len(idaapi.cmd.Operands)): op = idaapi.cmd.Operands[i] if op.type == idaapi.o_imm and idaapi.getseg(op.value): idaapi.add_dref(ea, op.value, (idaapi.dr_O | idaapi.XREF_USER)) count += 1 print "created %d new data xrefs" % count
def gen_skeleton_and_eas(reg): ea = idc.ScreenEA() start = idc.GetFunctionAttr(ea, idc.FUNCATTR_START) end = idc.GetFunctionAttr(ea, idc.FUNCATTR_END) ea = start eas = [] skeleton = {} while ea <= end: if idaapi.decode_insn(ea) == 0: print 'error in {0}'.format(GetDisasm(ea)) else: for opn in (0, 1): op = idaapi.cmd.Operands[opn] offset = 0 if op.type == idaapi.o_idpspec3: continue if op.type in (idaapi.o_phrase, idaapi.o_displ) and op.phrase == phrase[reg]: skeleton[op.addr] = ('field_{0}'.format(hex(op.addr)), dtyp[op.dtyp]) eas.append((ea, opn, offset)) ea = idc.NextHead(ea) skeleton = [(elem[0], elem[1][0], elem[1][1]) for elem in sorted(skeleton.items(), key = lambda x: x[0])] return [skeleton, eas]
def get_struc_offset(ea, opn): path = idaapi.tid_array(1) delta = idaapi.sval_pointer() idaapi.get_stroff_path(ea, opn, path.cast(), delta.cast()) struct = path[0] if idaapi.decode_insn(ea) == 0: print 'error in {0}'.format(GetDisasm(ea)) else: op = idaapi.cmd.Operands[opn] offset = op.value result = [] idaapi.get_stroff_path(ea, opn, path.cast(), delta.cast()) struct = path[0] while offset: member_id = idc.GetMemberId(struct, offset) member_name = idc.GetMemberName(member_id) field_struct_id = idc.GetMemberStrId(struct, offset) if field_struct_id != idc.BADADDR: result.append([field_struct_id, idc.GetStrucName(field_struct_id)]) else: result.append([member_name, idc.GetMemberFlag(struct, offset)]) return result offset -= idc.GetMemberOffset(member_name)
def block(self, block): ''' Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names]) ''' formal = [] fuzzy = [] functions = [] immediates = [] ea = block.startEA while ea < block.endEA: idaapi.decode_insn(ea) # Get a list of all data/code references from the current instruction drefs = [x for x in idautils.DataRefsFrom(ea)] crefs = [x for x in idautils.CodeRefsFrom(ea, False)] # Add all instruction mnemonics to the formal block hash formal.append(idc.GetMnem(ea)) # If this is a call instruction, be sure to note the name of the function # being called. This is used to apply call-based signatures to functions. # # For fuzzy signatures, we can't use the actual name or EA of the function, # but rather just want to note that a function call was made. # # Formal signatures already have the call instruction mnemonic, which is more # specific than just saying that a call was made. if idaapi.is_call_insn(ea): for cref in crefs: func_name = idc.Name(cref) if func_name: functions.append(func_name) fuzzy.append("funcref") # If there are data references from the instruction, check to see if any of them # are strings. These are looked up in the pre-generated strings dictionary. # # String values are easily identifiable, and are used as part of both the fuzzy # and the formal signatures. # # It is more difficult to determine if non-string values are constants or not; # for both fuzzy and formal signatures, just use "data" to indicate that some data # was referenced. elif drefs: for dref in drefs: if self.strings.has_key(dref): formal.append(self.strings[dref].value) fuzzy.append(self.strings[dref].value) else: formal.append("dataref") fuzzy.append("dataref") # If there are no data or code references from the instruction, use every operand as # part of the formal signature. # # Fuzzy signatures are only concerned with interesting immediate values, that is, values # that are greater than 65,535, are not memory addresses, and are not displayed as # negative values. elif not drefs and not crefs: for n in range(0, len(idaapi.cmd.Operands)): opnd_text = idc.GetOpnd(ea, n) formal.append(opnd_text) if idaapi.cmd.Operands[n].type == idaapi.o_imm and not opnd_text.startswith('-'): if idaapi.cmd.Operands[n].value >= 0xFFFF: if idaapi.getFlags(idaapi.cmd.Operands[n].value) == 0: fuzzy.append(str(idaapi.cmd.Operands[n].value)) immediates.append(idaapi.cmd.Operands[n].value) ea = idc.NextHead(ea) return (self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)), immediates, functions)
def AnalyzeRange( self, startEA, endEA ): CurrentAddress = startEA CurrentBlockAddress = CurrentAddress NewBlockStart = True last_op_code = '' while CurrentAddress < endEA: if idaapi.isCode( idaapi.get_flags_novalue( CurrentAddress ) ): idaapi.decode_insn( CurrentAddress ) op_code = idaapi.ua_mnem( CurrentAddress ) operands=[] disasm_line = op_code + ' ' for i in range(0, 6, 1): operand = idaapi.ua_outop2( CurrentAddress, i ) if not operand: break; operand = idaapi.tag_remove( operand ) operands.append( operand ) if i != 0: disasm_line += ',' disasm_line += operand #disasm_line = idaapi.tag_remove( idaapi.generate_disasm_line( CurrentAddress ) ) xref = idaapi.xrefblk_t() ret = xref.first_to( CurrentAddress, idaapi.XREF_FAR ) while ret: ret = xref.next_to() NewBlockStart = True if NewBlockStart and last_op_code[0:3] != 'ret' and last_op_code != 'new block': self.AddToMap( CurrentBlockAddress, CurrentAddress, None, 'link') if NewBlockStart: CurrentBlockAddress = CurrentAddress self.BlockData[CurrentBlockAddress]=[] if self.DebugLevel > 2: print '='*80 if self.DebugLevel > 2: print hex(CurrentAddress), disasm_line self.BlockData[CurrentBlockAddress].append( ( CurrentAddress, disasm_line ) ) NewBlockStart = False CallIsResolved = False ret = xref.first_from( CurrentAddress, idaapi.XREF_FAR ) while ret: if xref.iscode: if op_code == 'jmp' and xref.to == CurrentAddress + idaapi.cvar.cmd.size: NewBlockStart = True elif op_code == 'call': CallIsResolved = True self.AddToMap( CurrentBlockAddress,xref.to, operands[0], 'call') else: if len(operands) > 0 : self.AddToMap( CurrentBlockAddress,xref.to, operands[0], 'from') NewBlockStart = True ret = xref.next_from() if ( op_code == 'call' or op_code =='' ) and not CallIsResolved: self.AddToMap( CurrentBlockAddress, operands[0], operands[0], 'call') if NewBlockStart and op_code != 'jmp': self.AddToMap( CurrentBlockAddress, CurrentAddress + idaapi.cvar.cmd.size, '', 'link') if op_code[0:3] == 'ret': NewBlockStart = True last_op_code = op_code CurrentAddress += idaapi.cvar.cmd.size else: CurrentAddress += 1
def _profile_function(self): current_ea = ScreenEA() current_function = idc.GetFunctionName(current_ea) current_function_ea = idc.LocByName(current_function) if current_function: self.function = current_function ea = start_ea = idc.GetFunctionAttr(current_function_ea, idc.FUNCATTR_START) end_ea = idc.GetFunctionAttr(current_function_ea, idc.FUNCATTR_END) self.highlighted = idaapi.get_highlighted_identifier() while ea < end_ea and ea != idc.BADADDR and self.highlighted: i = 0 match = False optype = self.READ comment = None idaapi.decode_insn(ea) mnem = idc.GetMnem(ea) if self.highlighted in mnem: match = True elif idaapi.is_call_insn(ea): for xref in idautils.XrefsFrom(ea): if xref.type != 21: name = idc.Name(xref.to) if name and self.highlighted in name: match = True break else: while True: opnd = idc.GetOpnd(ea, i) if opnd: if self.highlighted in opnd: match = True if (idaapi.insn_t_get_canon_feature(idaapi.cmd.itype) & self.OPND_WRITE_FLAGS[i]): optype = self.WRITE i += 1 else: break if not match: comment = idc.GetCommentEx(ea, 0) if comment and self.highlighted in comment: match = True else: comment = idc.GetCommentEx(ea, 1) if comment and self.highlighted in comment: match = True else: comment = None if match: if ea > current_ea: direction = self.DOWN elif ea < current_ea: direction = self.UP else: direction = self.THIS self.xrefs[ea] = { 'offset' : idc.GetFuncOffset(ea), 'mnem' : mnem, 'type' : optype, 'direction' : direction, 'text' : idc.GetDisasm(ea), } ea += idaapi.cmd.size
def build_gadget(self, ea, ea_end): instructions = list() chg_registers = set() use_registers = set() operations = set() pivot = 0 # Process each instruction in the gadget while ea <= ea_end: ################################################################### # Gadget Level Cache: # # Locate a gadget (failed or built) starting at this address. # If one is located, then we don't need to process any further # instructions and just get necessary data from the cached # gadget to never have to process the same address twice. if ea in self.gadgets_cache: # Check if the gadget was build successfully gadget_cache = self.gadgets_cache[ea] # Build the reset of the gadget from cache if gadget_cache: for insn in gadget_cache.instructions: instructions.append(insn) for reg in gadget_cache.chg_registers: chg_registers.add(reg) for reg in gadget_cache.use_registers: use_registers.add(reg) for op in gadget_cache.operations: operations.add(op) pivot += gadget_cache.pivot gadget = Gadget(instructions, pivot, operations, chg_registers, use_registers) return gadget # Previous attempt to build gadget at this address failed else: return None # Process new instruction else: # Instruction length # NOTE: decode_insn also sets global idaapi.cmd # which contains insn_t structure insn_size = idaapi.decode_insn(ea) # Check successful decoding of the instruction if insn_size: ############################################################### # Instruction Level Cache # # Most instructions are repetitive so we can just cache # unique byte combinations to avoid costly decoding more # than once # Read instruction from memory cache dbg_mem_offset = ea - (ea_end - (len(self.dbg_mem_cache) - self.dbg_read_extra)) dbg_mem = self.dbg_mem_cache[dbg_mem_offset:dbg_mem_offset + insn_size] # Create instruction cache if it doesn't already exist if not dbg_mem in self.insn_cache: ########################################################### # Decode instruction ########################################################### # Get global insn_t structure describing the instruction # NOTE: copy() is expensive, so we keep this single-threaded insn = idaapi.cmd ####################################################### # Decode and Cache instruction characteristics self.insn_cache[dbg_mem] = self.decode_instruction(insn, ea, ea_end) ################################################################## # Retrieve cached instruction and apply it to the gadget # Check that cached instruction contains valid data if self.insn_cache[dbg_mem]: # Retrieve basic instruction characteristics insn_mnem = self.insn_cache[dbg_mem]["insn_mnem"] insn_disas = self.insn_cache[dbg_mem]["insn_disas"] instructions.append(insn_disas) # Check if we found an instruction that would change the instruction pointer if insn_mnem == "blr" or insn_mnem == "bctr" or insn_mnem == "bctrl": # RETN at the expected address if ea == ea_end: gadget = Gadget(instructions, pivot, operations, chg_registers, use_registers) return gadget # RETN at an unexpected address else: return None ####################################################### # Add instruction instruction characteristics to the gadget else: for reg in self.insn_cache[dbg_mem]["insn_chg_registers"]: chg_registers.add(reg) for reg in self.insn_cache[dbg_mem]["insn_use_registers"]: use_registers.add(reg) for op in self.insn_cache[dbg_mem]["insn_operations"]: operations.add(op) pivot += self.insn_cache[dbg_mem]["insn_pivot"] # Previous attempt to decode the instruction invalidated the gadget else: return None ############################################################### # Next instruction # NOTE: This is outside cache ea += insn_size ################################################################### # Failed decoding of the instruction # NOTE: Gadgets may have bad instructions in the middle which # can be tolerated as long as we can find a useful instruction # further out. else: # Invalidate the gadget return None # Failed to build a gadget, because RETN instruction was not found return None
def single_operand_parser(self, address, op, idx): """Parse a PPC operand.""" def constraint_value(value): if value>2**16: return -(2**32-value) return value # Operand parsing # if op.type == OPERAND_TYPE_NO_OPERAND: return None #print '>>>', hex(address), idx, op.type segment = idaapi.getseg(address) addressing_mode = segment.bitness # Start creating the AST, the root entry is always the width of the # operand operand = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] # Compose the rest of the AST # if op.type == OPERAND_TYPE_DISPLACEMENT: # A displacement operatior might refer to a variable... # var_name = None # Try to get any name that might have been assigned to the # variable. It's only done if the register is: # sp/esp (4) os bp/ebp (5) # flags = idc.GetFlags(address) if (idx==0 and idc.isStkvar0(flags)) or ( idx==1 and idc.isStkvar1(flags)): var_name = self.get_operand_stack_variable_name(address, op, idx) #if has_sib_byte(op) is True: # when SIB byte set, process the SIB indexing # phrase = parse_phrase(op) #else: phrase = [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 0]] if var_name: value = arch.ExpressionNamedValue(long(op.addr), var_name) else: value = constraint_value(op.addr) operand.extend([ [self.NODE_TYPE_DEREFERENCE, phrase+[ [self.NODE_TYPE_VALUE, value, 1]] ] ]) elif op.type == OPERAND_TYPE_REGISTER: operand.extend([ [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 1]]) elif op.type == OPERAND_TYPE_MEMORY: addr_name = self.get_address_name(op.addr) if addr_name: value = arch.ExpressionNamedValue(long(op.addr), addr_name) else: value = op.addr operand.extend([ [self.NODE_TYPE_DEREFERENCE, [self.NODE_TYPE_VALUE, value, 0]] ]) elif op.type == OPERAND_TYPE_IMMEDIATE: # Keep the value's size # if self.as_byte_value(op.dtyp) == 0: mask = 0xff elif self.as_byte_value(op.dtyp) == 1: mask = 0xffff else: mask = 0xffffffff operand.extend([[self.NODE_TYPE_VALUE, op.value&mask, 0]]) elif op.type in (OPERAND_TYPE_NEAR, OPERAND_TYPE_FAR): addr_name = self.get_address_name(op.addr) if addr_name: value = arch.ExpressionNamedValue(long(op.addr), addr_name) else: value = op.addr operand.extend([[self.NODE_TYPE_VALUE, value, 0]]) elif op.type == OPERAND_TYPE_PHRASE: print '***Dunno how to parse PHRASE' operand.extend([[self.NODE_TYPE_SYMBOL, 'UNK_PHRASE(val:%d, reg:%d, type:%d)' % ( op.value, self.as_byte_value(op.reg), op.type), 0]]) elif op.type == OPERAND_TYPE_IDPSPEC0: # Handle Special Purpose Registers # register = self.SPR_REGISTERS.get( op.value, 'UNKNOWN_REGISTER(val:%x)' % op.value) operand.extend([ [self.NODE_TYPE_REGISTER, register, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC1: #print '***Dunno how to parse OPERAND_TYPE_IDPSPEC1' #operand.extend([[self.NODE_TYPE_SYMBOL, # 'UNK_IDPSPEC1(val:%d, reg:%d, type:%d)' % ( # op.value, op.reg, op.type), 0]]) operand.extend([ [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 1]]) operand.extend([ [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.specflag1)], 2]]) elif op.type == OPERAND_TYPE_IDPSPEC2: # IDSPEC2 is operand type for all rlwinm and rlwnm # instructions which are in general op reg, reg, byte, byte, byte # or eqivalent. simplified mnemonics sometimes take less than # five arguments. # # Keep the value's size # if self.as_byte_value(op.dtyp) == 0: mask = 0xff elif self.as_byte_value(op.dtyp) == 1: mask = 0xffff else: mask = 0xffffffff operand_1 = [] operand_2 = [] operand_3 = [] # Get the object representing the instruction's data. # It varies between IDA pre-5.7 and 5.7 onwards, the following check # will take care of it (for more detail look into the similar # construct in arch.py) # if hasattr(idaapi, 'cmd' ): idaapi.decode_insn(address) ida_instruction = idaapi.cmd else: idaapi.ua_code(address) ida_instruction = idaapi.cvar.cmd if (ida_instruction.auxpref & 0x0020): #print "SH" operand_1 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_1.extend([[self.NODE_TYPE_VALUE, self.as_byte_value(op.reg)&mask, 0]]) else: operand_1 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_1.extend([[self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 0]]) #print operand_1 if (ida_instruction.auxpref & 0x0040): #print "MB" operand_2 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_2.extend([[self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag1)&mask, 0]]) #print operand_2 if (ida_instruction.auxpref & 0x0080): #print "ME" operand_3 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_3.extend([[self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag2)&mask, 0]]) #print operand_3 operand = [operand_1] #operand = operand_1 if (ida_instruction.auxpref & 0x0040): #print "MB2" operand.append(operand_2) if (ida_instruction.auxpref & 0x0080): #print "ME2" operand.append(operand_3) #print operand # operand = operand_1 #print operand #print '>>>', hex(address), idx, op.type, op.reg #operand.extend([[self.NODE_TYPE_OPERATOR_COMMA, [self.NODE_TYPE_VALUE, op.reg&mask, 0], [self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag1)&mask, 1], [self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag2)&mask, 2]]]) elif op.type == OPERAND_TYPE_IDPSPEC3: # CR registers # operand.extend([ [self.NODE_TYPE_REGISTER, self.CR_REGISTERS[self.as_byte_value(op.reg)], 0]]) elif op.type == OPERAND_TYPE_IDPSPEC4: # The bit in the CR to check for # operand.extend([[self.NODE_TYPE_REGISTER, self.as_byte_value(op.reg), 0]]) elif op.type == OPERAND_TYPE_IDPSPEC5: # Device Control Register, implementation specific operand.extend([[self.NODE_TYPE_REGISTER, 'DCR(%x)' % op.value, 0]]) return operand
def build_gadget(self, ea, ea_end): instructions = list() chg_registers = set() use_registers = set() operations = set() pivot = 0 # Process each instruction in the gadget while ea <= ea_end: ################################################################### # Gadget Level Cache: # # Locate a gadget (failed or built) starting at this address. # If one is located, then we don't need to process any further # instructions and just get necessary data from the cached # gadget to never have to process the same address twice. if ea in self.gadgets_cache: # Check if the gadget was build successfully gadget_cache = self.gadgets_cache[ea] # Build the reset of the gadget from cache if gadget_cache: for insn in gadget_cache.instructions: instructions.append(insn) for reg in gadget_cache.chg_registers: chg_registers.add(reg) for reg in gadget_cache.use_registers: use_registers.add(reg) for op in gadget_cache.operations: operations.add(op) pivot += gadget_cache.pivot gadget = Gadget(instructions, pivot, operations, chg_registers, use_registers) return gadget # Previous attempt to build gadget at this address failed else: return None # Process new instruction else: # Instruction length # NOTE: decode_insn also sets global idaapi.cmd # which contains insn_t structure insn_size = idaapi.decode_insn(ea) # Check successful decoding of the instruction if insn_size: # Decoded instruction is too big to be a RETN or RETN imm16 if ea + insn_size > ea_end + self.dbg_read_extra: return None ############################################################### # Instruction Level Cache # # Most instructions are repetitive so we can just cache # unique byte combinations to avoid costly decoding more # than once # Read instruction from memory cache dbg_mem_offset = ea - (ea_end - (len(self.dbg_mem_cache) - self.dbg_read_extra)) dbg_mem = self.dbg_mem_cache[dbg_mem_offset:dbg_mem_offset + insn_size] # Create instruction cache if it doesn't already exist if not dbg_mem in self.insn_cache: ########################################################### # Decode instruction ########################################################### # Get global insn_t structure describing the instruction # NOTE: copy() is expensive, so we keep this single-threaded insn = idaapi.cmd ####################################################### # Decode and Cache instruction characteristics self.insn_cache[dbg_mem] = self.decode_instruction(insn, ea, ea_end) ################################################################## # Retrieve cached instruction and apply it to the gadget # Check that cached instruction contains valid data if self.insn_cache[dbg_mem]: # Retrieve basic instruction characteristics insn_mnem = self.insn_cache[dbg_mem]["insn_mnem"] insn_disas = self.insn_cache[dbg_mem]["insn_disas"] instructions.append(insn_disas) ####################################################### # Expected ending instruction of the gadget if ea == ea_end: gadget = Gadget(instructions, pivot, operations, chg_registers, use_registers) return gadget ####################################################### # Filter out of place ROP/JOP/COP terminators # NOTE: retn/jmp/call are allowed, but only in the last position # Unexpected return instruction elif insn_mnem == "retn": return None # Unexpected call/jmp instruction elif insn_mnem in ["jmp", "call"]: return None ####################################################### # Add instruction instruction characteristics to the gadget else: for reg in self.insn_cache[dbg_mem]["insn_chg_registers"]: chg_registers.add(reg) for reg in self.insn_cache[dbg_mem]["insn_use_registers"]: use_registers.add(reg) for op in self.insn_cache[dbg_mem]["insn_operations"]: operations.add(op) pivot += self.insn_cache[dbg_mem]["insn_pivot"] # Previous attempt to decode the instruction invalidated the gadget else: return None ############################################################### # Next instruction # NOTE: This is outside cache ea += insn_size ################################################################### # Failed decoding of the instruction # NOTE: Gadgets may have bad instructions in the middle which # can be tolerated as long as we can find a useful instruction # further out. else: # HACK: IDA does not disassemble "\x00\x00" unless you enable # "Disassemble zero opcode instructions" in Processor Options. # Since this option is normally disabled, I will attempt # to get this instruction manually. # Read two bytes from memory cache at current instruction candidate dbg_mem_offset = ea - (ea_end - self.maxRopOffset) dbg_mem = self.dbg_mem_cache[dbg_mem_offset:dbg_mem_offset + 2] # BUGFIX: For some reason the length of dbg_mem may be 0 (perhaps we ran out of cache?), so # verify the size is valid before using the buffer. if len(dbg_mem) != 2: return None # Compare to two zero bytes if dbg_mem[:2] == "\x00\x00": if self.sploiter.addr64: instructions.append("add [rax],al") else: instructions.append("add [eax],al") use_registers.add("al") operations.add("reg-to-mem") ea += 2 # "MOV Sreg, r/m16" instructions will result in illegal instruction exception: c000001d # or the memory couldn't be read exception: c0000005 which we don't want in our gadgets. elif dbg_mem[0] == "\x8E": return None # Record a "bad byte" if allowed elif dbg_mem and not self.ropNoBadBytes: byte = dbg_mem[0] instructions.append("db %sh" % binascii.hexlify(byte)) ea += 1 # Invalidate the gadget else: return None # Failed to build a gadget, because RETN instruction was not found else: return None
def process_instruction_generic(self, addr): """Architecture agnostic instruction parsing.""" # Retrieve the instruction mnemonic # i_mnemonic = self.get_mnemonic(addr) if not i_mnemonic: return None, None, None, None, None # Set the current location to the instruction to disassemble # #idaapi.jumpto(addr) #idaapi.ua_ana0(addr) # Up to IDA 5.7 it was called ua_code... if hasattr(idaapi, 'ua_code'): # Gergely told me of using ua_code() and idaapi.cvar.cmd # instead of jumpto() and get_current_instruction(). The latter # where always making IDA to reposition the cursor and refresh # the GUI, which was quite painful # idaapi.ua_code(addr) # Retrieve the current instruction's structure and # set its type ida_instruction = idaapi.cvar.cmd else: # now it's called decode_insn() idaapi.decode_insn(addr) # Retrieve the current instruction's structure and # set its type ida_instruction = idaapi.cmd instruction = Instruction( ida_instruction.itype, ida_instruction.size, ida_instruction.ip) self.current_instruction_type = instruction.itype # Try to process as many operands as IDA supports # # Up to IDA 5.7 it was called ua_code... so we use it to check for 5.7 if hasattr(idaapi, 'ua_code'): operands = self.operands_parser( addr, [( idaapi.get_instruction_operand(ida_instruction, idx), idx ) for idx in range(6)] ) else: operands = self.operands_parser( addr, [( ida_instruction.Operands[idx], idx ) for idx in range(6)] ) # Retrieve the operand strings # operand_strings = [ idc.GetOpnd(addr, idx) for idx in range(len(operands))] # Get the instruction data # data = ''.join( [chr(idaapi.get_byte(addr+i)) for i in range(idc.ItemSize(addr))]) # Return the mnemonic and the operand AST # return instruction, i_mnemonic, operands, operand_strings, data
def search_pointers(self): # HACK: A separate flag is used to track user canceling the search, # because multiple calls to idaapi.wasBreak() do not properly # detect cancellations. breakFlag = False # Show wait dialog idaapi.show_wait_box("Searching writable function pointers...") for m in self.modules: ################################################################### # Locate all of the CALL and JMP instructions in the current module # which use an immediate operand. # List of call/jmp pointer calls in a given module ptr_calls = list() # Iterate over segments in the module # BUG: Iterating over all loaded segments is more stable than looking up by address for n in xrange(idaapi.get_segm_qty()): seg = idaapi.getnseg(n) # Segment in a selected modules if seg and seg.startEA >= m.addr and seg.endEA <= (m.addr + m.size): # Locate executable segments # NOTE: Each module may have multiple executable segments # TODO: Search for "MOV REG, PTR # CALL REG" if seg.perm & idaapi.SEGPERM_EXEC: # Search all instances of CALL /2 imm32/64 - FF 15 # TODO: Alternative pointer calls using SIB: FF 14 E5 11 22 33 44 - call dword/qword ptr [0x44332211] # FF 14 65 11 22 33 44 # FF 14 25 11 22 33 44 call_ea = seg.startEA while True: call_ea = idaapi.find_binary(call_ea + 1, seg.endEA, "FF 15", 16, idaapi.SEARCH_DOWN) if call_ea == idaapi.BADADDR: break ptr_calls.append(call_ea) # Search all instances of JMP /2 imm32/64 - FF 25 # TODO: Alternative pointer calls using SIB: FF 24 E5 11 22 33 44 - jmp dword/qword ptr [0x44332211] # FF 24 65 11 22 33 44 # FF 24 25 11 22 33 44 call_ea = seg.startEA while True: call_ea = idaapi.find_binary(call_ea + 1, seg.endEA, "FF 25", 16, idaapi.SEARCH_DOWN) if call_ea == idaapi.BADADDR: break ptr_calls.append(call_ea) ################################################################### # Extract all of the function pointers and make sure they are # are writable. # List of writable function pointer objects in a given module ptrs = list() for call_ea in ptr_calls: # Decode CALL/JMP instruction # NOTE: May result in invalid disassembly of split instructions insn_size = idaapi.decode_insn(call_ea) if insn_size: insn = idaapi.cmd insn_op1 = insn.Operands[0].type # Verify first operand is a direct memory reference if insn.Operands[0].type == idaapi.o_mem: # Get operand address ptr_ea = insn.Operands[0].addr # Apply pointer offset ptr_ea -= self.ptrOffset # Locate segment where the pointer is located ptr_seg = idaapi.getseg(ptr_ea) # Make sure a valid segment writeable segment was found if ptr_seg and ptr_seg.perm & idaapi.SEGPERM_WRITE: # Get pointer charset ptr_charset = self.sploiter.get_ptr_charset(ptr_ea) # Filter the pointer if not self.filterP2P: if ptr_charset == None: continue if self.ptrNonull and not "nonull" in ptr_charset: continue if self.ptrUnicode and not "unicode" in ptr_charset: continue if self.ptrAscii and not "ascii" in ptr_charset: continue if self.ptrAsciiPrint and not "asciiprint" in ptr_charset: continue if self.ptrAlphaNum and not "alphanum" in ptr_charset: continue if self.ptrNum and not "numeric" in ptr_charset: continue if self.ptrAlpha and not "alpha" in ptr_charset: continue # Increment the fptr counter # Get pointer disassembly insn_disas = idc.GetDisasmEx(call_ea, idaapi.GENDSM_FORCE_CODE) # Add pointer to the list ptr = Ptr(m.file, ptr_ea, self.ptrOffset, ptr_charset, call_ea, insn_disas) ptrs.append(ptr) ################################################################### # Cache Pointers to Pointers ptr_ea_prefix_cache = dict() if self.searchP2P: # CACHE: Running repeated searches over the entire memory space is # very expensive. Let's cache all of the addresses containing # bytes corresponding to discovered function pointers in a # single search and simply reference this cache for each # function pointer. Specifically running idaapi.find_binary() # is much more expensive than idaapi.dbg_read_memory(). # # NOTE: For performance considerations, the cache works on a per # module basis, but could be expanded for the entire memory # space. # # prefix_offset - how many bytes of discovered function # pointers to cache. # # Example: For function pointers 0x00401234, 0x00404321, 0x000405678 # we are going to use prefix_offset 2, so we will cache all of the # values located at addresses 0x0040XXXX if self.sploiter.addr64: pack_format = "<Q" addr_bytes = 8 prefix_offset = 6 else: pack_format = "<I" addr_bytes = 4 prefix_offset = 2 # Set of unique N-byte address prefixes to search in memory ea_prefix_set = set() for ptr in ptrs: ptr_ea = ptr.ptr_ea ptr_bytes = struct.pack(pack_format, ptr_ea) ptr_bytes = ptr_bytes[-prefix_offset:] ea_prefix_set.add(ptr_bytes) # Search the module for all bytes corresponding to the prefix # and use them as candidates for pointers-to-pointers for ea_prefix in ea_prefix_set: # NOTE: Make sure you search using 44 33 22 11 format and not 11223344 ea_prefix_str = " ".join(["%02x" % ord(b) for b in ea_prefix]) # Initialize search parameters for a given module ea = m.addr maxea = m.addr + m.size while True: ea = idaapi.find_binary(ea + 1, maxea, ea_prefix_str, 16, idaapi.SEARCH_DOWN) if ea == idaapi.BADADDR: break p2p_ea = ea - (addr_bytes - prefix_offset) dbg_mem = read_module_memory(p2p_ea, addr_bytes) ptr_ea_prefix = unpack(pack_format, dbg_mem)[0] if ptr_ea_prefix in ptr_ea_prefix_cache: ptr_ea_prefix_cache[ptr_ea_prefix].add(p2p_ea) else: ptr_ea_prefix_cache[ptr_ea_prefix] = set([p2p_ea, ]) # Detect search cancellation, but allow the loop below # to run to create already cached/found function pointers # Canceled if breakFlag or idaapi.wasBreak(): breakFlag = True break # Canceled if breakFlag or idaapi.wasBreak(): breakFlag = True break ################################################################### # Locate Pointer to Pointers for ptr in ptrs: ptr_ea = ptr.ptr_ea # Locate pointers-to-pointers for a given function pointer in the cache if self.searchP2P and ptr_ea in ptr_ea_prefix_cache: for p2p_ea in ptr_ea_prefix_cache[ptr_ea]: # Apply pointer-to-pointer offset p2p_ea -= self.p2pOffset p2p_charset = self.sploiter.get_ptr_charset(p2p_ea) # Filter the pointer if self.filterP2P: if p2p_charset == None: continue if self.ptrNonull and not "nonull" in p2p_charset: continue if self.ptrUnicode and not "unicode" in p2p_charset: continue if self.ptrAscii and not "ascii" in p2p_charset: continue if self.ptrAsciiPrint and not "asciiprint" in p2p_charset: continue if self.ptrAlphaNum and not "alphanum" in p2p_charset: continue if self.ptrNum and not "numeric" in p2p_charset: continue if self.ptrAlpha and not "alpha" in p2p_charset: continue # Copy existing pointer object to modify it for the particular p p2p = copy.copy(ptr) p2p.p2p_ea = p2p_ea p2p.p2p_offset = self.p2pOffset p2p.p2p_charset = p2p_charset # Apppend p2p specific pointer object to the global list self.ptrs.append(p2p) # Exceeded maximum number of pointers if self.maxPtrs and len(self.ptrs) >= self.maxPtrs: breakFlag = True print "[idasploiter] Maximum number of pointers exceeded." break # Simply append pointer object to the global list else: self.ptrs.append(ptr) # Exceeded maximum number of pointers if self.maxPtrs and len(self.ptrs) >= self.maxPtrs: breakFlag = True print "[idasploiter] Maximum number of pointers exceeded." break if breakFlag or idaapi.wasBreak(): breakFlag = True break # Canceled # NOTE: Only works when started from GUI not script. if breakFlag or idaapi.wasBreak(): breakFlag = True print "[idasploiter] Canceled." break print "[idasploiter] Found %d total pointers." % len(self.ptrs) idaapi.hide_wait_box()
def _findTail(ea): while database.isCode(ea): n = idaapi.decode_insn(ea) ea += n return ea