def _is_bad_instruction(self, ea, bad_instructions=['j', 'b'], no_clobber=[]): bad = False mnem = idc.print_insn_mnem(ea) if mnem and mnem[0] in bad_instructions: bad = True else: if idaapi.IDA_SDK_VERSION < 700: for register in no_clobber: if (idaapi.insn_t_get_canon_feature(idaapi.cmd.itype) & idaapi.CF_CHG1) == idaapi.CF_CHG1: if idc.print_operand(ea, 0) == register: bad = True else: insn = idaapi.insn_t() #insn.itype = idaapi.cmd.itype # ml for register in no_clobber: if (insn.get_canon_feature() & idaapi.CF_CHG1) == idaapi.CF_CHG1: if idc.print_operand(ea, 0) == register: bad = True return bad
def _calc_displacement(self): """ Calculate the displacement offset of the operand's text. e.g: word ptr [rdi+rbx] :return int: calculated value """ size = 8 if idc.__EA64__ else 4 insn = idaapi.insn_t() idaapi.decode_insn(insn, self.ip) op = insn.ops[self.idx] offset = utils.signed(op.addr, utils.get_bits()) scale = utils.sib_scale(op) base_reg = utils.x86_base_reg(insn, op) indx_reg = utils.x86_index_reg(insn, op) base_val = self._cpu_context.registers[utils.reg2str(base_reg, size)] indx_val = self._cpu_context.registers[utils.reg2str( indx_reg, size)] if indx_reg != -1 else 0 result = base_val + indx_val * scale + offset logger.debug("calc_displacement :: Displacement {} -> {}".format( self.text, result)) # Before returning, record the frame_id and stack_offset for this address. # (This can become useful information for retrieving the original location of a variable) frame_id = idc.get_frame_id(self.ip) stack_var = ida_frame.get_stkvar(insn, op, offset) if stack_var: _, stack_offset = stack_var self._cpu_context.stack_variables[result] = (frame_id, stack_offset) return result
def _addToSig(self, sigIndex) -> bool: cmd = idaapi.insn_t() cmd.size = 0 sig = self.Sigs[sigIndex] if not idaapi.can_decode(sig.dwCurrentAddress): return False count = idaapi.decode_insn(cmd, sig.dwCurrentAddress) if count == 0 or cmd.size == 0: return False if cmd.size < 5: self._addBytesToSig(sigIndex, sig.dwCurrentAddress, cmd.size) else: self._addInsToSig(cmd, sigIndex) sig.dwCurrentAddress = sig.dwCurrentAddress + cmd.size sig.iOpCount = sig.iOpCount + 1 self.Sigs[sigIndex] = sig return True
def get_operands(self, ip=None): """ Gets the Operand objects of all operands in the current instruction and returns them in a list. :param int ip: location of instruction pointer to pull operands from (defaults to current rip in context) :return: list of Operand objects """ if ip is None: ip = self.ip operands = [] cmd = idaapi.insn_t() inslen = idaapi.decode_insn(cmd, ip) for i in range(inslen): try: operand = Operand(self, ip, i) # IDA will sometimes create hidden or "fake" operands. # These are there to represent things like an implicit EAX register. # To help avoid confusion to the opcode developer, these fake operands will not be included. if not operand.is_fake: operands.append(operand) except (IndexError, RuntimeError): # IDA will identify more operands than there actually are causing an issue. # Just break out of the loop if this happens. # IDA 7 throws RuntimeError instead of IndexError break return operands
def get_operand_value_replacement(ea, pos, state): """ A replacement for Ida's idc.get_operand_value that handles displacements more reasonably :param ea: memory location :param pos: argument location example: add eax, ebx eax is position 0, ebx is position 1 :param state: the current stack pointer register family (usually sp) :return: computes a numerical replacement for an operand """ if is_displ(ea, pos): bit_size = 64 if is_64_bit() else 32 stack_reg = 'rsp' if bit_size == 64 else 'esp' cmd = idaapi.insn_t() idaapi.decode_insn(cmd, ea) offset = cmd.ops[pos].addr # Convert the offset to a signed value if offset & (1 << (bit_size - 1)): offset -= (1 << bit_size) if stack_reg in get_opnd_replacement(ea, pos): offset += idc.get_spd(ea) or 0 return offset else: return idc.get_operand_value(ea, pos)
def _does_instruction_match(self, ea, instruction, regex=False): i = 0 op_cnt = 0 op_ok_cnt = 0 match = False insn_t = idaapi.insn_t() ins_size = idaapi.decode_insn(insn_t, ea) mnem = idc.print_insn_mnem(ea) if (not instruction.mnem) or (instruction.mnem == mnem) or ( regex and re.match(instruction.mnem, mnem)): for operand in instruction.operands: if operand: op_cnt += 1 op = idc.print_operand(ea, i) if regex: if re.match(operand, op): op_ok_cnt += 1 elif operand == op: op_ok_cnt += 1 i += 1 if op_cnt == op_ok_cnt: match = True return match
def graph_down(ea, path=set()): """ Recursively collect all function calls. Copied with minor modifications from http://hooked-on-mnemonics.blogspot.com/2012/07/renaming-subroutine-blocks-and.html """ path.add(ea) # # extract all the call instructions from the current function # call_instructions = [] instruction_info = idaapi.insn_t() for address in idautils.FuncItems(ea): # decode the instruction if not idaapi.decode_insn(instruction_info, address): continue # check if this instruction is a call if not idaapi.is_call_insn(instruction_info): continue # save this address as a call instruction call_instructions.append(address) # # iterate through all the instructions in the target function (ea) and # inspect all the call instructions # for x in call_instructions: # TODO for r in idautils.XrefsFrom(x, idaapi.XREF_FAR): #print(0x%08X" % h, "--calls-->", "0x%08X" % r.to) if not r.iscode: continue # get the function pointed at by this call func = idaapi.get_func(r.to) if not func: continue # ignore calls to imports / library calls / thunks if (func.flags & (idaapi.FUNC_THUNK | idaapi.FUNC_LIB)) != 0: continue # # if we have not traversed to the destination function that this # call references, recurse down to it to continue our traversal # if r.to not in path: graph_down(r.to, path) return path
def decode_insn(ea): if idaapi.IDA_SDK_VERSION >= 700 and sys.maxsize > 2**32: insn = idaapi.insn_t() if idaapi.decode_insn(insn, ea) > 0: return insn else: if idaapi.decode_insn(ea): return idaapi.cmd.copy()
def hook(self, hook_addr=0): """ Args: hook_addr(int): address for inline hook code, 0 indicates bpt hook. Returns: memory size in bytes used for inline hook. """ self.hook_addr = hook_addr self.func_addr = idc.get_name_ea_simple(self.name) if self.func_addr == 0: return 0 print("Hooking %s at 0x%x" % (self.name, self.func_addr)) if self.hook_addr == 0: idc.add_bpt(self.func_addr) idc.set_bpt_cond(self.func_addr, self.bpt_cond_hook_code) return 0 else: # assemble jmp code jmp_code = "jmp 0x%x" % self.hook_addr jmp_buf, _ = assemble(jmp_code, self.func_addr) # read function prologue according to jmp code length # NOTE: instructions like 'call $+5' in prologue will # cause problems. insn = idaapi.insn_t() move_length = 0 while move_length < len(jmp_buf): idaapi.decode_insn(insn, self.func_addr + move_length) move_length += insn.size prologue = idaapi.get_bytes(self.func_addr, move_length) # write jmp code idaapi.patch_bytes(self.func_addr, jmp_buf) # assmble hook code hook_buf, _ = assemble(self.inline_hook_code, self.hook_addr) hook_buf += prologue jmp_back_code = 'jmp 0x%x' % (self.func_addr + move_length) jmp_back_buf, _ = assemble(jmp_back_code, self.hook_addr + len(hook_buf)) hook_buf += jmp_back_buf # wirte hook code idaapi.patch_bytes(self.hook_addr, hook_buf) return len(hook_buf)
def _is_bad_instruction(self, ea, bad_instructions=['j', 'b'], no_clobber=[]): bad = False mnem = idc.GetMnem(ea) if mnem and mnem[0] in bad_instructions: bad = True else: insn_t = idaapi.insn_t() for register in no_clobber: #if (insn_t.get_canon_feature(idaapi.cmd.itype) & idaapi.CF_CHG1) == idaapi.CF_CHG1: if (insn_t.get_canon_feature() & idaapi.CF_CHG1) == idaapi.CF_CHG1: if idc.GetOpnd(ea, 0) == register: bad = True return bad
def handle_string_mov(ea, state): """Updates the stack based on a movs instruction. Used by create_stack If a rep/repne prefix is used, takes the count from ecx. If the count cannot be determined, will ignore the instruction. Also assumes that esi points to memory within the executable, and edi points to the stack. On any errors, this will ignore the instruction. :param ea: instruction location :param state: the current TraceState :return: None - updates stack or regs """ opcode = idaapi.get_bytes(ea, 1) rep_inst = opcode in [b"\xf2", b"\xf3"] count = state.get_reg_value("ecx") if rep_inst else 1 if not count or count < 0: return cmd = idaapi.insn_t() inslen = idaapi.decode_insn(cmd, ea) dtype = cmd.ops[0].dtype word_size = [1, 2, 4][dtype] if dtype < 3 else 4 count *= word_size src = state.get_reg_value("esi") dst = state.get_reg_value("edi") if src is None or dst is None: return # In IDA 7, get_bytes doesn't return None on failure, instead it will return # a string of \xff the size of count. My theory is that the function changed # to return -1 for each byte within the c code and something is casting it to a string before returning. # Since, all \xff's could be valid we need to check if src is valid instead. if not idc.is_loaded(src): return bytes_ = idaapi.get_bytes(src, count) if bytes_ in ( None, -1): # Keep this around in-case they fix it in a future version. return for i in range(count): state.stack[dst + i] = ((bytes_[i]), ea) if rep_inst: state.set_reg_value("ecx", 0, ea) state.set_reg_value("esi", src + count, ea) state.set_reg_value("edi", dst + count, ea)
def find_ref_loc(ea, ref): """ type ea: idc.ea_t type ref: idc.ea_t """ logger = logging.getLogger("idb2pat:find_ref_loc") if ea == idc.BADADDR: logger.debug("Bad parameter: ea") return idc.BADADDR if ref == idc.BADADDR: logger.debug("Bad parameter: ref") return idc.BADADDR insn = idaapi.insn_t() idaapi.decode_insn(insn, ea) op_num = 0 for idx in range(len(insn.ops)): if insn.ops[idx].type == idaapi.o_void: op_num = idx break for idx in range(op_num): op = insn.ops[idx] if op.type not in [ idaapi.o_reg, idaapi.o_mem, idaapi.o_imm, idaapi.o_far, idaapi.o_near ]: continue # HTC - raw, dummy fix start = op.offb if start == 0: start = insn.ops[idx + 1].offb if idx < op_num - 1: end = insn.ops[idx + 1].offb if end == start: end = insn.size else: end = insn.size return (start, end) return (idc.BADADDR, idc.BADADDR)
def IsPrevInsnCall(ea): """ Given a return address, this function tries to check if previous instruction is a CALL instruction """ global CallPattern if ea == idaapi.BADADDR or ea < 10: return None for delta, opcodes in CallPattern: # assume caller's ea caller = ea + delta # get the bytes bytes = [x for x in GetDataList(caller, len(opcodes), 1)] # do we have a match? is it a call instruction? if bytes == opcodes: tmp = idaapi.insn_t() if idaapi.decode_insn(tmp, caller) and idaapi.is_call_insn(tmp): return caller return None
def _sig_instruction(self, addr: int) -> Optional[str]: """ Get the bytes for a single instruction with wildcards :param addr: Instruction address :return: A signature chunk """ # I'm not sure if either of these checks will ever happen # So let it explode until it does by trying to join None if not idaapi.is_code(idaapi.get_flags(addr)): return None if not idaapi.can_decode(addr): return None insn = idaapi.insn_t() insn.size = 0 idaapi.decode_insn(insn, addr) if insn.size == 0: return None if insn.size < 5: return self._sig_bytes(insn.ea, insn.size) op_size = self._get_current_opcode_size(insn) if op_size == 0: return self._sig_bytes(insn.ea, insn.size) operand_size = insn.size - op_size sig = self._sig_bytes(insn.ea, op_size) if self._match_operands(insn.ea): sig += ' ' + self._sig_bytes(insn.ea + op_size, operand_size) else: sig += ' ' + self._sig_wildcards(operand_size) return sig
def get_byte_size_of_operand(ea, pos): """Gets the byte size of the operand at the given ea and position""" cmd = idaapi.insn_t() idaapi.decode_insn(cmd, ea) return OPERAND_BYTE_SIZES.get(cmd.ops[pos].dtype, 4) # 4 is Unknown
def _find_controllable_jumps(self, start_ea, end_ea): controllable_jumps = [] t9_controls = [ MIPSInstruction("move", "\$t9"), MIPSInstruction("addiu", "\$t9", "^\$"), ] t9_jumps = [ MIPSInstruction("jalr", "\$t9"), MIPSInstruction("jr", "\$t9"), ] ra_controls = [ MIPSInstruction("lw", "\$ra"), ] ra_jumps = [ # TODO: Search for jumps to registers other than $ra. MIPSInstruction("jr", "\$ra"), ] t9_musnt_clobber = ["$t9"] ra_musnt_clobber = ["$ra"] for possible_control_instruction in t9_controls + ra_controls: ea = start_ea found = 0 if possible_control_instruction in t9_controls: jumps = t9_jumps musnt_clobber = t9_musnt_clobber else: jumps = ra_jumps musnt_clobber = ra_musnt_clobber while ea <= end_ea: ea = self._find_next_instruction_ea( ea, possible_control_instruction, end_ea, regex=True) if ea != idc.BADADDR: insn_t = idaapi.insn_t() ins_size = idaapi.decode_insn(insn_t, ea) control_instruction = self._get_instruction(ea) control_register = control_instruction.operands[1] if control_register: for jump in jumps: jump_ea = self._find_next_instruction_ea( ea + ins_size, jump, end_ea, no_baddies=True, regex=True, dont_overwrite=musnt_clobber) if jump_ea != idc.BADADDR: jump_instruction = self._get_instruction( jump_ea) controllable_jumps.append( ROPGadget(control_instruction, jump_instruction, description="Controllable Jump", base=self.base)) ea = jump_ea ea += ins_size return controllable_jumps
def block(self, block): """ Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names]) """ formal = [] fuzzy = [] functions = [] immediates = [] ea = block.start_ea insn = idaapi.insn_t() while ea < block.end_ea: idaapi.decode_insn(insn, ea) # Get a list of all data/code references from the current instruction drefs = [x for x in idautils.DataRefsFrom(ea)] crefs = [x for x in idautils.CodeRefsFrom(ea, False)] # Add all instruction mnemonics to the formal block hash formal.append(idc.print_insn_mnem(ea)) # If this is a call instruction, be sure to note the name of the function # being called. This is used to apply call-based signatures to functions. # # For fuzzy signatures, we can't use the actual name or EA of the function, # but rather just want to note that a function call was made. # # Formal signatures already have the call instruction mnemonic, which is more # specific than just saying that a call was made. if idaapi.is_call_insn(ea): for cref in crefs: func_name = idc.get_name(cref, ida_name.GN_VISIBLE) if not func_name: continue functions.append(func_name) fuzzy.append('funcref') # If there are data references from the instruction, check to see if any of them # are strings. These are looked up in the pre-generated strings dictionary. # # String values are easily identifiable, and are used as part of both the fuzzy # and the formal signatures. # # It is more difficult to determine if non-string values are constants or not; # for both fuzzy and formal signatures, just use "data" to indicate that some data # was referenced. elif drefs: for dref in drefs: if dref in self.strings: formal.append(self.strings[dref].value) fuzzy.append(self.strings[dref].value) else: formal.append('dataref') fuzzy.append('dataref') # If there are no data or code references from the instruction, use every operand as # part of the formal signature. # # Fuzzy signatures are only concerned with interesting immediate values, that is, values # that are greater than 65,535, are not memory addresses, and are not displayed as # negative values. elif not drefs and not crefs: for n in range(0, len(idaapi.insn_t().ops)): opnd_text = idc.print_operand(ea, n) formal.append(opnd_text) if idaapi.insn_t().ops[n].type != idaapi.o_imm or opnd_text.startswith('-'): continue if idaapi.insn_t().ops[n].value < 0xFFFF: continue if idaapi.get_full_flags(idaapi.insn_t().ops[n].value) != 0: continue fuzzy.append(str(idaapi.insn_t().ops[n].value)) immediates.append(idaapi.insn_t().ops[n].value) ea = idc.next_head(ea) return self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)), immediates, functions
def _profile_function(self): current_ea = idc.ScreenEA() current_function = idc.GetFunctionName(current_ea) current_function_ea = idc.LocByName(current_function) if current_function: self.function = current_function ea = start_ea = idc.GetFunctionAttr(current_function_ea, idc.FUNCATTR_START) end_ea = idc.GetFunctionAttr(current_function_ea, idc.FUNCATTR_END) self.highlighted = idaapi.get_highlighted_identifier() while ea < end_ea and ea != idc.BADADDR and self.highlighted: i = 0 match = False optype = self.READ comment = None idaapi.decode_insn(ea) mnem = idc.GetMnem(ea) if self.highlighted in mnem: match = True elif idaapi.is_call_insn(ea): for xref in idautils.XrefsFrom(ea): if xref.type != 21: name = idc.Name(xref.to) if name and self.highlighted in name: match = True break else: while True: opnd = idc.GetOpnd(ea, i) if opnd: if self.highlighted in opnd: try: canon_feature = idaapi.insn_t_get_canon_feature( idaapi.cmd.ityp) except AttributeError: insn_t = idaapi.insn_t() canon_feature = insn_t.get_canon_feature() match = True if canon_feature & self.OPND_WRITE_FLAGS[i]: optype = self.WRITE i += 1 else: break if not match: comment = idc.GetCommentEx(ea, 0) if comment and self.highlighted in comment: match = True else: comment = idc.GetCommentEx(ea, 1) if comment and self.highlighted in comment: match = True else: comment = None if match: if ea > current_ea: direction = self.DOWN elif ea < current_ea: direction = self.UP else: direction = self.THIS self.xrefs[ea] = { 'offset': idc.GetFuncOffset(ea), 'mnem': mnem, 'type': optype, 'direction': direction, 'text': idc.GetDisasm(ea), } ea += idaapi.cmd.size