def find_exported_eas(): """Find the address of all exported functions. Exported functions are entrypoints into this program that external code can execute.""" exported_eas = set() for index, ordinal, ea, name in idautils.Entries(): # Not sure how this happens, but IDA seemed to treat # `obstack_alloc_failed_handler` in `call cs:[obstack_alloc_failed_handler]` # as an entrypoint. num_data_refs = len(tuple(idautils.DataRefsTo(ea))) num_code_refs = len(tuple(idautils.CodeRefsTo(ea, True))) num_code_refs += len(tuple(idautils.CodeRefsTo(ea, True))) if num_data_refs and not num_code_refs: log.warning( "Ignoring entrypoint {:08x}, it's only referenced by data". format(ea)) continue if not has_segment_type(ea, idc.SEG_CODE): log.warning( "Ignoring entrypoint {:08x}, it is not in a code segment". format(ea)) continue if not idc.hasName(ea): old_name = name if name.startswith("."): name = idc.GetCommentEx(ea, 0) log.info("Renaming `{}` at {:08x} to `{}`".format( old_name, ea, name)) idc.MakeName(ea, name) return exported_eas
def get_real_code_refs_to(func_addr): for code_ref in idautils.CodeRefsTo(func_addr, True): if plt_start <= code_ref <= plt_end: for transient_code_ref in idautils.CodeRefsTo(code_ref, True): yield transient_code_ref if plt_got_start <= code_ref <= plt_got_end: for transient_code_ref in idautils.CodeRefsTo(code_ref, True): yield transient_code_ref else: yield code_ref
def getCfg(func, externs_eas, ea_externs): func_start = func.startEA func_end = func.endEA cfg = nx.DiGraph() control_blocks, main_blocks = obtain_block_sequence(func) i = 0 visited = {} start_node = None for bl in control_blocks: start = control_blocks[bl][0] end = control_blocks[bl][1] src_node = (start, end) if src_node not in visited: src_id = len(cfg) visited[src_node] = src_id cfg.add_node(src_id) cfg.node[src_id]['label'] = src_node else: src_id = visited[src_node] #if end in seq_blocks and GetMnem(PrevHead(end)) != 'jmp': if start == func_start: cfg.node[src_id]['c'] = "start" start_node = src_node if end == func_end: cfg.node[src_id]['c'] = "end" #print control_ea, 1 refs = idautils.CodeRefsTo(start, 0) for ref in refs: if ref in control_blocks: dst_node = control_blocks[ref] if dst_node not in visited: visited[dst_node] = len(cfg) dst_id = visited[dst_node] cfg.add_edge(dst_id, src_id) cfg.node[dst_id]['label'] = dst_node #print control_ea, 1 refs = idautils.CodeRefsTo(start, 1) for ref in refs: if ref in control_blocks: dst_node = control_blocks[ref] if dst_node not in visited: visited[dst_node] = len(cfg) dst_id = visited[dst_node] cfg.add_edge(dst_id, src_id) cfg.node[dst_id]['label'] = dst_node #print "attributing" attributingRe(cfg, externs_eas, ea_externs) # removing deadnodes #old_cfg = copy.deepcopy(cfg) #transform(cfg) return cfg, 0
def scanDatabase(self, json_configuration): configuration = "" try: configuration = json.loads(json_configuration) except: print("IDA Function Tagger: Invalid configuration file") return print("IDA Function Tagger: Loading configuration: %s" % configuration["name"]) print("IDA Function Tagger: Configuration comment: %s" % configuration["comment"]) for tag in configuration["tag_list"]: print("IDA Function Tagger: Scanning for tag '%s'..." % tag["name"]) for imported_function in tag["import_list"]: function_address = idc.get_name_ea_simple( str(imported_function)) if function_address == BADADDR: continue cross_reference_list = idautils.CodeRefsTo(function_address, 0) for xref in cross_reference_list: function_name = idc.get_func_name(xref) self._addTagToFunction(function_name, str(tag["name"]))
def get_code_refs(self): xrefs = list(idautils.CodeRefsTo(self.addr, 1)) if len(xrefs) < 2: mnem = idc.print_insn_mnem(self.addr).lower() if mnem == "nop" or mnem == "jmp": return 9999 return len(xrefs)
def find_dispatch_by_cfg(): """ Finds the functions in the binary which are not directly called anywhere and counts how many other functions they call, returing all functions which call > 0 other functions but are not called themselves. As a dispatch function is not normally directly called but will normally many other functions this is a fairly good way to guess which function it is. """ out = [] called = set() caller = dict() # Loop through all the functions in the binary for function_ea in idautils.Functions(): flags = idc.get_func_flags(function_ea) # skip library functions if flags & idc.FUNC_LIB: continue f_name = idc.get_func_name(function_ea) # For each of the incoming references for ref_ea in idautils.CodeRefsTo(function_ea, 0): called.add(f_name) # Get the name of the referring function caller_name = idc.get_func_name(ref_ea) if caller_name not in caller.keys(): caller[caller_name] = 1 else: caller[caller_name] += 1 while True: if len(caller.keys()) == 0: break potential = max(caller, key=caller.get) if potential not in called: out.append(potential) del caller[potential] return out
def decrypt_all_strings(decrypt_string_func_ea): global STRING_DICTIONARY for ref in idautils.CodeRefsTo(decrypt_string_func_ea, 1): # this function can be better. I hate it rn but oh well prev_instruction_ea = 0x472DAC if ref == translate_ea( 0x1db3) else idc.prev_head(ref) if idc.print_insn_mnem( prev_instruction_ea) == 'push' or idc.print_insn_mnem( prev_instruction_ea) == 'lea': encrypted_blob_ea = idc.get_operand_value( prev_instruction_ea, 1) if ref == translate_ea(0x1db3) else idc.get_operand_value( prev_instruction_ea, 0) length = int.from_bytes(idaapi.get_bytes(encrypted_blob_ea - 4, 4), 'little') #print(hex(prev_instruction_ea) + " and " + hex(encrypted_blob_ea) + " and " + hex(length)) encrypted_blob = [ x for x in idaapi.get_bytes(encrypted_blob_ea, length) ] encrypted_blob = decrypt_config(encrypted_blob, length) string = '' for each in encrypted_blob: if each != 0: string += chr(each) STRING_DICTIONARY[translate_ea_to_offset( encrypted_blob_ea)] = string return
def rename_parents(self, fn, user_prefix, layer): has_default = 0 if layer > 10: print("layer >10") return for ref in idautils.CodeRefsTo(self.start_ea_of(fn), 1): parrent_func_p = '{:x}'.format(ref) #if len(fn_an['math']) < self._MIN_MAX_MATH_OPS_TO_ALLOW_RENAME: jeanfixme: check the max length can be set here parent_name = idaapi.get_func_name(ref) print(parrent_func_p + ":" + parent_name) if not (user_prefix in parent_name): parent_prefix = user_prefix + 'p' + str(layer) + '_' print("user_prefix=" + user_prefix) if 'sub' in parent_name: #replace the sub_with user prefix parent_new_name = parent_name.replace( 'sub_', parent_prefix) parent_fn = idaapi.get_func(ref) force_name(self.start_ea_of(parent_fn), parent_new_name) print("[parent]rename \"" + parent_name + "\" (" + parrent_func_p + ") to " + parent_new_name) self.rename_parents(parent_fn, user_prefix, layer + 1) has_default = 1 #else: jeanfixme: only rename the default one. #parent_new_name= parent_prefix + parent_name #print("[parent]rename \"" + parent_name+ "\" ("+parrent_func_p+") to " + parent_new_name) else: print("user_prefix \"" + user_prefix + "\" in (" + parrent_func_p + ") " + parent_name) return has_default
def func_callee_weight(f): fc = 0 fs = 0 a = 0 for xref in idautils.CodeRefsTo(f,0): dist = abs(xref - f) #print "%08x: %08x %d " % (f, xref, dist), if dist > MAX_CALL: continue if (dist != 0): logdist = math.log(dist) else: #recursive function call logdist = 0 if (xref - f < 0): o = -logdist else: o = logdist #print " %f " % o, fs += o fc += 1 if fc == 0: score = 0 else: score = fs / fc return score
def resolve_all_APIs(resolve_ea, mode): if resolve_ea is None: print('resolve fails..') return for ref in idautils.CodeRefsTo(resolve_ea, 1): # only 1 ref curr_ea = ref while True: prev_instruction_ea = idc.prev_head(curr_ea) if mode == 1: if idc.print_insn_mnem(prev_instruction_ea) == 'push': hash_val = idc.get_operand_value(prev_instruction_ea, 0) if hash_val in export_hashes: print(hex(ref) + ' : ' + export_hashes[hash_val]) idc.set_cmt(ref, export_hashes[hash_val], 0) break else: if idc.print_insn_mnem(prev_instruction_ea) == 'mov': hash_val = idc.get_operand_value(prev_instruction_ea, 1) print(hex(hash_val)) if hash_val in export_hashes: print(hex(ref) + ' : ' + export_hashes[hash_val]) idc.set_cmt(ref, export_hashes[hash_val], 0) break curr_ea = prev_instruction_ea
def btn_func_xref_count(self, code=0): """ 函数调用次数统计 """ xref_count_dict = OrderedDict() for func_addr_t in idautils.Functions(): count = len(list(idautils.CodeRefsTo(func_addr_t, 0))) xref_count_dict[ida_funcs.get_func_name(func_addr_t)] = [ func_addr_t, count ] ordered_list = sorted(list(xref_count_dict.items()), key=lambda x: x[1][1], reverse=True) cols = [['', 0 | ida_kernwin.Choose.CHCOL_DEC], ['函数名', 15 | ida_kernwin.Choose.CHCOL_PLAIN], ['地址', 10 | ida_kernwin.Choose.CHCOL_HEX], ['次数', 10 | ida_kernwin.Choose.CHCOL_PLAIN]] items = [] for x in ordered_list: data = AnalysisChooseData(vuln=0, name=x[0], ea=x[1][0], other1=str(x[1][1])) items.append(data) chooser = AnalysisChooser(title='函数调用次数统计', cols=cols, item=items) chooser.Show()
def find_vm_codes(dispatcher_ea): mnem = idc.GetMnem(dispatcher_ea) if mnem != "pusha": print "dispatcher_ea: 0x%08x, bad mnem: %s" (dispatcher_ea, mnem) assert (False) refs = idautils.CodeRefsTo(dispatcher_ea, 1) refs = list(x for x in refs) print "vms found:", len(refs) for ref in refs: print hex(ref) vms = [] for ref in refs: #push offset #jmp dispatcher push_ea = prev_head(ref) mnem = idc.GetMnem(push_ea) if mnem != "push": print "push_ea:", hex(push_ea) print "unexpected mnem:", mnem assert (False) op = idc.GetOpnd(push_ea, 0) op = str2int(op) vms.append((push_ea, op)) return vms
def DecryptStackStrings(addrDecryptFunction): global emu print "[+]DecryptStackStrings" #Get All XrefsTo this function calls = idautils.CodeRefsTo(addrDecryptFunction, 1) # Iterate all Calls Decrypt Strings for call in calls: print "[+]Call at 0x%08X %s" % (call, idc.GetFunctionName(call)) # Resolve Parameters # Param1. DestBuffer # Param2. Length # Param3. StackStringEncrypted destBuffer, length = GetDecryptString1Parameters(call) print "[+]Params dest = 0x%08X len = 0x%08X" % (destBuffer, length) #Get Emulation Boundaries emulStart, emulEnd = GetDecryptString1EmulationBoundaries(call, length) print "[+]Start 0x%08X, End 0x%08X" % (emulStart, emulEnd) #Inits Registers PrepareEmuRegister(emu, emulStart) #Try to Emulate and Update the ida databse try: #Emulate szDecryptedString = Emulate(emu, emulStart, emulEnd) #Valid Decrypted String if 0 < len(szDecryptedString): print "[+]Decrypted: \"%s\" at 0x%08X" % (szDecryptedString, call) #Add Comment and Patch Database idc.MakeRptCmt(call, szDecryptedString) #If DestBuffer is an address and not a register #Make Name and Patch IDB if destBuffer != 0 and destBuffer != -1: idc.MakeNameEx(destBuffer, "" + szDecryptedString, SN_NOCHECK) # Patch decrypted Buffer and convert to String idx = 0 for c in szDecryptedString: idc.PatchByte(destBuffer + idx, ord(c)) idx += 1 idc.PatchByte(destBuffer + idx, 0) idc.MakeStr(destBuffer, destBuffer + idx) except: print "[+]EmulStart = 0x%08X, EmulEnd = 0x%08X" % (emulStart, emulEnd) emu.dump_regs() e = sys.exc_info()[0] print e print
def AEG_GetCodeXrefsTo(addr, flow=1): ''' 对于一个地址,返回针对引用到该地址的指令(仅限代码段) @flow : 是否使用正常的流分析 ''' return idautils.CodeRefsTo(addr, flow)
def get_encrypted_lib_table(): LoadLibraryA_ea = idc.get_name_ea_simple("LoadLibraryA") LoadLibraryA_ref = None if LoadLibraryA_ea != idaapi.BADADDR: for ref in idautils.CodeRefsTo(LoadLibraryA_ea, 1): LoadLibraryA_ref = ref break ENCRYPTED_LIB_TABLE_instruction_ea = LoadLibraryA_ref - 22 API_TABLE_start_instruction_ea = LoadLibraryA_ref - 16 if 'lea' not in idc.GetDisasm( ENCRYPTED_LIB_TABLE_instruction_ea) or 'lea' not in idc.GetDisasm( API_TABLE_start_instruction_ea): print(hex(ENCRYPTED_LIB_TABLE_instruction_ea)) print('Parsing fails...') return ENCRYPTED_LIB_TABLE_ea = idc.get_operand_value( ENCRYPTED_LIB_TABLE_instruction_ea, 1) ENCRYPTED_LIB_TABLE_ea -= 4 global API_TABLE_start_ea API_TABLE_start_ea = idc.get_operand_value(API_TABLE_start_instruction_ea, 1) global ENCRYPTED_LIB_TABLE # 3694 bytes ENCRYPTED_LIB_TABLE = idaapi.get_bytes(ENCRYPTED_LIB_TABLE_ea, 3694)
def print_func_args(symname, narg, OS="win", MAXDEPTH=50): args = list() sym_ea = idaapi.get_name_ea(idaapi.NT_NONE, symname) str_type = idaapi.ASCSTR_TERMCHR if OS == "win": if symname[-1] == "W": str_type = idaapi.ASCSTR_UNICODE refs = idautils.CodeRefsTo(sym_ea, 0) for r in refs: sys.stdout.write('0x%08X %s (' % (r, symname)) for m in range(0, narg): current_ea = get_func_arg_ea(r, m) if current_ea == 0xFFFFFFFF: break op_type = idc.GetOpType(current_ea, 0) if op_type == idc.o_imm: str_arg = idc.GetString(idc.GetOperandValue(current_ea, 0), strtype=str_type) if str_arg != None: sys.stdout.write( '(char*)(0x%08X)\"%s\"' % (idc.GetOperandValue(current_ea, 0), str_arg)) else: sys.stdout.write('0x%08X' % idc.GetOperandValue(current_ea, 0)) else: sys.stdout.write('%s' % idc.GetOpnd(current_ea, 0)) if (m + 1) != narg: sys.stdout.write(', ') sys.stdout.write(');\n')
def getCallGraph(): callGraph = nx.DiGraph() for func_addr in idautils.Functions(MinEA(), MaxEA()): #print func_addr #coderefler bulunur ve hepsi grapha eklenir fn = idc.GetFunctionName(func_addr) #print "for function ",fn, " caller functions listed below:" i = 0 callers = idautils.CodeRefsTo(func_addr, 1) #avoid if there is no caller if (len(callers) == 0): #print "empty set!" continue callGraph.add_node(func_addr, name=fn) for caller_addr in callers: cn = idc.GetFunctionName(caller_addr) #print i,".caller is ",cn, " address: 0x %x" %caller_addr i += 1 #avoid circle if (fn == cn): print fn continue cf_addr = idc.GetFunctionAttr(caller_addr, FUNCATTR_START) if cf_addr is None: #print "none function -> 0x%x"%caller_addr continue if not (cf_addr in callGraph): callGraph.add_node(cf_addr, name=cn) callGraph.add_edge(cf_addr, func_addr) return callGraph
def callback(ea, name, ordinal): """ Callback function to retrieve code references to library calls. """ library_calls[name] = [] library_addr[name] = ea for ref in idautils.CodeRefsTo(ea, 0): library_calls[name].append(ref) return True # True -> Continue enumeration
def _recurse(self, ea, path, depth): if depth + 1 >= self.mr: self.paths[path] = [("[...]", BADADDR)] return # for all callers of ea... i = 0 for ref in idautils.CodeRefsTo(ea, False): if i + 1 >= self.mf: self.paths[path].append(("...", BADADDR)) break cea = ref func = ida_funcs.get_func(cea) if func: cea = func.start_ea loc_name = ida_name.get_short_name(cea) if not len(loc_name): loc_name = "unkn_%x" % cea elem = (loc_name, cea) # if path doesn't exist yet if path not in self.paths: self.paths[path] = [elem] # if caller doesn't exist yet if elem not in self.paths[path]: self.paths[path].append(elem) i += 1 newpath = "%s/%s" % (path, loc_name) self._recurse(cea, newpath, depth + 1) return
def is_dominant(self, RP): seen = [] if list(idautils.CodeRefsTo(self.func.start_ea, 0)) == []: return True if dominates(self.func.start_ea, RP) == True: return 1 else: return 0
def extract_function_calls_to(f): """ extract callers to a function args: f (IDA func_t) """ for ea in idautils.CodeRefsTo(f.start_ea, True): yield Characteristic("calls to"), ea
def main(): beginThreadExLoc = idc.LocByName('_beginthreadex') if beginThreadExLoc == idc.BADADDR: print 'Function "_beginthreadex" not found. Returning' return for xref in idautils.CodeRefsTo(beginThreadExLoc, 1): if getFunctionArgumentCount(xref) == 7: print 'Found likely MyCreateThread: 0x%08x' % xref handleCreateThread(idc.GetFunctionAttr(xref, idc.FUNCATTR_START))
def function_xrefs(name): import idc import idautils functions_that_exit = [] wf_addr = idc.get_name_ea_simple(name) print hex(wf_addr), idc.generate_disasm_line(wf_addr, 0) for addr in idautils.CodeRefsTo(wf_addr, 0): functions_that_exit.append(idc.get_func_name(addr)) return functions_that_exit
def instruction_is_referenced(ea): """Returns `True` if it appears that there's a non-fall-through reference to the instruction at `ea`.""" global POSSIBLE_CODE_REFS if len(tuple(idautils.CodeRefsTo(ea, False))): return True if len(tuple(idautils.DataRefsTo(ea))): return True return ea in POSSIBLE_CODE_REFS
def is_function_recursive(f): """check if function is recursive args: f (IDA func_t) """ for ref in idautils.CodeRefsTo(f.start_ea, True): if f.contains(ref): return True return False
def propagate_dead_code(self, ea, op_map): prevs = [x for x in idautils.CodeRefsTo(ea, True) if x not in self.marked_addresses and not self.dead_br_of_op(ea, x, op_map)] if prevs: # IF there is no legit predecessors idc.SetColor(ea, idc.CIC_ITEM, 0x0000ff) self.marked_addresses[ea] = None succs = [x for x in idautils.CodeRefsFrom(ea, True)] for succ in succs: self.propagate_dead_code(succ, op_map) else: return
def idautils_getcoderefs_to(): """ IDA GUI: Right click on any address and click List Cross-references to. This is a list of all those locations in all segments that refer to this address. The only ones that matter for this API are the ones in the .text segment. While this code will work, it makes more sense to iterate over an entire function and check every location for Cross references. """ print "Getting all code references to a specific address" coderefs = idautils.CodeRefsTo(0x000000004A6811E0, 1) for ref in coderefs: print str(ref) + ':' + str(hex(ref))
def highlight_anti_debug(): funs = [ "IsDebuggerPresent", "CheckRemoteDebuggerPresent", "NtQueryInformationProcess", "OutputDebugString", "QueryPerformanceCounter", "GetTickCount" ] for func in funs: func_ea = get_name_ea_simple(func) xrefs = list(idautils.CodeRefsTo(func_ea, 0)) for ea in xrefs: highlight_insn(ea, ANITDEBUG_COLOR, "Possible Anti-Debugging")
def activate(self, ctx): for pfn_idx in ctx.chooser_selection: pfn = ida_funcs.getn_func(pfn_idx) if pfn: xrefs = [x for x in idautils.CodeRefsTo(pfn.start_ea, 0)] for xref in list(set(xrefs)): cfunc = idaapi.decompile(xref) if cfunc: xref_args = get_args(cfunc, xref, self.var_prop) self.callback(xref, cfunc, xref_args) return 1
def main(): #decode_data() fun_dec_addr = idc.LocByName("sub_A35C") data = [] for addr in idautils.CodeRefsTo(fun_dec_addr, 0): data_addr = find_data(addr) if data_addr != 0 and data_addr != None: size = find_data_size(addr) data.append((data_addr, size, addr)) for (data_addr, size, call_addr) in data: print('0x%x: %s' % (call_addr, decode_data(data_addr, size)))