def get_strings_per_function(self, start_func): strings = [] fs = '' func_obj = idaapi.get_func(start_func) if func_obj: self.clear_comments(start_func, func_obj) for inst_list in idautils.Heads(start_func, idc.FindFuncEnd(start_func)): try: for string in [ self.get_string_type(xref_addr) for xref_addr in idautils.DataRefsFrom(inst_list) ]: if len(string) > 2: strings.append(string) self.string_counter += 1 else: pass except StringException: continue if strings: for c in strings: if '\n' in c: c = c.replace('\n', '') fs += '"' + c + '" ' idaapi.set_func_cmt(func_obj, 'STR {}# {}'.format(len(strings), fs), 1) else: print("func_obj return 0") pass
def create_pe(): text_start = text_end = 0 for seg in Segments(): if idc.SegName(seg)==".text": text_start=idc.SegStart(seg) text_end=idc.SegEnd(seg) for func in idautils.Functions(): start_address = func end_address = idc.FindFuncEnd(func) #print hex(start_address) for each_step in idautils.Heads(start_address, end_address): #print hex(each_step) op = idc.GetDisasm(each_step) if each_step >= text_start and each_step <text_end: instrument(op,each_step) section_data = '' offsets = [] for index in range(len(ori_op)): offsets.append(len(section_data)) section_data += build_section_data(args0[index],args1[index],args2[index]) # add dispatch function len_funs = len(section_data) section_data = add_dispatch_function(ori_address, offsets) + section_data section_file = open( INPUT_PE + '_newSectionData','wb') section_file.write(section_data) section_file.close() section_size = len(section_data) insert_section(len(section_data),section_data,len_funs)
def codeify(self, ea=idc.BADADDR): func_count = 0 code_count = 0 if ea == idc.BADADDR: ea = self.get_start_ea(self.CODE) if ea == idc.BADADDR: ea = idc.FirstSeg() self.say("\nLooking for undefined code starting at: %s:0x%X" % (idc.SegName(ea), ea)) while ea != idc.BADADDR: try: if idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE) == self.CODE: if idc.GetFunctionName(ea) != '': ea = idc.FindFuncEnd(ea) continue else: if idc.MakeFunction(ea): func_count += 1 elif idc.MakeCode(ea): code_count += 1 except: pass ea = idc.NextAddr(ea) self.say("Created %d new functions and %d new code blocks\n" % (func_count, code_count))
def run(self): self.loadInstructionList() self.loadInstructionGroups() functionNamesToEA = {} ea = idc.BeginEA() count = 0 for funcea in Functions(idc.SegStart(ea), idc.SegEnd(ea)): functionInstructions = copy.deepcopy(self.instructions) functionGroups = copy.deepcopy(self.groups) sum = 0 allGroupSum = 0 functionName = idc.GetFunctionName(funcea) functionNamesToEA[functionName] = funcea originalfuncea = funcea currentea = funcea while currentea != idc.BADADDR and currentea < idc.FindFuncEnd( funcea): currentInstruction = idc.GetMnem(currentea) if currentInstruction in self.instructions.keys(): functionInstructions[currentInstruction] += 1 sum += 1 for group in self.groups.keys(): if currentInstruction in self.groups[group][0].keys(): functionGroups[group][1] += 1 functionGroups[group][0][currentInstruction] += 1 allGroupSum += 1 currentea = idc.NextHead(currentea) self.writeInstructionFeatures(self.instructions, sum, functionInstructions, functionName) self.writeInstructionGroupFeatures(self.groups, allGroupSum, functionGroups, functionName) return functionNamesToEA
def run(self, arg): print("Running") PE = peutils_t() print("Image base is %016X" % PE.imagebase) print("Exporting functions...") filename = os.path.splitext(idc.GetIdbPath())[0] + ".sym" rawOffsetsFilename = os.path.splitext(idc.GetIdbPath())[0] + ".raw.sym" f = open(filename, 'w') rawOffsetsFile = open(rawOffsetsFilename, 'w') count = 0 for address, name in Names(): offset = address - PE.imagebase rawOffset = idaapi.get_fileregion_offset(address) if idc.GetFunctionFlags(address) != -1: size = idc.FindFuncEnd(address) - address else: size = 4 #namesList.append((offset, name)) count += 1 f.write("%08X %08X;%s\n" % (offset, size, name)) rawOffsetsFile.write("%08X %08X;%s\n" % (rawOffset, size, name)) f.close() rawOffsetsFile.close() print("%d functions exported" % count)
def find_unusual_xors(functions): # TODO find xors in tight loops candidate_functions = [] for fva in functions: cva = fva while cva != idaapi.BADADDR and cva < idc.FindFuncEnd(fva): if idc.GetMnem(cva) == "xor": if idc.GetOpnd(cva, 0) != idc.GetOpnd(cva, 1): g_logger.debug( "suspicious XOR instruction at 0x%08X in function 0x%08X: %s", cva, fva, idc.GetDisasm(cva)) ph = idc.PrevHead(cva) nh = idc.NextHead(cva) ip = idc.GetDisasm(ph) ia = idc.GetDisasm(nh) if ip and ia: g_logger.debug("Instructions: %s; %s; %s", ip, idc.GetDisasm(cva), ia) if ph or nh: if is_security_cookie(cva, ph, nh): g_logger.debug( "XOR related to security cookie: %s", idc.GetDisasm(cva)) else: g_logger.debug("unusual XOR: %s", idc.GetDisasm(cva)) candidate_functions.append(fva) break cva = idc.NextHead(cva) return candidate_functions
def __init__(self): print "Naming saved register locations...", for ea in idautils.Functions(): mea = ea named_regs = [] last_iteration = False while mea < (ea + (self.INSIZE * self.SEARCH_DEPTH)): mnem = idc.GetMnem(mea) if mnem in ['sw', 'sd']: reg = idc.GetOpnd(mea, 0) dst = idc.GetOpnd(mea, 1) if reg in self.ARCH['savedregs'] and reg not in named_regs and dst.endswith('($sp)') and 'var_' in dst: offset = int(dst.split('var_')[1].split('(')[0], 16) idc.MakeLocal(ea, idc.FindFuncEnd(ea), "[sp-%d]" % offset, "saved_%s" % reg[1:]) named_regs.append(reg) if last_iteration: break elif mnem.startswith('j') or mnem.startswith('b'): last_iteration = True mea += self.INSIZE print "done."
def codeify(self, ea=idc.BADADDR): func_count = 0 code_count = 0 if ea == idc.BADADDR: ea = self.get_start_ea(self.CODE) if ea == idc.BADADDR: ea = idc.FirstSeg() print "\nLooking for undefined code starting at: %s:0x%X" % ( idc.SegName(ea), ea) if self.get_start_ea(self.DATA) == idc.BADADDR: print "WARNING: No data segments defined! I don't know where the code segment ends and the data segment begins." while ea != idc.BADADDR: try: if idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE) == self.CODE: if idc.GetFunctionName(ea) != '': ea = idc.FindFuncEnd(ea) continue else: if idc.MakeFunction(ea): func_count += 1 elif idc.MakeCode(ea): code_count += 1 except: pass ea = idc.NextAddr(ea) print "Created %d new functions and %d new code blocks\n" % ( func_count, code_count)
def make_new_functions_heuristic_push_regs(self, makefunction=False): ''' After converting bytes to instructions, Look for PUSH instructions that are likely the beginning of functions. Convert these code areas to functions. ''' EAstart = idc.MinEA() EAend = idc.MaxEA() ea = EAstart while ea < EAend: if self.printflag: print "EA %08x" % ea ea_function_start = idc.GetFunctionAttr(ea, idc.FUNCATTR_START) # If ea is inside a defined function, skip to end of function if ea_function_start != idc.BADADDR: ea = idc.FindFuncEnd(ea) continue # If current ea is code if ida_bytes.isCode(ida_bytes.getFlags(ea)): # Looking for prologues that do PUSH {register/s} mnem = idc.GetMnem(ea) # if (mnem == "PUSH"): if makefunction: if self.printflag: print "Converting code to function @ %08x" % ea idc.MakeFunction(ea) eanewfunction = idc.FindFuncEnd(ea) if eanewfunction != idc.BADADDR: ea = eanewfunction continue nextcode = ida_search.find_code(ea, idc.SEARCH_DOWN) if nextcode != idc.BADADDR: ea = nextcode else: ea += 1
def make_funcs_from_prof(self, binprof): """ Creates funcs in IDA, base on callgrind func EAs from one callgrind profile """ count = 0 for i in binprof.keys(): ea = int(i) if idc.MakeFunction(ea, idc.FindFuncEnd(ea)): count += 1 return count
def _process_mod_init_func_for_metaclasses(func, found_metaclass): """Process a function from the __mod_init_func section for OSMetaClass information.""" _log(4, 'Processing function {}', idc.GetFunctionName(func)) def on_BL(addr, reg): X0, X1, X3 = reg['X0'], reg['X1'], reg['X3'] if not (X0 and X1 and X3): return _log(5, 'Have call to {:#x}({:#x}, {:#x}, ?, {:#x})', addr, X0, X1, X3) # OSMetaClass::OSMetaClass(this, className, superclass, classSize) if not idc.SegName(X1).endswith("__TEXT.__cstring") or not idc.SegName(X0): return found_metaclass(X0, idc.GetString(X1), X3, reg['X2'] or None) _emulate_arm64(func, idc.FindFuncEnd(func), on_BL=on_BL)
def getFunc(self, ea=None, next=False): if ea == None: ea = idaapi.get_screen_ea() if next: ea = idc.NextFunction(ea) if ea == -1: return (0xFFFFFFFFL, 0xFFFFFFFFL) if ea < 0: return (0xFFFFFFFFL, 0xFFFFFFFFL) elif idc.GetFunctionName(ea) == idc.GetFunctionName(idc.PrevAddr(ea)): ea = idc.PrevFunction(ea) return (ea, idc.FindFuncEnd(ea))
def add_func(self, ea): # invalid all addresses in this function (they depend (relatively) on this function now, no on code) logger.warning("Warning : deletion of objects not implemented") # TODO : implement deletion of objects inside newly created function range # TODO : use function chunks to iterate over function code ea_index = int(ea) while ea_index < int(idc.FindFuncEnd(ea)): self.delete_object_version_for_ea(ea_index) ea_index += 1 # self.update_object_version_from_idb(ea) self.addresses_to_process.add(ea) self.segment_address_to_process.add(ea) self.repo_manager.add_auto_comment(ea, "Create function")
def find_function_strings(func_ea): end_ea = idc.FindFuncEnd(func_ea) if end_ea == idaapi.BADADDR: return strings = [] for line in idautils.Heads(func_ea, end_ea): refs = idautils.DataRefsFrom(line) for ref in refs: try: strings.append(String(line, ref)) except StringParsingException: continue return strings
def get_list_of_function_instr(addr, mode): #TODO follow subcalls MODE_INSTRUMENT_SUBCALLS f_start = addr f_end = idc.FindFuncEnd(addr) chunks = enumerate_function_chunks(f_start) list_of_addr = list() image_base = idaapi.get_imagebase(addr) for chunk in chunks: for head in idautils.Heads(chunk[0], chunk[1]): # If the element is an instruction if head == hex(0xffffffffL): raise Exception("Invalid head for parsing") if isCode(idc.GetFlags(head)): head = head - image_base head = str(hex(head)) head = head.replace("L", "") head = head.replace("0x", "") list_of_addr.append(head)
def find_shifts(functions): candidate_functions = {} # TODO better to compare number of shifts to overall instruction count? # TODO find shifts in tight loops shift_mnems = set(["shl", "shr", "sar", "sal", "rol", "ror"]) shift_mnems_len = len(shift_mnems) for fva in functions: found_shifts = set([]) cva = fva while cva != idaapi.BADADDR and cva < idc.FindFuncEnd(fva): i = idc.GetMnem(cva) if i in shift_mnems: found_shifts.add(i) g_logger.debug("shift instruction: %s va: 0x%x function: 0x%x", idc.GetDisasm(cva), cva, fva) cva = idc.NextHead(cva) candidate_functions[fva] = 1 - ( (shift_mnems_len - len(found_shifts)) / float(shift_mnems_len)) return candidate_functions
def get_list_of_function_instr(addr): f_start = addr f_end = idc.FindFuncEnd(addr) chunks = enumerate_function_chunks(f_start) list_of_addr = list() list_of_calls = list() image_base = idaapi.get_imagebase(addr) for chunk in chunks: for head in idautils.Heads(chunk[0], chunk[1]): # If the element is an instruction if head == hex(0xffffffffL): raise Exception("Invalid head for parsing") if idc.isCode(idc.GetFlags(head)): call_name = get_call_name(head) if call_name != None: list_of_calls.append(call_name) head = head - image_base head = str(hex(head)) head = head.replace("L", "") head = head.replace("0x", "") list_of_addr.append(head)
def GetFuncInputSurrogate(func, binaryName): data = dict() data['name'] = binaryName data['functions'] = list() function_ea = func.startEA f_name = GetFunctionName(func) function = dict() data['functions'].append(function) function['name'] = f_name function['id'] = function_ea # ignore call-graph at this moment function['call'] = list() function['sea'] = function_ea function['see'] = idc.FindFuncEnd(function_ea) function['blocks'] = list() # basic bloc content for bblock in idaapi.FlowChart(idaapi.get_func(function_ea)): sblock = dict() sblock['id'] = bblock.id sblock['sea'] = bblock.startEA sblock['eea'] = bblock.endEA fcode = '' for head in idautils.Heads(bblock.startEA, bblock.endEA): fcode += '%s %s \r\n' % ( str(head), unicode(idc.GetDisasm(head), errors='replace')) sblock['src'] = fcode # flow chart bcalls = list() for succ_block in bblock.succs(): bcalls.append(succ_block.id) sblock['call'] = bcalls function['blocks'].append(sblock) return data
def get_list_of_function_instr(addr): ''' The function returns a list of instructions from a function @addr - is function entry point @return - list of instruction's addresses ''' f_start = addr f_end = idc.FindFuncEnd(addr) chunks = enumerate_function_chunks(f_start) list_of_addr = list() image_base = idaapi.get_imagebase(addr) for chunk in chunks: for head in idautils.Heads(chunk[0], chunk[1]): # If the element is an instruction if head == hex(0xffffffffL): raise Exception("Invalid head for parsing") if idc.isCode(idc.GetFlags(head)): head = head - image_base head = str(hex(head)) head = head.replace("L", "") head = head.replace("0x", "") list_of_addr.append(head)
def find_function_callees(func_ea, maxlvl): callees = [] visited = set() pending = set((func_ea, )) lvl = 0 while len(pending) > 0: func_ea = pending.pop() visited.add(func_ea) func_name = idc.GetFunctionName(func_ea) if not func_name: continue callees.append(func_ea) func_end = idc.FindFuncEnd(func_ea) if func_end == idaapi.BADADDR: continue lvl += 1 if lvl >= maxlvl: continue all_refs = set() for line in idautils.Heads(func_ea, func_end): if not ida_bytes.isCode(get_flags(line)): continue ALL_XREFS = 0 refs = idautils.CodeRefsFrom(line, ALL_XREFS) refs = set( filter(lambda x: not (x >= func_ea and x <= func_end), refs)) all_refs |= refs all_refs -= visited pending |= all_refs return callees
def make_funcs_from_profiles(self, firstprof, secondprof): """ Creates funcs in IDA, base on callgrind func EAs from two callgrind profiles """ count = 0 funcs = [] for i in firstprof.keys(): if i in secondprof.keys(): funcs.append(i) for j in secondprof.keys(): if (j in firstprof.keys()) and (j not in funcs): funcs.append(j) for fun in funcs: ea = int(fun) if idc.MakeFunction(ea, idc.FindFuncEnd(ea)): count += 1 del funcs return count
def main(): idaapi.msg("alter instrument function\n") addr_afl_maybe_log_fun = 0 addr_afl_maybe_log = 0 fun_info = [] try: for func in idautils.Functions(): fun_name = idc.GetFunctionName(func) if fun_name.find('afl_maybe_log_fun') > 0: addr_afl_maybe_log_fun = func elif fun_name.find('afl_maybe_log') > 0: addr_afl_maybe_log = func if addr_afl_maybe_log_fun and addr_afl_maybe_log: break if not addr_afl_maybe_log_fun or not addr_afl_maybe_log: print("don't find add_afl_maybe_fun\n") return print("find add_afl_maybe_fun ok\n") # find instrumented function for func in idautils.Functions(): f_end = idc.FindFuncEnd(func) if f_end - func <= 0x28: continue # call __afl_maybe_log if idc.__EA64__: # 64bit addr_call = func + 0x1A else: # 32bit addr_call = func + 0x08 mnem = idc.GetMnem(addr_call) if mnem != 'call': continue for to in idautils.CodeRefsFrom(addr_call, False): fun_name = idc.GetFunctionName(to) if fun_name.find('afl_maybe_log') < 0: continue fun_info.append((func, f_end - func, addr_call)) fun_info.sort(key=lambda x: x[1]) num = len(fun_info) print(num) for i in range(num - 1, -1, -1): print(hex(fun_info[i][0])) if fun_info[i][ 1] < 0x200 or i < num * 90.0 / 100.0 and random.randint( 0, 99) < 80: # remove fun instrumented #or i < num/3 idc.PatchDword(fun_info[i][2] + 1, addr_afl_maybe_log - fun_info[i][2] - 5) else: print(hex(fun_info[i][0])) #idc.SaveBase('') #idc.Exit(0) except Exception as e: print(e) print('analyse time: ' + str(time.time() - g_time_start) + 's\n')
def create_pe(): text_start = text_end = 0 for seg in Segments(): if idc.SegName(seg) == ".text": text_start = idc.SegStart(seg) text_end = idc.SegEnd(seg) for func in idautils.Functions(): # fourG_1 = '' fourG_2 = '' fourG_3 = '' fourG_4 = '' fourG_1_addr = 0 fourG_2_addr = 0 fourG_3_addr = 0 fourG_4_addr = 0 start_address = func end_address = idc.FindFuncEnd(func) for each_step in idautils.Heads(start_address, end_address): opcode = idc.GetMnem(each_step) #traverse 4 Gram fourG_1 = fourG_2 fourG_1_addr = fourG_2_addr fourG_2 = fourG_3 fourG_2_addr = fourG_3_addr fourG_3 = fourG_4 fourG_3_addr = fourG_4_addr fourG_4 = opcode fourG_4_addr = each_step if fourG_1 == 'mov' and fourG_2 == 'push' and fourG_3 == 'pop' and fourG_4 == 'mov': print "mov push pop mov", "0x%x" % fourG_1_addr, idc.GetDisasm( fourG_1_addr) addr_to_fix.append(['mov push pop mov-mov1', fourG_1_addr]) addr_to_fix.append(['mov push pop mov-mov2', fourG_4_addr]) if fourG_1 == 'mov' and fourG_2 == 'mov' and fourG_3 == 'push' and fourG_4 == 'xor': print "mov mov push xor", "0x%x" % fourG_1_addr, idc.GetDisasm( fourG_1_addr) addr_to_fix.append(['mov mov push xor-mov1', fourG_1_addr]) addr_to_fix.append(['mov mov push xor-mov2', fourG_2_addr]) addr_to_fix.append(['mov mov push xor-xor1', fourG_4_addr]) if fourG_1 == 'mov' and fourG_2 == 'add' and fourG_3 == 'mov' and fourG_4 == 'mov': print "mov add mov mov", "0x%x" % fourG_1_addr, idc.GetDisasm( fourG_1_addr) addr_to_fix.append(['mov add mov mov-mov1', fourG_1_addr]) addr_to_fix.append(['mov add mov mov-mov2', fourG_3_addr]) addr_to_fix.append(['mov add mov mov-mov3', fourG_4_addr]) if fourG_1 == 'mov' and fourG_2 == 'rep': print "mov rep", "0x%x" % fourG_1_addr, idc.GetDisasm( fourG_1_addr) addr_to_fix.append(['mov rep-mov1', fourG_1_addr]) if fourG_1 == 'push' and fourG_2 == 'call' and fourG_3 == 'pop': print "push call pop", "0x%x" % fourG_1_addr, idc.GetDisasm( fourG_1_addr) addr_to_fix.append(['push call pop-call1', fourG_2_addr]) op = idc.GetDisasm(each_step) if each_step >= text_start and each_step < text_end: instrument(op, each_step) #print "3 of 4",3*len(addr_to_fix)/4 name = ['operation', 'address'] os.system("mkdir " + "crafted\\" + INPUT_PE) for i in range(0, 1024): tmp_list = [] tmp_list = random.sample(addr_to_fix, 3 * len(addr_to_fix) / 4) df_tmp = pd.DataFrame(columns=name, data=tmp_list) df_tmp.to_csv("crafted\\" + INPUT_PE + "\\" + str(i), encoding='gbk') ''' tmp2 = open("crafted\\"+INPUT_PE+"\\"+str(i),'w') for each in tmp_list: tmp2.write(str(each[0])+','+str(each[1])) tmp2.write('\n') tmp2.close() ''' df_tmp1 = pd.DataFrame(columns=name, data=addr_to_fix) df_tmp1.to_csv("crafted\\" + INPUT_PE + "\\" + INPUT_PE + "_addr_to_fix", encoding='gbk') ''' tmp1 = open("crafted\\" + INPUT_PE + "_addr_to_fix",'w') for each in addr_to_fix: tmp1.write(str(each[0])+','+str(each[1])) tmp1.write('\n') tmp1.close() ''' section_data = '' offsets = [] for index in range(len(ori_op)): offsets.append(len(section_data)) section_data += build_section_data(args0[index], args1[index], args2[index]) # add dispatch function len_funs = len(section_data) section_data = add_dispatch_function(ori_address, offsets) + section_data section_file = open(INPUT_PE + '_newSectionData', 'wb') section_file.write(section_data) section_file.close() section_size = len(section_data) insert_section(len(section_data), section_data, len_funs)
def _get_ida_func_surrogate(func, arch): func_surrogate = dict() func_surrogate['name'] = idc.GetFunctionName(func.startEA) func_surrogate['id'] = func.startEA # ignore call-graph at this moment func_surrogate['call'] = list() func_surrogate['sea'] = func.startEA func_surrogate['see'] = idc.FindFuncEnd(func.startEA) # api is optional func_surrogate['api'] = _get_api(func.startEA)[1] func_surrogate['blocks'] = list() # comments func_surrogate['comments'] = [] func_surrogate['comments'].extend(get_comments(func.startEA)) for bb in idaapi.FlowChart(idaapi.get_func(func.startEA)): block = dict() block['id'] = bb.id block['sea'] = bb.startEA if arch is 'arm': # for arm; the last bit indicates thumb mode. block['sea'] += idc.GetReg(bb.startEA, 'T') block['eea'] = bb.endEA block['name'] = 'loc_' + format(bb.startEA, 'x').upper() dat = {} block['dat'] = dat s = idc.GetManyBytes(bb.startEA, bb.endEA - bb.startEA) if s is not None: block['bytes'] = "".join("{:02x}".format(ord(c)) for c in s) func_surrogate['comments'].extend(get_comments(bb.startEA)) instructions = list() for head in idautils.Heads(bb.startEA, bb.endEA): ins = list() ins.append(str(hex(head)).rstrip("L").upper().replace("0X", "0x")) opr = idc.GetMnem(head) if opr == "": continue ins.append(opr) for i in range(5): opd = idc.GetOpnd(head, i) if opd == "": continue ins.append(opd) instructions.append(ins) refs = list(idautils.DataRefsFrom(head)) for ref in refs: dat[head] = binascii.hexlify(struct.pack("<Q", idc.Qword(ref))) block['src'] = instructions # flow chart block_calls = list() for success_block in bb.succs(): block_calls.append(success_block.id) block['call'] = block_calls func_surrogate['blocks'].append(block) return func_surrogate
def GetFunBbls(function_ea): """ Get bbls of function. @function_ea - function address @return - bbls of function """ f_start = function_ea f_end = idc.FindFuncEnd(function_ea) boundaries = set((f_start, )) for head in idautils.Heads(f_start, f_end): # If the element is an instruction if head == idaapi.BADADDR: raise Exception("Invalid head for parsing") if idc.isCode(idc.GetFlags(head)): # Get the references made from the current instruction # and keep only the ones local to the function. refs = idautils.CodeRefsFrom(head, 0) refs_filtered = set() for ref in refs: if ref >= f_start and ref < f_end: refs_filtered.add(ref) refs = refs_filtered if refs: # If the flow continues also to the next (address-wise) # instruction, we add a reference to it. # For instance, a conditional jump will not branch # if the condition is not met, so we save that # reference as well. next_head = idc.NextHead(head, f_end) if next_head != idaapi.BADADDR and idc.isFlow( idc.GetFlags(next_head)): refs.add(next_head) # Update the boundaries found so far. boundaries.update(refs) #end of for head in idautils.Heads(chunk[0], chunk[1]): bbls = [] bbl = [] # a list of heads # NOTE: We can handle if jump xrefs to chunk address space. for head in idautils.Heads(f_start, f_end): if head in boundaries: #print('%d') % head if len(bbl) > 0: if bbl[0] == head: continue bbl.append(head) bbls.append(bbl) bbl = [] bbl.append(head) #elif self.GetInstructionType(head) == self.BRANCH_INSTRUCTION: elif idc.GetMnem(head).startswith('j'): if len(bbl) > 0 and bbl[0] == head + idc.ItemSize(head): continue bbl.append(head + idc.ItemSize(head)) bbls.append(bbl) bbl = [] bbl.append(head + idc.ItemSize(head)) else: pass # add last basic block if len(bbl) and bbl[0] != f_end: bbl.append(f_end) bbls.append(bbl) return bbls
def GetFunEdgesAndBbls(function_ea): """ Get bbls of function. @function_ea - function address @return - bbls of function """ bbl = [] # bbl info [head, tail, call_num, mem_num] SingleBBS = {} # head -> pred_bbl MultiBBS = {} # head -> [pred_bbls] bbls = {} # head -> bbl bbls2 = {} # tail -> bbl edges_s = set() # set of (tail, head) edges_d = {} # dict struct. head -> of (head, ..., head) edges_count = 0 edges_s_t = set() # tmp edges set edges_d_t = {} # tmp edges dict. if not IsInstrumentIns(function_ea): return bbls, edges_d, edges_count, SingleBBS, MultiBBS f_start = function_ea f_end = idc.FindFuncEnd(function_ea) boundaries = set((f_start, )) # head of bbl for head in idautils.Heads(f_start, f_end): # If the element is an instruction if head == idaapi.BADADDR: raise Exception("Invalid head for parsing") if not idc.isCode(idc.GetFlags(head)): continue # Get the references made from the current instruction # and keep only the ones local to the function. refs = idautils.CodeRefsFrom(head, 0) refs_filtered = set() for ref in refs: if ref > f_start and ref < f_end: # can't use ref>=f_start, avoid recusion refs_filtered.add(ref) refs = refs_filtered if refs: # If the flow continues also to the next (address-wise) # instruction, we add a reference to it. # For instance, a conditional jump will not branch # if the condition is not met, so we save that # reference as well. next_head = idc.NextHead(head, f_end) if next_head != idaapi.BADADDR and idc.isFlow( idc.GetFlags(next_head)): refs.add(next_head) # Update the boundaries found so far. boundaries.update(refs) for r in refs: # enum all of next ins # If the flow could also come from the address # previous to the destination of the branching # an edge is created. if isFlow(idc.GetFlags(r)): prev_head = idc.PrevHead(r, f_start) if prev_head == 0xffffffffL: #edges_s_t.add((head, r)) #raise Exception("invalid reference to previous instruction for", hex(r)) pass else: edges_s_t.add((prev_head, r)) edges_s_t.add((head, r)) #end of for head in idautils.Heads(chunk[0], chunk[1]): last_head = 0 # NOTE: We can handle if jump xrefs to chunk address space. # get bbls. head of bbl is first ins addr, tail of bbl is last ins addr. for head in idautils.Heads(f_start, f_end): mnem = idc.GetMnem(head) if head in boundaries: if len(bbl) > 0: if bbl[0] == head: continue if True: # IsInstrumentIns(bbl[0]): bbl[1] = last_head bbls[bbl[0]] = bbl bbls2[bbl[1]] = bbl bbl = [head, 0, 0, 0] #elif self.GetInstructionType(head) == self.BRANCH_INSTRUCTION: elif mnem.startswith('j'): if len(bbl) > 0 and bbl[0] == head + idc.ItemSize(head): continue if True: # IsInstrumentIns(bbl[0]): bbl[1] = head # head + idc.ItemSize(head)) bbls[bbl[0]] = bbl bbls2[bbl[1]] = bbl bbl = [head + idc.ItemSize(head), 0, 0, 0] else: last_head = head if mnem.startswith('call'): bbl[2] += 1 #if 2 == idc.GetOpType(head, 0): # 2 Memory Reference # bbl[3] += 1 #if 2 == idc.GetOpType(head, 1): # 2 Memory Reference # bbl[3] += 1 # add last basic block if len(bbl) and bbl[0] != f_end: # and IsInstrumentIns(bbl[0]): bbl[1] = f_end bbls[bbl[0]] = bbl bbls2[bbl[1]] = bbl # edges set -> dict for e in edges_s_t: if e[0] in bbls2: bbl_head = bbls2[e[0]][0] if bbl_head in edges_d_t: edges_d_t[bbl_head].append(e[1]) else: edges_d_t[bbl_head] = [e[1]] else: print('edge (%x, %x) can not find head bbl.' % (e[0], e[1])) # a small case. e1 flow e0. # revise edges. head bbl and tail bbl of edges must be instrumented. for e0 in edges_d_t: if not IsInstrumentIns(e0): # e0 don't instrumented, skip. continue for e1 in edges_d_t[e0]: if IsInstrumentIns(e1): # e0 e1 both instrumented, add edge. if e0 in edges_d: edges_d[e0].append(e1) else: edges_d[e0] = [e1] edges_count += 1 else: # e1 don't instrumented, recursively looks for instrumented child bbls bbls_t = LookForInsChildBbls(e1, edges_d_t, []) for b in bbls_t: # add edge if e0 in edges_d: edges_d[e0].append(b) else: edges_d[e0] = [b] edges_count += 1 # revise bbls. bbl must be instrumented. for b in bbls.keys(): if not IsInstrumentIns(b): # if bbls[b][1] in bbls2: # avoid multi del # bbls2.pop(bbls[b][1]) bbls.pop(b) #print('bbls:') #i = 0 #for b in bbls: # i += 1 # print('%04d %x, %x' % (i, b, bbls[b][1])) #print('edges_d:') #i = 0 #for e0 in edges_d: # for e1 in edges_d[e0]: # i += 1 # print('%04d %x, %x' % (i, e0, e1)) for e0 in edges_d: if e0 not in bbls: print('error:%x have no head' % (e0)) # error continue for e1 in edges_d[e0]: if e1 in MultiBBS: MultiBBS[e1].append(bbls[e0]) # add Pred elif e1 in SingleBBS: MultiBBS[e1] = [SingleBBS[e1], bbls[e0]] # add Pred SingleBBS.pop(e1) # remove from SingleBBS else: SingleBBS[e1] = bbls[e0] # add Pred # del bbls which don't instrumented return bbls, edges_d, edges_count, SingleBBS, MultiBBS
def GetFuncInputSurrogate(func): info = idaapi.get_inf_structure() arch = info.procName.lower() function_ea = func.startEA f_name = GetFunctionName(func) function = dict() function['name'] = f_name function['id'] = function_ea # ignore call-graph at this moment function['call'] = list() function['sea'] = function_ea function['see'] = idc.FindFuncEnd(function_ea) function['blocks'] = list() # basic bloc content for bblock in idaapi.FlowChart(idaapi.get_func(function_ea)): sblock = dict() sblock['id'] = bblock.id sblock['sea'] = bblock.startEA if (arch == 'arm'): sblock['sea'] += idc.GetReg(bblock.startEA, 'T') sblock['eea'] = bblock.endEA sblock['name'] = 'loc_' + format(bblock.startEA, 'x').upper() dat = {} sblock['dat'] = dat s = idc.GetManyBytes(bblock.startEA, bblock.endEA - bblock.startEA) if (s != None): sblock['bytes'] = "".join("{:02x}".format(ord(c)) for c in s) tlines = [] for head in idautils.Heads(bblock.startEA, bblock.endEA): tline = [] tline.append( str(hex(head)).rstrip("L").upper().replace("0X", "0x")) mnem = idc.GetMnem(head) if mnem == "": continue tline.append(mnem) for i in range(5): opd = idc.GetOpnd(head, i) if opd == "": continue tline.append(opd) tlines.append(tline) refdata = list(idautils.DataRefsFrom(head)) if (len(refdata) > 0): for ref in refdata: dat[head] = binascii.hexlify( struct.pack("<Q", idc.Qword(ref))) sblock['src'] = tlines # flow chart bcalls = list() for succ_block in bblock.succs(): bcalls.append(succ_block.id) sblock['call'] = bcalls function['blocks'].append(sblock) return function
def process_function(arch, func_ea): func_end = idc.FindFuncEnd(func_ea) packet = DismantlerDataPacket() ida_chunks = get_chunks(func_ea) chunks = set() # Add to the chunks only the main block, containing the # function entry point # chunk = get_flow_code_from_address(func_ea) if chunk: chunks.add(chunk) # Make "ida_chunks" a set for faster searches within ida_chunks = set(ida_chunks) ida_chunks_idx = dict(zip([c[0] for c in ida_chunks], ida_chunks)) func = idaapi.get_func(func_ea) comments = [idaapi.get_func_cmt(func, 0), idaapi.get_func_cmt(func, 1)] # Copy the list of chunks into a queue to process # chunks_todo = [c for c in chunks] while True: # If no chunks left in the queue, exit if not chunks_todo: if ida_chunks: chunks_todo.extend(ida_chunks) else: break chunk_start, chunk_end = chunks_todo.pop() if ida_chunks_idx.has_key(chunk_start): ida_chunks.remove(ida_chunks_idx[chunk_start]) del ida_chunks_idx[chunk_start] for head in idautils.Heads(chunk_start, chunk_end): comments.extend((idaapi.get_cmt(head, 0), idaapi.get_cmt(head, 1))) comment = '\n'.join([c for c in comments if c is not None]) comment = comment.strip() if comment: packet.add_comment(head, comment) comments = list() if idc.isCode(idc.GetFlags(head)): instruction = arch.process_instruction(packet, head) # if there are other references than # flow add them all. if list(idautils.CodeRefsFrom(head, 0)): # for each reference, including flow ones for ref_idx, ref in enumerate( idautils.CodeRefsFrom(head, 1)): if arch.is_call(instruction): # This two conditions must remain separated, it's # necessary to enter the enclosing "if" whenever # the instruction is a call, otherwise it will be # added as an uncoditional jump in the last else # if ref in list(idautils.CodeRefsFrom(head, 0)): packet.add_direct_call(head, ref) elif ref_idx > 0 and arch.is_conditional_branch( instruction): # The ref_idx is > 0 in order to avoid processing the # normal flow reference which would effectively imply # that the conditional branch is processed twice. # It's done this way instead of changing the loop's head # from CodeRefsFrom(head, 1) to CodeRefsFrom(head, 0) in # order to avoid altering the behavior of other conditions # which rely on it being so. # FIXME # I don't seem to check for the reference here # to point to valid, defined code. I suspect # this could lead to a failure when exporting # if such situation appears. I should test if # it's a likely scenario and probably just add # an isHead() or isCode() to address it. packet.add_conditional_branch_true(head, ref) packet.add_conditional_branch_false( head, idaapi.next_head(head, chunk_end)) # If the target is not in our chunk list if not address_in_chunks(ref, chunks): new_chunk = get_flow_code_from_address(ref) # Add the chunk to the chunks to process # and to the set containing all visited # chunks if new_chunk is not None: chunks_todo.append(new_chunk) chunks.add(new_chunk) elif arch.is_unconditional_branch(instruction): packet.add_unconditional_branch(head, ref) # If the target is not in our chunk list if not address_in_chunks(ref, chunks): new_chunk = get_flow_code_from_address(ref) # Add the chunk to the chunks to process # and to the set containing all visited # chunks if new_chunk is not None: chunks_todo.append(new_chunk) chunks.add(new_chunk) #skip = False for ref in idautils.DataRefsFrom(head): packet.add_data_reference(head, ref) # Get a data reference from the current reference's # location. For instance, if 'ref' points to a valid # address and such address contains a data reference # to code. target = list(idautils.DataRefsFrom(ref)) if target: target = target[0] else: target = None if target is None and arch.is_call(instruction): imp_name = idc.Name(ref) imp_module = get_import_module_name(ref) imported_functions.add((ref, imp_name, imp_module)) packet.add_indirect_virtual_call(head, ref) elif target is not None and idc.isHead(target): # for calls "routed" through this reference if arch.is_call(instruction): packet.add_indirect_call(head, target) # for unconditional jumps "routed" through this reference elif arch.is_unconditional_branch(instruction): packet.add_unconditional_branch(head, target) # for conditional "routed" through this reference elif arch.is_conditional_branch(instruction): packet.add_conditional_branch_true(head, target) packet.add_conditional_branch_false( head, idaapi.next_head(head, chunk_end)) f = FunctionAnalyzer(arch, func_ea, packet) instrumentation.new_packet(packet) instrumentation.new_function(f)
def deobfuscate_function(addr): if addr != idc.FirstFuncFchunk(addr): print "[DEOBF] Address %X is not the start of a function." % addr return # Static data. func_start = addr func_end = idc.FindFuncEnd(addr) # 1. Find MOV PC (mov_addr, mov_match) = search_forward(func_start, func_end, regex_mov) if mov_addr is None: # print "[DEOBF] No MOV PC was found in %s" % idc.GetFunctionName(func_start) return # 2. Find LDR.W .. ldr_addr = search_register_modifier_backward(mov_addr, func_start, mov_match.group(1)) ldr_match = regex_ldrw.match( idc.GetDisasm(ldr_addr)) if ldr_addr is not None else None if ldr_addr is None: print "[DEOBF] No LDR.W was found in %s" % idc.GetFunctionName( func_start) return if ldr_match is None: print "[DEOBF] Modifier of %s found from %X is not a LDR.W" % ( mov_match.group(1), mov_addr) return # 3. Find table offset add_addr = search_register_modifier_backward(ldr_addr, func_start, ldr_match.group(2)) # add_match = regex_table.match(idc.GetDisasm(add_addr)) if add_addr is not None else None # # print idc.GetEnum(add_match.group(2) + 'asd') if add_addr is None: # TODO: Check if belongs to a previously found graph. # print "[DEOBF] No ADD was found above %X" % ldr_addr return if idc.GetOpnd(add_addr, 1) != 'PC': print "[DEOBF] ADD does not use PC at %X" % add_addr return ldr2_addr = search_register_modifier_backward(idc.PrevHead(add_addr), func_start, idc.GetOpnd(add_addr, 0)) opp_val = idc.GetOperandValue(ldr2_addr, 1) # Address to loc_80054 opp_val = idc.Dword(opp_val) # loc_80054 opp_val = opp_val + idc.NextHead(add_addr) + 2 # Address of the table. # 4. Read table. table = [] table_addr = opp_val while True: table_entry = idc.Dword(table_addr) if table_entry > 0: table.append(table_entry) table_addr = table_addr + 4 if idc.Name(table_addr): break # - We also have to add the starting block to the table. table.append(func_start) # 5. Find subroutine boundary (sub_start, sub_end) = find_subroutine_boundary(opp_val, table) print "Start: %X - End: %X" % (sub_start, sub_end)
def print_deepest_path(self): # CS_OP_INVALID = 0, ///< uninitialized/invalid operand. # CS_OP_REG, 1 ///< Register operand. # CS_OP_IMM, 2 ///< Immediate operand. # CS_OP_MEM, 3 ///< Memory operand. def ext_instruction(file_name, addr_start, addr_end): name_fun = GetFunctionName(addr_start) row = '' for addr in Heads(addr_start, addr_end): ins = '' thisOperand = idc.GetMnem(addr) oPtype1 = idc.GetOpType(addr, 0) oPtype2 = idc.GetOpType(addr, 1) # assemblydata = parametertype(oPtype1)+' '+parametertype(oPtype2) if (oPtype1 == 1 or oPtype1 == 4): oPtype1 = idc.GetOpnd(addr, 0) if (oPtype2 == 1 or oPtype2 == 4): oPtype2 = idc.GetOpnd(addr, 1) if thisOperand == "call": call_fun_name = GetOpnd(addr, 0) keyInstr = LocByName(call_fun_name) fflags = idc.get_func_flags(keyInstr) if (fflags & idc.FUNC_LIB) or (fflags & idc.FUNC_THUNK): ins = thisOperand + '_' + idc.GetOpnd(addr, 0) + '_0' row = row + ' ' + ins continue ins = str(thisOperand)+'_'+tran(str(oPtype1)) + \ '_'+tran(str(oPtype2)) row = row + ' ' + ins return row # file_name.writerow([name_fun, hex(addr_start), hex(addr_end), row]) deepset = list() path = list() all_path = list() def print_path_deepest(head): global deepset global path global all_path # print(deepset) if head is None: return if head in all_path: return # print(path) all_path.append(head) path.append(head) # print("********\n",path) if len(deepset) < len(path): # print(len(deepset),len(path)) deepset = [i for i in path] # print(deepset) for elem in head.child: print_path_deepest(elem) path.remove(head) # print("#######\n",path) def print_path_all(head): global path # print(deepset) if head is None: return if head in path: return path.append(head) for elem in head.child: print_path_all(elem) f = open(file_store_path + '\\' + 'test' + '.csv', 'wb') saveFile = csv.writer(f) saveFile.writerow(["name", "start", "end", "Op"]) for fun_node in self.root: name_fun = GetFunctionName(fun_node.addr) fflags = idc.get_func_flags(fun_node.addr) if not ((fflags & idc.FUNC_LIB) or (fflags & idc.FUNC_THUNK)): global path path = list() global deepset deepset = list() global all_path all_path = list() # print(deepset) # path.clear() print_path_deepest(fun_node) row_fun = '' fun_addr_end = idc.FindFuncEnd(fun_node.addr) for bb in deepset: ins_bb = ext_instruction(saveFile, bb.block.startEA, bb.block.end_ea) row_fun = row_fun + ' ' + ins_bb saveFile.writerow( [name_fun, hex(fun_node.addr), hex(fun_addr_end), row_fun])