def _collect_data(self, collect_args): func_item = collect_args["func_item"] code_refs_from_list = \ list(idautils.CodeRefsFrom(func_item, False)) for code_ref in code_refs_from_list: is_loaded_dynamically = False is_library_function = False called_function_name = "" if (idc.GetFunctionFlags(code_ref) == -1): # Find code_ref in functions that are imported dynamically for imported_module in self._imported_modules: if code_ref in imported_module.get_addresses(): is_loaded_dynamically = True break else: # get_func(code_ref) != get_func(func_item) -> # do not include coderefs to self. if ((idc.GetFunctionFlags(code_ref) & idaapi.FUNC_LIB) != 0 and idaapi.get_func(code_ref) != idaapi.get_func(func_item)): # code_ref is imported statically is_library_function = True # Data is gathered only for library functions or Imports. if (is_library_function or is_loaded_dynamically): # get name called_function_name = idc.NameEx(func_item, code_ref) # include in attribute self._lib_calls_list.append(called_function_name)
def _collect_data(self, collect_args): func_item = collect_args["func_item"] code_refs_from_list = list(idautils.CodeRefsFrom(func_item, False)) for code_ref in code_refs_from_list: is_loaded_dynamically = False is_library_function = False function_name = "" if (idc.GetFunctionFlags(code_ref) == -1): # Find code_ref in functions that are imported dynamically for imported_module in self._imported_modules: if code_ref in imported_module.get_addresses(): is_loaded_dynamically = True break else: if ((idc.GetFunctionFlags(code_ref) & idaapi.FUNC_LIB) != 0 and idaapi.get_func(code_ref) != idaapi.get_func(func_item)): # code_ref is imported statically is_library_function = True if (is_library_function or is_loaded_dynamically): # get name function_name = idc.NameEx(func_item, code_ref) # include in attribute if not(function_name in self._lib_calls_counters): self._lib_calls_counters[function_name] = 0 self._lib_calls_counters[function_name] += 1
def __init__(self, start_ea, end_ea): ''' Class constructor. @start_ea - An address in the head function. @end_ea - An address in the tail funciton. Returns None. ''' global ALLEYCAT_LIMIT self.limit = ALLEYCAT_LIMIT self.paths = [] # We work backwards via xrefs, so we start at the end and end at the start try: start = idaapi.get_func(end_ea).startEA except: raise AlleyCatException("Address 0x%X is not part of a function!" % end) try: end = idaapi.get_func(start_ea).startEA except: end = idc.BADADDR print "Generating call paths from %s to %s..." % (idc.Name(end), idc.Name(start)) self._build_paths(start, end)
def def_functions(s_start): num_added_functions = 0 s_addr = s_start s_end = idc.GetSegmentAttr(s_start, SEGATTR_END) #idc.SegEnd(segm) print "0x%08x 0x%08x" % (s_start, s_end) while (s_addr < s_end): print "Testing address 0x%08x" % s_addr #optimization assumes that function chunks are consecutive (no "function-in-function" monkey business) if (idaapi.get_func(s_addr)): next_func = idc.NextFunction(s_addr) ea = s_addr for c in idautils.Chunks(s_addr): #only use chunks in lookahead that do not jump over the next function and that are not smaller than where we are atm. if (c[1] > ea) and (c[1] <= next_func): ea = c[1] if ea == s_addr: s_addr += 2 else: s_addr = ea #s_addr += 4 continue else: #This is not a good optimization, there WILL be data refs to function start addresses sometimes. ''' if sum(1 for _ in (CodeRefsTo(s_addr, 1))) != 0: s_addr += 4 continue ''' #also add STMFD if ((idc.GetMnem(s_addr) == "STM") and ("SP!" in idc.GetOpnd(s_addr, 0)) and ("LR" in idc.GetOpnd(s_addr, 1))) or (((idc.GetMnem(s_addr) == "PUSH") or (idc.GetMnem(s_addr) == "PUSH.W") or (idc.GetMnem(s_addr) == "STR.W") ) and ("LR" in idc.GetOpnd(s_addr, 0))): print "Found function at 0x%08x" % s_addr idc.MakeFunction(s_addr) f = idaapi.get_func(s_addr) if (type(f) == type(None)): print "Failed to create function! Undefined instructions?" s_addr += 2 else: num_added_functions += 1 ea = -1 for c in idautils.Chunks(s_addr): if c[1] > ea: ea = c[1] if ea != -1: s_addr = ea #failed? else: s_addr += 2 else: s_addr += 2 print "finished segment" return num_added_functions
def __init__(self, start=[], end=[], include=[], exclude=[], xrefs=[], noxrefs=[]): self.start = self._obj2list(start) self.end = self._obj2list(end) self.include = self._obj2list(include) self.exclude = self._obj2list(exclude) self.xrefs = self._obj2list(xrefs) self.noxrefs = self._obj2list(noxrefs) if len(self.start) > 0: first_ea = self._obj2ea(self.start[0]) func = idaapi.get_func(self.start[0]) if func: results = [] end_func = idaapi.get_func(self.end[0]) if end_func and end_func.startEA == self.end[0]: pfclass = FunctionPathFinder else: pfclass = BlockPathFinder print pfclass for destination in self.end: pf = pfclass(destination) for source in self.start: results += pf.paths_from(source, exclude=self.exclude, include=self.include, xrefs=self.xrefs, noxrefs=self.noxrefs) del pf print "RESULTS:", results if results: pg = PathFinderGraph(results) pg.Show() del pg
def try_get_function(address): func = idaapi.get_func(address) if not func: debug("Error: couldn't find function for", hex(address)) if not try_mark_as_function(address): return None func = idaapi.get_func(address) return func
def is_same_function(ea1, ea2): func1 = idaapi.get_func(ea1) func2 = idaapi.get_func(ea2) # This is bloated code. `None in (func1, func2)` will not work because of a # bug in IDAPython in the way functions are compared. if any(func is None for func in (func1, func2)): return False return func1.startEA == func2.startEA
def get_ida_func(ea=None): if ea is None: func = idaapi.get_func(idc.ScreenEA()) if not func: return None else: return func func = idaapi.get_func(ea) if not func: return None else: return func
def __OKButtonPressed(self): self.__main_window.close() start_addr = int(self.__start_edit.text(), 16) end_addr = int(self.__end_edit.text(), 16) struc_name = str(self.__name_edit.text()) struc_id = idc.AddStruc(-1, struc_name) for addr in idautils.Heads(start_addr, end_addr): for prob_func in idautils.XrefsFrom(addr): if idaapi.get_func(prob_func.to): self.__ProcessFunc(prob_func.to, struc_id) else: self.__ProcessAddress(prob_func.to, struc_id) for prob_func in idautils.XrefsFrom(end_addr): if idaapi.get_func(prob_func.to): self.__ProcessFunc(prob_func.to, struc_id) else: self.__ProcessAddress(prob_func.to, struc_id)
def __init__(self, start_ea, end_ea, quiet=False): # We work backwards via xrefs, so we start at the end and end at the start try: start = idaapi.get_func(end_ea).startEA except: raise AlleyCatException("Address 0x%X is not part of a function!" % end_ea) try: end = idaapi.get_func(start_ea).startEA except: end = idc.BADADDR super(AlleyCatFunctionPaths, self).__init__(start, end, quiet)
def get_cursor_func_ref(): """ Get the function reference under the user cursor. Returns BADADDR or a valid function address. """ current_tform = idaapi.get_current_tform() tform_type = idaapi.get_tform_type(current_tform) # get the hexrays vdui (if available) vu = idaapi.get_tform_vdui(current_tform) # # hexrays view is active # if vu: cursor_addr = vu.item.get_ea() # # disassembly view is active # elif tform_type == idaapi.BWN_DISASM: cursor_addr = idaapi.get_screen_ea() # # if the cursor is over an operand value that has a function ref, # use that as a valid rename target # op_addr = idc.GetOperandValue(cursor_addr, idaapi.get_opnum()) op_func = idaapi.get_func(op_addr) if op_func and op_func.startEA == op_addr: return op_addr # unsupported/unknown view is active else: return idaapi.BADADDR # # if the cursor is over a function definition or other reference, use that # as a valid rename target # cursor_func = idaapi.get_func(cursor_addr) if cursor_func and cursor_func.startEA == cursor_addr: return cursor_addr # fail return idaapi.BADADDR
def _generate_checksum_xrefs_table(self): self.funcs = {} if not self.cksums: self.checksums() for cksum in self.cksums: func = idaapi.get_func(cksum) if func: self.funcs[func.startEA] = set() for xref in idautils.XrefsTo(cksum): func = idaapi.get_func(xref.frm) if func and not self.funcs.has_key(func.startEA): self.funcs[func.startEA] = set()
def function(cls): '''Return the current function.''' ea = cls.address() res = idaapi.get_func(ea) if res is None: raise internal.exceptions.FunctionNotFoundError(u"{:s}.function() : Unable to locate the current function.".format('.'.join((__name__, cls.__name__)))) return res
def _find_leafs(self): # Loop through every function for func_ea in idautils.Functions(): # Count the number of xrefs to this function func = idaapi.get_func(func_ea) if func: leaf_function = True ea = func.startEA # Loop through all instructions in this function looking # for call instructions; if found, then this is not a leaf. while ea <= func.endEA: idaapi.decode_insn(ea) if idaapi.is_call_insn(ea): leaf_function = False break ea += self.arch.insn_size if leaf_function: self.functions.append( Function( start=func.startEA, end=func.endEA, leaf=True, loop=self.has_loop(func), argc=self.argp.argc(func), ) ) # Sort leafs by xref count, largest first self.functions.sort(key=lambda f: f.xrefs, reverse=True)
def eFunc(self, address, *args): func = get_func(address) funcSize = func.endEA - func.startEA try: uc = Uc(self.arch, self.mode) # init code addr = self._alignAddr(func.startEA) size = PAGE_ALIGN while addr + size < func.endEA: size += PAGE_ALIGN uc.mem_map(addr, size) code = self._getOriginData(addr, size) uc.mem_write(addr, code) self._initStackAndArgs(uc, self.RA, *args) self._initData(uc) # add the invalid memory access hook uc.hook_add(UC_HOOK_MEM_READ_UNMAPPED | UC_HOOK_MEM_WRITE_UNMAPPED | \ UC_HOOK_MEM_FETCH_UNMAPPED, self._hook_mem_invalid) # start emulate uc.emu_start(func.startEA, self.RA) print("Euclation done. Below is the Result:") res = uc.reg_read(self.RES_REG) print(">>> function result = %d" % res) except UcError as e: print("#ERROR: %s" % e)
def decoder(): failed = True loader = "C:\\GitHub\\GHAST_priv8\\py\\PnkDecoder.exe" filepath = GetInputFilePath() ea = ScreenEA() print "Using: %s"%filepath func = idaapi.get_func(ea).startEA #screw you non-relocatable PE file. for i in xrange(0, 1000): proc = subprocess.Popen([loader, filepath, hex(func)], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out = proc.communicate() if proc.returncode == 0: comment = "result = %s"%out[0] print "Successfully decoded %s"%comment SetFunctionCmt(func, comment, 1) failed = False break if failed: print "Failed, please try again!"
def __init__(self): addr = idc.ScreenEA() func = idaapi.get_func(addr) tests_choice = "\n".join(map(lambda x: "<%s:{r%s}>" % (x, x), AVAILABLE_TESTS)) ida_kernwin.Form.__init__(self, r"""BUTTON YES* Launch BUTTON CANCEL NONE Sibyl Settings {FormChangeCb} Apply on: <One function:{rOneFunc}> <All functions:{rAllFunc}>{cMode}> <Targeted function:{cbFunc}> Testsets to use: %s{cTest}> """ % tests_choice, { 'FormChangeCb': ida_kernwin.Form.FormChangeCb(self.OnFormChange), 'cMode': ida_kernwin.Form.RadGroupControl(("rOneFunc", "rAllFunc")), 'cTest': ida_kernwin.Form.ChkGroupControl(map(lambda x: "r%s" % x, AVAILABLE_TESTS), value=(1 << len(AVAILABLE_TESTS)) - 1), 'cbFunc': ida_kernwin.Form.DropdownListControl( items=self.available_funcs, readonly=False, selval="0x%x" % func.startEA), } ) self.Compile()
def cfunc_from_ea(ea): """Get cfuncptr_t from EA.""" func = idaapi.get_func(ea) if func is None: return None cfunc = idaapi.decompile(func) return cfunc
def show_decompiler(): import idc import host import host.ui import traceback import sys import decompiler_form reload(decompiler_form) try: ea = idc.here() func = idaapi.get_func(ea) ea = func.startEA print 'Decompiling %x' % (ea, ) form = decompiler_form.DecompilerForm(ea) form.Show() except: traceback.print_exc() return
def log_stack_chains(chains): f = open("%s/%s" % (LOG_PATH, "stack_chains"), "wb") long_chains = 0 for c in chains: if len(c) > 3: long_chains += 1 for a in c: if type(a) == type("x"): s = a else: s = "[0x%08x] %s+0x%x" % (a, str(idc.GetFunctionName(Dword(a))), Dword(a) - idaapi.get_func(Dword(a)).startEA) #print s f.write(s) f.write("\n") f.write("\n") print "%d chains found" % len(chains) print "%d long chains" % long_chains f.close()
def ret_addr(ea): #we can't assume Thumb only, so we also keep ARM cases, just adjust addr in Thumb cases if (ea % 2) != 0: ea -= 1 ''' #calculating code segment ranges every time is wasteful code_segs = [] for s in idautils.Segments(): if idaapi.segtype(s) == idaapi.SEG_CODE: s_end = idc.GetSegmentAttr(s, SEGATTR_END) code_segs.append({"start" : s, "end" : s_end}) if not reduce(lambda x, y: x or y, map(lambda x: (x["start"] <= ea) and (x["end"] > ea), code_segs)): return False ''' #this is-in-function check is enough (segment check redundant) if we trust function ID'ing anyway. f_ea = idaapi.get_func(ea) if not f_ea: return False #Preceding or Previous? # Not necessarily all preceding will be a call to a ret instruction, # but "the" prev should be always the one. i = idautils.DecodePreviousInstruction(ea) if i and "BL" in idc.GetMnem(i.ea): return True return False
def get_op(ea, op, stkvars=None): '''ea_t -> int -> opt:{int : tinfo_t} -> op_ret''' cmd = idautils.DecodeInstruction(ea) cmd.Operands = get_operands(cmd) # for mips_op_hack op = mips_op_hack(cmd, op) opd = cmd[op] if opd.type == idaapi.o_reg: # gpr, XXX sorta MIPS-specific return op_ret(op_ty.reg, regs.gpr(opd.reg), 0) elif opd.type == idaapi.o_idpspec1: # fpr, XXX sorta MIPS-specific return op_ret(op_ty.reg, regs.fpr(opd.reg), 0) elif opd.type in [idaapi.o_near, idaapi.o_mem]: return op_ret(op_ty.name, idc.Name(opd.addr), 0) elif idc.isStkvar1(idc.GetFlags(ea)): # IDA seems to set this flag even for operands beyond the second, # i.e. both of these are true for isStkvar1: # .text:10003A84 sd $a1, 0x2E0+var_58($sp) # .text:10003A68 addiu $a1, $sp, 0x2E0+var_2D8 try: func = idaapi.get_func(ea) off = idaapi.calc_stkvar_struc_offset(func, ea, op) (name, ti) = stkvars[off] return op_ret_for_ti(ti, name, off, off) except KeyError: raise OperandUnresolvableError('unable to get operand %u at %s' % (op, idc.atoa(ea))) elif opd.type in [idaapi.o_imm, idaapi.o_displ]: return cpu_ida.ida_current_cpu().data.get_op_addrmode(ea, op, cmd) else: raise OperandUnresolvableError('unable to get operand %u at %s' % (op, idc.atoa(ea)))
def find_xrefs(self, ea): ret = set() for xref in XrefsTo(ea): fi = idaapi.get_func(xref.frm) if fi: ret.add(fi.startEA) return ret
def process_func_for_string(str, f): loc = idaapi.find_binary(0, idc.BADADDR, "\"%s" % str, 16, 0) if loc == idc.BADADDR: print "String '%s' not found" % str return False xrEa = 0 for xr in idautils.XrefsTo(loc): xrEa = xr.frm break if xrEa == 0: print "No xrefs to string '%s'" % str return False fn = idaapi.get_func(xrEa) if not fn: print "No function at xref to string '%s' (at %x)" % (str, xrEa) return False fnEa = fn.startEA if isThumb(fnEa): fnEa += 1 if f: f.write("\t// %s\n" % str) f.write("\t{0x%x, 0x%x, 0x%x},\n" % (loc, xrEa, fnEa)) print "// %s" % str print "{0x%x, 0x%x, 0x%x}," % (loc, xrEa, fnEa) return True
def FindUnFunction(self, StringEA): for ref in DataRefsTo(StringEA): f = idaapi.get_func(ref) if f: return f return None
def main(): eip = idaapi.get_screen_ea(); function = idaapi.func_item_iterator_t(); function.set(idaapi.get_func(eip)); b_ok = function.first(); while b_ok: pc = function.current(); inslen = idaapi.decode_insn(function.current()); if inslen == 0: b_ok = function.next_code(); continue; if inst_is_call(pc): color = get_blue(); if is_indirect(pc): color = get_green(); idc.SetColor(pc, CIC_ITEM, color); elif inst_is_ret(pc): color = get_red(); idc.SetColor(pc, CIC_ITEM, color); elif inst_is_jcc(pc): color = get_yellow(); if is_indirect(pc): color = get_green(); idc.SetColor(pc, CIC_ITEM, color); b_ok = function.next_code();
def JumpToTop(): curr_ea = idaapi.get_screen_ea() curr_func = idaapi.get_func(curr_ea) if not curr_func: return begin = curr_func.startEA idaapi.jumpto(begin)
def JumpToBottom(): curr_ea = idaapi.get_screen_ea() curr_func = idaapi.get_func(curr_ea) if not curr_func: return begin = idaapi.prevaddr(curr_func.endEA) idaapi.jumpto(begin)
def Functions(start, end): """ Get a list of functions @param start: start address @param end: end address @return: list of heads between start and end @note: The last function that starts before 'end' is included even if it extends beyond 'end'. """ funclist = [] func = idaapi.get_func(start) if func: funclist.append(func.startEA) ea = start while 1: func = idaapi.get_next_func(ea) if not func: break if func.startEA < end: funclist.append(func.startEA) ea = func.startEA else: break return funclist
def OnRefresh(self): self.Clear() prev = 'START' prev_node = self.AddNode(prev) node = None node_hash = {} #print "Layout %d nodes\n" % (len(function_addrs)) offset = 0 if len(self.graph_addrs) > 200: print(" Truncating graph layout to 200 most recent references") offset = len(self.graph_addrs) - 200 node_count = 0 for addr in self.graph_addrs[offset:]: if addr != prev: #print "%x -> %x" % (prev, fname) if node_hash.has_key(addr): node = node_hash[addr] else: if idaapi.get_func(addr): func_name = idc.GetFunctionName(addr) node = self.AddNode(func_name) node_hash[addr] = node node_count += 1 self.AddEdge(prev_node, node) prev = addr prev_node = node end_node = self.AddNode('END') self.AddEdge(prev_node, end_node) return True """
def extract_reduced_cfg(self, _): # TODO: Make a copy of the CFG before stripping it print "Extract reduced CFG" curr_fun = idaapi.get_func(idc.here()).startEA cfg = self.functions_cfg[curr_fun] po_addrs = { k for k, v in self.results.items() if v.status == self.po.OPAQUE } cfg.remove_dead_bb() # Dead basic block removal step # Relocation + Merge step for idx, bb in cfg.items(): print "try reduce: %d: 0x%x" % (idx, bb.startEA) if bb.is_full_spurious( ) and bb.nb_preds() == bb.nb_succs() == 1: # Do relocation bb_pred = list(bb.preds())[0] bb_succ = list(bb.succs())[0] print " relocation bind %d->%d" % (bb_pred.id, bb_succ.id) bb_pred.remove_succ(bb) bb_pred.add_succ(bb_succ) bb_succ.remove_pred(bb) bb_succ.add_pred(bb_pred) cfg.pop(idx) elif not (bb.is_full_spurious()): if bb.nb_preds() == 1: bb_pred = list(bb.preds())[0] print " One pred! %d, 0x%x" % (bb_pred.nb_succs(), bb_pred.last()) if bb_pred.nb_succs() == 1 and bb_pred.last() in po_addrs: bb_pred.concat(bb) cfg.pop(idx) else: print " None of all ?" cfg.Show()
def OnDblClick(self, node_id): eas = self.nodes_ea[node_id] if len(eas) == 1: jumpto(list(eas)[0]) else: items = [] for ea in eas: func = idaapi.get_func(ea) if func is None: s = GetString(ea) if s is not None and s.find(str(self[node_id])) == -1: s = GetString(ea, strtype=1) else: s = GetDisasm(ea) else: s = GetFunctionName(func.startEA) items.append(["0x%08x" % ea, repr(s)]) chooser = CClassXRefsChooser("XRefs to %s" % str(self[node_id]), items) idx = chooser.Show(1) if idx > -1: jumpto(list(eas)[idx])
def _match_with_check(self, ea): fail, skip, succ = -1, 0, 1 # < minsize pfn = idaapi.get_func(ea) if idaapi.FlowChart(pfn).size < bai_config['minsize']: return skip # do match try: targets = self.mgr.retrieve_by_feature(ea, topk=1) except DecompilationFailure as e: BinaryAILog.fail(idaapi.get_func_name(ea), str(e)) return fail except BinaryAIException as e: idaapi.hide_wait_box() BinaryAILog.fatal(e) if targets is None: return fail if targets[0]['score'] < bai_config['threshold'] or \ not bai_mark.apply_bai_high_score( ea, targets[0]['function']['name'], targets[0]['score']): return skip return succ
def workaround_Functions(start=idaapi.cvar.inf.minEA, end=idaapi.cvar.inf.maxEA): """ Get a list of functions @param start: start address (default: inf.minEA) @param end: end address (default: inf.maxEA) @return: list of heads between start and end @note: The last function that starts before 'end' is included even if it extends beyond 'end'. """ func = idaapi.get_func(start) if not func: func = idaapi.get_next_func(start) while func and func.startEA < end: startea = func.startEA yield startea func = idaapi.get_next_func(startea) addr = startea while func and startea == func.startEA: addr = idaapi.next_head(addr, end) func = idaapi.get_next_func(addr)
def get_upload_func_info(ea): """ get function upload info by IDA Pro Args: ea(ea_t): function address Returns: func_info(dict): function info """ func_info = {} try: hf = idaapi.hexrays_failure_t() if idaapi.IDA_SDK_VERSION >= 730: cfunc = idaapi.decompile(ea, hf, idaapi.DECOMP_NO_WAIT) else: cfunc = idaapi.decompile(ea, hf) func_info['feature'] = str(cfunc) func_info['pseudo_code'] = str(cfunc) except Exception as e: print(str(e)) return None func_info['binary_file'] = idaapi.get_root_filename() func_info['binary_sha256'] = idaapi.retrieve_input_file_sha256() func_info['binary_offset'] = idaapi.get_fileregion_offset(ea) func_info['platform'] = get_platform_info() func_info['name'] = idaapi.get_func_name(ea) func_bytes = b'' for start, end in idautils.Chunks(idaapi.get_func(ea).start_ea): fb = idaapi.get_bytes(start, end - start) func_bytes += fb func_info['func_bytes'] = hashlib.md5(func_bytes).hexdigest() return func_info
def get_func_end(addr): func = idaapi.get_func(addr) return func.endEA
def generate(self): signatures = RizzoSignatures() # Generate unique string-based function signatures for (ea, string) in self.strings.iteritems(): # Only generate signatures on reasonably long strings with one xref if len(string.value) >= 8 and len(string.xrefs) == 1: func = idaapi.get_func(string.xrefs[0]) if func: strhash = self.sighash(string.value) # Check for and remove string duplicate signatures (the same # string can appear more than once in an IDB). # If no duplicates, add this to the string signature dict. if signatures.strings.has_key(strhash): del signatures.strings[strhash] signatures.stringdups.add(strhash) elif strhash not in signatures.stringdups: signatures.strings[strhash] = func.startEA # Generate formal, fuzzy, and immediate-based function signatures for ea in idautils.Functions(): func = idaapi.get_func(ea) if func: # Generate a signature for each block in this function blocks = self.function(func) # Build function-wide formal and fuzzy signatures by simply # concatenating the individual function block signatures. formal = self.sighash(''.join( [str(e) for (e, f, i, c) in blocks])) fuzzy = self.sighash(''.join( [str(f) for (e, f, i, c) in blocks])) # Add this signature to the function dictionary. signatures.functions[func.startEA] = (idc.Name(func.startEA), blocks) # Check for and remove formal duplicate signatures. # If no duplicates, add this to the formal signature dict. if signatures.formal.has_key(formal): del signatures.formal[formal] signatures.formaldups.add(formal) elif formal not in signatures.formaldups: signatures.formal[formal] = func.startEA # Check for and remove fuzzy duplicate signatures. # If no duplicates, add this to the fuzzy signature dict. if signatures.fuzzy.has_key(fuzzy): del signatures.fuzzy[fuzzy] signatures.fuzzydups.add(fuzzy) elif fuzzy not in signatures.fuzzydups: signatures.fuzzy[fuzzy] = func.startEA # Check for and remove immediate duplicate signatures. # If no duplicates, add this to the immediate signature dict. for (e, f, immediates, c) in blocks: for immediate in immediates: if signatures.immediates.has_key(immediate): del signatures.immediates[immediate] signatures.immediatedups.add(immediate) elif immediate not in signatures.immediatedups: signatures.immediates[immediate] = func.startEA # These need not be maintained across function calls, # and only add to the size of the saved signature file. signatures.fuzzydups = set() signatures.formaldups = set() signatures.stringdups = set() signatures.immediatedups = set() # DEBUG signatures.show() return signatures
def _key(cls, ea): '''Converts the address `ea` to a key that's used to store contents data for the specified function.''' res = idaapi.get_func(ea) return internal.interface.range.start(res) if res else None
def get_func_name_offset(ea): func = idaapi.get_func(ea) if func: offset = ea - func.startEA return "%s+0x%d" % (GetFunctionName(ea), offset) return None
def _get_code_block(self, ea): return idaapi.get_func(ea)
import idautils import idc import struct for func_addr in idautils.Functions(0, 0xffffffff): # Signature is based on the beginning of the decryption function: # 53 push ebx # 8B DA mov ebx, edx # 56 push esi # 33 F6 xor esi, esi # 57 push edi # 8B F9 mov edi, ecx # 39 34 DD C4 DE 06+ cmp dword_1006DEC4[ebx*8], esi if idaapi.get_many_bytes(func_addr, 12) == "538BDA5633F6578BF93934DD".decode('hex'): decoding_func = idaapi.get_func(func_addr) break for addr in idautils.Heads(decoding_func.startEA, decoding_func.endEA): if chr(idaapi.get_byte(addr)) == "\x8A": # 8A 89 B0 D5 04 10 mov cl, key[ecx] # ^ key offset key_offset = struct.unpack("<I", idaapi.get_many_bytes(addr + 2, 4))[0] elif idaapi.get_many_bytes(addr, 1) == "\x6B": # 6B C2 33 imul eax, edx, 51 # ^ key length key_len = idaapi.get_byte(addr + 2) elif idaapi.get_many_bytes(addr, 3) == "\x8B\x04\xDD": # 8B 04 DD C0 DE 06 10 mov eax, packed_strings_list[ebx*8] # ^ address of string list struct_base = struct.unpack("<I", idaapi.get_many_bytes(addr + 3,
def _key(cls, ea): '''Converts the address `ea` to a key that's used to store contents data for the specified function.''' res = idaapi.get_func(ea) return res.startEA if res else None
def _is_repeatable(cls, ea): f = idaapi.get_func(ea) return True if f is None else False
def get_callers(name): for xr in idautils.CodeRefsTo(idaapi.get_name_ea(idaapi.BADADDR, name), True): fn = idaapi.get_func(xr) if fn: yield fn.startEA
def main(): # XXX: Change this to the corresponding directory. input_dir = "/Users/anon/images/ttf/libFontParser" if os.path.isdir(input_dir): traces = filter( lambda x: x.endswith(".trace"), map(lambda x: os.path.join(input_dir, x), os.listdir(input_dir))) else: traces = [input_dir] # Get loaded binary name image_name = idc.GetInputFile().lower() info("IDB binary name '%s'" % image_name) # Get the image base image_base = idaapi.get_imagebase() info("IDB binary base 0x%.16x" % image_base) # Gather tuples of coverage_info = [] for filename in traces: debug("Loading code coverage from '%s'." % filename) # Get all the hits on this .idb file. hits = get_image_hits(filename, image_name, image_base) if not len(hits): debug("No hits could be loaded from image") continue # Save the coverage information. coverage_info.append(CoverageInformation(filename, hits)) if not len(coverage_info): info("No coverage information was present for image '%s'" % image_name) sys.exit() all_hits = set() shared_hits = set.intersection(*[x.hits for x in coverage_info]) reached_functions = set() for element in coverage_info: all_hits.update(element.hits) for hit in element.hits: f = idaapi.get_func(hit) if f: reached_functions.add(f.startEA) info("Covered %d basic blocks in total using %d files" % (len(all_hits), len(coverage_info))) info(" Number of shared basic locks %d" % (len(shared_hits))) info(" Number of reached functions %d" % (len(reached_functions))) if idaapi.askyn_c(1, "Do you want to mark all the FUNCTIONS reached?") == 1: FUNCTION_COLOR = 0xBCF5D1 for function in reached_functions: info("Reached -> %s" % GetFunctionName(function)) SetFunctionColor(function, FUNCTION_COLOR) if idaapi.askyn_c( 0, "Do you want to mark all the BASIC BLOCKS reached?") == 1: BBLOCK_COLOR_1 = 0xA3A9E3 BBLOCK_COLOR_2 = 0xA3D1E3 for hit in all_hits: SetBasicBlockColor(hit, BBLOCK_COLOR_1) for hit in shared_hits: SetBasicBlockColor(hit, BBLOCK_COLOR_2) return trace_to_new = {} for filename in traces: info("Loading code coverage from '%s'." % filename) # Get all the hits on this .idb file. hits = get_image_hits(filename, image_name, image_base) reached_functions = set() for e in hits: f = idaapi.get_func(e) if not f: continue reached_functions.add(f.startEA) # Get the elements that are introduced by this new trace. diff_hits = hits - global_hits diff_functions = reached_functions - global_reached_functions trace_to_new[filename] = diff_functions global_hits.update(hits) global_reached_functions.update(reached_functions) info("Image '%s' got %d hits (global) and %d function hits (global)." % (image_name, len(global_hits), len(global_reached_functions))) for trace_name, introduced_functions in trace_to_new.iteritems(): # Get the original file name. file_name = trace_name.replace(".trace", "") # We remove the files that did not introduce any new functions. if not len(introduced_functions): assert os.path.exists(file_name) assert os.path.exists(trace_name) debug("Removing input file '%s'", os.path.basename(file_name)) debug("Removing trace file '%s'", os.path.basename(trace_name)) os.remove(file_name) os.remove(trace_name) continue fileName, fileExtension = os.path.splitext(file_name) fileDir = os.path.dirname(file_name) hash_ = hashlib.sha224(file(file_name).read()).hexdigest() new_file_name = os.path.join(fileDir, hash_ + fileExtension) new_trace_name = new_file_name + ".trace" os.rename(file_name, new_file_name) os.rename(trace_name, new_trace_name) info("Trace '%s' introduced functions:" % new_trace_name) for func in introduced_functions: info(" %s" % GetFunctionName(func)) if idaapi.askyn_c(1, "Do you want to mark all the FUNCTIONS reached?") == 1: FUNCTION_COLOR = 0xBCF5D1 for function in global_reached_functions: SetFunctionColor(function, FUNCTION_COLOR) if idaapi.askyn_c( 0, "Do you want to mark all the BASIC BLOCKS reached?") == 1: BBLOCK_COLOR = 0xf2ddda for hit in global_hits: SetBasicBlockColor(hit, BBLOCK_COLOR)
def load_capa_function_results(self): """ """ if not self.rules_cache or not self.ruleset_cache: # only reload rules if caches are empty if not self.load_capa_rules(): return False else: logger.info( 'Using cached ruleset, click "Reset" to reload rules from disk.' ) if ida_kernwin.user_cancelled(): logger.info("User cancelled analysis.") return False update_wait_box("loading IDA extractor") try: # must use extractor to get function, as capa analysis requires casted object extractor = CapaExplorerFeatureExtractor() except Exception as e: logger.error("Failed to load IDA feature extractor (error: %s)" % e) return False if ida_kernwin.user_cancelled(): logger.info("User cancelled analysis.") return False update_wait_box("extracting function features") try: f = idaapi.get_func(idaapi.get_screen_ea()) if f: f = extractor.get_function(f.start_ea) self.rulegen_current_function = f func_features, bb_features = find_func_features(f, extractor) self.rulegen_func_features_cache = collections.defaultdict( set, copy.copy(func_features)) self.rulegen_bb_features_cache = collections.defaultdict( dict, copy.copy(bb_features)) if ida_kernwin.user_cancelled(): logger.info("User cancelled analysis.") return False update_wait_box("matching function/basic block rule scope") try: # add function and bb rule matches to function features, for display purposes func_matches, bb_matches = find_func_matches( f, self.ruleset_cache, func_features, bb_features) for (name, res) in itertools.chain(func_matches.items(), bb_matches.items()): rule = self.ruleset_cache[name] if rule.meta.get("capa/subscope-rule"): continue for (ea, _) in res: func_features[capa.features.common.MatchedRule( name)].add(ea) except Exception as e: logger.error( "Failed to match function/basic block rule scope (error: %s)" % e) return False else: func_features = {} except UserCancelledError: logger.info("User cancelled analysis.") return False except Exception as e: logger.error("Failed to extract function features (error: %s)" % e) return False if ida_kernwin.user_cancelled(): logger.info("User cancelled analysis.") return False update_wait_box("extracting file features") try: file_features = find_file_features(extractor) self.rulegen_file_features_cache = collections.defaultdict( dict, copy.copy(file_features)) if ida_kernwin.user_cancelled(): logger.info("User cancelled analysis.") return False update_wait_box("matching file rule scope") try: # add file matches to file features, for display purposes for (name, res) in find_file_matches(self.ruleset_cache, file_features).items(): rule = self.ruleset_cache[name] if rule.meta.get("capa/subscope-rule"): continue for (ea, _) in res: file_features[capa.features.common.MatchedRule( name)].add(ea) except Exception as e: logger.error("Failed to match file scope rules (error: %s)" % e) return False except Exception as e: logger.error("Failed to extract file features (error: %s)" % e) return False if ida_kernwin.user_cancelled(): logger.info("User cancelled analysis.") return False update_wait_box("rendering views") try: # load preview and feature tree self.view_rulegen_preview.load_preview_meta( f.start_ea if f else None, settings.user.get(CAPA_SETTINGS_RULEGEN_AUTHOR, "<insert_author>"), settings.user.get(CAPA_SETTINGS_RULEGEN_SCOPE, "function"), ) self.view_rulegen_features.load_features(file_features, func_features) # self.view_rulegen_header_label.setText("Function Features (%s)" % trim_function_name(f)) self.set_view_status_label("capa rules directory: %s (%d rules)" % (settings.user[CAPA_SETTINGS_RULE_PATH], len(self.rules_cache))) except Exception as e: logger.error("Failed to render views (error: %s)" % e) return False return True
def update_view_tree_limit_results_to_function(self, ea): """ """ self.limit_results_to_function(idaapi.get_func(ea)) self.view_tree.reset_ui()
def main(fileName): if fileName is None: return jsonValue = {} jsonValue["names"] = {} jsonValue["functions"] = {} jsonValue["segments"] = [] jsonValue["strings"] = {} for addr, name in idautils.Names(): jsonValue["names"][addr] = name # Record segment details for ea in idautils.Segments(): cur_seg = {} cur_seg["start"] = idc.SegStart(ea) cur_seg["end"] = idc.SegEnd(ea) cur_seg["name"] = idc.SegName(ea) seg = idaapi.getseg(ea) cur_seg["r"] = (seg.perm & idaapi.SEGPERM_READ) != 0 cur_seg["w"] = (seg.perm & idaapi.SEGPERM_WRITE) != 0 cur_seg["x"] = (seg.perm & idaapi.SEGPERM_EXEC) != 0 cur_seg["semantics"] = DefaultSectionSemantics if seg.type == idaapi.SEG_CODE: cur_seg["semantics"] = ReadOnlyCodeSectionSemantics elif seg.type == idaapi.SEG_DATA or seg.type == idaapi.SEG_BSS: if cur_seg["w"]: cur_seg["semantics"] = ReadWriteDataSectionSemantics else: cur_seg["semantics"] = ReadOnlyDataSectionSemantics # Record function details for ea in idautils.Functions(): cur_func = {} cur_func["start"] = ea cur_func["end"] = idc.GetFunctionAttr(ea, idc.FUNCATTR_END) cur_func["comment"] = linearize_comment(ea, True) cur_func["comments"] = {} for line_ea in idautils.Heads(ea, cur_func["end"]): line_comment = linearize_comment(line_ea) if line_comment is not None: cur_func["comments"][line_comment] = line_ea flags = idc.GetFunctionFlags(ea) cur_func["can_return"] = (flags & idc.FUNC_NORET) != idc.FUNC_NORET cur_func["thunk"] = False f = idaapi.get_func(ea) blocks = [] for block in idaapi.FlowChart(f): blocks.append([block.startEA, block.endEA]) # IDA treats thunks as being part of the function they are tunking to # Binary Ninja doesn't so only add the first basic block for all thunks if flags & idc.FUNC_THUNK != 0: cur_func["thunk"] = True break cur_func["basic_blocks"] = blocks jsonValue["functions"][idc.GetFunctionName(ea)] = cur_func # Record string details for string in idautils.Strings(): name = "" if string.ea in jsonValue["names"]: name = jsonValue["names"][string.ea] xrefs = list(idautils.DataRefsTo(string.ea)) jsonValue["strings"][string.ea] = (name, string.length, string.type, xrefs) # TODO: global variable names and types # TODO: stack local variable names and types # TODO: types and enumerations # TODO: non-function comments with open(fileName, "wb") as f: f.write(json.dumps(jsonValue, indent=4)) print("Exported idb to {}".format(fileName))
def _event(cls): while True: # cmt_changing event ea, rpt, new = (yield) old = utils.string.of(idaapi.get_cmt(ea, rpt)) f, o, n = idaapi.get_func(ea), internal.comment.decode( old), internal.comment.decode(new) # update references before we update the comment cls._update_refs(ea, o, n) # wait for cmt_changed event newea, nrpt, none = (yield) # now fix the comment the user typed if (newea, nrpt, none) == (ea, rpt, None): ncmt, repeatable = utils.string.of(idaapi.get_cmt( ea, rpt)), cls._is_repeatable(ea) if (ncmt or '') != new: logging.warn( u"{:s}.event() : Comment from event at address {:#x} is different from database. Expected comment ({!s}) is different from current comment ({!s})." .format('.'.join((__name__, cls.__name__)), ea, utils.string.repr(new), utils.string.repr(ncmt))) # delete it if it's the wrong type # if nrpt != repeatable: # idaapi.set_cmt(ea, '', nrpt) # # write the tag back to the address # if internal.comment.check(new): idaapi.set_cmt(ea, utils.string.to(internal.comment.encode(n)), repeatable) # # write the comment back if it's non-empty # elif new: idaapi.set_cmt(ea, utils.string.to(new), repeatable) # # otherwise, remove its reference since it's being deleted # else: cls._delete_refs(ea, n) if internal.comment.check(new): idaapi.set_cmt(ea, utils.string.to(internal.comment.encode(n)), rpt) elif new: idaapi.set_cmt(ea, utils.string.to(new), rpt) else: cls._delete_refs(ea, n) continue # if the changed event doesn't happen in the right order logging.fatal( u"{:s}.event() : Comment events are out of sync at address {:#x}, updating tags from previous comment. Expected comment ({!s}) is different from current comment ({!s})." .format('.'.join((__name__, cls.__name__)), ea, utils.string.repr(o), utils.string.repr(n))) # delete the old comment cls._delete_refs(ea, o) idaapi.set_cmt(ea, '', rpt) logging.warn( u"{:s}.event() : Deleted comment at address {:#x} was {!s}.". format('.'.join((__name__, cls.__name__)), ea, utils.string.repr(o))) # new comment new = utils.string.of(idaapi.get_cmt(newea, nrpt)) n = internal.comment.decode(new) cls._create_refs(newea, n) continue return
def get_function(va): ''' return va for first instruction in function that contains given va. ''' return idaapi.get_func(va).start_ea
def _event(cls): while True: # cmt_changing event ea, rpt, new = (yield) fn = idaapi.get_func(ea) old = utils.string.of(idaapi.get_func_cmt(fn, rpt)) o, n = internal.comment.decode(old), internal.comment.decode(new) # update references before we update the comment cls._update_refs(fn, o, n) # wait for cmt_changed event newea, nrpt, none = (yield) # now we can fix the user's new coment if (newea, nrpt, none) == (ea, rpt, None): ncmt = utils.string.of(idaapi.get_func_cmt(fn, rpt)) if (ncmt or '') != new: logging.warn( u"{:s}.event() : Comment from event for function {:#x} is different from database. Expected comment ({!s}) is different from current comment ({!s})." .format('.'.join((__name__, cls.__name__)), ea, utils.string.repr(new), utils.string.repr(ncmt))) # if it's non-repeatable, then fix it. # if not nrpt: # idaapi.set_func_cmt(fn, '', nrpt) # # write the tag back to the function # if internal.comment.check(new): idaapi.set_func_cmt(fn, utils.string.to(internal.comment.encode(n)), True) # # otherwise, write the comment back as long as it's valid # elif new: idaapi.set_func_cmt(fn, utils.string.to(new), True) # # otherwise, the user has deleted it..so update its refs. # else: cls._delete_refs(fn, n) # write the tag back to the function if internal.comment.check(new): idaapi.set_func_cmt( fn, utils.string.to(internal.comment.encode(n)), rpt) elif new: idaapi.set_func_cmt(fn, utils.string.to(new), rpt) else: cls._delete_refs(fn, n) continue # if the changed event doesn't happen in the right order logging.fatal( u"{:s}.event() : Comment events are out of sync for function {:#x}, updating tags from previous comment. Expected comment ({!s}) is different from current comment ({!s})." .format('.'.join((__name__, cls.__name__)), ea, utils.string.repr(o), utils.string.repr(n))) # delete the old comment cls._delete_refs(fn, o) idaapi.set_func_cmt(fn, '', rpt) logging.warn( u"{:s}.event() : Deleted comment for function {:#x} was ({!s})." .format('.'.join((__name__, cls.__name__)), ea, utils.string.repr(o))) # new comment newfn = idaapi.get_func(newea) new = utils.string.of(idaapi.get_func_cmt(newfn, nrpt)) n = internal.comment.decode(new) cls._create_refs(newfn, n) continue return
def is_func_start(ea): """ check if function stat exists at virtual address """ f = idaapi.get_func(ea) return f and f.start_ea == ea
def process_function(arch, func_ea): func_end = idc.FindFuncEnd(func_ea) packet = DismantlerDataPacket() ida_chunks = get_chunks(func_ea) chunks = set() # Add to the chunks only the main block, containing the # function entry point # chunk = get_flow_code_from_address(func_ea) if chunk: chunks.add(chunk) # Make "ida_chunks" a set for faster searches within ida_chunks = set(ida_chunks) ida_chunks_idx = dict(zip([c[0] for c in ida_chunks], ida_chunks)) func = idaapi.get_func(func_ea) comments = [idaapi.get_func_cmt(func, 0), idaapi.get_func_cmt(func, 1)] # Copy the list of chunks into a queue to process # chunks_todo = [c for c in chunks] while True: # If no chunks left in the queue, exit if not chunks_todo: if ida_chunks: chunks_todo.extend(ida_chunks) else: break chunk_start, chunk_end = chunks_todo.pop() if ida_chunks_idx.has_key(chunk_start): ida_chunks.remove(ida_chunks_idx[chunk_start]) del ida_chunks_idx[chunk_start] for head in idautils.Heads(chunk_start, chunk_end): comments.extend((idaapi.get_cmt(head, 0), idaapi.get_cmt(head, 1))) comment = '\n'.join([c for c in comments if c is not None]) comment = comment.strip() if comment: packet.add_comment(head, comment) comments = list() if idc.isCode(idc.GetFlags(head)): instruction = arch.process_instruction(packet, head) # if there are other references than # flow add them all. if list(idautils.CodeRefsFrom(head, 0)): # for each reference, including flow ones for ref_idx, ref in enumerate( idautils.CodeRefsFrom(head, 1)): if arch.is_call(instruction): # This two conditions must remain separated, it's # necessary to enter the enclosing "if" whenever # the instruction is a call, otherwise it will be # added as an uncoditional jump in the last else # if ref in list(idautils.CodeRefsFrom(head, 0)): packet.add_direct_call(head, ref) elif ref_idx > 0 and arch.is_conditional_branch( instruction): # The ref_idx is > 0 in order to avoid processing the # normal flow reference which would effectively imply # that the conditional branch is processed twice. # It's done this way instead of changing the loop's head # from CodeRefsFrom(head, 1) to CodeRefsFrom(head, 0) in # order to avoid altering the behavior of other conditions # which rely on it being so. # FIXME # I don't seem to check for the reference here # to point to valid, defined code. I suspect # this could lead to a failure when exporting # if such situation appears. I should test if # it's a likely scenario and probably just add # an isHead() or isCode() to address it. packet.add_conditional_branch_true(head, ref) packet.add_conditional_branch_false( head, idaapi.next_head(head, chunk_end)) # If the target is not in our chunk list if not address_in_chunks(ref, chunks): new_chunk = get_flow_code_from_address(ref) # Add the chunk to the chunks to process # and to the set containing all visited # chunks if new_chunk is not None: chunks_todo.append(new_chunk) chunks.add(new_chunk) elif arch.is_unconditional_branch(instruction): packet.add_unconditional_branch(head, ref) # If the target is not in our chunk list if not address_in_chunks(ref, chunks): new_chunk = get_flow_code_from_address(ref) # Add the chunk to the chunks to process # and to the set containing all visited # chunks if new_chunk is not None: chunks_todo.append(new_chunk) chunks.add(new_chunk) #skip = False for ref in idautils.DataRefsFrom(head): packet.add_data_reference(head, ref) # Get a data reference from the current reference's # location. For instance, if 'ref' points to a valid # address and such address contains a data reference # to code. target = list(idautils.DataRefsFrom(ref)) if target: target = target[0] else: target = None if target is None and arch.is_call(instruction): imp_name = idc.Name(ref) imp_module = get_import_module_name(ref) imported_functions.add((ref, imp_name, imp_module)) packet.add_indirect_virtual_call(head, ref) elif target is not None and idc.isHead(target): # for calls "routed" through this reference if arch.is_call(instruction): packet.add_indirect_call(head, target) # for unconditional jumps "routed" through this reference elif arch.is_unconditional_branch(instruction): packet.add_unconditional_branch(head, target) # for conditional "routed" through this reference elif arch.is_conditional_branch(instruction): packet.add_conditional_branch_true(head, target) packet.add_conditional_branch_false( head, idaapi.next_head(head, chunk_end)) f = FunctionAnalyzer(arch, func_ea, packet) instrumentation.new_packet(packet) instrumentation.new_function(f)
def get_func_start_ea(ea): """ """ f = idaapi.get_func(ea) return f if f is None else f.start_ea
def _current_function(self): return idaapi.get_func(ScreenEA()).startEA
def activate(self, context): # Read current selection is_selected, select_begin, select_end = idaapi.read_selection() if not is_selected: select_begin = ScreenEA() print("Selection from {:08X}".format(select_begin)) functionsList = list() hasLimitOut = False hasAnyFunctions = True byteAddr = select_begin while hasAnyFunctions and not hasLimitOut: bytes = bytearray(idc.get_bytes(byteAddr, 4)) addr = int(''.join('{:02X}'.format(x) for x in bytes[::-1]), 16) # Get info about func func = idaapi.get_func(addr) if not func: hasAnyFunctions = False print("End of vftable. Total entities: {}".format( len(functionsList))) break funcName = idc.get_name(addr, GN_DEMANGLED) functionsList.append((funcName, addr, byteAddr)) byteAddr = byteAddr + 4 # Jump to next addr hasLimitOut = len( functionsList) >= Limits.MAX_FUNCTIONS_PER_VFTABLE # Generate CXX class by vftable if len(functionsList) == 0: print("Unable to locate first method of class at {:08X}".format( select_begin)) return 1 className = CXXClassUtils.GetClassName(functionsList[0]) if not className: print("Unable to parse class at {:08X}".format(select_begin)) return 1 src = "// Type forwardings for class {}\n".format(className) typeForwardings = CXXClassUtils.GenerateTypeForwardings( functionsList[1:]) for typeForward in typeForwardings: src = src + "{}\n".format(typeForward) src = src + "// Class definition {}\n".format(className) src = src + "class {} {{\n".format(className) # Generate vftable members vftblIndex = 0 for rawName, funcAddr, _rawAddr in functionsList[1:]: src = src + "\t{} //#{:04} at {:08X} org {}\n".format( CXXClassUtils.GetMemberNameWithArgsList(rawName), vftblIndex, funcAddr, rawName) vftblIndex = vftblIndex + 1 # Finalize class src += "}}; //End of {} from {:08X}".format(className, select_begin) print("Class source: \n\n{}".format(src)) # End of plugin return 1
def find_subroutine_boundary(table_addr, table): # Find all basic blocks. ranges = set() ida_functions = set( ) # Storing all functions found so we can use it to verify results later. # Find all functions and blocks from the table. for entry in table: ida_func = idaapi.get_func(entry) ida_functions.add(ida_func.start_ea) ida_fc = idaapi.FlowChart(ida_func) ida_block = None # Find the block that belongs to this table entry. for ida_block_entry in ida_fc: if ida_block_entry.start_ea == entry: ida_block = ida_block_entry break if ida_block is None: print "[DEOBF] Unable to find block of %X" % entry return None, None brange = (ida_block.start_ea, ida_block.end_ea) if brange in ranges: print "[DEOBF] Found duplicate block usage at %X" % entry return None, None ranges.add(brange) # Make sure that every block of every function is found and in the table. miss_count = 0 for ida_function in ida_functions: for ida_block in idaapi.FlowChart(idaapi.get_func(ida_function)): # Check if this block is in the table. found = False for brange in ranges: if brange[0] == ida_block.start_ea: found = True break if not found: if miss_count == 0 \ and ida_block.end_ea - ida_block.start_ea == 4\ and idc.GetMnem(ida_block.start_ea) == 'BLX': print "[DEOBF] Found unimportant block at %X in table %X" % ( ida_block.start_ea, table_addr) miss_count += 1 continue print "[DEOBF] Found unused block at %X in table %X" % ( ida_block.start_ea, table_addr) return None, None # Make sure every function connects. sub_start = None sub_end = None if len(ida_functions) > 1: for func_a in ida_functions: found_connection = False ida_func_a = idaapi.get_func(func_a) if sub_start is None or sub_start > ida_func_a.start_ea: sub_start = ida_func_a.start_ea if sub_end is None or sub_end < ida_func_a.end_ea: sub_end = ida_func_a.end_ea for func_b in ida_functions: ida_func_b = idaapi.get_func(func_b) if ida_func_a.start_ea == ida_func_b.end_ea \ or ida_func_a.end_ea == ida_func_b.start_ea: found_connection = True break if not found_connection: print "[DEOBF] Found disconnected function %X in table %X" % ( ida_func_a.start_ea, table_addr) return None, None else: ida_func = idaapi.get_func(list(ida_functions)[0]) sub_start = ida_func.start_ea sub_end = ida_func.end_ea return sub_start, sub_end
def get_func_start(addr): func = idaapi.get_func(addr) return func.startEA
def main(): global bin_num, func_num, function_list_file, function_list_fp, functions CFG_extract_time = defaultdict(list) DFG_extract_time = defaultdict(list) FEATURE_extract_time = defaultdict(list) fea_path = "" print idc.ARGV[1] print idc.ARGV[2] fea_path_origion = idc.ARGV[1] fea_path_temp = idc.ARGV[1] + "\\temp" bin_path = idc.ARGV[2] binary_file = bin_path.split(os.sep)[-1] program = idc.ARGV[3] version = idc.ARGV[4] fname_prefix = '_'.join(binary_file.split('.')[:-1]) print fname_prefix print "Directory path : ", fea_path_origion function_list_file = fea_path_origion + os.sep + "functions_list_fea.csv" print(function_list_file) textStartEA = 0 textEndEA = 0 for seg in idautils.Segments(): if idc.SegName(seg)[:5] == ".text": textStartEA = idc.SegStart(seg) textEndEA = idc.SegEnd(seg) for func in idautils.Functions(textStartEA, textEndEA): # Ignore Library Code flags = idc.GetFunctionFlags(func) if flags & idc.FUNC_LIB: print hex(func), "FUNC_LIB", idc.GetFunctionName(func) continue cur_function_name = fname_prefix + '_' + idc.GetFunctionName(func) if len(cur_function_name) > 130: cur_function_name = cur_function_name[:130] if ':' in cur_function_name: cur_function_name = '_'.join(cur_function_name.split(':')) print cur_function_name fea_path = fea_path_origion if cur_function_name.lower() in functions: fea_path = fea_path_temp if not os.path.exists(fea_path): os.mkdir(fea_path) else: functions.append(cur_function_name.lower()) allblock = idaapi.FlowChart(idaapi.get_func(func)) cfg_file = fea_path + os.sep + str( cur_function_name) + "_cfg.txt" cfg_fp = open(cfg_file, 'w') t_cfg_start = time.time() block_items = [] DG = nx.DiGraph() for idaBlock in allblock: temp_str = str(hex(idaBlock.startEA)) block_items.append(temp_str[2:]) DG.add_node(hex(idaBlock.startEA)) for succ_block in idaBlock.succs(): DG.add_edge(hex(idaBlock.startEA), hex(succ_block.startEA)) for pred_block in idaBlock.preds(): DG.add_edge(hex(pred_block.startEA), hex(idaBlock.startEA)) CFG_extract_time[str(DG.number_of_nodes())].append( str(time.time() - t_cfg_start)) # Generate CFG for cfg_node in DG.nodes(): cfg_str = str(cfg_node) for edge in DG.succ[cfg_node]: cfg_str = cfg_str + " " + edge cfg_str = cfg_str + "\n" cfg_fp.write(cfg_str) cfg_fp.close() if config.STEP2_GEN_DFG: # Generate DFG t_dfg_start = time.time() dfg = dataflow_analysis(func, block_items, DG) DFG_extract_time[str(DG.number_of_nodes())].append( str(time.time() - t_dfg_start)) dfg_file = fea_path + os.sep + str( cur_function_name) + "_dfg.txt" dfg_fp = open(dfg_file, 'w') for dfg_node in dfg.nodes(): dfg_str = dfg_node for edge in dfg.succ[dfg_node]: dfg_str = dfg_str + " " + edge dfg_str = dfg_str + "\n" dfg_fp.write(dfg_str) dfg_fp.close() # Generate Features fea_file = fea_path + os.sep + str( cur_function_name) + "_fea.csv" func_num += 1 fea_fp = open(fea_file, 'w') t_feature_start = time.time() block_fea(allblock, fea_fp) FEATURE_extract_time[str(DG.number_of_nodes())].append( str(time.time() - t_feature_start)) function_str = str(cur_function_name) + "," + str(DG.number_of_nodes()) + "," + \ str(DG.number_of_edges()) + ","+ \ str(program) + "," + str(version) + "," + str(bin_path) + ",\n" function_list_fp = open(function_list_file, 'a') # a 追加 function_list_fp.write(function_str) function_list_fp.close() binary_name = '_'.join(bin_path.split('\\')[-1].split('.')[:-1]) with open( os.path.join(fea_path_origion, binary_name + "_cfg_extractor_time.json"), 'w') as fp: json.dump(CFG_extract_time, fp, indent=4) if config.STEP2_GEN_DFG: with open( os.path.join(fea_path_origion, binary_name + "_dfg_extractor_time.json"), 'w') as fp: json.dump(DFG_extract_time, fp, indent=4) with open( os.path.join(fea_path_origion, binary_name + "_mfe_extractor_time.json"), 'w') as fp: json.dump(FEATURE_extract_time, fp, indent=4) return
import idaapi from cfg import * ENTRY_FUNC = 0xffffffff8178ffa0 mycg = CallGraph() mycfg = CFG() entry_func = idaapi.get_func(ENTRY_FUNC) visited = set() def traverse(func): func_name = Name(func.startEA) assert func_name if func_name in visited: return visited.add(func_name) myfunc = Function(func.startEA, func_name) f = idaapi.FlowChart(func) for bb in f: mybb = BasicBlock(bb.startEA, bb.endEA - bb.startEA)