def _find_leafs(self): # Loop through every function for func_ea in idautils.Functions(): # Count the number of xrefs to this function func = idaapi.get_func(func_ea) if func: leaf_function = True ea = ida_shims.start_ea(func) end_ea = ida_shims.end_ea(func) # Loop through all instructions in this function looking # for call instructions; if found, then this is not a leaf. while ea <= end_ea: insn = ida_shims.decode_insn(ea) if idaapi.is_call_insn(ea): leaf_function = False break ea = ida_shims.next_head(ea) if leaf_function: self.functions.append( Function(start=ida_shims.start_ea(func), end=ida_shims.end_ea(func), leaf=True, loop=self.has_loop(func), argc=self.argp.argc(func))) # Sort leafs by xref count, largest first self.functions.sort(key=lambda f: f.xrefs, reverse=True)
def __init__(self, start_ea, end_ea, quiet=False): try: func = idaapi.get_func(end_ea) start = ida_shims.start_ea(func) except: raise AlleyCatException("Address 0x%X is not part of a function!" % end_ea) try: func = idaapi.get_func(start_ea) end = ida_shims.start_ea(func) except: end = idc.BADADDR super(AlleyCatFunctionPaths, self).__init__(start, end, quiet)
def _get_code_block(self, ea): for block in self.blocks: start_ea = ida_shims.start_ea(block) end_ea = ida_shims.end_ea(block) if start_ea <= ea and end_ea > ea: return block return None
def has_loop(self, func): ''' A naive method for checking to see if a function contains a loop. Works pretty well for simple functions though. ''' func_start_ea = ida_shims.start_ea(func) blocks = [func_start_ea] for block in idaapi.FlowChart(func): end_ea = ida_shims.end_ea(block) blocks.append(end_ea) for block in blocks: for xref in idautils.XrefsTo(block): xref_func = idaapi.get_func(xref.frm) xref_start_ea = ida_shims.start_ea(xref_func) if xref_func and xref_start_ea == func_start_ea: if xref.frm >= block: return True return False
def _build_function_xrefs(self): for function in idautils.Functions(): for xref in idautils.XrefsTo(function): func = idaapi.get_func(xref.frm) if func: start_ea = ida_shims.start_ea(func) if not self.functions.has_key(start_ea): self.functions[start_ea] = list() self.functions[start_ea].append(IDAProfilerXref( ea=function, string=ida_shims.get_name(function), xref=xref.frm, type=callable))
def colorize_node(self, ea, color): func = idaapi.get_func(ea) if func: for block in idaapi.FlowChart(func): block_start_ea = ida_shims.start_ea(block) block_end_ea = ida_shims.end_ea(block) if block_start_ea <= ea and block_end_ea > ea: ea = block_start_ea while ea < block_end_ea: idaapi.set_item_color(ea, color) ea = ida_shims.next_head(ea) break
def argv(self, func): ''' Attempts to identify what types of arguments are passed to a given function. Currently unused. ''' args = [None for x in self.arch.argv] if not self.arch.unknown: start_ea = ida_shims.start_ea(func) for xref in idautils.XrefsTo(start_ea): if idaapi.is_call_insn(xref.frm): insn = ida_shims.decode_insn(xref.frm) ea = xref.frm + (self.arch.delay_slot * self.arch.insn_size) end_ea = (xref.frm - (self.arch.insn_size * 10)) while ea >= end_ea: if idaapi.is_basic_block_end(ea) or \ (ea != xref.frm and idaapi.is_call_insn(ea)): break insn = ida_shims.decode_insn(ea) features = ida_shims.get_canon_feature(insn) for n in range(0, len(self.CHANGE_OPND)): ops = ida_shims.get_operands(insn) if ops[n].type in [ idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase ]: try: regname = self.arch.registers[ops[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n]: for xref in idautils.XrefsFrom(ea): # TODO: Where is this xref type defined? if xref.type == 1: string = \ ida_shims.get_strlit_contents( xref.to) if string and len(string) > 4: args[index] = str break ea -= self.arch.insn_size yield args
def argc(self, function): ''' Counts the number of arguments used by the specified function. ''' argv = set() notargv = set() ea = ida_shims.start_ea(function) end_ea = ida_shims.end_ea(function) if self.arch.unknown: return 0 while ea < end_ea: insn = ida_shims.decode_insn(ea) features = ida_shims.get_canon_feature(insn) for n in range(0, len(self.USE_OPND)): ops = ida_shims.get_operands(insn) if ops[n].type in [ idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase ]: try: regname = self.arch.registers[ops[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.USE_OPND[n] and regname not in notargv: argv.update(self.arch.argv[:index + 1]) for n in range(0, len(self.CHANGE_OPND)): ops = ida_shims.get_operands(insn) if ops[n].type in [ idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase ]: try: regname = self.arch.registers[ops[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if regname not in argv: notargv.update(self.arch.argv[index:]) if argv.union(notargv) == set(self.arch.argv): break # TODO: Use idc.NextHead(ea) instead... ea += self.arch.insn_size return len(argv)
def from_function_profiler(arg=None): try: chooser = IDAFunctionProfilerChooser() cur_loc = ida_shims.get_screen_ea() func = idaapi.get_func(cur_loc) if func: start_ea = ida_shims.start_ea(func) chooser.set_internal_filter(functions=set([start_ea])) else: raise Exception("Can't limit profile to just this function, " "because 0x%X is not inside a function!" % cur_loc) chooser.show() except Exception as e: print "IDAFunctionProfiler ERROR: %s" % str(e)
def _build_string_xrefs(self): for string in idautils.Strings(): key_string = str(string) for xref in idautils.XrefsTo(string.ea): func = idaapi.get_func(xref.frm) if func: start_ea = ida_shims.start_ea(func) if not self.functions.has_key(start_ea): self.functions[start_ea] = list() xref = IDAProfilerXref(ea=string.ea, string=key_string, xref=xref.frm, type=str) self.functions[start_ea].append(xref)
def __init__(self, start_ea, end_ea, quiet=False): end_func = idaapi.get_func(end_ea) start_func = idaapi.get_func(start_ea) if not start_func: raise AlleyCatException("Address 0x%X is not part of a function!" % start_ea) if not end_func: raise AlleyCatException("Address 0x%X is not part of a function!" % end_ea) start_func_ea = ida_shims.start_ea(start_func) end_func_ea = ida_shims.end_ea(end_func) if start_func_ea != end_func_ea: raise AlleyCatException("The start and end addresses are not part " "of the same function!") self.func = start_func self.blocks = [block for block in idaapi.FlowChart(self.func)] end_block = self._get_code_block(start_ea) start_block = self._get_code_block(end_ea) if not end_block: raise AlleyCatException("Failed to find the code block associated " "with address 0x%X" % start_ea) if not start_block: raise AlleyCatException("Failed to find the code block associated " "with address 0x%X" % end_ea) start_block_ea = ida_shims.start_ea(start_block) end_block_ea = ida_shims.start_ea(end_block) super(AlleyCatCodePaths, self).__init__( start_block_ea, end_block_ea, quiet)
def _build_paths(self, start, end=idc.BADADDR): partial_paths = [[start]] # Loop while there are still unresolve paths and while all path sizes # have not exceeded ALLEYCAT_LIMIT while partial_paths and \ len(self.paths) < self.limit and \ len(partial_paths) < self.limit: callers = set() # Callee is the last entry of the first path in partial paths. # The first path list will change as paths are completed and # popped from the list. callee = partial_paths[0][-1] # Find all unique functions that reference the callee, assuming this # path has not exceeded ALLEYCAT_LIMIT. if len(partial_paths[0]) < self.limit: for xref in idautils.XrefsTo(callee): caller = self._get_code_block(xref.frm) if caller: start_ea = ida_shims.start_ea(caller) if start_ea not in callers: callers.add(start_ea) # If there are callers to the callee, remove the callee's current # path and insert new ones with the new callers appended. if callers: base_path = partial_paths.pop(0) for caller in callers: # Don't want to loop back on ourselves in the same path if caller in base_path: continue if caller == end: self._add_path((base_path + [caller])[::-1]) else: partial_paths.append(base_path + [caller]) elif end not in partial_paths[0]: partial_paths.pop(0) elif end in partial_paths[0]: self._add_path(partial_paths.pop(0)[::-1])
def generate(self): signatures = RizzoSignatures() # Generate unique string-based function signatures for (ea, string) in self.strings.iteritems(): # Only generate signatures on reasonably long strings with one xref if len(string.value) >= 8 and len(string.xrefs) == 1: func = idaapi.get_func(string.xrefs[0]) if func: str_hash = self.sighash(string.value) # Check for and remove string duplicate signatures (the same # string can appear more than once in an IDB). # If no duplicates, add this to the string signature dict. if str_hash in signatures.strings: del signatures.strings[str_hash] signatures.stringdups.add(str_hash) elif str_hash not in signatures.stringdups: signatures.strings[str_hash] = ida_shims.start_ea(func) # Generate formal, fuzzy, and immediate-based function signatures for ea in idautils.Functions(): func = idaapi.get_func(ea) if func: # Generate a signature for each block in this function blocks = self.function(func) # Build function-wide formal and fuzzy signatures by simply # concatenating the individual function block signatures. formal = self.sighash(''.join( [str(e) for (e, f, i, c) in blocks])) fuzzy = self.sighash(''.join( [str(f) for (e, f, i, c) in blocks])) # Add this signature to the function dictionary. start_ea = ida_shims.start_ea(func) signatures.functions[start_ea] = (ida_shims.get_name(start_ea), blocks) # Check for and remove formal duplicate signatures. # If no duplicates, add this to the formal signature dict. if signatures.formal.has_key(formal): del signatures.formal[formal] signatures.formaldups.add(formal) elif formal not in signatures.formaldups: signatures.formal[formal] = ida_shims.start_ea(func) # Check for and remove fuzzy duplicate signatures. # If no duplicates, add this to the fuzzy signature dict. if signatures.fuzzy.has_key(fuzzy): del signatures.fuzzy[fuzzy] signatures.fuzzydups.add(fuzzy) elif fuzzy not in signatures.fuzzydups: signatures.fuzzy[fuzzy] = ida_shims.start_ea(func) # Check for and remove immediate duplicate signatures. # If no duplicates, add this to the immediate signature dict. for (e, f, immediates, c) in blocks: for immediate in immediates: if signatures.immediates.has_key(immediate): del signatures.immediates[immediate] signatures.immediatedups.add(immediate) elif immediate not in signatures.immediatedups: signatures.immediates[immediate] = \ ida_shims.start_ea(func) # These need not be maintained across function calls, # and only add to the size of the saved signature file. signatures.fuzzydups = set() signatures.formaldups = set() signatures.stringdups = set() signatures.immediatedups = set() # DEBUG signatures.show() return signatures
def block(self, block): ''' Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names]) ''' formal = [] fuzzy = [] functions = [] immediates = [] ea = ida_shims.start_ea(block) while ea < ida_shims.end_ea(block): insn = ida_shims.decode_insn(ea) # Get a list of all data/code refs from the current instruction drefs = [x for x in idautils.DataRefsFrom(ea)] crefs = [x for x in idautils.CodeRefsFrom(ea, False)] # Add all instruction mnemonics to the formal block hash formal.append(ida_shims.print_insn_mnem(ea)) # If this is a call instruction, be sure to note the name of the # function being called. This is used to apply call-based # signatures to functions. # # For fuzzy signatures, we can't use the actual name or EA of the # function, but rather just want to note that a function call was # made. # # Formal signatures already have the call instruction mnemonic, # which is more specific than just saying that a call was made. if idaapi.is_call_insn(ea): for cref in crefs: func_name = ida_shims.get_name(cref) if func_name: functions.append(func_name) fuzzy.append("funcref") # If there are data references from the instruction, check to see # if any of them are strings. These are looked up in the # pre-generated strings dictionary. # # String values are easily identifiable, and are used as part of # both the fuzzy and the formal signatures. # # It is more difficult to determine if non-string values are # constants or not; for both fuzzy and formal signatures, just use # "data" to indicate that some data was referenced. elif drefs: for dref in drefs: if self.strings.has_key(dref): formal.append(self.strings[dref].value) fuzzy.append(self.strings[dref].value) else: formal.append("dataref") fuzzy.append("dataref") # If there are no data or code references from the instruction, use # every operand as part of the formal signature. # # Fuzzy signatures are only concerned with interesting immediate # values, that is, values that are greater than 65,535, are not # memory addresses, and are not displayed as negative values. elif not drefs and not crefs: ops = ida_shims.get_operands(insn) for n in range(0, len(ops)): opnd_text = ida_shims.print_operand(ea, n) formal.append(opnd_text) if ops[n].type == idaapi.o_imm and \ not opnd_text.startswith('-'): if ops[n].value >= 0xFFFF: if ida_shims.get_full_flags(ops[n].value) == 0: fuzzy.append(str(ops[n].value)) immediates.append(ops[n].value) ea = ida_shims.next_head(ea) return (self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)), immediates, functions)
def _current_function(self): function = idaapi.get_func(ida_shims.get_screen_ea()) return ida_shims.start_ea(function)