def get_function_basic_blocks(funcea, func=None): try: cached = function_basic_block_cache[funcea] # logger.debug("function basic block cache hit for %s" % (self.yatools.address_to_hex_string(funcea))) return cached except KeyError: # logger.debug("function basic block cache miss for %s" % (self.yatools.address_to_hex_string(funcea))) pass basic_blocks = list() if func is None: func = idaapi.get_func(funcea) start_ea = func.startEA end_ea = func.endEA flow_chart = idaapi.qflow_chart_t() flow_chart.create("", func, start_ea, end_ea, 0) size = flow_chart.size() for i in range(size): block = flow_chart.__getitem__(i) block_startEA = block.startEA block_endEA = block.endEA if block_startEA != block_endEA: block_type = flow_chart.calc_block_type(i) basic_blocks.append({ 'funcEA': funcea, 'startEA': block_startEA, 'endEA': block_endEA, 'block_type': block_type, }) basic_blocks.sort(key=lambda x: x["startEA"]) function_basic_block_cache[funcea] = basic_blocks return basic_blocks
def map_flowchart(function_address): """ Map a FlowChart and its node bounds for fast access. ----------------------------------------------------------------------- Walking the IDAPython flowcharts can actually be really slow. when we need to repeatedly access or walk a given flowchart, we should instead extract its layout one-time and use this minimal form when applicable. ----------------------------------------------------------------------- Output: +- flowchart_nodes: | a map keyed with node ID's, holding a tuple of node bounds | | eg: { int(node_id): (startEA, endEA), ... } ' """ flowchart_nodes = {} # retrieve the flowchart for this function function = idaapi.get_func(function_address) flowchart = idaapi.qflow_chart_t("", function, idaapi.BADADDR, idaapi.BADADDR, 0) # cache the bounds for every node in this flowchart for i in xrange(flowchart.size()): node = flowchart[i] flowchart_nodes[i] = (node.startEA, node.endEA) return flowchart_nodes
def get(self, address): """ Cached lookup of the flowchart for a given address. On cache-miss, a new flowchart is generated. """ # cache hit for cache_entry in self.cache: bounds = cache_entry[0].bounds if bounds.startEA <= address < bounds.endEA: #logger.debug("0x%08X: cache hit!" % address) return cache_entry # # flow chart is NOT in the cache... # #logger.debug("0x%08X: cache miss!" % address) # create a new flowchart corresponding to the address function = idaapi.get_func(address) flowchart = idaapi.qflow_chart_t("", function, idaapi.BADADDR, idaapi.BADADDR, 0) # cache the newly created flowchart cache_entry = (flowchart, 0) self.set(cache_entry) # return the created flowchart entry return cache_entry
def init_function_converage(): """ Build a clean function map ready to populate with future coverage. """ functions = {} for function_address in idautils.Functions(): function = idaapi.get_func(function_address) flowchart = idaapi.qflow_chart_t("", function, idaapi.BADADDR, idaapi.BADADDR, 0) functions[function_address] = FunctionCoverage(flowchart) return functions
def _refresh_nodes(self): """ Refresh the function nodes against the open database. """ function_metadata, database = self, self._database # dispose of stale information function_metadata.nodes = {} # get function & flowchart object from database function = idaapi.get_func(self.address) flowchart = idaapi.qflow_chart_t("", function, idaapi.BADADDR, idaapi.BADADDR, 0) # # now we will walk the flowchart for this function, collecting # information on each of its nodes (basic blocks) and populating # the function & node metadata objects. # for node_id in xrange(flowchart.size()): node = flowchart[node_id] # # attempt to select the node via address from our database-wide # node list (should the node already exist) # # eg: a node may be shared between multiple functions # node_metadata = database.nodes.get( node.startEA, NodeMetadata(node) # create a new node ) # # a node's id will be unique per flowchart (function). we need # these id's cached such that we can quickly paint nodes. # # save the node's id as it exists in *this* function into a # map, keyed by the function address # node_metadata.ids[self.address] = node_id # # establish a relationship between this node (basic block) and # this function (as one of its owners/xrefs) # node_metadata.functions[self.address] = function_metadata function_metadata.nodes[node.startEA] = node_metadata # finally, ensure the node exists in the database-wide node list database.nodes[node.startEA] = node_metadata
def _refresh_nodes(self): """ Refresh the function nodes against the open database. """ function_metadata = self # dispose of stale information function_metadata.nodes = {} # get function & flowchart object from database function = idaapi.get_func(self.address) flowchart = idaapi.qflow_chart_t("", function, idaapi.BADADDR, idaapi.BADADDR, 0) # # now we will walk the flowchart for this function, collecting # information on each of its nodes (basic blocks) and populating # the function & node metadata objects. # for node_id in xrange(flowchart.size()): node = flowchart[node_id] # NOTE/COMPAT: if using_ida7api: start_ea = node.start_ea end_ea = node.end_ea else: start_ea = node.startEA end_ea = node.endEA # # the node size as this flowchart sees it is 'zero'. This means # that another flowchart / function owns this node so we can just # ignore it. # if start_ea == end_ea: continue # create a new metadata object for this node node_metadata = NodeMetadata(start_ea, end_ea, node_id) # # establish a relationship between this node (basic block) and # this function metadata as its parent # node_metadata.function = function_metadata function_metadata.nodes[start_ea] = node_metadata
def clean_node(func): done = False while not done: done = True q = idaapi.qflow_chart_t("The title", func, 0, 0, idaapi.FC_PREDS) assert(q[0].start_ea == func.start_ea) for n in xrange(1, q.size()): b = q[n] if q.npred(n) != 0: continue done = False size = b.end_ea - b.start_ea MakeUnknown(b.start_ea, size, idaapi.DOUNK_SIMPLE) MakeData(b.start_ea, idaapi.FF_BYTE, size, 0)
def raw_main(p=True): f = idaapi.get_func(here()) if not f: return q = idaapi.qflow_chart_t("The title", f, 0, 0, idaapi.FC_PREDS) for n in xrange(0, q.size()): b = q[n] if p: print("%x - %x [%d]:" % (b.start_ea, b.end_ea, n)) for ns in xrange(0, q.nsucc(n)): if p: print("SUCC: %d->%d" % (n, q.succ(n, ns))) for ns in xrange(0, q.npred(n)): if p: print("PRED: %d->%d" % (n, q.pred(n, ns)))
def process_func(ea): f = idaapi.get_func(ea) if not f: return rets = 0 edges = 0 q = idaapi.qflow_chart_t("The title", f, 0, 0, idaapi.FC_PREDS) for n in xrange(0, q.size()): if q.is_ret_block(n): rets = rets + 1 else: edges = edges + q.nsucc(n) nodes = q.size() print "%x %s edges=%d nodes=%d rets=%d E-N+2=%d E-N+rets=%d" % ( ea, GetFunctionName(ea), edges, nodes, rets, edges - nodes + 2, edges - nodes + rets)
def raw_main(p=True): f = idaapi.get_func(here()) if not f: return q = idaapi.qflow_chart_t("The title", f, 0, 0, idaapi.FC_PREDS) for n in range(0, q.size()): b = q[n] if p: print("%x - %x [%d]:" % (b.start_ea, b.end_ea, n)) for ns in range(0, q.nsucc(n)): if p: print("SUCC: %d->%d" % (n, q.succ(n, ns))) for ns in range(0, q.npred(n)): if p: print("PRED: %d->%d" % (n, q.pred(n, ns)))
def _refresh_nodes(self): """ Refresh the function nodes against the open database. """ function_metadata = self # dispose of stale information function_metadata.nodes = {} # get function & flowchart object from database function = idaapi.get_func(self.address) flowchart = idaapi.qflow_chart_t("", function, idaapi.BADADDR, idaapi.BADADDR, 0) # # now we will walk the flowchart for this function, collecting # information on each of its nodes (basic blocks) and populating # the function & node metadata objects. # for node_id in xrange(flowchart.size()): node = flowchart[node_id] # TODO if node.startEA == node.endEA: continue # create a new metadata object for this node node_metadata = NodeMetadata(node) # # save the node's id as it exists in this function's flowchart so # that we do not have to walk the flowchart to locate it every time # node_metadata.id = node_id # # establish a relationship between this node (basic block) and # this function metadata as its parent # node_metadata.function = function_metadata function_metadata.nodes[node.startEA] = node_metadata
def raw_main(p=True): global res # find .text section startEA first #text_startEA = None #for s in Segments(): # if SegName(s) == '.text': # text_startEA = s # break #if text_startEA is None: # text_startEA = 0 #f = idaapi.get_func(text_startEA) f = idaapi.get_next_func(0) fc = idaapi.FlowChart(f) while f: funcea = f.startEA fn = GetFunctionName(funcea) # if "Pl" in fn: # funcaddr = f.startEA # f = idaapi.get_next_func(funcaddr) # continue q = idaapi.qflow_chart_t("The title", f, 0, 0, idaapi.FC_PREDS) res.append("##############################\n") for n in xrange(0, q.size()): b = q[n] if p: res.append("%x - %x [%d]:\n" % (b.startEA, b.endEA, n)) for ns in xrange(0, q.nsucc(n)): res.append("SUCC: %d->%d\n" % (n, q.succ(n, ns))) pred_set = set() for ns in xrange(0, q.npred(n)): res.append("PRED: %d->%d\n" % (n, q.pred(n, ns))) pred_set.add(q.pred(n, ns)) if q.nsucc(n) == 0: # this is a block with no successors last_insn = None for h in Heads(b.startEA, b.endEA): last_insn = h if last_insn is None: continue insn = DecodeInstruction(last_insn) if idaapi.is_ret_insn(insn): continue disasm_str = GetDisasm(last_insn) if 'abort' in disasm_str or 'exit' in disasm_str or 'hlt' in disasm_str or '___stack_chk_fail' in disasm_str or '___assert_fail' in disasm_str: continue if idaapi.is_indirect_jump_insn(insn): # if this function ends with an indirect jump, it means ida failed to # determine the successors. We treat all blocks in this function as possible successors #with open('wierd_jump.txt', 'a') as tmp_f: # tmp_f.write(disasm_str + '\n') for tn in xrange(0, q.size()): res.append("SUCC: %d->%d\n" % (n, tn)) if tn not in pred_set: res.append("PRED: %d->%d\n" % (tn, n)) elif idaapi.is_call_insn(insn): # if this function ends with a call (not something like abort), it is somewhat wierd. # do not solve this temporarily #with open('wierd_call.txt', 'a') as tmp_f: # tmp_f.write(disasm_str + '\n') for tn in xrange(0, q.size()): res.append("SUCC: %d->%d\n" % (n, tn)) if tn not in pred_set: res.append("PRED: %d->%d\n" % (tn, n)) funcaddr = f.startEA f = idaapi.get_next_func(funcaddr)
def _refresh_nodes(self): """ Refresh the function nodes against the open database. """ function_metadata = self # dispose of stale information function_metadata.nodes = {} # get function & flowchart object from database function = idaapi.get_func(self.address) flowchart = idaapi.qflow_chart_t("", function, idaapi.BADADDR, idaapi.BADADDR, 0) # # now we will walk the flowchart for this function, collecting # information on each of its nodes (basic blocks) and populating # the function & node metadata objects. # for node_id in xrange(flowchart.size()): node = flowchart[node_id] # NOTE/COMPAT: if using_ida7api: node_start = node.start_ea node_end = node.end_ea else: node_start = node.startEA node_end = node.endEA # # the node size as this flowchart sees it is 'zero'. This means # that another flowchart / function owns this node so we can just # ignore it. # if node_start == node_end: continue # create a new metadata object for this node node_metadata = NodeMetadata(node_start, node_end, node_id) # # establish a relationship between this node (basic block) and # this function metadata as its parent # node_metadata.function = function_metadata function_metadata.nodes[node_start] = node_metadata # # enumerate the edges produced by this node with a destination # that falls within this function. # edge_src = node_metadata.instructions[-1] # NOTE/COMPAT: we do a single api check *outside* the loop for perf if using_ida7api: for edge_dst in idautils.CodeRefsFrom(edge_src, True): edge_function = idaapi.get_func(edge_dst) if edge_function and edge_function.start_ea == function.start_ea: # NOTE: start_ea vs startEA function_metadata.edges.append((edge_src, edge_dst)) else: for edge_dst in idautils.CodeRefsFrom(edge_src, True): edge_function = idaapi.get_func(edge_dst) if edge_function and edge_function.startEA == function.startEA: # NOTE: startEA vs start_ea function_metadata.edges.append((edge_src, edge_dst))
def _refresh_nodes(self): """ Refresh the function nodes against the open database. """ function_metadata = self # dispose of stale information function_metadata.nodes = {} # get function & flowchart object from database function = idaapi.get_func(self.address) flowchart = idaapi.qflow_chart_t("", function, idaapi.BADADDR, idaapi.BADADDR, 0) # # now we will walk the flowchart for this function, collecting # information on each of its nodes (basic blocks) and populating # the function & node metadata objects. # for node_id in xrange(flowchart.size()): node = flowchart[node_id] # NOTE/COMPAT: if using_ida7api: node_start = node.start_ea node_end = node.end_ea else: node_start = node.startEA node_end = node.endEA # # the node size as this flowchart sees it is 'zero'. This means # that another flowchart / function owns this node so we can just # ignore it. # if node_start == node_end: continue # # if the current node_start address does not fall within the # original / entry 'function chunk', we want to ignore it. # # this check is used as an attempt to ignore the try/catch/SEH # exception handling blocks that IDA 7 parses and displays in # the graph view (and therefore, the flowcahrt). # # practically speaking, 99% of the time people aren't going to be # interested in the coverage information on their exception # handlers. I am skeptical that dynamic instrumentation tools # would be able to collect coverage in these handlers anway... # if idaapi.get_func_chunknum(function, node_start): continue # create a new metadata object for this node node_metadata = NodeMetadata(node_start, node_end, node_id) # # establish a relationship between this node (basic block) and # this function metadata as its parent # node_metadata.function = function_metadata function_metadata.nodes[node_start] = node_metadata # # enumerate the edges produced by this node with a destination # that falls within this function. # edge_src = node_metadata.instructions[-1] # NOTE/COMPAT: we do a single api check *outside* the loop for perf if using_ida7api: for edge_dst in idautils.CodeRefsFrom(edge_src, True): edge_function = idaapi.get_func(edge_dst) if edge_function and edge_function.start_ea == function.start_ea: # NOTE: start_ea vs startEA function_metadata.edges.append((edge_src, edge_dst)) else: for edge_dst in idautils.CodeRefsFrom(edge_src, True): edge_function = idaapi.get_func(edge_dst) if edge_function and edge_function.startEA == function.startEA: # NOTE: startEA vs start_ea function_metadata.edges.append((edge_src, edge_dst))
def build_function_coverage(coverage_blocks): """ Map block based coverage data to database defined basic blocks (nodes). ----------------------------------------------------------------------- NOTE: I don't like writing overly large / complex functions. But this will be an important high compute + IDB access point for larger data sets. I put some effort into reducing database access, excessive searches, iterations, instantiations, etc. I am concerned about performance overhead that may come with trying to break this out into multiple functions, but I encourage you to try :-) ----------------------------------------------------------------------- Input: +- coverage_blocks: | a list of tuples in (offset, size) format that define coverage ' ----------------------------------------------------------------------- Output: +- function_map: | a map keyed with a function address and holds function coverage | | eg: { functionEA: FunctionCoverage(...) } | +- orphans: | a list of tuples (offset, size) of coverage fragments that could | not be mapped into any defined functions / nodes | | eg: [(offset, size), ...] ' """ function_map, orphans = {}, [] # TODO FLOWCHART_CACHE_SIZE = 6 flowchart_cache = FlowChartCache(FLOWCHART_CACHE_SIZE) # # The purpose of this mega while loop is to process the raw block # based coverage data and build a comprehensive mapping of nodes # throughout the database that are tainted by it. # blocks = collections.deque(coverage_blocks) while blocks: # pop off the next coverage block address, size = blocks.popleft() # retrieve the flowchart for this address try: flowchart, cached_base = flowchart_cache.get(address) # failed to locate flowchart for this address. the address likely # does not fall inside of a defined function except Exception as e: orphans.append((address, size)) continue # alias the function's address from the flowchart for convenience function_address = flowchart.bounds.startEA # # At this point, we have located the flowchart corresponding to # this address. We are now ready to identify which node our # current coverage block (address, size) starts in. # # # walk through every node (basic block) in the flowchart until a # a node corresponding with our coverage block is found # flowchart_size = flowchart.size() for count in xrange(flowchart_size): # get the last basic block we started on index = (cached_base + count) % flowchart_size bb = flowchart[index] # the coverage block (address) starts in this node if bb.startEA <= address < bb.endEA: # # first, retrieve the coverage data item for the function # corresponding with this flowchart. # try: function_coverage = function_map[function_address] # # looks like this is the first time we have identiied # coverage for this function. creaate a coverage data item # for the function now and use that # except KeyError as e: function_coverage = FunctionCoverage(flowchart) function_map[function_address] = function_coverage # # now we taint the basic block that we hit # function_map[function_address].mark_node(bb.startEA) # # depending on coverage & bb quality, we also check for # the possibility of a fragment due to the coverage block # spilling into the next basic block. # # does the coverage block spill past this basic block? end_address = address + size if end_address > bb.endEA: # yes, compute the fragment size and prepend the work # to be consumed later (next iteration, technically) fragment_address = bb.endEA fragment_size = end_address - bb.endEA blocks.appendleft((fragment_address, fragment_size)) # update the flowchart cache flowchart_cache.set((flowchart, index)) # all done, break from the bb for loop break # end of if statement # end of for loop # # We made it through the entire flowchart for this function without # finding an appropriate basic block (node) for the coverage data. # this is strange, but whatever... just log the fragment as an # orphan for later investigation. # else: orphans.append((address, size)) # end of while loop # # We are done processing the coverage data given to us. Now we # enumerate and initialize all the functions that had no coverage. # # NOTE: linear sweep, no reason to use the flowcache here for function_address in idautils.Functions(): if function_address not in function_map: function = idaapi.get_func(function_address) flowchart = idaapi.qflow_chart_t("", function, idaapi.BADADDR, idaapi.BADADDR, 0) function_map[function_address] = FunctionCoverage(flowchart) # done, return results return (function_map, orphans)