def get_flow_code_from_address(address): """Get a sequence of instructions starting at a given address. This function is used to collect basic blocks marked as chunks in IDA but not as belonging to the function being examined. IDA can only assign a chunk to a function, not to multiple. This helps getting around that limitation. """ if idc.isCode(idc.GetFlags(address)): code = [address] else: return None while True: # Get the address of the following element address = address + idc.ItemSize(address) flags = idc.GetFlags(address) # If the element is an instruction and "flow" goes into it if idc.isCode(flags) and idc.isFlow(flags): code.append(address) else: break # Return the code chunk just obtained # Note: if we get down here there'll be at least one instruction so we are cool # Node: the +1 is so the last instruction can be retrieved through a call to # "Heads(start, end)". As end is a non-inclusive limit we need to move the # pointer ahead so the instruction at that address is retrieved. return (min(code), max(code) + 1)
def get_flow_code_from_address(address): """Get a sequence of instructions starting at a given address. This function is used to collect basic blocks marked as chunks in IDA but not as belonging to the function being examined. IDA can only assign a chunk to a function, not to multiple. This helps getting around that limitation. """ if idc.isCode(idc.GetFlags(address)): code = [address] else: return None while True: # Get the address of the following element address = address+idc.ItemSize(address) flags = idc.GetFlags(address) # If the element is an instruction and "flow" goes into it if idc.isCode(flags) and idc.isFlow(flags): code.append(address) else: break # Return the code chunk just obtained # Note: if we get down here there'll be at least one instruction so we are cool # Node: the +1 is so the last instruction can be retrieved through a call to # "Heads(start, end)". As end is a non-inclusive limit we need to move the # pointer ahead so the instruction at that address is retrieved. return (min(code), max(code)+1)
def is_end_of_flow(self, instruction): """Return whether the last instruction processed end the flow.""" next_addr = instruction.ip+idc.ItemSize(instruction.ip) next_addr_flags = idc.GetFlags(next_addr) if idc.isCode(next_addr_flags) and idc.isFlow(next_addr_flags): return False return True
def is_end_of_flow(self, instruction): """Return whether the last instruction processed end the flow.""" next_addr = instruction.ip + idc.ItemSize(instruction.ip) next_addr_flags = idc.GetFlags(next_addr) if idc.isCode(next_addr_flags) and idc.isFlow(next_addr_flags): return False return True
def is_conditional_branch(self, instruction): """Return whether the instruction is a conditional branch""" next_addr = instruction.ip + idc.ItemSize(instruction.ip) next_addr_flags = idc.GetFlags(next_addr) if (idc.isCode(next_addr_flags) and idc.isFlow(next_addr_flags) and (instruction.itype in self.INSTRUCTIONS_BRANCH)): return True return False
def is_unconditional_branch(self, instruction): """Return whether the instruction is an unconditional branch""" next_addr = instruction.ip+idc.ItemSize(instruction.ip) next_addr_flags = idc.GetFlags(next_addr) if ( (instruction.itype in self.INSTRUCTIONS_BRANCH) and (not idc.isCode(next_addr_flags)) or (not idc.isFlow(next_addr_flags)) ): return True return False
def GetFunEdgesAndBbls(function_ea): """ Get bbls of function. @function_ea - function address @return - bbls of function """ bbl = [] # bbl info [head, tail, call_num, mem_num] SingleBBS = {} # head -> pred_bbl MultiBBS = {} # head -> [pred_bbls] bbls = {} # head -> bbl bbls2 = {} # tail -> bbl edges_s = set() # set of (tail, head) edges_d = {} # dict struct. head -> of (head, ..., head) edges_count = 0 edges_s_t = set() # tmp edges set edges_d_t = {} # tmp edges dict. if not IsInstrumentIns(function_ea): return bbls, edges_d, edges_count, SingleBBS, MultiBBS f_start = function_ea f_end = idc.FindFuncEnd(function_ea) boundaries = set((f_start, )) # head of bbl for head in idautils.Heads(f_start, f_end): # If the element is an instruction if head == idaapi.BADADDR: raise Exception("Invalid head for parsing") if not idc.isCode(idc.GetFlags(head)): continue # Get the references made from the current instruction # and keep only the ones local to the function. refs = idautils.CodeRefsFrom(head, 0) refs_filtered = set() for ref in refs: if ref > f_start and ref < f_end: # can't use ref>=f_start, avoid recusion refs_filtered.add(ref) refs = refs_filtered if refs: # If the flow continues also to the next (address-wise) # instruction, we add a reference to it. # For instance, a conditional jump will not branch # if the condition is not met, so we save that # reference as well. next_head = idc.NextHead(head, f_end) if next_head != idaapi.BADADDR and idc.isFlow( idc.GetFlags(next_head)): refs.add(next_head) # Update the boundaries found so far. boundaries.update(refs) for r in refs: # enum all of next ins # If the flow could also come from the address # previous to the destination of the branching # an edge is created. if isFlow(idc.GetFlags(r)): prev_head = idc.PrevHead(r, f_start) if prev_head == 0xffffffffL: #edges_s_t.add((head, r)) #raise Exception("invalid reference to previous instruction for", hex(r)) pass else: edges_s_t.add((prev_head, r)) edges_s_t.add((head, r)) #end of for head in idautils.Heads(chunk[0], chunk[1]): last_head = 0 # NOTE: We can handle if jump xrefs to chunk address space. # get bbls. head of bbl is first ins addr, tail of bbl is last ins addr. for head in idautils.Heads(f_start, f_end): mnem = idc.GetMnem(head) if head in boundaries: if len(bbl) > 0: if bbl[0] == head: continue if True: # IsInstrumentIns(bbl[0]): bbl[1] = last_head bbls[bbl[0]] = bbl bbls2[bbl[1]] = bbl bbl = [head, 0, 0, 0] #elif self.GetInstructionType(head) == self.BRANCH_INSTRUCTION: elif mnem.startswith('j'): if len(bbl) > 0 and bbl[0] == head + idc.ItemSize(head): continue if True: # IsInstrumentIns(bbl[0]): bbl[1] = head # head + idc.ItemSize(head)) bbls[bbl[0]] = bbl bbls2[bbl[1]] = bbl bbl = [head + idc.ItemSize(head), 0, 0, 0] else: last_head = head if mnem.startswith('call'): bbl[2] += 1 #if 2 == idc.GetOpType(head, 0): # 2 Memory Reference # bbl[3] += 1 #if 2 == idc.GetOpType(head, 1): # 2 Memory Reference # bbl[3] += 1 # add last basic block if len(bbl) and bbl[0] != f_end: # and IsInstrumentIns(bbl[0]): bbl[1] = f_end bbls[bbl[0]] = bbl bbls2[bbl[1]] = bbl # edges set -> dict for e in edges_s_t: if e[0] in bbls2: bbl_head = bbls2[e[0]][0] if bbl_head in edges_d_t: edges_d_t[bbl_head].append(e[1]) else: edges_d_t[bbl_head] = [e[1]] else: print('edge (%x, %x) can not find head bbl.' % (e[0], e[1])) # a small case. e1 flow e0. # revise edges. head bbl and tail bbl of edges must be instrumented. for e0 in edges_d_t: if not IsInstrumentIns(e0): # e0 don't instrumented, skip. continue for e1 in edges_d_t[e0]: if IsInstrumentIns(e1): # e0 e1 both instrumented, add edge. if e0 in edges_d: edges_d[e0].append(e1) else: edges_d[e0] = [e1] edges_count += 1 else: # e1 don't instrumented, recursively looks for instrumented child bbls bbls_t = LookForInsChildBbls(e1, edges_d_t, []) for b in bbls_t: # add edge if e0 in edges_d: edges_d[e0].append(b) else: edges_d[e0] = [b] edges_count += 1 # revise bbls. bbl must be instrumented. for b in bbls.keys(): if not IsInstrumentIns(b): # if bbls[b][1] in bbls2: # avoid multi del # bbls2.pop(bbls[b][1]) bbls.pop(b) #print('bbls:') #i = 0 #for b in bbls: # i += 1 # print('%04d %x, %x' % (i, b, bbls[b][1])) #print('edges_d:') #i = 0 #for e0 in edges_d: # for e1 in edges_d[e0]: # i += 1 # print('%04d %x, %x' % (i, e0, e1)) for e0 in edges_d: if e0 not in bbls: print('error:%x have no head' % (e0)) # error continue for e1 in edges_d[e0]: if e1 in MultiBBS: MultiBBS[e1].append(bbls[e0]) # add Pred elif e1 in SingleBBS: MultiBBS[e1] = [SingleBBS[e1], bbls[e0]] # add Pred SingleBBS.pop(e1) # remove from SingleBBS else: SingleBBS[e1] = bbls[e0] # add Pred # del bbls which don't instrumented return bbls, edges_d, edges_count, SingleBBS, MultiBBS
def GetFunBbls(function_ea): """ Get bbls of function. @function_ea - function address @return - bbls of function """ f_start = function_ea f_end = idc.FindFuncEnd(function_ea) boundaries = set((f_start, )) for head in idautils.Heads(f_start, f_end): # If the element is an instruction if head == idaapi.BADADDR: raise Exception("Invalid head for parsing") if idc.isCode(idc.GetFlags(head)): # Get the references made from the current instruction # and keep only the ones local to the function. refs = idautils.CodeRefsFrom(head, 0) refs_filtered = set() for ref in refs: if ref >= f_start and ref < f_end: refs_filtered.add(ref) refs = refs_filtered if refs: # If the flow continues also to the next (address-wise) # instruction, we add a reference to it. # For instance, a conditional jump will not branch # if the condition is not met, so we save that # reference as well. next_head = idc.NextHead(head, f_end) if next_head != idaapi.BADADDR and idc.isFlow( idc.GetFlags(next_head)): refs.add(next_head) # Update the boundaries found so far. boundaries.update(refs) #end of for head in idautils.Heads(chunk[0], chunk[1]): bbls = [] bbl = [] # a list of heads # NOTE: We can handle if jump xrefs to chunk address space. for head in idautils.Heads(f_start, f_end): if head in boundaries: #print('%d') % head if len(bbl) > 0: if bbl[0] == head: continue bbl.append(head) bbls.append(bbl) bbl = [] bbl.append(head) #elif self.GetInstructionType(head) == self.BRANCH_INSTRUCTION: elif idc.GetMnem(head).startswith('j'): if len(bbl) > 0 and bbl[0] == head + idc.ItemSize(head): continue bbl.append(head + idc.ItemSize(head)) bbls.append(bbl) bbl = [] bbl.append(head + idc.ItemSize(head)) else: pass # add last basic block if len(bbl) and bbl[0] != f_end: bbl.append(f_end) bbls.append(bbl) return bbls
def control_flows_to_address(address): pf = idc.GetFlags(address) return idc.isFlow(pf)
def is_flow(self): """True if instruction 'Exec flow from prev instruction' idc.py: FF_FLOW = idaapi.FF_FLOW # Exec flow from prev instruction? """ return idc.isFlow(self.flags)