def generate_refs(self): for start, end in self.non_init_segm: for func in idautils.Functions(start, end): for item in idautils.FuncItems(func): for xref in chain(idautils.DataRefsFrom(item), idautils.CodeRefsFrom(item, 1)): if self.is_ea_in_segs(xref, self.init_segm): if not self.refs.get(func, None): self.refs[func] = {'to': set(), 'from': set()} self.refs[func]['from'].add((item, xref)) for to_xref in chain(idautils.DataRefsTo(func), idautils.CodeRefsTo(func, 1)): self.refs[func]['to'].add(to_xref)
def extract_function_calls_from(f, bb, insn): """extract functions calls from features most relevant at the function scope, however, its most efficient to extract at the instruction scope args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ if idaapi.is_call_insn(insn): for ref in idautils.CodeRefsFrom(insn.ea, False): yield Characteristic("calls from"), ref
def goto_ref(addresses, code=False, data=False): for address in addresses: refs = [] if code: refs += list(idautils.CodeRefsFrom(address, 0)) if data: refs += list(idautils.DataRefsFrom(address)) if len(refs) == 0: continue for ref in refs: if address + 4 != ref: yield ref
def retrieveExterns(bl, ea_externs): externs = [] start = bl[0] end = bl[1] inst_addr = start while inst_addr < end: refs = idautils.CodeRefsFrom(inst_addr, 1) try: ea = [v for v in refs if v in ea_externs][0] externs.append(ea_externs[ea]) except: pass inst_addr = idc.next_head(inst_addr) return externs
def propagate_dead_code(self, ea, op_map): prevs = [ x for x in idautils.CodeRefsTo(ea, True) if not self.marked_addresses.has_key(x) and not self.dead_br_of_op(ea, x, op_map) ] if prevs == []: #IF there is no legit predecessors idc.SetColor(ea, idc.CIC_ITEM, 0x0000ff) self.marked_addresses[ea] = None succs = [x for x in idautils.CodeRefsFrom(ea, True)] for succ in succs: self.propagate_dead_code(succ, op_map) else: return
def recoverBlock(startEA): b = Block(startEA) curEA = startEA while True: insn_t = idautils.DecodeInstruction(curEA) if insn_t is None: if idc.Byte(curEA) == 0xCC: b.endEA = curEA + 1 return b else: sys.stdout.write( "WARNING: Couldn't decode insn at: {0:x}. Ending block.\n". format(curEA)) b.endEA = curEA return b nextEA = curEA + insn_t.size crefs = idautils.CodeRefsFrom(curEA, 1) # get curEA follows follows = [cref for cref in crefs] if follows == [nextEA] or isCall(curEA): # there is only one following branch, to the next instruction # check if this is a JMP 0; in that case, make a new block if isUnconditionalJump(curEA): b.endEA = nextEA for f in follows: # do not decode external code refs if not isExternalReference(f): b.succs.append(f) return b # if its not JMP 0, add next instruction to current block curEA = nextEA # check if we need to make a new block elif len(follows) == 0: # this is a ret, no follows b.endEA = nextEA return b else: # this block has several follow blocks b.endEA = nextEA for f in follows: # do not decode external code refs if not isExternalReference(f): b.succs.append(f) return b
def get_static_successors(inst): """Returns the statically known successors of an instruction.""" branch_flows = tuple(idautils.CodeRefsFrom(inst.ea, False)) # Direct function call. The successor will be the fall-through instruction # unless the target of the function call looks like a `noreturn` function. if inst.is_direct_function_call(): called_ea = get_direct_branch_target(inst.ea) flags = idc.GetFunctionFlags(called_ea) if 0 < flags and (flags & idaapi.FUNC_NORET): log.debug("Call to noreturn function {:08x} at {:08x}".format( called_ea, inst.ea)) else: yield inst.next_ea # Not recognised as a `noreturn` function. if inst.is_call(): # Indirect function call, system call. yield inst.next_ea elif inst.is_conditional_branch(): yield inst.next_ea yield get_direct_branch_target(inst.ea) elif inst.is_direct_jump(): yield get_direct_branch_target(inst.ea) elif inst.is_indirect_jump(): si = idaapi.get_switch_info_ex(inst.ea) if si: for case_ea in idautils.CodeRefsFrom(inst.ea, True): yield case_ea elif inst.is_fall_through(): yield inst.next_ea else: log.debug("No static successors of {:08x}".format(inst.ea))
def is_basic_block_tight_loop(bb): """check basic block loops to self true if last instruction in basic block branches to basic block start args: f (IDA func_t) bb (IDA BasicBlock) """ bb_end = idc.prev_head(bb.end_ea) if bb.start_ea < bb_end: for ref in idautils.CodeRefsFrom(bb_end, True): if ref == bb.start_ea: return True return False
def analyse_indirect_jump(block, jump_inst, blocks): """Analyse an indirect jump and try to determine its targets.""" log.info("Analysing indirect jump at {:08x}".format(jump_inst.ea)) si = idaapi.get_switch_info_ex(jump_inst.ea) target_eas = set() if si: num_targets = si.get_jtable_size() log.info( "IDA identified a jump table at {:08x} with {} targets".format( jump_inst.ea, num_targets)) target_eas.update(idautils.CodeRefsFrom(jump_inst.ea, True)) for target_ea in target_eas: block = program.get_basic_block(target_ea) block.address_is_taken = True
def isElfThunk(ea): if not isLinkedElf(): return False, None if isUnconditionalJump(ea): have_ext_ref = False for cref in idautils.CodeRefsFrom(ea, 0): if isExternalReference(cref): have_ext_ref = True break if have_ext_ref: fn = getFunctionName(cref) return True, fn return False, None
def extract_insn_cross_section_cflow(f, bb, insn): """inspect the instruction for a CALL or JMP that crosses section boundaries args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ for ref in idautils.CodeRefsFrom(insn.ea, False): if ref in get_imports(f.ctx).keys(): # ignore API calls continue if not idaapi.getseg(ref): # handle IDA API bug continue if idaapi.getseg(ref) == idaapi.getseg(insn.ea): continue yield Characteristic("cross section flow"), insn.ea
def check_for_api_call(ctx, insn): """ check instruction for API call """ if not idaapi.is_call_insn(insn): return for ref in idautils.CodeRefsFrom(insn.ea, False): info = get_imports(ctx).get(ref, ()) if info: yield "%s.%s" % (info[0], info[1]) else: f = idaapi.get_func(ref) # check if call to thunk # TODO: first instruction might not always be the thunk if f and (f.flags & idaapi.FUNC_THUNK): for thunk_ref in idautils.DataRefsFrom(ref): # TODO: always data ref for thunk?? info = get_imports(ctx).get(thunk_ref, ()) if info: yield "%s.%s" % (info[0], info[1])
def ShowFunctionsBrowser(mea=None, show_runtime=False, show_string=True, mynav=None): try: if mea is None: ea = idc.ScreenEA() else: ea = mea num = idc.AskLong(3, "Maximum recursion level") if not num: return result = list(idautils.CodeRefsFrom(ea, idc.BADADDR)) g = FunctionsBrowser("Code Refs Browser %s" % idc.GetFunctionName(ea), ea, result) g.max_level = num g.show_string = True g.show_runtime_functions = show_runtime g.mynav = mynav g.Show() except: print "Error", sys.exc_info()[1]
def verify_ref(addresses, name, code=False, data=False): symbol = locate(name) if symbol == idc.BADADDR: return for address in addresses: refs = [] if code: refs += list(idautils.CodeRefsFrom(address, 1)) if data: refs += list(idautils.DataRefsFrom(address)) if len(refs) == 0: continue for ref in refs: if address + 4 != ref and symbol == ref: yield address break
def GetCodeRefsFrom(ea): name = idc.GetFunctionName(ea) ea = idc.LocByName(name) f_start = ea f_end = idc.GetFunctionAttr(ea, idc.FUNCATTR_END) ret = [] for chunk in idautils.Chunks(ea): astart = chunk[0] aend = chunk[1] for head in idautils.Heads(astart, aend): # If the element is an instruction if idc.isCode(idc.GetFlags(head)): refs = idautils.CodeRefsFrom(head, 0) for ref in refs: loc = idc.LocByName(idc.GetFunctionName(ref)) if loc not in ret and loc != f_start: ret.append(ref) return ret
def forward_analysis(self): """ Start at self.ea, end at all pointers were killed. :return: """ self.active_paths = [Path(self.ea, self)] while self.active_paths: add = [] remove = [] for p in self.active_paths: ea = p.route[-1] # the latest step while ea != self.ctx_end: ea = idc.NextHead(ea) if idc.GetMnem(ea) in ['CBZ', 'B']: for des in idautils.CodeRefsFrom(ea, 1): if des in p.route: # avoid loop continue successor_path = copy.deepcopy(p) successor_path.route.append(ea) successor_path.add_step(des) if successor_path.active: add.append(successor_path) else: self.dead_paths.append(successor_path) remove.append( p ) # the path is not dead, just gave path to the successors. break else: p.add_step(ea) if p.active: continue else: remove.append(p) self.dead_paths.append(p) break self.active_paths.extend(add) for p in remove: self.active_paths.remove(p)
def extract_insn_api_features(f, bb, insn): """parse instruction API features args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: call dword [0x00473038] """ if not insn.get_canon_mnem() in ("call", "jmp"): return for api in check_for_api_call(f.ctx, insn): dll, _, symbol = api.rpartition(".") for name in capa.features.extractors.helpers.generate_symbols( dll, symbol): yield API(name), insn.ea # extract IDA/FLIRT recognized API functions targets = tuple(idautils.CodeRefsFrom(insn.ea, False)) if not targets: return target = targets[0] target_func = idaapi.get_func(target) if not target_func or target_func.start_ea != target: # not a function (start) return if target_func.flags & idaapi.FUNC_LIB: name = idaapi.get_name(target_func.start_ea) yield API(name), insn.ea if name.startswith("_"): # some linkers may prefix linked routines with a `_` to avoid name collisions. # extract features for both the mangled and un-mangled representations. # e.g. `_fwrite` -> `fwrite` # see: https://stackoverflow.com/a/2628384/87207 yield API(name[1:]), insn.ea
def find_function_callees(func_ea, maxlvl): callees = [] visited = set() pending = set((func_ea, )) lvl = 0 while len(pending) > 0: func_ea = pending.pop() visited.add(func_ea) func_name = idc.GetFunctionName(func_ea) if not func_name: continue callees.append(func_ea) func_end = idc.FindFuncEnd(func_ea) if func_end == idaapi.BADADDR: continue lvl += 1 if lvl >= maxlvl: continue all_refs = set() for line in idautils.Heads(func_ea, func_end): if not ida_bytes.isCode(get_flags(line)): continue ALL_XREFS = 0 refs = idautils.CodeRefsFrom(line, ALL_XREFS) refs = set( filter(lambda x: not (x >= func_ea and x <= func_end), refs)) all_refs |= refs all_refs -= visited pending |= all_refs return callees
def process_function(arch, func_ea): func_end = idc.FindFuncEnd(func_ea) packet = DismantlerDataPacket() ida_chunks = get_chunks(func_ea) chunks = set() # Add to the chunks only the main block, containing the # function entry point # chunk = get_flow_code_from_address(func_ea) if chunk: chunks.add(chunk) # Make "ida_chunks" a set for faster searches within ida_chunks = set(ida_chunks) ida_chunks_idx = dict(zip([c[0] for c in ida_chunks], ida_chunks)) func = idaapi.get_func(func_ea) comments = [idaapi.get_func_cmt(func, 0), idaapi.get_func_cmt(func, 1)] # Copy the list of chunks into a queue to process # chunks_todo = [c for c in chunks] while True: # If no chunks left in the queue, exit if not chunks_todo: if ida_chunks: chunks_todo.extend(ida_chunks) else: break chunk_start, chunk_end = chunks_todo.pop() if ida_chunks_idx.has_key(chunk_start): ida_chunks.remove(ida_chunks_idx[chunk_start]) del ida_chunks_idx[chunk_start] for head in idautils.Heads(chunk_start, chunk_end): comments.extend((idaapi.get_cmt(head, 0), idaapi.get_cmt(head, 1))) comment = '\n'.join([c for c in comments if c is not None]) comment = comment.strip() if comment: packet.add_comment(head, comment) comments = list() if idc.isCode(idc.GetFlags(head)): instruction = arch.process_instruction(packet, head) # if there are other references than # flow add them all. if list(idautils.CodeRefsFrom(head, 0)): # for each reference, including flow ones for ref_idx, ref in enumerate( idautils.CodeRefsFrom(head, 1)): if arch.is_call(instruction): # This two conditions must remain separated, it's # necessary to enter the enclosing "if" whenever # the instruction is a call, otherwise it will be # added as an uncoditional jump in the last else # if ref in list(idautils.CodeRefsFrom(head, 0)): packet.add_direct_call(head, ref) elif ref_idx > 0 and arch.is_conditional_branch( instruction): # The ref_idx is > 0 in order to avoid processing the # normal flow reference which would effectively imply # that the conditional branch is processed twice. # It's done this way instead of changing the loop's head # from CodeRefsFrom(head, 1) to CodeRefsFrom(head, 0) in # order to avoid altering the behavior of other conditions # which rely on it being so. # FIXME # I don't seem to check for the reference here # to point to valid, defined code. I suspect # this could lead to a failure when exporting # if such situation appears. I should test if # it's a likely scenario and probably just add # an isHead() or isCode() to address it. packet.add_conditional_branch_true(head, ref) packet.add_conditional_branch_false( head, idaapi.next_head(head, chunk_end)) # If the target is not in our chunk list if not address_in_chunks(ref, chunks): new_chunk = get_flow_code_from_address(ref) # Add the chunk to the chunks to process # and to the set containing all visited # chunks if new_chunk is not None: chunks_todo.append(new_chunk) chunks.add(new_chunk) elif arch.is_unconditional_branch(instruction): packet.add_unconditional_branch(head, ref) # If the target is not in our chunk list if not address_in_chunks(ref, chunks): new_chunk = get_flow_code_from_address(ref) # Add the chunk to the chunks to process # and to the set containing all visited # chunks if new_chunk is not None: chunks_todo.append(new_chunk) chunks.add(new_chunk) #skip = False for ref in idautils.DataRefsFrom(head): packet.add_data_reference(head, ref) # Get a data reference from the current reference's # location. For instance, if 'ref' points to a valid # address and such address contains a data reference # to code. target = list(idautils.DataRefsFrom(ref)) if target: target = target[0] else: target = None if target is None and arch.is_call(instruction): imp_name = idc.Name(ref) imp_module = get_import_module_name(ref) imported_functions.add((ref, imp_name, imp_module)) packet.add_indirect_virtual_call(head, ref) elif target is not None and idc.isHead(target): # for calls "routed" through this reference if arch.is_call(instruction): packet.add_indirect_call(head, target) # for unconditional jumps "routed" through this reference elif arch.is_unconditional_branch(instruction): packet.add_unconditional_branch(head, target) # for conditional "routed" through this reference elif arch.is_conditional_branch(instruction): packet.add_conditional_branch_true(head, target) packet.add_conditional_branch_false( head, idaapi.next_head(head, chunk_end)) f = FunctionAnalyzer(arch, func_ea, packet) instrumentation.new_packet(packet) instrumentation.new_function(f)
def block(self, block): ''' Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names]) ''' formal = [] fuzzy = [] functions = [] immediates = [] ea = block.startEA while ea < block.endEA: idaapi.decode_insn(ea) # Get a list of all data/code references from the current instruction drefs = [x for x in idautils.DataRefsFrom(ea)] crefs = [x for x in idautils.CodeRefsFrom(ea, False)] # Add all instruction mnemonics to the formal block hash formal.append(idc.GetMnem(ea)) # If this is a call instruction, be sure to note the name of the function # being called. This is used to apply call-based signatures to functions. # # For fuzzy signatures, we can't use the actual name or EA of the function, # but rather just want to note that a function call was made. # # Formal signatures already have the call instruction mnemonic, which is more # specific than just saying that a call was made. if idaapi.is_call_insn(ea): for cref in crefs: func_name = idc.Name(cref) if func_name: functions.append(func_name) fuzzy.append("funcref") # If there are data references from the instruction, check to see if any of them # are strings. These are looked up in the pre-generated strings dictionary. # # String values are easily identifiable, and are used as part of both the fuzzy # and the formal signatures. # # It is more difficult to determine if non-string values are constants or not; # for both fuzzy and formal signatures, just use "data" to indicate that some data # was referenced. elif drefs: for dref in drefs: if self.strings.has_key(dref): formal.append(self.strings[dref].value) fuzzy.append(self.strings[dref].value) else: formal.append("dataref") fuzzy.append("dataref") # If there are no data or code references from the instruction, use every operand as # part of the formal signature. # # Fuzzy signatures are only concerned with interesting immediate values, that is, values # that are greater than 65,535, are not memory addresses, and are not displayed as # negative values. elif not drefs and not crefs: for n in range(0, len(idaapi.cmd.Operands)): opnd_text = idc.GetOpnd(ea, n) formal.append(opnd_text) if idaapi.cmd.Operands[ n].type == idaapi.o_imm and not opnd_text.startswith( '-'): if idaapi.cmd.Operands[n].value >= 0xFFFF: if idaapi.getFlags( idaapi.cmd.Operands[n].value) == 0: fuzzy.append(str(idaapi.cmd.Operands[n].value)) immediates.append(idaapi.cmd.Operands[n].value) ea = idc.NextHead(ea) return (self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)), immediates, functions)
def get_stack_strings(self, functions): """ Finds all the stack strings it can in the given functions. Parameters set globally: STRING_GAP_TOLERANCE - the gap allowed between string characters. MAX_CHARACTER_WIDTH - the maximum character size, in bytes ASCII - Whether character values must be 0-127 """ stack_strings = [] for func in functions: state = tracing.BranchingTraceState(func.start_ea) state.strs = set() states = [state] func_eas = [] ea = state.ea while ea < func.end_ea: func_eas.append(ea) ea += idc.get_item_size(ea) while states: state = states.pop() while state.ea < func.end_ea: try: func_eas.remove(state.ea) except: pass state.visited_eas.append(state.ea) mnemonic = idc.print_insn_mnem(state.ea) if mnemonic in IGNORED_MNEMONICS: pass elif "pop" in mnemonic: reg = tracing.get_reg_fam(tracing.get_opnd_replacement(state.ea, POS_FIRST)) if reg: value = state.stack.get(idc.get_spd(state.ea), None) if value is not None: state.regs[reg[0]] = value else: self.clear_reg_if_needed(reg, state.regs) elif "push" in mnemonic: # bug where idc.get_spd was not correctly tracking the pointer, # this case also hasn't really been seen often as part of a stack string # self.set_stack(idc.get_spd(ea), ea, POS_FIRST, regs, stack) pass elif "mov" in mnemonic: self.handle_mov(state) elif ( ( "xor" in mnemonic and tracing.get_reg_fam(tracing.get_opnd_replacement(state.ea, POS_FIRST)) == tracing.get_reg_fam(tracing.get_opnd_replacement(state.ea, POS_SECOND)) ) or ("lea" in mnemonic and idc.print_operand(state.ea, POS_SECOND) == "[0]") or ( "sub" in mnemonic and tracing.get_opnd_replacement(state.ea, POS_FIRST) == tracing.get_opnd_replacement(state.ea, POS_SECOND) ) ): reg = tracing.get_reg_fam(tracing.get_opnd_replacement(state.ea, POS_FIRST)) if reg: state.regs[reg[0]] = (0, state.ea) elif "loop" in mnemonic or "movsb" in mnemonic: state.regs["rcx"] = (0, state.ea) elif mnemonic in JUMPS: try: target = next(idautils.CodeRefsFrom(state.ea, 0)) except StopIteration: target = None if target and target not in state.visited_eas: if func.end_ea > target >= func.start_ea: state.visited_eas.append(target) new_state = tracing.BranchingTraceState(target, state) new_state.strs = state.strs states.append(new_state) else: self.report_strings(state.strs, state.stack) # Always follow an unconditional jump if mnemonic == "jmp": break elif ( "rep" in idc.GetDisasm(state.ea).split(" ")[0] and "scas" not in idc.GetDisasm(state.ea).split(" ")[1] ): self.report_strings(state.strs, state.stack) elif "lea" in mnemonic: self.handle_lea(state) elif "call" in mnemonic: self.handle_call(state) elif "ret" in mnemonic: break elif ( idc.get_operand_type(state.ea, POS_FIRST) == idc.o_reg ): # If we find a target register we were tracking, stop tracking it. self.clear_reg_if_needed( tracing.get_reg_fam(tracing.get_opnd_replacement(state.ea, POS_FIRST)), state.regs ) state.ea += idc.get_item_size(state.ea) self.report_strings(state.strs, state.stack) if not states and func_eas: new_state = tracing.BranchingTraceState(func_eas[0]) new_state.strs = set() states.append(new_state) stack_strings.extend(state.strs) self.strings.update(stack_strings)
def make_po_pair(ea, alive): dead = [x for x in idautils.CodeRefsFrom(ea, True) if x != alive] return alive, dead[0]
def main(): idaapi.msg("alter instrument function\n") addr_afl_maybe_log_fun = 0 addr_afl_maybe_log = 0 fun_info = [] try: for func in idautils.Functions(): fun_name = idc.GetFunctionName(func) if fun_name.find('afl_maybe_log_fun') > 0: addr_afl_maybe_log_fun = func elif fun_name.find('afl_maybe_log') > 0: addr_afl_maybe_log = func if addr_afl_maybe_log_fun and addr_afl_maybe_log: break if not addr_afl_maybe_log_fun or not addr_afl_maybe_log: print("don't find add_afl_maybe_fun\n") return print("find add_afl_maybe_fun ok\n") # find instrumented function for func in idautils.Functions(): f_end = idc.FindFuncEnd(func) if f_end - func <= 0x28: continue # call __afl_maybe_log if idc.__EA64__: # 64bit addr_call = func + 0x1A else: # 32bit addr_call = func + 0x08 mnem = idc.GetMnem(addr_call) if mnem != 'call': continue for to in idautils.CodeRefsFrom(addr_call, False): fun_name = idc.GetFunctionName(to) if fun_name.find('afl_maybe_log') < 0: continue fun_info.append((func, f_end - func, addr_call)) fun_info.sort(key=lambda x: x[1]) num = len(fun_info) print(num) for i in range(num - 1, -1, -1): print(hex(fun_info[i][0])) if fun_info[i][ 1] < 0x200 or i < num * 90.0 / 100.0 and random.randint( 0, 99) < 80: # remove fun instrumented #or i < num/3 idc.PatchDword(fun_info[i][2] + 1, addr_afl_maybe_log - fun_info[i][2] - 5) else: print(hex(fun_info[i][0])) #idc.SaveBase('') #idc.Exit(0) except Exception as e: print(e) print('analyse time: ' + str(time.time() - g_time_start) + 's\n')
def instructionHandler(M, F, B, inst, new_eas): insn_t = idautils.DecodeInstruction(inst) if not insn_t: # handle jumps after noreturn functions if idc.Byte(inst) == 0xCC: I = addInst(B, inst, [0xCC]) return I, True else: raise Exception("Cannot read instruction at: {0:x}".format(inst)) # check for align instruction pf = idc.GetFlags(inst) if idaapi.isAlign(pf): return None, True # skip HLTs -- they are privileged, and are used in ELFs after a noreturn call if isHlt(inst): return None, False DEBUG("\t\tinst: {0}\n".format(idc.GetDisasm(inst))) inst_bytes = readInstructionBytes(inst) DEBUG("\t\tBytes: {0}\n".format(inst_bytes)) I = addInst(B, inst, inst_bytes) if isJmpTable(inst): handleJmpTable(I, F, inst, new_eas) return I, False if isIndirectCall(inst): global FUNCTIONS_NEED_TRAMPOLINE FUNCTIONS_NEED_TRAMPOLINE = True #check for code refs from here crefs = [] for cref in idautils.CodeRefsFrom(inst, 0): crefs.append(cref) fn = getFunctionName(cref) if isCall(inst): elfy, fn_replace = isElfThunk(cref) if elfy: fn = fn_replace if isExternalReference(cref) or elfy: fn = handleExternalRef(fn) I.ext_call_name = fn DEBUG("EXTERNAL CALL: {0}\n".format(fn)) if doesNotReturn(fn): return I, True else: I.call_target = cref if cref not in RECOVERED_EAS: new_eas.add(cref) DEBUG("INTERNAL CALL: {0}\n".format(fn)) elif isUnconditionalJump(inst): if isExternalReference(cref): fn = handleExternalRef(fn) I.ext_call_name = fn DEBUG("EXTERNAL JMP: {0}\n".format(fn)) if doesNotReturn(fn): DEBUG("Nonreturn JMP\n") return I, True else: DEBUG("INTERNAL JMP: {0:x}\n".format(cref)) I.true_target = cref #true: jump to where we have a code-ref #false: continue as we were print hex(inst), crefs if isConditionalJump(inst): I.true_target = crefs[0] I.false_target = inst+len(inst_bytes) return I, False relo_off = findRelocOffset(inst, len(inst_bytes)) if relo_off != -1: I.reloc_offset = relo_off for dref in idautils.DataRefsFrom(inst): if dref in crefs: continue if inValidSegment(dref): if isExternalReference(dref): fn = getFunctionName(dref) fn = handleExternalRef(fn) if isExternalData(fn): I.ext_data_name = fn sys.stdout.write("EXTERNAL DATA REF FROM {0:x} to {1}\n".format(inst, fn)) else: I.ext_call_name = fn sys.stdout.write("EXTERNAL CODE REF FROM {0:x} to {1}\n".format(inst, fn)) elif isInternalCode(dref): DEBUG("\t\tCode Ref from {0:x} to {1:x}\n".format(inst, dref)) I.call_target = dref if dref not in RECOVERED_EAS: new_eas.add(dref) else: dref_size = idc.ItemSize(dref) I.data_offset = handleDataRelocation(M, dref, new_eas) DEBUG("\t\tData Ref: {0:x}, size: {1}, offset : {2:x}\n".format( dref, dref_size, I.data_offset)) else: DEBUG("Data not in valid segment {0:x}\n".format(dref)) # if we have a mov sth, imm with imm that it's likely a fn pointer, # we add that pointer to the list of ones to disassemble # TODO: use also some other info to assume this if insn_t[1].type == idaapi.o_imm and insn_t.itype == idaapi.NN_mov and inValidSegment(insn_t[1].value): ref = insn_t[1].value if isInternalCode(ref) and ref not in RECOVERED_EAS: new_eas.add(ref) if isCall(inst): coderefs = [i for i in idautils.CodeRefsFrom(inst, 0)] coderefs_normal = [i for i in idautils.CodeRefsFrom(inst, 1)] if len(coderefs) == 0 and len(coderefs_normal) == 1 and insn_t[0].type == idaapi.o_near: for cref in coderefs_normal: I.call_target = cref if cref not in RECOVERED_EAS: new_eas.add(cref) return I, False
def recoverBlock(F, startEA, need_trampolines): b = Block(startEA) curEA = startEA # TODO: link some metadata to any block to keep track # of this table, because the indirect jmp # may be in a follower block and not directly in # the block where the address is loaded likelyJmpTable = None while True: insn_t = idautils.DecodeInstruction(curEA) if insn_t is None: if idc.Byte(curEA) == 0xCC: b.endEA = curEA+1 return b else: sys.stdout.write("WARNING: Couldn't decode insn at: {0:x}. Ending block.\n".format(curEA)) b.endEA = curEA return b # check for xrefs j = 0 for op in insn_t: # if it is a MEM operand if op.type == idaapi.o_mem and inValidSegment(op.addr): if isCall(curEA): if isInternalCode(op.addr): idaapi.add_cref(curEA, op.addr, idaapi.fl_CN) else: idaapi.add_dref(curEA, op.addr, idaapi.dr_R) elif isUnconditionalJump(curEA) or isConditionalJump(curEA): if isInternalCode(op.addr): idaapi.add_cref(curEA, op.addr, idaapi.fl_JN) else: idaapi.add_dref(curEA, op.addr, idaapi.dr_R) else: if j == 0: idaapi.add_dref(curEA, op.addr, idaapi.dr_W) else: idaapi.add_dref(curEA, op.addr, idaapi.dr_R) j += 1 nextEA = curEA+insn_t.size crefs = idautils.CodeRefsFrom(curEA, 1) # get curEA follows follows = [cref for cref in crefs] if isJmpTable(curEA): # this is a jmptable (according to IDA) # XXX: we assume jmp tables found by IDA don't overlap # with others jmpentries = set() jmpt = handleJmpTable(None, F, curEA, jmpentries) follows = list(jmpentries.union(set(follows))) JMPTABLES.add(jmpt) elif isIndirectJmp(curEA) and likelyJmpTable is not None: # this is an indirect jmp and in the same block there # was a mov to take the address of a "likely" jmptable for ref in likelyJmpTable.entries(): need_trampolines.add(ref) follows = list(set(likelyJmpTable.entries() + follows)) JMPTABLES.add(likelyJmpTable) likelyJmpTable = None elif isLikeLoadJmpTable(curEA): # this is an instruction which take the address of a # switch table (or something we *think* is a jmp table) likelyJmpTable = handleLikeLoadJmpTable(curEA, F) if isRepPrefix(curEA): sys.stdout.write("Found rep prefix at {0:#x}\n".format(curEA)) b.succs.append(nextEA) b.succs.append(curEA) b.endEA = nextEA return b if isDataInst(curEA): sys.stdout.write("Found data in middle of code at {0:#x}\n".format(curEA)) b.endEA = curEA return b if isCall(curEA): sys.stdout.write("Found call\n") fcrefs = idautils.CodeRefsFrom(curEA, 0) ffollows = [cref for cref in fcrefs] if len(ffollows) == 0 or idaapi.func_does_return(ffollows[0]): b.succs.append(nextEA) b.endEA = nextEA return b if isInt(curEA): sys.stdout.write("Found int\n") b.endEA = nextEA b.succs.append(nextEA) return b if (follows == [nextEA] and not isUnconditionalJump(curEA)) or isCall(curEA): # read next instruction curEA = nextEA # check if we need to make a new block elif len(follows) == 0: # this is a ret, no follows b.endEA = nextEA return b else: # this block has several follow blocks b.endEA = nextEA for f in follows: # do not decode external code refs if not isExternalReference(f): b.succs.append(f) return b
def crefs_from(self): """Destination addresses of code references from this line.""" return idautils.CodeRefsFrom(self.ea, 1)
def GetFunEdgesAndBbls(function_ea): """ Get bbls of function. @function_ea - function address @return - bbls of function """ bbl = [] # bbl info [head, tail, call_num, mem_num] SingleBBS = {} # head -> pred_bbl MultiBBS = {} # head -> [pred_bbls] bbls = {} # head -> bbl bbls2 = {} # tail -> bbl edges_s = set() # set of (tail, head) edges_d = {} # dict struct. head -> of (head, ..., head) edges_count = 0 edges_s_t = set() # tmp edges set edges_d_t = {} # tmp edges dict. if not IsInstrumentIns(function_ea): return bbls, edges_d, edges_count, SingleBBS, MultiBBS f_start = function_ea f_end = idc.FindFuncEnd(function_ea) boundaries = set((f_start, )) # head of bbl for head in idautils.Heads(f_start, f_end): # If the element is an instruction if head == idaapi.BADADDR: raise Exception("Invalid head for parsing") if not idc.isCode(idc.GetFlags(head)): continue # Get the references made from the current instruction # and keep only the ones local to the function. refs = idautils.CodeRefsFrom(head, 0) refs_filtered = set() for ref in refs: if ref > f_start and ref < f_end: # can't use ref>=f_start, avoid recusion refs_filtered.add(ref) refs = refs_filtered if refs: # If the flow continues also to the next (address-wise) # instruction, we add a reference to it. # For instance, a conditional jump will not branch # if the condition is not met, so we save that # reference as well. next_head = idc.NextHead(head, f_end) if next_head != idaapi.BADADDR and idc.isFlow( idc.GetFlags(next_head)): refs.add(next_head) # Update the boundaries found so far. boundaries.update(refs) for r in refs: # enum all of next ins # If the flow could also come from the address # previous to the destination of the branching # an edge is created. if isFlow(idc.GetFlags(r)): prev_head = idc.PrevHead(r, f_start) if prev_head == 0xffffffffL: #edges_s_t.add((head, r)) #raise Exception("invalid reference to previous instruction for", hex(r)) pass else: edges_s_t.add((prev_head, r)) edges_s_t.add((head, r)) #end of for head in idautils.Heads(chunk[0], chunk[1]): last_head = 0 # NOTE: We can handle if jump xrefs to chunk address space. # get bbls. head of bbl is first ins addr, tail of bbl is last ins addr. for head in idautils.Heads(f_start, f_end): mnem = idc.GetMnem(head) if head in boundaries: if len(bbl) > 0: if bbl[0] == head: continue if True: # IsInstrumentIns(bbl[0]): bbl[1] = last_head bbls[bbl[0]] = bbl bbls2[bbl[1]] = bbl bbl = [head, 0, 0, 0] #elif self.GetInstructionType(head) == self.BRANCH_INSTRUCTION: elif mnem.startswith('j'): if len(bbl) > 0 and bbl[0] == head + idc.ItemSize(head): continue if True: # IsInstrumentIns(bbl[0]): bbl[1] = head # head + idc.ItemSize(head)) bbls[bbl[0]] = bbl bbls2[bbl[1]] = bbl bbl = [head + idc.ItemSize(head), 0, 0, 0] else: last_head = head if mnem.startswith('call'): bbl[2] += 1 #if 2 == idc.GetOpType(head, 0): # 2 Memory Reference # bbl[3] += 1 #if 2 == idc.GetOpType(head, 1): # 2 Memory Reference # bbl[3] += 1 # add last basic block if len(bbl) and bbl[0] != f_end: # and IsInstrumentIns(bbl[0]): bbl[1] = f_end bbls[bbl[0]] = bbl bbls2[bbl[1]] = bbl # edges set -> dict for e in edges_s_t: if e[0] in bbls2: bbl_head = bbls2[e[0]][0] if bbl_head in edges_d_t: edges_d_t[bbl_head].append(e[1]) else: edges_d_t[bbl_head] = [e[1]] else: print('edge (%x, %x) can not find head bbl.' % (e[0], e[1])) # a small case. e1 flow e0. # revise edges. head bbl and tail bbl of edges must be instrumented. for e0 in edges_d_t: if not IsInstrumentIns(e0): # e0 don't instrumented, skip. continue for e1 in edges_d_t[e0]: if IsInstrumentIns(e1): # e0 e1 both instrumented, add edge. if e0 in edges_d: edges_d[e0].append(e1) else: edges_d[e0] = [e1] edges_count += 1 else: # e1 don't instrumented, recursively looks for instrumented child bbls bbls_t = LookForInsChildBbls(e1, edges_d_t, []) for b in bbls_t: # add edge if e0 in edges_d: edges_d[e0].append(b) else: edges_d[e0] = [b] edges_count += 1 # revise bbls. bbl must be instrumented. for b in bbls.keys(): if not IsInstrumentIns(b): # if bbls[b][1] in bbls2: # avoid multi del # bbls2.pop(bbls[b][1]) bbls.pop(b) #print('bbls:') #i = 0 #for b in bbls: # i += 1 # print('%04d %x, %x' % (i, b, bbls[b][1])) #print('edges_d:') #i = 0 #for e0 in edges_d: # for e1 in edges_d[e0]: # i += 1 # print('%04d %x, %x' % (i, e0, e1)) for e0 in edges_d: if e0 not in bbls: print('error:%x have no head' % (e0)) # error continue for e1 in edges_d[e0]: if e1 in MultiBBS: MultiBBS[e1].append(bbls[e0]) # add Pred elif e1 in SingleBBS: MultiBBS[e1] = [SingleBBS[e1], bbls[e0]] # add Pred SingleBBS.pop(e1) # remove from SingleBBS else: SingleBBS[e1] = bbls[e0] # add Pred # del bbls which don't instrumented return bbls, edges_d, edges_count, SingleBBS, MultiBBS
def getCodeOutRefs(self, offset): return [(offset, ref_to) for ref_to in idautils.CodeRefsFrom(offset, True)]
for f in idautils.Functions(): func = idaapi.get_func(f) for h in idautils.Heads(func.startEA, func.endEA): res = idaapi.get_switch_info_ex(h) if res != None: # number of cases num_cases = res.get_jtable_size() else: continue print '0x%08x: switch (%d cases)' % (h, num_cases) # get cases xrefs = idautils.CodeRefsFrom(h, 1) interesting_calls = [] switches.append((h, num_cases, interesting_calls)) # http://dvlabs.tippingpoint.com/blog/2011/05/11/mindshare-extending-ida-custviews class SwitchViewer(idaapi.simplecustviewer_t): def __init__(self, data): # data should be a 3-tuple # # (address, number of cases, list of interesting calls) self.switches = data
def index_idb(sdb_path): """ Index the current idb. """ sdbgen = SDBGen(sdb_path) sdbgen.begin_transaction() # Index all lines: for line_addr in iter_lines(): # Get line attributes: line_type = LineTypes.DATA if is_line_code(line_addr): line_type = LineTypes.CODE line_text = canonicalize_line_text(idc.GetDisasm(line_addr)) line_data = idc.GetManyBytes(line_addr, idc.ItemSize(line_addr)) # Make sure that we don't insert Nones: if line_data is None: line_data = "" # Index the line: sdbgen.add_line(line_addr, line_type, line_text, line_data) sdbgen.commit_transaction() sdbgen.begin_transaction() # Index all xrefs: for line_addr in iter_lines(): if is_line_code(line_addr): # Line is code: # Code xrefs: no_flow_crefs = set(idautils.CodeRefsFrom(line_addr, 0)) all_crefs = set(idautils.CodeRefsFrom(line_addr, 1)) flow_crefs = no_flow_crefs.difference(all_crefs) for nf_cref in no_flow_crefs: if not is_line_exists(nf_cref): logger.warning( 'Code line: nf_cref = 0x{:x} is nonexistent. ' 'line_addr = 0x{:x}'.format(nf_cref, line_addr)) continue sdbgen.add_xref(XrefTypes.CODE_JUMP, line_addr, nf_cref) for f_cref in flow_crefs: if not is_line_exists(f_cref): logger.warning( 'Code line: f_cref = 0x{:x} is nonexistent. ' 'line_addr = 0x{:x}'.format(f_cref, line_addr)) continue sdbgen.add_xref(XrefTypes.CODE_FLOW, line_addr, f_cref) # Code to Data xrefs: for dref in idautils.DataRefsFrom(line_addr): if not is_line_exists(dref): logger.warning('Code line: dref = 0x{:x} is nonexistent. ' 'line_addr = 0x{:x}'.format( dref, line_addr)) continue sdbgen.add_xref(XrefTypes.CODE_TO_DATA, line_addr, dref) else: # Line is data (Not code): for dref in idautils.DataRefsFrom(line_addr): if not is_line_exists(dref): logger.warning('Data line: dref = {:x} is nonexistent. ' 'line_addr = 0x{:x}'.format( dref, line_addr)) continue if is_line_code(dref): sdbgen.add_xref(XrefTypes.DATA_TO_CODE, line_addr, dref) else: sdbgen.add_xref(XrefTypes.DATA_TO_DATA, line_addr, dref) sdbgen.commit_transaction() sdbgen.begin_transaction() # Index all functions: for func_addr in idautils.Functions(): # We skip chunked functions: if is_func_chunked(func_addr): logger.warning('Function at 0x{:x} is chunked'.format(func_addr)) continue func_end = idc.GetFunctionAttr(func_addr, idc.FUNCATTR_END) # Make sure that start is before end: if func_end <= func_addr: logger.warning('Function at {:x} has end {:x}'\ .format(func_addr,func_end)) continue line_addresses = xrange(func_addr, func_end) func_name = idc.GetFunctionName(func_addr) sdbgen.add_function(func_addr, func_name, line_addresses) sdbgen.commit_transaction() sdbgen.fill_lines_fts() sdbgen.close()