def instructionHandler(M, B, inst, new_eas): insn_t = idautils.DecodeInstruction(inst) if not insn_t: # handle jumps after noreturn functions if idc.Byte(inst) == 0xCC: I = addInst(B, inst, [0xCC]) return I, True else: raise Exception("Cannot read instruction at: {0:x}".format(inst)) # skip HLTs -- they are privileged, and are used in ELFs after a noreturn call if isHlt(inst): return None, False DEBUG("\t\tinst: {0}\n".format(idc.GetDisasm(inst))) inst_bytes = readInstructionBytes(inst) DEBUG("\t\tBytes: {0}\n".format(inst_bytes)) I = addInst(B, inst, inst_bytes) if isJmpTable(inst): handleJmpTable(I, inst, new_eas) return I, False crefs_from_here = idautils.CodeRefsFrom(inst, 0) #check for code refs from here crefs = [] # pull code refs from generator into a list for cref_i in crefs_from_here: crefs.append(cref_i) is_call = isCall(inst) isize = insn_t.size next_ea = inst + isize had_refs = False # this is a call $+5, needs special handling if len(crefs) == 0 and is_call and isize == 5: selfCallEA = next_ea DEBUG("INTERNAL CALL $+5: {0:x}\n".format(selfCallEA)) DEBUG("LOCAL NORETURN CALL!\n") I.local_noreturn = True if selfCallEA not in RECOVERED_EAS: DEBUG("Adding new EA: {0:x}\n".format(selfCallEA)) new_eas.add(selfCallEA) I.call_target = selfCallEA return I, True for cref in crefs: had_refs = True fn = getFunctionName(cref) if is_call: elfy, fn_replace = isElfThunk(cref) if elfy: fn = fn_replace if isExternalReference(cref) or elfy: fn = handleExternalRef(fn) I.ext_call_name = fn DEBUG("EXTERNAL CALL: {0}\n".format(fn)) if doesNotReturn(fn): return I, True else: I.call_target = cref if cref not in RECOVERED_EAS: new_eas.add(cref) DEBUG("INTERNAL CALL: {0}\n".format(fn)) elif isUnconditionalJump(inst): if isExternalReference(cref): fn = handleExternalRef(fn) I.ext_call_name = fn DEBUG("EXTERNAL JMP: {0}\n".format(fn)) if doesNotReturn(fn): DEBUG("Nonreturn JMP\n") return I, True else: DEBUG("INTERNAL JMP: {0:x}\n".format(cref)) I.true_target = cref #true: jump to where we have a code-ref #false: continue as we were if isConditionalJump(inst): I.true_target = crefs[0] I.false_target = inst + len(inst_bytes) return I, False if is_call and isNotCode(next_ea): DEBUG("LOCAL NORETURN CALL!\n") I.local_noreturn = True return I, True relo_off = findRelocOffset(inst, len(inst_bytes)) if relo_off != -1: I.reloc_offset = relo_off for dref in idautils.DataRefsFrom(inst): had_refs = True if dref in crefs: continue addDataReference(M, I, inst, dref, new_eas) DEBUG("instr refering data") if isUnconditionalJump(inst): xdrefs = DataRefsFrom(dref) for xref in xdrefs: DEBUG("xref : {0:x}\n".format(xref)) # check if it refers to come instructions; link Control flow if isExternalReference(xref): fn = getFunctionName(xref) fn = handleExternalRef(fn) I.ext_call_name = fn DEBUG("EXTERNAL CALL : {0}\n".format(fn)) if not had_refs and isLinkedElf(): for op in insn_t.Operands: if op.type == idc.o_imm: # we have an immedaite.. check if its in a code or data section begin_a = op.value end_a = begin_a + idc.ItemSize(begin_a) if isInData(begin_a, end_a): # add data reference addDataReference(M, I, inst, begin_a, new_eas) #elif isInCode(begin_a, end_a): # add code ref return I, False
def trace_param(ea, min_ea, op_type, op_val): ''' trace_param: ea, min_ea, op_type, op_val Taking ea as start, this function does basic backtrace of an operand (defined by op_type and op_val) until it finds a data reference which we consider the "source". It stops when ea < min_ea (usually the function start). It does not support arithmetic or complex modifications of the source. This will be improved on future versions. ''' global displ_re, msgsend, var_re ea_call = ea while ea != idc.BADADDR and ea != min_ea: ea = idc.PrevHead(ea, min_ea) if op_type == idaapi.o_reg and op_val == 0 and idaapi.is_call_insn(ea): # We have a BL/BLX that will modify the R0 # we're tracking # return None if idc.GetMnem(ea) in ['LDR', 'MOV']: src_op = 1 dest_op = 0 elif idc.GetMnem(ea) == 'STR': src_op = 0 dest_op = 1 else: continue if idc.GetOpType(ea, dest_op) == op_type and idc.GetOperandValue(ea, dest_op) == op_val: # Found, see where it comes from if idc.GetOpType(ea, src_op) == idc.o_mem: # Got the final reference refs = list(idautils.DataRefsFrom(ea)) if not refs: local_ref = idc.GetOperandValue(ea, src_op) far_ref = idc.Dword(local_ref) else: while len(refs) > 0: far_ref = refs[0] refs = list(idautils.DataRefsFrom(refs[0])) return far_ref elif idc.GetOpType(ea, src_op) == idc.o_displ: if ', [SP' in idc.GetDisasm(ea): if 'arg_' in idc.GetDisasm(ea): # We don't track function arguments return None # We're tracking an stack variable try: var_name = var_re.search(idc.GetDisasm(ea)).group('varname') except: print '%08x: Unable to recognize variable' % ea return None while ea != idc.BADADDR and ea > min_ea: if idc.GetMnem(ea) == 'STR' and var_name in idc.GetDisasm(ea): # New reg to track op_val = idc.GetOperandValue(ea, dest_op) break ea = idc.PrevHead(ea, min_ea) else: # New reg to track if '[LR]' in idc.GetDisasm(ea): # Optimizations use LR as general reg op_val = 14 else: # Got the final reference refs = list(idautils.DataRefsFrom(ea)) if refs: while len(refs) > 0: far_ref = refs[0] refs = list(idautils.DataRefsFrom(refs[0])) return far_ref try: op_val = int(displ_re.search(idc.GetDisasm(ea)).group('regnum')) except: print '%08x: Unable to recognize register' % ea return None elif idc.GetOpType(ea, src_op) == idc.o_reg: # Direct reg-reg assignment op_val = idc.GetOperandValue(ea, src_op) else: # We don't track o_phrase or other complex source operands :( return None return None
def instructionHandler(M, B, inst, new_eas): insn_t = idautils.DecodeInstruction(inst) if not insn_t: # handle jumps after noreturn functions if idc.Byte(inst) == 0xCC: I = addInst(B, inst, [0xCC]) return I, True else: raise Exception("Cannot read instruction at: {0:x}".format(inst)) # skip HLTs -- they are privileged, and are used in ELFs after a noreturn call if isHlt(inst): return None, False DEBUG("\t\tinst: {0}\n".format(idc.GetDisasm(inst))) inst_bytes = readInstructionBytes(inst) DEBUG("\t\tBytes: {0}\n".format(inst_bytes)) I = addInst(B, inst, inst_bytes) if isJmpTable(inst): handleJmpTable(I, inst, new_eas) return I, False crefs_from_here = idautils.CodeRefsFrom(inst, 0) #check for code refs from here crefs = [] # pull code refs from generator into a list for cref_i in crefs_from_here: crefs.append(cref_i) is_call = isCall(inst) isize = idautils.DecodeInstruction(inst).size next_ea = inst+isize # this is a call $+5, needs special handling if len(crefs) == 0 and is_call and isize == 5: selfCallEA = next_ea DEBUG("INTERNAL CALL $+5: {0:x}\n".format(selfCallEA)) sys.stdout.write("LOCAL NORETURN CALL!\n") I.local_noreturn = True if selfCallEA not in RECOVERED_EAS: DEBUG("Adding new EA: {0:x}\n".format(selfCallEA)) new_eas.add(selfCallEA) I.call_target = selfCallEA return I, True for cref in crefs: fn = getFunctionName(cref) if is_call: elfy, fn_replace = isElfThunk(cref) if elfy: fn = fn_replace if isExternalReference(cref) or elfy: fn = handleExternalRef(fn) I.ext_call_name = fn DEBUG("EXTERNAL CALL: {0}\n".format(fn)) if doesNotReturn(fn): return I, True else: I.call_target = cref if cref not in RECOVERED_EAS: new_eas.add(cref) DEBUG("INTERNAL CALL: {0}\n".format(fn)) elif isUnconditionalJump(inst): if isExternalReference(cref): fn = handleExternalRef(fn) I.ext_call_name = fn DEBUG("EXTERNAL JMP: {0}\n".format(fn)) if doesNotReturn(fn): DEBUG("Nonreturn JMP\n") return I, True else: DEBUG("INTERNAL JMP: {0:x}\n".format(cref)) I.true_target = cref #true: jump to where we have a code-ref #false: continue as we were if isConditionalJump(inst): I.true_target = crefs[0] I.false_target = inst+len(inst_bytes) return I, False if is_call and isNotCode(next_ea): sys.stdout.write("LOCAL NORETURN CALL!\n") I.local_noreturn = True return I, True relo_off = findRelocOffset(inst, len(inst_bytes)) if relo_off != -1: I.reloc_offset = relo_off for dref in idautils.DataRefsFrom(inst): if dref in crefs: continue if inValidSegment(dref): if isExternalReference(dref): fn = getFunctionName(dref) fn = handleExternalRef(fn) if isExternalData(fn): I.ext_data_name = fn sys.stdout.write("EXTERNAL DATA REF FROM {0:x} to {1}\n".format(inst, fn)) else: I.ext_call_name = fn sys.stdout.write("EXTERNAL CODE REF FROM {0:x} to {1}\n".format(inst, fn)) elif isInternalCode(dref): I.call_target = dref if dref not in RECOVERED_EAS: new_eas.add(dref) else: dref_size = idc.ItemSize(dref) DEBUG("\t\tData Ref: {0:x}, size: {1}\n".format( dref, dref_size)) I.data_offset = handleDataRelocation(M, dref, new_eas) else: DEBUG("Data not in valid segment {0:x}\n".format(dref)) return I, False
def block(self, block): ''' Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names]) ''' formal = [] fuzzy = [] functions = [] immediates = [] ea = ida_shims.start_ea(block) while ea < ida_shims.end_ea(block): insn = ida_shims.decode_insn(ea) # Get a list of all data/code refs from the current instruction drefs = [x for x in idautils.DataRefsFrom(ea)] crefs = [x for x in idautils.CodeRefsFrom(ea, False)] # Add all instruction mnemonics to the formal block hash formal.append(ida_shims.print_insn_mnem(ea)) # If this is a call instruction, be sure to note the name of the # function being called. This is used to apply call-based # signatures to functions. # # For fuzzy signatures, we can't use the actual name or EA of the # function, but rather just want to note that a function call was # made. # # Formal signatures already have the call instruction mnemonic, # which is more specific than just saying that a call was made. if idaapi.is_call_insn(ea): for cref in crefs: func_name = ida_shims.get_name(cref) if func_name: functions.append(func_name) fuzzy.append("funcref") # If there are data references from the instruction, check to see # if any of them are strings. These are looked up in the # pre-generated strings dictionary. # # String values are easily identifiable, and are used as part of # both the fuzzy and the formal signatures. # # It is more difficult to determine if non-string values are # constants or not; for both fuzzy and formal signatures, just use # "data" to indicate that some data was referenced. elif drefs: for dref in drefs: if dref in self.strings: formal.append(self.strings[dref].value) fuzzy.append(self.strings[dref].value) else: formal.append("dataref") fuzzy.append("dataref") # If there are no data or code references from the instruction, use # every operand as part of the formal signature. # # Fuzzy signatures are only concerned with interesting immediate # values, that is, values that are greater than 65,535, are not # memory addresses, and are not displayed as negative values. elif not drefs and not crefs: ops = ida_shims.get_operands(insn) for n in range(0, len(ops)): opnd_text = ida_shims.print_operand(ea, n) formal.append(opnd_text) if ops[n].type == idaapi.o_imm and \ not opnd_text.startswith('-'): if ops[n].value >= 0xFFFF: if ida_shims.get_full_flags(ops[n].value) == 0: fuzzy.append(str(ops[n].value)) immediates.append(ops[n].value) ea = ida_shims.next_head(ea) return (self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)), immediates, functions)
print hex(addr), idc.GetDisasm(addr) #查询所有对指定地址处数据的交叉引用 #0x100001378L text "UTF-16LE", '\UNC\',0 #0x100001723L lea r11, aUnc; "\\UNC\\" ea = here() print hex(ea), idc.GetDisasm(ea) for addr in idautils.DataRefsTo(ea): print hex(addr), idc.GetDisasm(addr) #查询该地址所引用的所有数据地址 #0x100001723L lea r11, aUnc; "\\UNC\\" #0x100001378L text "UTF-16LE", '\UNC\',0 ea = here() print hex(ea), idc.GetDisasm(ea) for addr in idautils.DataRefsFrom(ea): print hex(addr), idc.GetDisasm(addr) # 交叉引用的类型 #0 = 'Data_Unknown' #1 = 'Data_Offset' #2 = 'Data_Write' #3 = 'Data_Read' #4 = 'Data_Text' #5 = 'Data_Informational' #16 = 'Code_Far_Call' #17 = 'Code_Near_Call' #18 = 'Code_Far_Jump' #19 = 'Code_Near_Jump' #20 = 'Code_User' #21 = 'Ordinary_Flow'
def trace_param(ea, min_ea, op_type, op_val): ''' trace_param: ea, min_ea, op_type, op_val Taking ea as start, this function does basic backtrace of an operand (defined by op_type and op_val) until it finds a data reference which we consider the "source". It stops when ea < min_ea (usually the function start). It does not support arithmetic or complex modifications of the source. This will be improved on future versions. ''' global displ_re, msgsend, var_re ea_call = ea while ea != idc.BADADDR and ea != min_ea: ea = idc.PrevHead(ea, min_ea) if op_type == idaapi.o_reg and op_val == 0 and idaapi.is_call_insn(ea): # We have a BL/BLX that will modify the R0 # we're tracking #TODO: resolve more situation return None if idc.GetMnem(ea) in ['LDR', 'MOV']: src_op = 1 dest_op = 0 elif idc.GetMnem(ea) == 'STR': src_op = 0 dest_op = 1 else: continue if idc.GetOpType(ea, dest_op) == op_type and idc.GetOperandValue(ea, dest_op) == op_val: # Found, see where it comes from if idc.GetOpType(ea, src_op) == idc.o_mem or idc.GetOpType(ea, src_op) == idc.o_imm: #add o_imm support # Got the final reference refs = list(idautils.DataRefsFrom(ea)) if not refs: local_ref = idc.GetOperandValue(ea, src_op) far_ref = idc.Dword(local_ref) else: while len(refs) > 0: far_ref = refs[0] refs = list(idautils.DataRefsFrom(refs[0])) #patch by lc if far_ref: return far_ref elif idc.GetOpType(ea, src_op) == idc.o_displ: if ', [SP' in idc.GetDisasm(ea): if 'arg_' in idc.GetDisasm(ea): # We don't track function arguments return None # We're tracking an stack variable try: var_name = var_re.search(idc.GetDisasm(ea)).group('varname') except: print '%08x: Unable to recognize variable' % ea return None while ea != idc.BADADDR and ea > min_ea: if idc.GetMnem(ea) == 'STR' and var_name in idc.GetDisasm(ea): # New reg to track op_val = idc.GetOperandValue(ea, dest_op) break ea = idc.PrevHead(ea, min_ea) else: # New reg to track if '[LR]' in idc.GetDisasm(ea): # Optimizations use LR as general reg op_val = 14 else: try: op_val = int(displ_re.search(idc.GetDisasm(ea)).group('regnum')) except: print '%08x: Unable to recognize register' % ea return None elif idc.GetOpType(ea, src_op) == idc.o_reg: # Direct reg-reg assignment op_val = idc.GetOperandValue(ea, src_op) else: # We don't track o_phrase or other complex source operands :( return None #register R0-R3 assigned by function parameter if ea <= min_ea and op_type == idc.o_reg and op_val in range(4): f_info = get_func_info(ea) return ['pself', 'selector', f_info['fparam_type'], f_info['sparam_name']][op_val]#fix: error return None def fix_callgraph(msgsend, segname, class_param, sel_param): #class_param == 0, sel_param == 1 ''' fix_callgraph: msgsend, segname, class_param, sel_param Given the msgsend flavour address as a parameter, looks for the parameters (class and selector, identified by class_param and sel_param) and creates a new segment where it places a set of dummy calls named as classname_methodname (we use method instead of selector most of the time). ''' t1 = time.time() if not msgsend: print 'ERROR: msgSend not found' return total = 0 resolved = 0 call_table = dict() for xref in idautils.XrefsTo(msgsend, idaapi.XREF_ALL): total += 1 ea_call = xref.frm func_start = idc.GetFunctionAttr(ea_call, idc.FUNCATTR_START) if not func_start or func_start == idc.BADADDR: continue ea = ea_call method_name_ea = trace_param(ea, func_start, idc.o_reg, sel_param)#sel_param == 1 if method_name_ea and idc.isASCII(idc.GetFlags(method_name_ea)): method_name = idc.GetString(method_name_ea, -1, idc.ASCSTR_C) if not method_name: method_name = '_unk_method' else: method_name = '_unk_method' class_name_ea = trace_param(ea, func_start, idc.o_reg, class_param)#class_param == 0 if class_name_ea: class_name = idc.Name(class_name_ea) if not class_name: class_name = '_unk_class' else: class_name = '_unk_class' if method_name == '_unk_method' and class_name == '_unk_class': continue # Using this name convention, if the class and method # are identified by IDA, the patched call will point to # the REAL call and not one of our dummy functions # class_name = class_name.replace('_OBJC_CLASS_$_', '') class_name = class_name.replace('_OBJC_METACLASS_$_', '') new_name = '_[' + class_name + '_' + method_name + ']' print '%08x: %s' % (ea_call, new_name) call_table[ea_call] = new_name resolved += 1 print '\nFinal stats:\n\t%d total calls, %d resolved' % (total, resolved) print '\tAnalysis took %.2f seconds' % (time.time() - t1) if resolved == 0: print 'Nothing to patch.' return print 'Adding new segment to store new nullsubs' # segment size = opcode ret (4 bytes) * num_calls seg_size = resolved * 4 seg_start = idc.MaxEA() + 4 idaapi.add_segm(0, seg_start, seg_start + seg_size, segname, 'CODE') print 'Patching database...' seg_ptr = seg_start for ea, new_name in call_table.items(): if idc.LocByName(new_name) != idc.BADADDR: offset = idc.LocByName(new_name) - ea else: # create code and name it idc.PatchDword(seg_ptr, 0xE12FFF1E) # BX LR idc.MakeName(seg_ptr, new_name) idc.MakeCode(seg_ptr) idc.MakeFunction(seg_ptr, seg_ptr + 4) idc.MakeRptCmt(seg_ptr, new_name) offset = seg_ptr - ea seg_ptr += 4 # patch the msgsend call if idc.GetReg(ea, "T") == 1: if offset > 0 and offset & 0xFF800000: print 'Offset too far for Thumb (%08x) Stopping [%08x]' % (offset, ea) return off1 = (offset & 0x7FF000) >> 12 off2 = (offset & 0xFFF) / 2 w1 = (0xF000 | off1) w2 = (0xE800 | off2) - 1 idc.PatchWord(ea, w1) idc.PatchWord(ea + 2, w2) else: if offset > 0 and offset & 0xFF000000: print 'Offset too far (%08x) Stopping [%08x]' % (offset, ea) dw = (0xFA000000 | (offset - 8 >> 2)) if dw < 0: dw = dw & 0xFAFFFFFF idc.PatchDword(ea, dw) def make_offsets(segname): ''' change the segment's data value into offset by class name ''' segea = idc.SegByBase(idc.SegByName(segname)) segend = idc.SegEnd(segea) while segea < segend: idc.OpOffset(segea, 0) ptr = idc.Dword(segea) idc.OpOffset(ptr, 0) segea += 4 if __name__ == '__main__': print 'Preparing class references segments' make_offsets('__objc_classrefs') #TODO: what's these two segment means? make_offsets('__objc_superrefs') idaapi.analyze_area(idc.MinEA(), idc.MaxEA()) print 'Fixing callgraph' fix_callgraph(idc.LocByName('_objc_msgSend'), 'msgSend', 0, 1) fix_callgraph(idc.LocByName('_objc_msgSendSuper2'), 'msgSendSuper', 3, 1) idaapi.analyze_area(idc.MinEA(), idc.MaxEA()) print 'Done.'
def _get_ida_func_surrogate(func, arch): func_surrogate = dict() func_surrogate['name'] = idc.GetFunctionName(func.startEA) func_surrogate['id'] = func.startEA # ignore call-graph at this moment func_surrogate['call'] = list() func_surrogate['sea'] = func.startEA func_surrogate['see'] = idc.FindFuncEnd(func.startEA) # api is optional func_surrogate['api'] = _get_api(func.startEA)[1] func_surrogate['blocks'] = list() # comments func_surrogate['comments'] = [] func_surrogate['comments'].extend(get_comments(func.startEA)) for bb in idaapi.FlowChart(idaapi.get_func(func.startEA)): block = dict() block['id'] = bb.id block['sea'] = bb.startEA if arch is 'arm': # for arm; the last bit indicates thumb mode. block['sea'] += idc.GetReg(bb.startEA, 'T') block['eea'] = bb.endEA block['name'] = 'loc_' + format(bb.startEA, 'x').upper() dat = {} block['dat'] = dat s = idc.GetManyBytes(bb.startEA, bb.endEA - bb.startEA) if s is not None: block['bytes'] = "".join("{:02x}".format(ord(c)) for c in s) func_surrogate['comments'].extend(get_comments(bb.startEA)) instructions = list() for head in idautils.Heads(bb.startEA, bb.endEA): ins = list() ins.append(str(hex(head)).rstrip("L").upper().replace("0X", "0x")) opr = idc.GetMnem(head) if opr == "": continue ins.append(opr) for i in range(5): opd = idc.GetOpnd(head, i) if opd == "": continue ins.append(opd) instructions.append(ins) refs = list(idautils.DataRefsFrom(head)) for ref in refs: dat[head] = binascii.hexlify(struct.pack("<Q", idc.Qword(ref))) block['src'] = instructions # flow chart block_calls = list() for success_block in bb.succs(): block_calls.append(success_block.id) block['call'] = block_calls func_surrogate['blocks'].append(block) return func_surrogate
def drefs_from(self): """Destination addresses of data references from this line.""" return idautils.DataRefsFrom(self.ea)
def track_param(ea, min_ea, op_type, op_val): ''' trace_param: ea, min_ea, op_type, op_val Taking ea as start, this function does basic backtrace of an operand (defined by op_type and op_val) until it finds a data reference which we consider the "source". It stops when ea < min_ea (usually the function start). It does not support arithmetic or complex modifications of the source. This will be improved on future versions. ''' global msgsend, var_re ea_call = ea while ea != idc.BADADDR and ea != min_ea: ea = idc.PrevHead(ea, min_ea) if idc.GetMnem(ea) not in ['lea', 'mov']: continue if idc.GetOpType(ea, 0) == op_type and idc.GetOperandValue(ea, 0) == op_val: if idc.GetOpType(ea, 1) == idc.o_displ: if ', [esp' in idc.GetDisasm(ea) or ', [ebp' in idc.GetDisasm(ea): if 'arg_' in idc.GetDisasm(ea): # We don't track function arguments return None # We only track stack variables try: var_name = var_re.search(idc.GetDisasm(ea)).group('varname') op_type = idc.GetOpType(ea, 1) except: print '%08x: Unable to recognize variable' % ea return None while ea != idc.BADADDR and ea > min_ea: if idc.GetMnem(ea) == 'mov' or idc.GetMnem(ea) == 'lea' and var_name in idc.GetDisasm(ea): # New reg to track op_val = idc.GetOperandValue(ea, 0) break ea = idc.PrevHead(ea, min_ea) elif idc.GetOpType(ea, 1) == idc.o_mem: # Got the final reference refs = list(idautils.DataRefsFrom(ea)) if not refs: local_ref = idc.GetOperandValue(ea, 1) far_ref = idc.Dword(local_ref) else: while len(refs) > 0: far_ref = refs[0] refs = list(idautils.DataRefsFrom(refs[0])) return far_ref elif idc.GetOpType(ea, 1) == idc.o_reg: # Direct reg-reg assignment op_val = idc.GetOperandValue(ea, 1) op_type = idc.GetOpType(ea, 1) else: # We don't track o_phrase or other complex source operands :( return None return None
def dataRefs(self, ea): return idautils.DataRefsFrom(ea)
def trace_arg_bwd(ea, arg_num): ARCH = "ARM32" CALL_ARGS = {"ARM32": ["R0", "R1", "R2", "R3"]} args = CALL_ARGS[ARCH] if (len(args) <= arg_num): arg_into = "SP" arg_offs = ((arg_num - len(args))) * 4 else: arg_into = CALL_ARGS[ARCH][arg_num] arg_offs = 0 func = idaapi.get_func(ea) fc = idaapi.FlowChart(func) for block in fc: if block.startEA <= ea and block.endEA > ea: break #original sink arg_in = set([arg_into]) while (ea >= block.startEA): #print "0x%08x %s" % (ea, idc.GetDisasm(ea)) ############ BEGINNING OF TRACING ############ mnem = idc.GetMnem(ea) if mnem == "MOV": arg_to = idc.GetOpnd(ea, 0) arg_from = idc.GetOpnd(ea, 1) #propagate to new register if arg_to in arg_in: arg_in.add(arg_from) #note: if arg_from is in arg_in, but arg_to isn't, we don't add arg_to to the sinks, because we are going backwards, #so we know that's not the one that ended up being used. elif mnem == "LDR": arg_to = idc.GetOpnd(ea, 0) arg_from = idc.GetOpnd(ea, 1) if ARCH == "ARM32": if arg_to in arg_in: #now there should be a a DataRef here to a string. #we want the data reference that is of type 1 (Data_Offset), as oppossed to 1 (Data_Read) refs = [r for r in idautils.XrefsFrom(ea) if r.type == 1] if len(refs) == 1: #print "There is only one data offset reference from here, if it is a string we are done." for s in IDAStrings: if s.ea == refs[0].to: return str(s) elif mnem == "ADR" or mnem == "ADR.W": #print "ADR instruction!" arg_to = idc.GetOpnd(ea, 0) arg_from = idc.GetOpnd(ea, 1) if ARCH == "ARM32": if arg_to in arg_in: #now there should be a a DataRef here to a string. #we want the data reference that is of type 1 (Data_Offset), as oppossed to 1 (Data_Read) refs = [r for r in idautils.XrefsFrom(ea) if r.type == 1] if len(refs) == 1: #print "There is only one data offset reference from here, if it is a string we are done." for s in IDAStrings: if s.ea == refs[0].to: return str(s) elif mnem == "ADD": arg_to = idc.GetOpnd(ea, 0) arg_from = idc.GetOpnd(ea, 1) if ARCH == "ARM32": if arg_from == "PC" and arg_to in arg_in: #now there should be a a DataRef here to a string. if sum(1 for _ in idautils.DataRefsFrom(ea)) == 1: for ref in idautils.DataRefsFrom(ea): #get string at ref for s in IDAStrings: if s.ea == ref: return str(s) ############ END OF TRACING ############ if ea == block.startEA: #For some reason, block.preds() seems to be broken. I get 0 predecessors to every block. So for now, we limit to same block. #Also idaapi.decode_preceding_instruction is annoying, because if there are more than 1 preceding, it just shows the first one only. #So this is getting around the preds() not working. preds = [] for b in fc: for s in b.succs(): if s.startEA == block.startEA: #this is a predecessor block to us preds.append(b) if len(preds) == 1: #print "1 predecessor, continuing there" block = preds[0] i = idautils.DecodePreviousInstruction(block.endEA) ea = block.endEA - i.size else: #print "0 or multiple predecessor blocks, givin up." return "" else: i = idautils.DecodePreviousInstruction(ea) ea -= i.size return ""
def process_function(arch, func_ea): func_end = idc.FindFuncEnd(func_ea) packet = DismantlerDataPacket() ida_chunks = get_chunks(func_ea) chunks = set() # Add to the chunks only the main block, containing the # function entry point # chunk = get_flow_code_from_address(func_ea) if chunk: chunks.add(chunk) # Make "ida_chunks" a set for faster searches within ida_chunks = set(ida_chunks) ida_chunks_idx = dict(zip([c[0] for c in ida_chunks], ida_chunks)) func = idaapi.get_func(func_ea) comments = [idaapi.get_func_cmt(func, 0), idaapi.get_func_cmt(func, 1)] # Copy the list of chunks into a queue to process # chunks_todo = [c for c in chunks] while True: # If no chunks left in the queue, exit if not chunks_todo: if ida_chunks: chunks_todo.extend(ida_chunks) else: break chunk_start, chunk_end = chunks_todo.pop() if ida_chunks_idx.has_key(chunk_start): ida_chunks.remove(ida_chunks_idx[chunk_start]) del ida_chunks_idx[chunk_start] for head in idautils.Heads(chunk_start, chunk_end): comments.extend((idaapi.get_cmt(head, 0), idaapi.get_cmt(head, 1))) comment = '\n'.join([c for c in comments if c is not None]) comment = comment.strip() if comment: packet.add_comment(head, comment) comments = list() if idc.isCode(idc.GetFlags(head)): instruction = arch.process_instruction(packet, head) # if there are other references than # flow add them all. if list(idautils.CodeRefsFrom(head, 0)): # for each reference, including flow ones for ref_idx, ref in enumerate( idautils.CodeRefsFrom(head, 1)): if arch.is_call(instruction): # This two conditions must remain separated, it's # necessary to enter the enclosing "if" whenever # the instruction is a call, otherwise it will be # added as an uncoditional jump in the last else # if ref in list(idautils.CodeRefsFrom(head, 0)): packet.add_direct_call(head, ref) elif ref_idx > 0 and arch.is_conditional_branch( instruction): # The ref_idx is > 0 in order to avoid processing the # normal flow reference which would effectively imply # that the conditional branch is processed twice. # It's done this way instead of changing the loop's head # from CodeRefsFrom(head, 1) to CodeRefsFrom(head, 0) in # order to avoid altering the behavior of other conditions # which rely on it being so. # FIXME # I don't seem to check for the reference here # to point to valid, defined code. I suspect # this could lead to a failure when exporting # if such situation appears. I should test if # it's a likely scenario and probably just add # an isHead() or isCode() to address it. packet.add_conditional_branch_true(head, ref) packet.add_conditional_branch_false( head, idaapi.next_head(head, chunk_end)) # If the target is not in our chunk list if not address_in_chunks(ref, chunks): new_chunk = get_flow_code_from_address(ref) # Add the chunk to the chunks to process # and to the set containing all visited # chunks if new_chunk is not None: chunks_todo.append(new_chunk) chunks.add(new_chunk) elif arch.is_unconditional_branch(instruction): packet.add_unconditional_branch(head, ref) # If the target is not in our chunk list if not address_in_chunks(ref, chunks): new_chunk = get_flow_code_from_address(ref) # Add the chunk to the chunks to process # and to the set containing all visited # chunks if new_chunk is not None: chunks_todo.append(new_chunk) chunks.add(new_chunk) #skip = False for ref in idautils.DataRefsFrom(head): packet.add_data_reference(head, ref) # Get a data reference from the current reference's # location. For instance, if 'ref' points to a valid # address and such address contains a data reference # to code. target = list(idautils.DataRefsFrom(ref)) if target: target = target[0] else: target = None if target is None and arch.is_call(instruction): imp_name = idc.Name(ref) imp_module = get_import_module_name(ref) imported_functions.add((ref, imp_name, imp_module)) packet.add_indirect_virtual_call(head, ref) elif target is not None and idc.isHead(target): # for calls "routed" through this reference if arch.is_call(instruction): packet.add_indirect_call(head, target) # for unconditional jumps "routed" through this reference elif arch.is_unconditional_branch(instruction): packet.add_unconditional_branch(head, target) # for conditional "routed" through this reference elif arch.is_conditional_branch(instruction): packet.add_conditional_branch_true(head, target) packet.add_conditional_branch_false( head, idaapi.next_head(head, chunk_end)) f = FunctionAnalyzer(arch, func_ea, packet) instrumentation.new_packet(packet) instrumentation.new_function(f)
def block(self, block): """ Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names]) """ formal = [] fuzzy = [] functions = [] immediates = [] ea = block.start_ea insn = idaapi.insn_t() while ea < block.end_ea: idaapi.decode_insn(insn, ea) # Get a list of all data/code references from the current instruction drefs = [x for x in idautils.DataRefsFrom(ea)] crefs = [x for x in idautils.CodeRefsFrom(ea, False)] # Add all instruction mnemonics to the formal block hash formal.append(idc.print_insn_mnem(ea)) # If this is a call instruction, be sure to note the name of the function # being called. This is used to apply call-based signatures to functions. # # For fuzzy signatures, we can't use the actual name or EA of the function, # but rather just want to note that a function call was made. # # Formal signatures already have the call instruction mnemonic, which is more # specific than just saying that a call was made. if idaapi.is_call_insn(ea): for cref in crefs: func_name = idc.get_name(cref, ida_name.GN_VISIBLE) if not func_name: continue functions.append(func_name) fuzzy.append('funcref') # If there are data references from the instruction, check to see if any of them # are strings. These are looked up in the pre-generated strings dictionary. # # String values are easily identifiable, and are used as part of both the fuzzy # and the formal signatures. # # It is more difficult to determine if non-string values are constants or not; # for both fuzzy and formal signatures, just use "data" to indicate that some data # was referenced. elif drefs: for dref in drefs: if dref in self.strings: formal.append(self.strings[dref].value) fuzzy.append(self.strings[dref].value) else: formal.append('dataref') fuzzy.append('dataref') # If there are no data or code references from the instruction, use every operand as # part of the formal signature. # # Fuzzy signatures are only concerned with interesting immediate values, that is, values # that are greater than 65,535, are not memory addresses, and are not displayed as # negative values. elif not drefs and not crefs: for n in range(0, len(idaapi.insn_t().ops)): opnd_text = idc.print_operand(ea, n) formal.append(opnd_text) if idaapi.insn_t().ops[n].type != idaapi.o_imm or opnd_text.startswith('-'): continue if idaapi.insn_t().ops[n].value < 0xFFFF: continue if idaapi.get_full_flags(idaapi.insn_t().ops[n].value) != 0: continue fuzzy.append(str(idaapi.insn_t().ops[n].value)) immediates.append(idaapi.insn_t().ops[n].value) ea = idc.next_head(ea) return self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)), immediates, functions
def objc_msgsend_xref(self, call_ea, objc_self, objc_selector, create_xref=True): ''' This function will create a code xref to an objc method call_ea : location of call/jmp objc_msgsend (regardless of direct/indirect) objc_self: ea where RDI is set to static value (or that we find it's from a previous call or the RDI of the current function) objc_selector: ea where RSI is set to static value This ignores the RDI register, which is the `self` argument to objc_msgsend() id objc_msgSend(id self, SEL op, ...); So far, this seems to be fine as far as the cross-references are concerned. ''' # get instruction mnemonic at address - I guess to check and make sure # it's mov rsi, blah instruction = idc.GetDisasm(objc_selector) if self.debugflag: print(">>> objc_msgsend_xref 0x%08x %s" % (objc_selector, instruction)) # get outbound references in the appropriate segment # implicit assumption is there is exacltly one target_selref = None for _ref in idautils.DataRefsFrom(objc_selector): if idc.get_segm_name(_ref) == "__objc_selrefs": target_selref = _ref if not target_selref: return False # get outbound references in the appropriate segment # implicit assumption is there is exacltly one target_methname = None for _ref in idautils.DataRefsFrom(target_selref): if idc.get_segm_name(_ref) == "__objc_methname": target_methname = _ref if not target_methname: return False # get inbound references # __objc_const # must be a __objc2_meth # I hope this method is correct to find __objc2_meth structs # BUG: when the binary has mutiple objc methods by the same name, this logic fails # Track RDI register. have to figure out what instance/class is referenced objc2_meth_struct_id = ida_struct.get_struc_id("__objc2_meth") meth_struct_found = False target_method = None for _ref in idautils.DataRefsTo(target_methname): # multiple may match # we care about the __obj2_meth struct found in references if idc.get_segm_name(_ref) == "__objc_const": # check the outbound references for _meth_ref in idautils.DataRefsFrom(_ref): if _meth_ref == objc2_meth_struct_id: meth_struct_found = True if meth_struct_found: # only do this once # TODO: check against RDI here to make sure it's the proper class # meth_struct_found = False for _meth_ref in idautils.DataRefsFrom(_ref): # assumption made on function always being in text segment if idc.get_segm_name(_meth_ref) == "__text": # save the method implementation -- this is the function ptr if self.debugflag: print( "0x%08x checking for the proper method -- %s" % (_meth_ref, idc.get_name( idc.get_func_attr( _meth_ref, idc.FUNCATTR_START)))) target_method = _meth_ref if not target_method: return False # After dereferencing across the IDB file, we finally have a target function. # In other words, if there isn't a method **in this binary** no xref is made (IDA only loads one binary?) # that is referenced from the mov rsi, <selector> instruction if self.debugflag: print("Found target method 0x%08x" % target_method) if create_xref: ida_xref.add_cref(objc_selector, target_method, idc.fl_CF) return True