def TraceApiCall(code): print "operand 0 is "+idc.GetOpnd(code,0) print "operand 1 is "+idc.GetOpnd(code,1) print "operand 2 is "+idc.GetOpnd(code,2) reg = idc.GetOpnd(code,0) ### search down to find caller it cannt deal with such situation: ### addi r10, r10, VOS_sprintf@l ### b addr ### in the above code, the trace should follow addr to find the right call ### func_end = idc.GetFunctionAttr(code, idc.FUNCATTR_END) instruct = "mtlr "+reg while(code < func_end): code = idc.FindCode(code, SEARCH_DOWN|SEARCH_NEXT) ### search "mtlr r10" if(("mtlr"==idc.GetMnem(code)) and (idc.GetOpnd(code,1) == reg)): print idc.GetOpnd(code,1) print "Get the instruct! "+ idc.GetDisasm(code) while(code < func_end): code = idc.FindCode(code, SEARCH_DOWN|SEARCH_NEXT) if("blrl" in idc.GetDisasm(code)): print "api call " + idc.GetDisasm(code)+" from ",hex(code) print "mnem "+idc.GetMnem(code) return code
def parse_strings(): strings_added = 0 retry = [] text_seg = common.get_text_seg() if text_seg is None: common._debug('Failed to get text segment') return strings_added # This may be inherently flawed as it will only search for defined functions # and as of IDA Pro 6.95 it fails to autoanalyze many GO functions, currently # this works well since we redefine/find (almost) all the functions prior to # this being used. Could be worth a strategy rethink later one or on diff archs for addr in idautils.Functions(text_seg.startEA, text_seg.endEA): name = idc.GetFunctionName(addr) end_addr = idautils.Chunks(addr).next()[1] if (end_addr < addr): common._error('Unable to find good end for the function %s' % name) pass common._debug('Found function %s starting/ending @ 0x%x 0x%x' % (name, addr, end_addr)) while addr <= end_addr: if parse_str_ptr(addr): strings_added += 1 addr = idc.FindCode(addr, idaapi.SEARCH_DOWN) elif is_string_patt(addr): if 'rodata' not in idc.get_segm_name( addr) and 'text' not in idc.get_segm_name(addr): common._debug('Should a string be in the %s section?' % idc.get_segm_name(addr)) string_addr = idc.GetOperandValue(addr, 1) addr_3 = idc.FindCode(idc.FindCode(addr, idaapi.SEARCH_DOWN), idaapi.SEARCH_DOWN) string_len = idc.GetOperandValue(addr_3, 1) if string_len > 1: if create_string(string_addr, string_len): if create_offset(addr): strings_added += 1 else: # There appears to be something odd that goes on with IDA making some strings, always works # the second time, so lets just force a retry... retry.append((addr, string_addr, string_len)) # Skip the extra mov lines since we know it won't be a load on any of them addr = idc.FindCode(addr_3, idaapi.SEARCH_DOWN) else: addr = idc.FindCode(addr, idaapi.SEARCH_DOWN) for instr_addr, string_addr, string_len in retry: if create_string(string_addr, string_len): if create_offset(instr_addr): strings_added += 1 else: common._error( 'Unable to make a string @ 0x%x with length of %d for usage in function @ 0x%x' % (string_addr, string_len, instr_addr)) return strings_added
def is_string_patt(addr): # Check for first parts instruction and what it is loading -- also ignore function pointers we may have renamed if (idc.GetMnem(addr) != 'mov' and idc.GetMnem(addr) != 'lea') \ and (idc.GetOpType(addr, 1) != 2 or idc.GetOpType(addr, 1) != 5) \ or idc.GetOpnd(addr, 1)[-4:] == '_ptr': return False # Validate that the string offset actually exists inside the binary if idc.get_segm_name(idc.GetOperandValue(addr, 1)) is None: return False # Could be unk_, asc_, 'offset ', XXXXh, ignored ones are loc_ or inside [] if idc.GetOpnd(addr, 0) in VALID_REGS \ and not ('[' in idc.GetOpnd(addr, 1) or 'loc_' in idc.GetOpnd(addr, 1)) \ and (('offset ' in idc.GetOpnd(addr, 1) or 'h' in idc.GetOpnd(addr, 1)) \ or ('unk' == idc.GetOpnd(addr, 1)[:3])): from_reg = idc.GetOpnd(addr, 0) # Check for second part addr_2 = idc.FindCode(addr, idaapi.SEARCH_DOWN) try: dest_reg = idc.GetOpnd(addr_2, 0)[idc.GetOpnd(addr_2, 0).index('[') + 1:idc.GetOpnd(addr_2, 0).index('[') + 4] except ValueError: return False if idc.GetMnem(addr_2) == 'mov' and dest_reg in VALID_DEST \ and ('[%s' % dest_reg) in idc.GetOpnd(addr_2, 0) \ and idc.GetOpnd(addr_2, 1) == from_reg: # Check for last part, could be improved addr_3 = idc.FindCode(addr_2, idaapi.SEARCH_DOWN) # GetOpType 1 is a register, potentially we can just check that GetOpType returned 5? if idc.GetMnem(addr_3) == 'mov' \ and (('[%s+' % dest_reg) in idc.GetOpnd(addr_3, 0) or idc.GetOpnd(addr_3, 0) in VALID_DEST) \ and 'offset ' not in idc.GetOpnd(addr_3, 1) and 'dword ptr ds' not in idc.GetOpnd(addr_3, 1) \ and idc.GetOpType(addr_3, 1) != 1 and idc.GetOpType(addr_3, 1) != 2 and idc.GetOpType(addr_3, 1) != 4: try: dumb_int_test = idc.GetOperandValue(addr_3, 1) if dumb_int_test > 0 and dumb_int_test < sys.maxsize: return True except ValueError: return False return False
def GetApiParam(caller, n): code = caller count = 0 while(True): if(count == n): break code = idc.FindCode(code, SEARCH_UP|SEARCH_NEXT) ## print "mnemonic is "+GetMnem(code) ## print "disass is "+GetDisasm(code) ## print "operand is "+GetOpnd(code,0) if("push" in idc.GetMnem(code)): count = count+1 if(count ==n): return code
def GetApiParam(caller, n): regstr={"r3", "r4", "r5", "r6"} reg = regstr[n-1] code = caller count = 0 while(True): code = idc.FindCode(code, SEARCH_UP|SEARCH_NEXT) ## print "mnemonic is "+GetMnem(code) ## print "disass is "+GetDisasm(code) ## print "operand is "+GetOpnd(code,0) if(reg == "r3"): ##blrl or bl if("bl" in GetDisasm(code)): return code if(reg == idc.GetOpnd(code,0)): return code
def get_goroot(): goroot_path_str = "" ''' Get GOROOT path string ''' func_goroot = find_func_by_name("runtime_GOROOT") if func_goroot is None: _error("Failed to find func contains goroot") return goroot_path_str goroot_flowchart = idaapi.FlowChart(f=func_goroot) ret_cbs = find_ret_cb(goroot_flowchart) ''' runtime.GOROOT() normally has 2 return code blocks: 1. False return mov [rsp+28h+arg_0], rax mov [rsp+28h+arg_8], rcx mov rbp, [rsp+28h+var_8] add rsp, 28h retn 2. True return(Which we needed): (1). goroot string length as ptr mov rax, cs:runtime_internal_sys_DefaultGoroot mov rcx, cs:qword_D9AB58 mov [rsp+28h+arg_0], rax mov [rsp+28h+arg_8], rcx mov rbp, [rsp+28h+var_8] add rsp, 28h retn (2). goroot string length as instant number lea rax, unk_7220B5 mov [rsp+28h+arg_0], rax mov [rsp+28h+arg_8], 0Dh mov rbp, [rsp+28h+var_8] add rsp, 28h retn ''' for cb_idx in ret_cbs: if idc.GetOpType(goroot_flowchart[cb_idx].startEA, 0) == 1: # e.g.: mov rax, cs:runtime_internal_sys_DefaultGoroot ''' Op Types refer: https://www.hex-rays.com/products/ida/support/sdkdoc/ua_8hpp.html#aaf9da6ae7e8b201108fc225adf13b4d9 o_void = 0 # No Operand o_reg = 1 # General Register (al,ax,es,ds...) reg o_mem = 2 # Direct Memory Reference (DATA) addr o_phrase = 3 # Memory Ref [Base Reg + Index Reg] phrase o_displ = 4 # Memory Reg [Base Reg + Index Reg + Displacement] phrase+addr o_imm = 5 # Immediate Value value o_far = 6 # Immediate Far Address (CODE) addr o_near = 7 # Immediate Near Address (CODE) addr ...... ''' goroot_path_len = 0 goroot_path_addr = 0 curr_addr = goroot_flowchart[cb_idx].startEA goroot_path_addr_val = idc.GetOperandValue(curr_addr, 1) end_addr = goroot_flowchart[cb_idx].endEA curr_addr = idc.FindCode(curr_addr, idaapi.SEARCH_DOWN) # find goroot path length and OpType of length(instant len number or addr of len) while curr_addr <= end_addr: len_optype = idc.GetOpType(curr_addr, 1) if len_optype == 2: # addr of len # mov rcx, cs:qword_D9AB58 goroot_path_addr = read_mem(goroot_path_addr_val) goroot_path_len = read_mem(goroot_path_addr_val + ADDR_SZ) break elif len_optype == 5: # instant number as len # mov [rsp+28h+arg_8], 0Dh goroot_path_addr = goroot_path_addr_val goroot_path_len = idc.GetOperandValue(curr_addr, 1) break curr_addr = idc.FindCode(curr_addr, idaapi.SEARCH_DOWN) if goroot_path_len == 0 or goroot_path_addr == 0: raise Exception("Invalid GOROOT Address ang Length") goroot_path_str = str( idc.GetManyBytes(goroot_path_addr, goroot_path_len)) if goroot_path_str is None or len(goroot_path_str) == 0: raise Exception("Invalid GOROOT") idc.MakeStr(goroot_path_addr, goroot_path_addr + goroot_path_len) idaapi.autoWait() break if len(goroot_path_str) > 0: _info("Go ROOT Path: %s\n" % goroot_path_str) return goroot_path_str.replace("\\", "/")
def parse_str_ptr(addr): if idc.GetMnem(addr) != 'mov': return False # Validate that the string offset actually exists inside the binary if idc.get_segm_name(idc.GetOperandValue(addr, 1)) is None: return False # Check the operands' type: # - first one must be a register; # - second one must be a memory address if idc.GetOpType(addr, 0) != 1 or idc.GetOpType(addr, 1) != 2: return False addr_2 = idc.FindCode(addr, idaapi.SEARCH_DOWN) # same operands' type for addr_2 if idc.GetMnem(addr_2) != 'mov' or idc.GetOpType( addr_2, 0) != 1 or idc.GetOpType(addr_2, 1) != 2: return False opnd_val_1 = idc.GetOperandValue(addr, 1) opnd_val_2 = idc.GetOperandValue(addr_2, 1) opnd_diff = opnd_val_1 - opnd_val_2 # The 2 operands, one of addr of string length, another one is the addr of string pointer # and they must be side by side if opnd_diff != common.ADDR_SZ and opnd_diff != -common.ADDR_SZ: return False if opnd_diff > 0: str_len_addr, str_ptr_addr = opnd_val_1, opnd_val_2 else: str_len_addr, str_ptr_addr = opnd_val_2, opnd_val_1 str_len = common.read_mem(str_len_addr) str_ptr = common.read_mem(str_ptr_addr) str_addr = common.read_mem(str_ptr) # set max str len if str_len > 64: return False if 'rodata' not in idc.get_segm_name( str_ptr) and 'text' not in idc.get_segm_name(str_ptr): return False common._debug("------------------------------") common._debug("Possible str ptr:") common._debug("Code addr: 0x%x , str_ptr_addr: 0x%x , str_len_addr: 0x%x" % (addr, str_ptr_addr, str_len_addr)) common._debug("str_addr: 0x%x , str_len: 0x%x" % (str_ptr, str_len)) #if create_string(str_addr, str_len): if str_len > 1: if idc.MakeStr(str_ptr, str_ptr + str_len): idaapi.autoWait() if opnd_diff > 0: idc.MakeComm(addr, "length: %d" % str_len) idaapi.add_dref(addr_2, str_ptr, idaapi.dr_O) else: idc.MakeComm(addr_2, "length: %d" % str_len) idaapi.add_dref(addr, str_ptr, idaapi.dr_O) idaapi.autoWait() return True return False
#判断IDA是否将其判定为头部 idc.isHead(F) #0x100001f77L mov rbx, rsi #True ea = here() print hex(ea), idc.GetDisasm(ea) print idc.isCode(idc.GetFlags(ea)) # idc.FindCode(ea, flag) 该函数用于寻找被标记为代码的下一个地址. 对于想要查找数据块的尾部很有帮助 #0x1000013c0L text "UTF-16LE", '{00000000-0000-0000-0000-000000000000}',0 #0x1000014f8L xor r11d, r11d ea = here() print hex(ea), idc.GetDisasm(ea) addr = idc.FindCode(ea, SEARCH_DOWN | SEARCH_NEXT) print hex(addr), idc.GetDisasm(addr) # idc.FindData(ea, flag) 该函数用于寻找被标记为数据的下一个地址. # 0x1000020b6L movzx eax, word ptr [r12+2] # 0x100001cccL db 8 dup(0CCh) ea = here() print hex(ea), idc.GetDisasm(ea) addr = idc.FindData(ea, SEARCH_UP | SEARCH_NEXT) print hex(addr), idc.GetDisasm(addr) # idc.FindUnexplored(ea, flag) 该函数用于查找IDA未识别为代码或者数据的字节地址. 未知类型需要通过观察或者脚本进一步分析 ea = here() print hex(ea), idc.GetDisasm(ea) addr = idc.FindUnexplored(ea, SEARCH_DOWN) print hex(addr), idc.GetDisasm(addr)