class IDADebugger(DBG_Hooks, Debugger): def __init__(self, *args): super(IDADebugger, self).__init__(*args) self.hooked = False self.trace = Trace() self._module_name = 'IDADbg' self.arch = get_arch_dynamic() # init the cpu context with 0 if self.arch == 32: self.ctx = { c: '0' for c in [ 'eax', 'ebx', 'edx', 'ecx', 'ebp', 'esp', 'eip', 'edi', 'esi', 'cf', 'zf', 'sf', 'of', 'pf', 'af', 'tf', 'df' ] } elif self.arch == 64: self.ctx = { c: '0' for c in [ 'rax', 'rbx', 'rdx', 'rcx', 'rbp', 'rsp', 'rip', 'edi', 'rsi', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15', 'cf', 'zf', 'sf', 'of', 'pf', 'af', 'tf', 'df' ] } self.IAT = [] self.func_args = defaultdict(lambda: set()) """ @property的作用是把方法变成属性 http://python.jobbole.com/80955/ """ def module_name(self): return self._module_name def convert(self, value): """ 转换到16进制 Convert a value into its hex representation. :param value: :return: """ result = '%x' % int(value) return result.upper() def disconv(self, value): """ 为了让加载的trace和生成的trace等价,转换反汇编到标准的表示。 Convert the DISASM to a standardized representation. This enables the equivalence between generated traces and loaded traces. :param value: a disasm str :return: standardized str """ # disregard comments if value.__contains__(';'): value = value.split(';')[0] disasm = value.lower().split(' ') disasm = [x.lstrip() for x in disasm] disasm = filter(None, disasm) if len(disasm) > 1 and disasm[1].__contains__(', '): temp = disasm.pop(1) for elem in temp.split(', '): disasm.append(elem.lstrip().lstrip('0').rstrip('h')) return disasm def trace_init(self): """ Init the trace. """ if self.arch is None: self.arch == get_arch_dynamic() # reset trace self.trace = Trace(reg_size=self.arch) def hook_dbg(self): if self.hooked: # Release any current hooks self.unhook() try: # check if ida dbg is present and ready if not dbg_can_query(): return # hook IDADebugger self.hook() self.hooked = True self.arch = get_arch_dynamic() except Exception as ex: print "An Exception was encountered: %s" % ex.message def get_new_color(self, current_color): """ Redistribute a new color to a line. :param current_color: the current color of a line :return: the next or the max color """ colors = [0xffe699, 0xffcc33, 0xe6ac00, 0xb38600] try: index = colors.index(current_color) if index == len(colors) - 1: return colors[-1] else: return colors[index + 1] except ValueError: return colors[0] # TODO IAT checks def gen_trace(self, trace_start=BeginEA(), trace_end=BADADDR): """ 主动生成trace Generate trace for the loaded binary. :param trace_start: :param trace_end: :return: """ vmr = get_vmr() self.trace_init() # reset color heads = Heads(SegStart(ScreenEA()), SegEnd(ScreenEA())) for i in heads: SetColor(i, CIC_ITEM, 0xFFFFFF) # start exec RunTo(BeginEA()) event = GetDebuggerEvent(WFNE_SUSP, -1) # enable tracing EnableTracing(TRACE_STEP, 1) if vmr.sys_libs: pass event = GetDebuggerEvent(WFNE_ANY | WFNE_CONT, -1) while True: event = GetDebuggerEvent(WFNE_ANY, -1) addr = GetEventEa() # change color of executed line current_color = GetColor(addr, CIC_ITEM) new_color = self.get_new_color(current_color) SetColor(addr, CIC_ITEM, new_color) # break by exception if event <= 1: break # standardize the difference between ida_trace.txt files and generated trace files by debugger hook: # since dbg_trace returns the cpu context before the instruction execution and trace files the ctx after for line in self.trace: try: line.ctx = self.trace[self.trace.index(line) + 1].ctx except IndexError: line.ctx = defaultdict(lambda: '0') # return the trace, for population see dbg_trace() below msg('[*] Trace generated!\n') if vmr.extract_param: vmr.func_args = self.func_args for key in self.func_args.keys(): print 'Function %s call args:' % key, ''.join( '%s, ' % arg for arg in self.func_args[key]).rstrip(', ') return self.trace def unhook_dbg(self): if self.hooked: # unhook IDADebugger self.unhook() self.hooked = False else: pass def dbg_process_start(self, pid, tid, ea, name, base, size): # print("Process started, pid=%d tid=%d name=%s" % (pid, tid, name)) pass def dbg_process_exit(self, pid, tid, ea, code): # print("Process exited pid=%d tid=%d ea=0x%x code=%d" % (pid, tid, ea, code)) pass def dbg_library_unload(self, pid, tid, ea, info): print("Library unloaded: pid=%d tid=%d ea=0x%x info=%s" % (pid, tid, ea, info)) return 0 def dbg_process_attach(self, pid, tid, ea, name, base, size): # print("Process attach pid=%d tid=%d ea=0x%x name=%s base=%x size=%x" % (pid, tid, ea, name, base, size)) pass def dbg_process_detach(self, pid, tid, ea): # print("Process detached, pid=%d tid=%d ea=0x%x" % (pid, tid, ea)) return 0 def dbg_library_load(self, pid, tid, ea, name, base, size): print "Library loaded: pid=%d tid=%d name=%s base=%x" % (pid, tid, name, base) def dbg_bpt(self, tid, ea): # print "Break point at 0x%x pid=%d" % (ea, tid) # self.tid = tid # return values: # -1 - to display a breakpoint warning dialog # if the process is suspended. # 0 - to never display a breakpoint warning dialog. # 1 - to always display a breakpoint warning dialog. return 0 def dbg_suspend_process(self): # print "Process suspended" pass def dbg_exception(self, pid, tid, ea, exc_code, exc_can_cont, exc_ea, exc_info): print( "Exception: pid=%d tid=%d ea=0x%x exc_code=0x%x can_continue=%d exc_ea=0x%x exc_info=%s" % (pid, tid, ea, exc_code & BADADDR, exc_can_cont, exc_ea, exc_info)) # return values: # -1 - to display an exception warning dialog # if the process is suspended. # 0 - to never display an exception warning dialog. # 1 - to always display an exception warning dialog. return 0 def dbg_trace(self, tid, ea): """ 处理trace事件回调 :param tid: :param ea: :return: """ vmr = get_vmr() try: if vmr.extract_param and GetDisasm(ea).__contains__('call'): run_var = 0 key = GetDisasm(ea).split('call')[1].strip() while True: # traverse trace backwards and get sequential push and mov params line = self.trace[-(run_var + 1)] if line.is_push and line.disasm_len == 2: try: self.func_args[key].add(line.ctx[get_reg( line.disasm[1], self.arch)]) except: self.func_args[key].add(line.disasm[1]) elif line.is_mov: try: self.func_args[key].add(line.ctx[get_reg( line.disasm[2], self.arch)]) except: self.func_args[key].add(line.disasm[2]) else: break run_var += 1 # TODO mmx xmmx ymmx # compute next ctx if self.arch == 32: self.ctx = defaultdict( lambda: '0', { 'eax': self.convert(cpu.eax), 'ebx': self.convert(cpu.ebx), 'edx': self.convert(cpu.edx), 'ecx': self.convert(cpu.ecx), 'ebp': self.convert(cpu.ebp), 'esp': self.convert(cpu.esp), 'eip': self.convert(cpu.eip), 'edi': self.convert(cpu.edi), 'esi': self.convert(cpu.esi), 'cf': self.convert(cpu.cf), 'zf': self.convert(cpu.zf), 'sf': self.convert(cpu.sf), 'of': self.convert(cpu.of), 'pf': self.convert(cpu.pf), 'af': self.convert(cpu.af), 'tf': self.convert(cpu.tf), 'df': self.convert(cpu.df) }) elif self.arch == 64: self.ctx = defaultdict( lambda: '0', { 'rax': self.convert(cpu.eax), 'rbx': self.convert(cpu.ebx), 'rdx': self.convert(cpu.edx), 'rcx': self.convert(cpu.ecx), 'rbp': self.convert(cpu.ebp), 'rsp': self.convert(cpu.esp), 'rip': self.convert(cpu.eip), 'edi': self.convert(cpu.edi), 'rsi': self.convert(cpu.rsi), 'r8': self.convert(cpu.r8), 'r9': self.convert(cpu.r9), 'r10': self.convert(cpu.r10), 'r11': self.convert(cpu.r11), 'r12': self.convert(cpu.r12), 'r13': self.convert(cpu.r13), 'r14': self.convert(cpu.r14), 'r15': self.convert(cpu.r15), 'cf': self.convert(cpu.cf), 'zf': self.convert(cpu.zf), 'sf': self.convert(cpu.sf), 'of': self.convert(cpu.of), 'pf': self.convert(cpu.pf), 'af': self.convert(cpu.af), 'tf': self.convert(cpu.tf), 'df': self.convert(cpu.df) }) self.trace.append( Traceline(thread_id=tid, addr=ea, disasm=self.disconv(GetDisasm(ea)), ctx=deepcopy(self.ctx))) except Exception, e: print e.message # return values: # 1 - do not log this trace event; # 0 - log it return 0
def load(): """ 从文件加载trace Load a trace from file. Supported are IDAs txt trace files and VMAttacks json files. Further OllyDBG and ImmunityDBG traces are supported but have slightly limited analysis capabilities. :param path: system path to trace file :return: trace object """ path = '' try: fd = QtGui.QFileDialog() fd.setFileMode(QtGui.QFileDialog.AnyFile) fd.setFilters(["Text files (*.txt)", "JSON files (*.json)"]) fd.setWindowTitle('Load Trace ...') if fd.exec_(): path = fd.selectedFiles()[0] else: path = None except: msg('A Problem occured with the file selector dialog, first *.txt file in the current working directory was choosen!' ) for f in os.listdir(os.getcwd()): if f.endswith('txt'): path = f if path == '': path = asktext(40, '', 'Please provide the full path to the trace file: ') if path is not None: get_log().log('[TRC] Loaded the trace at %s\n' % path) if path.endswith('.txt'): with open(path, 'r') as f: lines = f.readlines() elif path.endswith('.json'): with open(path) as f: lines = json.load(f) else: return None trace = Trace() functions = { SegName(addr): { GetFunctionName(ea): ea for ea in Functions(SegStart(addr), SegEnd(addr)) } for addr in Segments() } try: context = defaultdict(lambda: False) # framework json trace if isinstance(lines, dict) or path.endswith('.json'): get_log().log('[TRC] The trace seems to be a VMAttack trace\n') for index in range(len(lines.keys())): line = lines[str(index)] t = Traceline(thread_id=line[0], addr=line[1], disasm=line[2], ctx=line[3], comment=line[4]) t.grade = line[5] trace.append(t) # ida trace via Win32Dbg elif lines[0].startswith('Thread '): for i in lines[3:]: if i.startswith('Thread'): break values = i.split('\t') # thread id thread_id = int(values[0], 16) # addr addr = BADADDR func_name = values[1].strip(' ').split(':') if len(func_name) == 2: try: # .segment:addr addr = int(func_name[1], 16) except: try: # .segment:func_name+offset offset = int(func_name[1].split('+')[1], 16) name = func_name[1].split('+')[0] addr = functions[func_name[0]][name] + offset except: try: # .segment:func_name-offset offset = int( i.split('-')[1].split(' ')[0], 16) name = func_name[1].split('-')[0] addr = functions[ func_name[0]][name] - offset except: if not func_name[1].startswith( 'loc_'): # .segment:func_name addr = functions[func_name[0]][ func_name[1]] else: # .segment:jmp_location addr = int(func_name[1][4:], 16) elif len(func_name) == 3: addr = int(func_name[2][4:], 16) # disasm disasm = values[2].strip(' ').lower() disasm = disasm.split(' ') disasm = [x.lstrip() for x in disasm] disasm = filter(None, disasm) if len(disasm) > 1 and disasm[1].__contains__(', '): temp = disasm.pop(1) for elem in temp.split(', '): disasm.append( elem.lstrip().lstrip('0').rstrip('h')) # remove [ebp+0] for dis in disasm: if dis.__contains__('[ebp+0]'): dis.replace('[ebp+0]', '[ebp]') # context ida_ctx = values[3].strip(' ').split(' ') for value in ida_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join( c.rstrip('\r\n') for c in b.lstrip('0')) if b == '': b = '0' context[a.lower()] = b except: pass trace.append( Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) # immunity trace elif lines[0].startswith('Address '): for i in lines[1:]: if i.__contains__('Run trace closed') or i.__contains__( 'Process terminated'): break values = i.split('\t') try: # thread_id thread_id = sum( ord(c) for c in values[1]) # immunity uses names, e.g. main # addr try: addr = int(values[0], 16) except: addr = BADADDR # disasm disasm = values[2].lower().rstrip('\r\n') disasm = disasm.split(' ', 1) if len(disasm) > 1 and disasm[1].__contains__(','): temp = disasm.pop(1) for elem in temp.split(','): disasm.append(elem.lstrip('0')) disasm = [ x.split('dword ptr ')[1] if x.__contains__('dword ptr ') else x for x in disasm ] if len(disasm) == 2 and len( re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*', disasm[1])) > 0: disasm[1] = ida_offset(disasm[1]) # context if len(values) > 3: olly_ctx = values[3].lstrip(' ').rstrip( '\r\n').split(',') for value in olly_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join(c for c in b.lstrip('0') if c not in '\n\r\t') if b == '': b = '0' context[a.lower()] = b except: pass trace.append( Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) except: if i.__contains__('terminated') or i.__contains__( 'entry point'): pass # olly trace elif lines[1].startswith('main '): for i in lines[1:]: if i.__contains__('Logging stopped'): break values = i.split('\t') # thread_id thread_id = sum( ord(c) for c in values[0]) # olly uses names, e.g. main # addr try: addr = int(values[1], 16) except: addr = BADADDR # disasm disasm = values[2].lower().rstrip('\r\n') disasm = disasm.split(' ', 1) if len(disasm) > 1 and disasm[1].__contains__(','): temp = disasm.pop(1) for elem in temp.split(','): disasm.append(elem.lstrip('0')) disasm = [ x.split('dword ptr ')[1] if x.__contains__('dword ptr ') else x for x in disasm ] if len(disasm) == 2 and len( re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*', disasm[1])) > 0: disasm[1] = ida_offset(disasm[1]) # context if len(values) > 3: olly_ctx = values[3].lstrip(' ').rstrip('\r\n').split( ',') for value in olly_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join(c for c in b.lstrip('0') if c not in '\n\r\t') if b == '': b = '0' context[a.lower()] = b except: pass trace.append( Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) if 'rax' in trace[-1].ctx.keys(): trace.ctx_reg_size = 64 elif 'eax' in trace[-1].ctx.keys( ) and 'rax' not in trace[-1].ctx.keys(): trace.ctx_reg_size = 32 msg("[*] Trace Loaded!\n") return trace except Exception, e: raise Exception('[*] Exception occured: \n%s\n' % (e.message))
def load(): """ Load a trace from file. Supported are IDAs txt trace files and VMAttacks json files. Further OllyDBG and ImmunityDBG traces are supported but have slightly limited analysis capabilities. :param path: system path to trace file :return: trace object """ path = '' try: fd = QtGui.QFileDialog() fd.setFileMode(QtGui.QFileDialog.AnyFile) fd.setFilters(["Text files (*.txt)", "JSON files (*.json)"]) fd.setWindowTitle('Load Trace ...') if fd.exec_(): path = fd.selectedFiles()[0] else: path = None except: msg('A Problem occured with the file selector dialog, first *.txt file in the current working directory was choosen!') for f in os.listdir(os.getcwd()): if f.endswith('txt'): path = f if path == '': path = asktext(40, '', 'Please provide the full path to the trace file: ') if path is not None: get_log().log('[TRC] Loaded the trace at %s\n' % path) if path.endswith('.txt'): with open(path, 'r') as f: lines = f.readlines() elif path.endswith('.json'): with open(path) as f: lines = json.load(f) else: return None trace = Trace() functions = {SegName(addr): {GetFunctionName(ea): ea for ea in Functions(SegStart(addr), SegEnd(addr))} for addr in Segments()} try: context = defaultdict(lambda: False) # framework json trace if isinstance(lines, dict) or path.endswith('.json'): get_log().log('[TRC] The trace seems to be a VMAttack trace\n') for index in range(len(lines.keys())): line = lines[str(index)] t = Traceline(thread_id=line[0], addr=line[1], disasm=line[2], ctx=line[3], comment=line[4]) t.grade = line[5] trace.append(t) # ida trace via Win32Dbg elif lines[0].startswith('Thread '): for i in lines[3:]: if i.startswith('Thread'): break values = i.split('\t') # thread id thread_id = int(values[0], 16) # addr addr = BADADDR func_name = values[1].strip(' ').split(':') if len(func_name) == 2: try: # .segment:addr addr = int(func_name[1], 16) except: try: # .segment:func_name+offset offset = int(func_name[1].split('+')[1], 16) name = func_name[1].split('+')[0] addr = functions[func_name[0]][name] + offset except: try: # .segment:func_name-offset offset = int(i.split('-')[1].split(' ')[0], 16) name = func_name[1].split('-')[0] addr = functions[func_name[0]][name] - offset except: if not func_name[1].startswith('loc_'): # .segment:func_name addr = functions[func_name[0]][func_name[1]] else: # .segment:jmp_location addr = int(func_name[1][4:], 16) elif len(func_name) == 3: addr = int(func_name[2][4:], 16) # disasm disasm = values[2].strip(' ').lower() disasm = disasm.split(' ') disasm = [x.lstrip() for x in disasm] disasm = filter(None, disasm) if len(disasm) > 1 and disasm[1].__contains__(', '): temp = disasm.pop(1) for elem in temp.split(', '): disasm.append(elem.lstrip().lstrip('0').rstrip('h')) # remove [ebp+0] for dis in disasm: if dis.__contains__('[ebp+0]'): dis.replace('[ebp+0]', '[ebp]') # context ida_ctx = values[3].strip(' ').split(' ') for value in ida_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join(c.rstrip('\r\n') for c in b.lstrip('0')) if b == '': b = '0' context[a.lower()] = b except: pass trace.append(Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) # immunity trace elif lines[0].startswith('Address '): for i in lines[1:]: if i.__contains__('Run trace closed') or i.__contains__('Process terminated'): break values = i.split('\t') try: # thread_id thread_id = sum(ord(c) for c in values[1]) # immunity uses names, e.g. main # addr try: addr = int(values[0], 16) except: addr = BADADDR # disasm disasm = values[2].lower().rstrip('\r\n') disasm = disasm.split(' ', 1) if len(disasm) > 1 and disasm[1].__contains__(','): temp = disasm.pop(1) for elem in temp.split(','): disasm.append(elem.lstrip('0')) disasm = [x.split('dword ptr ')[1] if x.__contains__('dword ptr ') else x for x in disasm] if len(disasm) == 2 and len(re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*', disasm[1])) > 0: disasm[1] = ida_offset(disasm[1]) # context if len(values) > 3: olly_ctx = values[3].lstrip(' ').rstrip('\r\n').split(',') for value in olly_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join(c for c in b.lstrip('0') if c not in '\n\r\t') if b == '': b = '0' context[a.lower()] = b except: pass trace.append(Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) except: if i.__contains__('terminated') or i.__contains__('entry point'): pass # olly trace elif lines[1].startswith('main '): for i in lines[1:]: if i.__contains__('Logging stopped'): break values = i.split('\t') # thread_id thread_id = sum(ord(c) for c in values[0]) # olly uses names, e.g. main # addr try: addr = int(values[1], 16) except: addr = BADADDR # disasm disasm = values[2].lower().rstrip('\r\n') disasm = disasm.split(' ', 1) if len(disasm) > 1 and disasm[1].__contains__(','): temp = disasm.pop(1) for elem in temp.split(','): disasm.append(elem.lstrip('0')) disasm = [x.split('dword ptr ')[1] if x.__contains__('dword ptr ') else x for x in disasm] if len(disasm) == 2 and len(re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*', disasm[1])) > 0: disasm[1] = ida_offset(disasm[1]) # context if len(values) > 3: olly_ctx = values[3].lstrip(' ').rstrip('\r\n').split(',') for value in olly_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join(c for c in b.lstrip('0') if c not in '\n\r\t') if b == '': b = '0' context[a.lower()] = b except: pass trace.append(Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) if 'rax' in trace[-1].ctx.keys(): trace.ctx_reg_size = 64 elif 'eax' in trace[-1].ctx.keys() and 'rax' not in trace[-1].ctx.keys(): trace.ctx_reg_size = 32 msg("[*] Trace Loaded!\n") return trace except Exception, e: raise Exception('[*] Exception occured: \n%s\n' % (e.message))
def follow_virt_reg(trace, **kwargs): """ Follows the virtual registers and extracts the relevant trace lines to clarify how the final result in a virtual register came to be and what values(=recursively) it consists of. :param trace: instruction trace :param virt_reg_addr: the stack addr of the virtual register :param real_reg_name: reg string :return: trace consisting of relevant tracelines for the virtual register """ assert(isinstance(trace, Trace)) update = kwargs.get('update', None) manual = kwargs.get('manual', False) if manual: real_reg_name = AskStr('eax', 'Which register do you want followed?') if real_reg_name is None: real_reg_name = get_reg('rax', trace.ctx_reg_size) else: real_reg_name = get_reg(real_reg_name, trace.ctx_reg_size) else: real_reg_name = kwargs.get('real_reg_name', get_reg('rax', trace.ctx_reg_size)) virt_reg_addr = kwargs.get('virt_reg_addr', None) if virt_reg_addr is None: vr = find_virtual_regs(deepcopy(trace)) virt_reg_addr = vr[real_reg_name] if update is not None: update.pbar_update(30) backtrace = Trace() watch_addrs = set() reg_vals = set() trace = optimization_const_propagation(trace) trace = optimization_stack_addr_propagation(trace) if update is not None: update.pbar_update(10) # reversing the trace makes the backward tracersal easier trace.reverse() # get reg value at pop reg = get_reg(real_reg_name, trace.ctx_reg_size) for line in trace: if len(line.disasm) == 2: if line.disasm[0] == 'pop' and get_reg_class(line.disasm[1]) == get_reg_class(reg): reg_vals.add(line.ctx[reg]) break watch_addrs.add(virt_reg_addr) for line in trace: assert isinstance(line,Traceline) if line.is_jmp: continue try: # +1 because trace is reversed to get to prev element prev = trace[trace.index(line)+1] for val in reg_vals.copy(): if val in line.ctx.values() and val not in prev.ctx.values(): backtrace.append(line) # if val suddenly appears in the ctx there should be 2 possibilities: # 1. it was moved from mem, so it was on the stack -> append stack addres to be watched out for if line.is_mov and line.is_op2_mem: watch_addrs.add(''.join(c for c in line.disasm[2] if c not in '[]')) #reg_vals.remove(val) # 2. it was computed -> if regs played a role in the computation add them to values to watch out for elif not line.is_mov: if line.disasm_len > 2: if line.is_op1_reg: reg_vals.add(line.ctx[get_reg(line.disasm[1], trace.ctx_reg_size)]) if line.is_op1_mem: watch_addrs.add(''.join(c for c in line.disasm[1] if c not in '[]')) if line.is_op2_reg: # not necessarily the case for lea reg_vals.add(line.ctx[get_reg(line.disasm[2], trace.ctx_reg_size)]) if line.is_op2_mem: watch_addrs.add(''.join(c for c in line.disasm[2] if c not in '[]')) elif line.disasm_len == 2: reg_vals.add(prev.ctx[get_reg('eax', trace.ctx_reg_size)]) if line.is_op1_reg: reg_vals.add(line.ctx[get_reg(line.disasm[1], trace.ctx_reg_size)]) reg_vals.add(prev.ctx[get_reg(line.disasm[1], trace.ctx_reg_size)]) if line.ctx[get_reg('eax', trace.ctx_reg_size)] != prev.ctx[get_reg('eax', trace.ctx_reg_size)]: reg_vals.add(line.ctx[get_reg('eax', trace.ctx_reg_size)]) reg_vals.add(prev.ctx[get_reg('eax', trace.ctx_reg_size)]) if line.disasm[0].startswith('not'): reg_vals.add(line.ctx[get_reg(line.disasm[1], trace.ctx_reg_size)]) reg_vals.add(prev.ctx[get_reg(line.disasm[1], trace.ctx_reg_size)]) backtrace.append(prev) backtrace.append(trace[trace.index(line)-1]) try: reg_vals.add(prev.ctx[get_reg(prev.disasm[1], trace.ctx_reg_size)]) reg_vals.add(trace[trace.index(line)-1].ctx[get_reg(prev.disasm[1], trace.ctx_reg_size)]) except: pass except Exception, e: pass #print 'reg_vals\n',line, e.message if watch_addrs: for addr in watch_addrs.copy(): try: if line.disasm[1].__contains__(addr): backtrace.append(line) reg_vals.add(line.disasm[2]) r = line.ctx.keys()[line.ctx.values().index(line.disasm[2])] for i in range(len(trace)): temp = trace[trace.index(line)+i] if len(temp.disasm) == 3: if temp.disasm[1][-2:] == r[-2:]: if get_reg_class(r[-2:]) is not None: watch_addrs.add(temp.disasm[2][1:-1]) break if line.is_mov: watch_addrs.remove(addr) except Exception, e: #print 'watch_addr\n',line, e.message pass
def follow_virt_reg(trace, **kwargs): """ Follows the virtual registers and extracts the relevant trace lines to clarify how the final result in a virtual register came to be and what values(=recursively) it consists of. :param trace: instruction trace :param virt_reg_addr: the stack addr of the virtual register :param real_reg_name: reg string :return: trace consisting of relevant tracelines for the virtual register """ update = kwargs.get('update', None) manual = kwargs.get('manual', False) if manual: real_reg_name = AskStr('eax', 'Which register do you want followed?') if real_reg_name is None: real_reg_name = get_reg('rax', trace.ctx_reg_size) else: real_reg_name = get_reg(real_reg_name, trace.ctx_reg_size) else: real_reg_name = kwargs.get('real_reg_name', get_reg('rax', trace.ctx_reg_size)) virt_reg_addr = kwargs.get('virt_reg_addr', None) if virt_reg_addr is None: vr = find_virtual_regs(deepcopy(trace)) virt_reg_addr = vr[real_reg_name] if update is not None: update.pbar_update(30) backtrace = Trace() watch_addrs = set() reg_vals = set() trace = optimization_const_propagation(trace) trace = optimization_stack_addr_propagation(trace) if update is not None: update.pbar_update(10) # reversing the trace makes the backward tracersal easier trace.reverse() # get reg value at pop reg = get_reg(real_reg_name, trace.ctx_reg_size) for line in trace: if len(line.disasm) == 2: if line.disasm[0] == 'pop' and get_reg_class( line.disasm[1]) == get_reg_class(reg): reg_vals.add(line.ctx[reg]) break watch_addrs.add(virt_reg_addr) for line in trace: assert isinstance(line, Traceline) if line.is_jmp: continue try: # +1 because trace is reversed to get to prev element prev = trace[trace.index(line) + 1] for val in reg_vals.copy(): if val in line.ctx.values() and val not in prev.ctx.values(): backtrace.append(line) # if val suddenly appears in the ctx there should be 2 possibilities: # 1. it was moved from mem, so it was on the stack -> append stack addres to be watched out for if line.is_mov and line.is_op2_mem: watch_addrs.add(''.join(c for c in line.disasm[2] if c not in '[]')) reg_vals.remove(val) # 2. it was computed -> if regs played a role in the computation add them to values to watch out for elif not line.is_mov: if line.is_op1_reg: reg_vals.add(line.disasm[1]) if line.is_op1_mem: watch_addrs.add(''.join(c for c in line.disasm[1] if c not in '[]')) if line.is_op2_reg: # not necessarily the case for lea reg_vals.add(line.disasm[2]) if line.is_op2_mem: watch_addrs.add(''.join(c for c in line.disasm[2] if c not in '[]')) except Exception, e: pass #print 'reg_vals\n',line, e.message if watch_addrs: for addr in watch_addrs.copy(): try: if line.disasm[1].__contains__(addr): backtrace.append(line) reg_vals.add(line.disasm[2]) r = line.ctx.keys()[line.ctx.values().index( line.disasm[2])] for i in range(len(trace)): temp = trace[trace.index(line) + i] if len(temp.disasm) == 3: if temp.disasm[1][-2:] == r[-2:]: if get_reg_class(r[-2:]) is not None: watch_addrs.add(temp.disasm[2][1:-1]) break if line.is_mov: watch_addrs.remove(addr) except Exception, e: #print 'watch_addr\n',line, e.message pass