def dbg_trace(self, tid, ea): """ 处理trace事件回调 :param tid: :param ea: :return: """ vmr = get_vmr() try: if vmr.extract_param and GetDisasm(ea).__contains__('call'): run_var = 0 key = GetDisasm(ea).split('call')[1].strip() while True: # traverse trace backwards and get sequential push and mov params line = self.trace[-(run_var + 1)] if line.is_push and line.disasm_len == 2: try: self.func_args[key].add(line.ctx[get_reg( line.disasm[1], self.arch)]) except: self.func_args[key].add(line.disasm[1]) elif line.is_mov: try: self.func_args[key].add(line.ctx[get_reg( line.disasm[2], self.arch)]) except: self.func_args[key].add(line.disasm[2]) else: break run_var += 1 # TODO mmx xmmx ymmx # compute next ctx if self.arch == 32: self.ctx = defaultdict( lambda: '0', { 'eax': self.convert(cpu.eax), 'ebx': self.convert(cpu.ebx), 'edx': self.convert(cpu.edx), 'ecx': self.convert(cpu.ecx), 'ebp': self.convert(cpu.ebp), 'esp': self.convert(cpu.esp), 'eip': self.convert(cpu.eip), 'edi': self.convert(cpu.edi), 'esi': self.convert(cpu.esi), 'cf': self.convert(cpu.cf), 'zf': self.convert(cpu.zf), 'sf': self.convert(cpu.sf), 'of': self.convert(cpu.of), 'pf': self.convert(cpu.pf), 'af': self.convert(cpu.af), 'tf': self.convert(cpu.tf), 'df': self.convert(cpu.df) }) elif self.arch == 64: self.ctx = defaultdict( lambda: '0', { 'rax': self.convert(cpu.eax), 'rbx': self.convert(cpu.ebx), 'rdx': self.convert(cpu.edx), 'rcx': self.convert(cpu.ecx), 'rbp': self.convert(cpu.ebp), 'rsp': self.convert(cpu.esp), 'rip': self.convert(cpu.eip), 'edi': self.convert(cpu.edi), 'rsi': self.convert(cpu.rsi), 'r8': self.convert(cpu.r8), 'r9': self.convert(cpu.r9), 'r10': self.convert(cpu.r10), 'r11': self.convert(cpu.r11), 'r12': self.convert(cpu.r12), 'r13': self.convert(cpu.r13), 'r14': self.convert(cpu.r14), 'r15': self.convert(cpu.r15), 'cf': self.convert(cpu.cf), 'zf': self.convert(cpu.zf), 'sf': self.convert(cpu.sf), 'of': self.convert(cpu.of), 'pf': self.convert(cpu.pf), 'af': self.convert(cpu.af), 'tf': self.convert(cpu.tf), 'df': self.convert(cpu.df) }) self.trace.append( Traceline(thread_id=tid, addr=ea, disasm=self.disconv(GetDisasm(ea)), ctx=deepcopy(self.ctx))) except Exception, e: print e.message
def load(): """ Load a trace from file. Supported are IDAs txt trace files and VMAttacks json files. Further OllyDBG and ImmunityDBG traces are supported but have slightly limited analysis capabilities. :param path: system path to trace file :return: trace object """ path = '' try: fd = QtGui.QFileDialog() fd.setFileMode(QtGui.QFileDialog.AnyFile) fd.setFilters(["Text files (*.txt)", "JSON files (*.json)"]) fd.setWindowTitle('Load Trace ...') if fd.exec_(): path = fd.selectedFiles()[0] else: path = None except: msg('A Problem occured with the file selector dialog, first *.txt file in the current working directory was choosen!') for f in os.listdir(os.getcwd()): if f.endswith('txt'): path = f if path == '': path = asktext(40, '', 'Please provide the full path to the trace file: ') if path is not None: get_log().log('[TRC] Loaded the trace at %s\n' % path) if path.endswith('.txt'): with open(path, 'r') as f: lines = f.readlines() elif path.endswith('.json'): with open(path) as f: lines = json.load(f) else: return None trace = Trace() functions = {SegName(addr): {GetFunctionName(ea): ea for ea in Functions(SegStart(addr), SegEnd(addr))} for addr in Segments()} try: context = defaultdict(lambda: False) # framework json trace if isinstance(lines, dict) or path.endswith('.json'): get_log().log('[TRC] The trace seems to be a VMAttack trace\n') for index in range(len(lines.keys())): line = lines[str(index)] t = Traceline(thread_id=line[0], addr=line[1], disasm=line[2], ctx=line[3], comment=line[4]) t.grade = line[5] trace.append(t) # ida trace via Win32Dbg elif lines[0].startswith('Thread '): for i in lines[3:]: if i.startswith('Thread'): break values = i.split('\t') # thread id thread_id = int(values[0], 16) # addr addr = BADADDR func_name = values[1].strip(' ').split(':') if len(func_name) == 2: try: # .segment:addr addr = int(func_name[1], 16) except: try: # .segment:func_name+offset offset = int(func_name[1].split('+')[1], 16) name = func_name[1].split('+')[0] addr = functions[func_name[0]][name] + offset except: try: # .segment:func_name-offset offset = int(i.split('-')[1].split(' ')[0], 16) name = func_name[1].split('-')[0] addr = functions[func_name[0]][name] - offset except: if not func_name[1].startswith('loc_'): # .segment:func_name addr = functions[func_name[0]][func_name[1]] else: # .segment:jmp_location addr = int(func_name[1][4:], 16) elif len(func_name) == 3: addr = int(func_name[2][4:], 16) # disasm disasm = values[2].strip(' ').lower() disasm = disasm.split(' ') disasm = [x.lstrip() for x in disasm] disasm = filter(None, disasm) if len(disasm) > 1 and disasm[1].__contains__(', '): temp = disasm.pop(1) for elem in temp.split(', '): disasm.append(elem.lstrip().lstrip('0').rstrip('h')) # remove [ebp+0] for dis in disasm: if dis.__contains__('[ebp+0]'): dis.replace('[ebp+0]', '[ebp]') # context ida_ctx = values[3].strip(' ').split(' ') for value in ida_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join(c.rstrip('\r\n') for c in b.lstrip('0')) if b == '': b = '0' context[a.lower()] = b except: pass trace.append(Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) # immunity trace elif lines[0].startswith('Address '): for i in lines[1:]: if i.__contains__('Run trace closed') or i.__contains__('Process terminated'): break values = i.split('\t') try: # thread_id thread_id = sum(ord(c) for c in values[1]) # immunity uses names, e.g. main # addr try: addr = int(values[0], 16) except: addr = BADADDR # disasm disasm = values[2].lower().rstrip('\r\n') disasm = disasm.split(' ', 1) if len(disasm) > 1 and disasm[1].__contains__(','): temp = disasm.pop(1) for elem in temp.split(','): disasm.append(elem.lstrip('0')) disasm = [x.split('dword ptr ')[1] if x.__contains__('dword ptr ') else x for x in disasm] if len(disasm) == 2 and len(re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*', disasm[1])) > 0: disasm[1] = ida_offset(disasm[1]) # context if len(values) > 3: olly_ctx = values[3].lstrip(' ').rstrip('\r\n').split(',') for value in olly_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join(c for c in b.lstrip('0') if c not in '\n\r\t') if b == '': b = '0' context[a.lower()] = b except: pass trace.append(Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) except: if i.__contains__('terminated') or i.__contains__('entry point'): pass # olly trace elif lines[1].startswith('main '): for i in lines[1:]: if i.__contains__('Logging stopped'): break values = i.split('\t') # thread_id thread_id = sum(ord(c) for c in values[0]) # olly uses names, e.g. main # addr try: addr = int(values[1], 16) except: addr = BADADDR # disasm disasm = values[2].lower().rstrip('\r\n') disasm = disasm.split(' ', 1) if len(disasm) > 1 and disasm[1].__contains__(','): temp = disasm.pop(1) for elem in temp.split(','): disasm.append(elem.lstrip('0')) disasm = [x.split('dword ptr ')[1] if x.__contains__('dword ptr ') else x for x in disasm] if len(disasm) == 2 and len(re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*', disasm[1])) > 0: disasm[1] = ida_offset(disasm[1]) # context if len(values) > 3: olly_ctx = values[3].lstrip(' ').rstrip('\r\n').split(',') for value in olly_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join(c for c in b.lstrip('0') if c not in '\n\r\t') if b == '': b = '0' context[a.lower()] = b except: pass trace.append(Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) if 'rax' in trace[-1].ctx.keys(): trace.ctx_reg_size = 64 elif 'eax' in trace[-1].ctx.keys() and 'rax' not in trace[-1].ctx.keys(): trace.ctx_reg_size = 32 msg("[*] Trace Loaded!\n") return trace except Exception, e: raise Exception('[*] Exception occured: \n%s\n' % (e.message))
def load(): """ 从文件加载trace Load a trace from file. Supported are IDAs txt trace files and VMAttacks json files. Further OllyDBG and ImmunityDBG traces are supported but have slightly limited analysis capabilities. :param path: system path to trace file :return: trace object """ path = '' try: fd = QtGui.QFileDialog() fd.setFileMode(QtGui.QFileDialog.AnyFile) fd.setFilters(["Text files (*.txt)", "JSON files (*.json)"]) fd.setWindowTitle('Load Trace ...') if fd.exec_(): path = fd.selectedFiles()[0] else: path = None except: msg('A Problem occured with the file selector dialog, first *.txt file in the current working directory was choosen!' ) for f in os.listdir(os.getcwd()): if f.endswith('txt'): path = f if path == '': path = asktext(40, '', 'Please provide the full path to the trace file: ') if path is not None: get_log().log('[TRC] Loaded the trace at %s\n' % path) if path.endswith('.txt'): with open(path, 'r') as f: lines = f.readlines() elif path.endswith('.json'): with open(path) as f: lines = json.load(f) else: return None trace = Trace() functions = { SegName(addr): { GetFunctionName(ea): ea for ea in Functions(SegStart(addr), SegEnd(addr)) } for addr in Segments() } try: context = defaultdict(lambda: False) # framework json trace if isinstance(lines, dict) or path.endswith('.json'): get_log().log('[TRC] The trace seems to be a VMAttack trace\n') for index in range(len(lines.keys())): line = lines[str(index)] t = Traceline(thread_id=line[0], addr=line[1], disasm=line[2], ctx=line[3], comment=line[4]) t.grade = line[5] trace.append(t) # ida trace via Win32Dbg elif lines[0].startswith('Thread '): for i in lines[3:]: if i.startswith('Thread'): break values = i.split('\t') # thread id thread_id = int(values[0], 16) # addr addr = BADADDR func_name = values[1].strip(' ').split(':') if len(func_name) == 2: try: # .segment:addr addr = int(func_name[1], 16) except: try: # .segment:func_name+offset offset = int(func_name[1].split('+')[1], 16) name = func_name[1].split('+')[0] addr = functions[func_name[0]][name] + offset except: try: # .segment:func_name-offset offset = int( i.split('-')[1].split(' ')[0], 16) name = func_name[1].split('-')[0] addr = functions[ func_name[0]][name] - offset except: if not func_name[1].startswith( 'loc_'): # .segment:func_name addr = functions[func_name[0]][ func_name[1]] else: # .segment:jmp_location addr = int(func_name[1][4:], 16) elif len(func_name) == 3: addr = int(func_name[2][4:], 16) # disasm disasm = values[2].strip(' ').lower() disasm = disasm.split(' ') disasm = [x.lstrip() for x in disasm] disasm = filter(None, disasm) if len(disasm) > 1 and disasm[1].__contains__(', '): temp = disasm.pop(1) for elem in temp.split(', '): disasm.append( elem.lstrip().lstrip('0').rstrip('h')) # remove [ebp+0] for dis in disasm: if dis.__contains__('[ebp+0]'): dis.replace('[ebp+0]', '[ebp]') # context ida_ctx = values[3].strip(' ').split(' ') for value in ida_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join( c.rstrip('\r\n') for c in b.lstrip('0')) if b == '': b = '0' context[a.lower()] = b except: pass trace.append( Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) # immunity trace elif lines[0].startswith('Address '): for i in lines[1:]: if i.__contains__('Run trace closed') or i.__contains__( 'Process terminated'): break values = i.split('\t') try: # thread_id thread_id = sum( ord(c) for c in values[1]) # immunity uses names, e.g. main # addr try: addr = int(values[0], 16) except: addr = BADADDR # disasm disasm = values[2].lower().rstrip('\r\n') disasm = disasm.split(' ', 1) if len(disasm) > 1 and disasm[1].__contains__(','): temp = disasm.pop(1) for elem in temp.split(','): disasm.append(elem.lstrip('0')) disasm = [ x.split('dword ptr ')[1] if x.__contains__('dword ptr ') else x for x in disasm ] if len(disasm) == 2 and len( re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*', disasm[1])) > 0: disasm[1] = ida_offset(disasm[1]) # context if len(values) > 3: olly_ctx = values[3].lstrip(' ').rstrip( '\r\n').split(',') for value in olly_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join(c for c in b.lstrip('0') if c not in '\n\r\t') if b == '': b = '0' context[a.lower()] = b except: pass trace.append( Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) except: if i.__contains__('terminated') or i.__contains__( 'entry point'): pass # olly trace elif lines[1].startswith('main '): for i in lines[1:]: if i.__contains__('Logging stopped'): break values = i.split('\t') # thread_id thread_id = sum( ord(c) for c in values[0]) # olly uses names, e.g. main # addr try: addr = int(values[1], 16) except: addr = BADADDR # disasm disasm = values[2].lower().rstrip('\r\n') disasm = disasm.split(' ', 1) if len(disasm) > 1 and disasm[1].__contains__(','): temp = disasm.pop(1) for elem in temp.split(','): disasm.append(elem.lstrip('0')) disasm = [ x.split('dword ptr ')[1] if x.__contains__('dword ptr ') else x for x in disasm ] if len(disasm) == 2 and len( re.findall(r'.*\[.*[\+\-\*].*[\+\-\*].*\].*', disasm[1])) > 0: disasm[1] = ida_offset(disasm[1]) # context if len(values) > 3: olly_ctx = values[3].lstrip(' ').rstrip('\r\n').split( ',') for value in olly_ctx: try: a, b = value.split('=') if len(b) > 1: b = ''.join(c for c in b.lstrip('0') if c not in '\n\r\t') if b == '': b = '0' context[a.lower()] = b except: pass trace.append( Traceline(thread_id=thread_id, addr=addr, disasm=disasm, ctx=deepcopy(context))) if 'rax' in trace[-1].ctx.keys(): trace.ctx_reg_size = 64 elif 'eax' in trace[-1].ctx.keys( ) and 'rax' not in trace[-1].ctx.keys(): trace.ctx_reg_size = 32 msg("[*] Trace Loaded!\n") return trace except Exception, e: raise Exception('[*] Exception occured: \n%s\n' % (e.message))
def create_bb_diff(bb, ctx_reg_size, prev_line_ctx): """ Addr and thread id irrelevant; ctx shown as: before -> after; disasm (and comment) is chosen by heuristic. :param ctx_reg_size: :param prev_line_ctx: :param bb: return """ first = bb[0] last = bb[-1] keys_f = prev_line_ctx.keys() keys_l = last.ctx.keys() context = {} disasm = [] comment = [] if keys_f == keys_l: for key in keys_f: if first.ctx[key] != last.ctx[key]: context[key] = first.ctx[key] + ' -> ' + last.ctx[key] else: context[key] = last.ctx[key] elif len(keys_l) > len(keys_f): for key in keys_f: if first.ctx[key] != last.ctx[key]: context[key] = first.ctx[key] + ' -> ' + last.ctx[key] else: context[key] = last.ctx[key] for key in list(set(keys_l) - set(keys_f)): context[key] = last.ctx[key] else: # means keys_l < keys_f and if that happens sth went wrong. Should not be possible by normal execution. raise Exception( '[*] Keys at the end of basic block %s-%s were LESS than at the beginning!' % (first.addr, last.addr)) last_ctx = prev_line_ctx for line in bb: if line.comment is not None: comment.append(line.comment) if line.disasm[0].startswith('mov'): try: if bb[bb.index(line) + 1].disasm[0].startswith('mov') and get_reg_class( bb[bb.index(line) + 1].disasm[1]) == get_reg_class( line.disasm[1]): continue except: pass if line.disasm[1].startswith('[') and line.disasm[1].endswith(']'): comment.append(line.disasm[1] + '=' + line.disasm[2]) elif get_reg_class(line.disasm[1]) is not None: continue elif line.disasm[0].startswith('j'): continue elif line.comment is not None and len( line.disasm) == 3 and line.disasm[1].startswith('['): if get_reg_class(line.disasm[2]) is not None: comment[-1] = comment[-1] + ' ' + line.disasm[ 0] + ' ' + last_ctx[get_reg(line.disasm[2], ctx_reg_size)] else: comment[-1] = comment[-1] + ' ' + line.disasm[ 0] + ' ' + line.disasm[2] elif line.comment is not None and len( line.disasm) == 3 and line.disasm[2].startswith('['): if get_reg_class(line.disasm[1]) is not None: comment[-1] = comment[-1] + ' ' + line.disasm[ 0] + ' ' + last_ctx[get_reg(line.disasm[1], ctx_reg_size)] else: comment[-1] = comment[-1] + ' ' + line.disasm[ 0] + ' ' + line.disasm[1] disasm.append(line.disasm) last_ctx = line.ctx result = Traceline(addr=last.addr, thread_id=last.thread_id, ctx=context, disasm=disasm, comment=comment) return result