def deobsfucate(self) : self.initalizesections() ep = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = ep+self.pe.OPTIONAL_HEADER.ImageBase data = self.pe.get_memory_mapped_image()[ep:ep+40] offset=0 self.emu.set_register("EIP", self.entrypoint) self.emu.set_register("ECX", 0x00000000) self.emu.set_register("BL", 0x00) self.emu.set_register("DL", 0x00) instruction = "NOP" while not instruction.startswith("loop") : #offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) instruction=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) offset+=i.length print instruction #c = raw_input("emulator> ") while 1 : value1 = self.emu.get_register("ECX") value2 = self.emu.get_register("BL") value3 = self.emu.get_register("DL") if(value1 != 0 and value2 !=0 and value3 != 0): break; self.emu.execute() self.bytesobs = value1 byte_obs = value1 while value1 !=0: self.emu.execute() value1 =self.emu.get_register("ECX") i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) instruction=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) print instruction offset+=i.length self.emu.execute() #offset+=((ep_ava+offset)- emu.get_register("EIP")) nxt_offset = self.emu.get_register("EIP")- (ep_ava+offset) offset+=nxt_offset instruction = "nop" while not instruction.startswith("jmp") : # offset < len(data) i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) instruction=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) self.emu.execute() offset+=i.length print instruction #c = raw_input("emulator> ") ret=instruction.find("0x") jmp_address= instruction[ret+2:]
def check_inline(pe, export_rva, export_va): try: bytes = pe.get_data(export_rva, 24) except: print "[!] Cannot read RVA at 0x%x" % export_rva return None, None i1 = pydasm.get_instruction(bytes, pydasm.MODE_32) if not i1: return None, None i2 = pydasm.get_instruction(bytes[i1.length:], pydasm.MODE_32) if not i2: return None, None hook_destination = None instruction = None if (i1.type == pydasm.INSTRUCTION_TYPE_JMP): if (i1.op1.type == pydasm.OPERAND_TYPE_MEMORY): hook_destination = (i1.op1.displacement & 0xffffffff) instruction = "jmp [0x%x]" % hook_destination elif (i1.op1.type == pydasm.OPERAND_TYPE_IMMEDIATE): hook_destination = export_va + i1.op1.immediate + i1.length instruction = "jmp 0x%x" % hook_destination elif (i1.type == pydasm.INSTRUCTION_TYPE_CALL): if (i1.op1.type == pydasm.OPERAND_TYPE_MEMORY): hook_destination = (i1.op1.displacement & 0xffffffff) instruction = "call [0x%x]" % hook_destination elif (i1.op1.type == pydasm.OPERAND_TYPE_IMMEDIATE): hook_destination = export_va + i1.op1.immediate + i1.length instruction = "call 0x%x" % hook_destination elif (i1.type == pydasm.INSTRUCTION_TYPE_PUSH) and (i2.type == pydasm.INSTRUCTION_TYPE_RET): hook_destination = i1.op1.immediate instruction = "push dword 0x%x; ret" % hook_destination return hook_destination, instruction
def assembly_disassemble(): try: max_bytes = int(getInput('Number of bytes to disassemble')) except ValueError: reportError('Please specify only numeric values') return clog = ChocolateLog() OEP = SUBJECT.OPTIONAL_HEADER.AddressOfEntryPoint OEP_base = OEP + SUBJECT.OPTIONAL_HEADER.ImageBase data = SUBJECT.get_memory_mapped_image()[OEP:] offset = 0 while offset < max_bytes: ins = pydasm.get_instruction(data[offset:], pydasm.MODE_32) if ins is None: asm = 'db %02x' % ord(data[offset]) clog.add('%s\t%s' % (__assembly_offset(offset), asm)) offset += 1 continue asm = pydasm.get_instruction_string(ins, pydasm.FORMAT_INTEL, OEP_base + offset) clog.add('%s\t%s' % (__assembly_offset(offset), asm)) offset += ins.length clog.interactiveOutput()
def find_interrupts(trace): to_remove = [] ints = trace.find_interrupts() for a,b in ints: i = a while trace[i].op != 'IFLO_INSN_BYTES': i -= 1 fault_idx = i fault_eip = trace[i].args[0] fault_insn = pydasm.get_instruction(trace[i].args[1].decode('hex'), pydasm.MODE_32) i = b while trace[i].op != 'IFLO_TB_HEAD_EIP': i += 1 ret_eip = trace[i].args[0] end = i # Default: delete up to the HEAD_EIP (i.e. start a new block) if trace[a].args[0] < 32 and fault_eip == ret_eip: start = fault_idx else: start = a # MZ says I only need to worry about this if it's a page fault. # Blame him if this breaks ;) if trace[a].args[0] == 0xe and (fault_eip == ret_eip or not is_branch(fault_insn)): # We want to merge the new TB in with this one while trace[i].op != "IFLO_INSN_BYTES": i += 1 end = i to_remove.append((start,end)) return to_remove
def find_gadget_ends(start_ea, end_ea): gadget_end_addresses = [] for opcode_byte in gadget_ends: ea = start_ea while True: ea = inp.code_search(ea, opcode_byte) if ea > end_ea or ea == None: break if inp.byte_at(ea) != 0xFF: gadget_end_addresses.append(ea) else: # An opcode starting with 0xFF is not necessarily an indirect jmp/call bytes_ahead = 10 # TODO should be smaller, probably 3, should check headroom = inp.seg_end(ea) - ea if 0 < headroom < 10: bytes_ahead = headroom ibuf = inp.bytes_at(ea, bytes_ahead) if not ibuf: print "WARNING: GetManyBytes(%.08X, %d) failed " % (ea, bytes_ahead) instr = pydasm.get_instruction(ibuf, pydasm.MODE_32) if (instr and pydasm.get_mnemonic_string(instr, pydasm.FORMAT_INTEL) in ("call", "jmp") and (instr.op1.reg != 8 or instr.op1.basereg != 8 or instr.op1.indexreg != 8)): gadget_end_addresses.append(ea) ea += 1 return gadget_end_addresses
def modify_entry_instructions(ep_ava, original_instructions, heuristic_decoder_offset, code_cave_address): updated_instructions = "" # holds the modified data unconditional_jump_opcodes = { "eb":"\xe9", # jmp short "e9":"\xe9", # jmp "ea":"\xea", # jmp far "e8":"\xe8" # call } conditional_jump_opcodes = { "77":"\x0f\x87", # ja/jnbe "73":"\x0f\x83", # jae/jnb "72":"\x0f\x82", # jb/jnae "76":"\x0f\x86", # jbe/jna "74":"\x0f\x84", # je/jz "7f":"\x0f\x8f", # jg/jnle "7d":"\x0f\x8d", # jge/jnl "7c":"\x0f\x8c", # jl/jnge "7e":"\x0f\x8e", # jle/jng "75":"\x0f\x85", # jne/jnz "71":"\x0f\x81", # jne/jnz "79":"\x0f\x89", # jns "7b":"\x0f\x8b", # jnp/jpo "70":"\x0f\x80", # jo "7a":"\x0f\x8a", # jp/jpe "78":"\x0f\x88" # js } current_offset = 0 prior_offset = 0 added_bytes = 0 while current_offset < len(original_instructions): # get the asm for each instruction i = pydasm.get_instruction(original_instructions[current_offset:], pydasm.MODE_32) asm = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+current_offset) # increment counters prior_offset = current_offset current_offset += i.length instruct_bytes = original_instructions[prior_offset:current_offset] # grab current instruction bytes opcode = binascii.hexlify(instruct_bytes[0]) # extract first opcode byte # the current address = the code cave address + the length of the heuristic functions + the decoder functions + # the length of the replaced entry instructions + any additional bytes we add as a result of modification current_address = int(code_cave_address, 16) + heuristic_decoder_offset + prior_offset + added_bytes # check opcode to see if it's is a relative conditional or unconditional jump if opcode in conditional_jump_opcodes: new_jmp_loc = update_jump_location(asm, current_address, 6) new_instruct_bytes = conditional_jump_opcodes[opcode] + struct.pack("l", new_jmp_loc) # replace short jump with long jump and update location elif opcode in unconditional_jump_opcodes: new_jmp_loc = update_jump_location(asm, current_address, 5) new_instruct_bytes = unconditional_jump_opcodes[opcode] + struct.pack("l", new_jmp_loc) # replace short jump with long jump and update locatio else: new_instruct_bytes = instruct_bytes updated_instructions += new_instruct_bytes # add to updated instructions added_bytes += len(new_instruct_bytes) - len(instruct_bytes) # by modifying these to long jmps we're adding bytes return updated_instructions
def disassemble(buf): # the actual diassembly process # we need an offset to keep track of each instruction in the given shell executable # we also need a list of tuples to be outputted offset = 0 output = [] # now we have to loop through all the executable hex and and parse each instruction, increment offset # get_instruction method only gets the first instruction in the first input argument while offset < len(buf): # check if bytecount was inputted by user, and return function once reached byte count if __BYTECOUNT__ is not 0 and offset >= __BYTECOUNT__: print '[+] Byte count was given, and all is parsed until bytecount' return output # get instruction instruction = pydasm.get_instruction(buf[offset:], pydasm.MODE_32) if not instruction: print '[+] Cannot find intructions in the given buffer at offset: ', offset return output instruction_string = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, offset) # increment offset, while keeping track of old one old_offset = offset offset = offset + instruction.length # append to output output.append((buf[old_offset:offset + 1], instruction_string)) return output
def dumpasm(data, opcodesize=configopts['asmopcodesize'], fillchar=configopts['asmfillchar']): if not configopts['asm4shellcode']: return else: import pydasm offset = 0 while offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) if not i: break else: j = 1 opcodes = "" buf = data[offset:(offset + i.length)] for c in buf: opcodes = opcodes + str("%02x " % (ord(c))) print "[0x%08x] (%02dB) %s %s" % (offset, i.length, opcodes.ljust(opcodesize, fillchar), pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0)) offset += i.length
def disas(tb, until=None): if until: while True: insns = [ e for i,e in tb.body if e.op == 'IFLO_INSN_BYTES' ] dis = [ (e.args[0],pydasm.get_instruction(e.args[1].decode('hex'), pydasm.MODE_32)) for e in insns ] for addr, insn in dis: print "%08x %s" % (addr, pydasm.get_instruction_string(insn, pydasm.FORMAT_INTEL, addr)) if until(tb): return tb = tb.next else: insns = [ e for i,e in tb.body if e.op == 'IFLO_INSN_BYTES' ] dis = [ (e.args[0],pydasm.get_instruction(e.args[1].decode('hex'), pydasm.MODE_32)) for e in insns ] for addr, insn in dis: print "%08x %s" % (addr, pydasm.get_instruction_string(insn, pydasm.FORMAT_INTEL, addr))
def find_gadget_ends(start_ea, end_ea): gadget_end_addresses = [] for opcode_byte in gadget_ends: ea = start_ea while True: ea = inp.code_search(ea, opcode_byte) if ea > end_ea or ea == None: break if inp.byte_at(ea) != 0xFF: gadget_end_addresses.append(ea) else: # An opcode starting with 0xFF is not necessarily an indirect jmp/call bytes_ahead = 10 # TODO should be smaller, probably 3, should check headroom = inp.seg_end(ea) - ea if 0 < headroom < 10: bytes_ahead = headroom ibuf = inp.bytes_at(ea, bytes_ahead) if not ibuf: print "WARNING: GetManyBytes(%.08X, %d) failed " % ( ea, bytes_ahead) instr = pydasm.get_instruction(ibuf, pydasm.MODE_32) if (instr and pydasm.get_mnemonic_string( instr, pydasm.FORMAT_INTEL) in ("call", "jmp") and (instr.op1.reg != 8 or instr.op1.basereg != 8 or instr.op1.indexreg != 8)): gadget_end_addresses.append(ea) ea += 1 return gadget_end_addresses
def ScanMainTillRet(): #Entry point for Optional Header ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint #Viratual Address of Imagebase ep_ava = ep+pe.OPTIONAL_HEADER.ImageBase data = pe.get_memory_mapped_image()[ep:ep+ pe.OPTIONAL_HEADER.SizeOfCode] offset = 0 #Scan the code of main function till length of Code section while offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) if i!= None: #print str(hex(ep_ava+offset)),i.ptr.mnemonic if i.ptr.mnemonic =="ret": #print "I am Returning From Main :)" return if i.ptr.mnemonic=="call": HexString = pydasm.get_operand_string(i,0,pydasm.FORMAT_INTEL, ep_ava+offset) #print HexString if len(HexString)==10 and HexString[1]!="e": HexStr = HexString.lstrip("[") HexStrAdd = HexStr.rstrip("]") intAdd=int(HexStrAdd,16) for entry in pe.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: if imp.address==intAdd: #Print the detailed information for the API function found print str(hex(ep_ava+offset))+" "+i.ptr.mnemonic +" "+ entry.dll+"."+imp.name+","+str(imp.hint) else: if(HexString[0]!="e" and HexString[1]!="e"): #print str(hex(ep_ava+offset)) #print i.ptr.mnemonic+" "+ HexString ScanFunc(int(HexString,16)) offset += i.length
def pydasm_linear(binary, start, startat): binary.seek(startat) for line in binary: offset = 0; while offset < len(buffer): i = pydasm.get_instruction(buffer[offset:], pydasm.MODE_64) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) if not i: break offset += i.length
def example_pydasm(): buffer = '\x90\x31\xc9\x31\xca\x31\xcb' offset = 0 while offset < len(buffer): i = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) if not i: break offset += i.length
def __init__(self, hdr, instruction): struct_elems = struct.unpack(Instruction.STRUCT_FMT(hdr), instruction) self.instr_bytes = struct_elems[:15] self.instr = pydasm.get_instruction(''.join(self.instr_bytes), pydasm.MODE_32) self.size = ord(struct_elems[15]) self.symbol = byte_array_to_str(struct_elems[16:80]) if hdr.is_net_tracker_enabled(): self.netidx = struct_elems[80:95] self.stage = struct_elems[95]
def disassemble_range(dbg): print '[+] Disassembling the given address range' raw_bin = dbg.read_process_memory(start_address, end_address - start_address) offset = 0 chunck = [] global all_jump_addr #Holds all Jump address all_jump_addr = [] #Including start & end of function global bp_to_be_set #bp for control flow drawing bp_to_be_set = [] global addr_dict #holding address and corresponding instruction addr_dict = {} #address dictionary global addr_cmnt_dict #holding address and corresponding instruction addr_cmnt_dict = {start_address: 'NA'} #address dictionary while offset < len(raw_bin): i = pydasm.get_instruction(raw_bin[offset:], pydasm.MODE_32) instruction = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, start_address) instruction = instruction.replace('dword', '') if offset == 0: address = start_address next_addr = start_address + i.length else: address = next_addr next_addr = address + i.length array = instruction.split(' ', 1) new = [] if len(hex(address)) == 9: new.append(hex(address)[:-1]) else: new.append(hex(address)) ''' addr_dict = {'Address':'Correspoding Instruction String', 'Address':'Correspoding Array of instruction at that address' } ''' addr_dict[address] = instruction addr_cmnt_dict[address] = "NA" for k in array: new.append(k) chunck.append(new) ####################################################### if address == end_address - 1: bp_to_be_set.append(chunck[0][0]) add_to_json_tree(chunck) ## if new[1] in all_jump: all_jump_addr.append(address) add_to_json_tree(chunck) ## bp_to_be_set.append(chunck[0][0]) chunck = [] ###################################################### offset += i.length closejs() print '[+] Disassembling Done!' return DBG_CONTINUE
def getDisasm(raw_bin): asm_buff = "" offset = 0 while offset < len(raw_bin): try: i = pydasm.get_instruction(raw_bin[offset:], pydasm.MODE_32) instruction = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) asm_buff += instruction + '; ' offset += i.length except Exception,e: asm_buff += 'Unknown' + ';'
def get_instruction(self, address): """ Pydasm disassemble utility function wrapper. Returns the pydasm decoded instruction in self.instruction. """ import pydasm try: data = self.read_bytes(int(address), 32) except: return 'Unable to disassemble at %08x' % address return pydasm.get_instruction(data, pydasm.MODE_32)
def opPrint(data, ep_ava): print hexlify(data) offset = 0 while offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) if i: print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) else: print "Unknown Opcode" break offset += i.length
def disassembl(dat): mal=binascii.hexlify(dat) assem="" offset=0 while offset < len(mal): i=pydasm.get_instruction(mal[offset:],pydasm.MODE_32) assem+=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0)+"\n" if not i: break offset+=i.length return assem
def dis(buff): offset = 0 outDis = [] while offset < len(buff): i = pydasm.get_instruction(buff[offset:], pydasm.MODE_32) tmp = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, offset) outDis.append(tmp) if not i: return outDis offset += i.length return outDis
def peek_next_instruction(self): if self.__decoded_instructions__.has_key(self.__current_offset__) and self.__decoded_instructions__[self.__current_offset__]: return self.__decoded_instructions__[self.__current_offset__] s = str(self.__instruction_stream__[self.__current_offset__:]) instruction = pydasm.get_instruction(s,pydasm.MODE_32) del s result = Instruction(self, instruction, self.__instruction_stream__[self.__current_offset__:self.__current_offset__+instruction.length], self.__current_offset__) self.__decoded_instructions__[self.__current_offset__] = result return result
def create(self, func_entry_addr, callback_function): """Creates the hook. @param func_entry_addr: The address of the function entry to hook @type func_entry_addr: int (0<= func_entry_addr < 2**32) @param callback_function: Python callback function @type callback_function: Python function with parameter (ExecutionContext) @return: C{True} on success. C{False} on failure """ if not (0 <= func_entry_addr <= 0xffffffff): raise ValueError, "Invalid function entry address <> [0, 2**32]" # read disassembly and make sure we can at least 5 consecutive bytes # longest x86 instruction is 15 bytes: # add [ds:esi+ecx*2+0x67452301], 0xEFCDAB89 code = memorymanager.read_addr(func_entry_addr, 20) save_code = "" while len(save_code) < 5: instr = pydasm.get_instruction(code, pydasm.MODE_32) if not instr: logging.warn("Cannot hook. Failed to disassemble bytes: \n" + \ binascii.hexlify(code)) return False save_code += code[:instr.length] code = code[instr.length:] # create trampoline if not self.create_trampoline( func_entry_addr, func_entry_addr + len(save_code), save_code, [1], #check locking callback_function): logging.warn("Failed to create trampoline") return False # overwrite the original code (write hook) tramp_offset = ctypes.addressof( self.trampoline) - (func_entry_addr + 5) hook_code = "\xE9" + struct.pack("I", tramp_offset) hook_code += "\x90" * (len(save_code) - 5) #hook_code = "\xeb\xfe" + hook_code if memorymanager.write_mem(func_entry_addr, hook_code): logging.debug("Successfully hooked target address %08x -> %08x" %\ (func_entry_addr, ctypes.addressof(self.trampoline))) else: logging.error("Failed to create hook at address %08x" % \ func_entry_addr) return False return True
def disassembl(dat): mal = binascii.hexlify(dat) assem = "" offset = 0 while offset < len(mal): i = pydasm.get_instruction(mal[offset:], pydasm.MODE_32) assem += pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) + "\n" if not i: break offset += i.length return assem
def disassemble_range(dbg): print '[+] Disassembling the given address range' raw_bin = dbg.read_process_memory(start_address, end_address-start_address) offset = 0 chunck = [] global all_jump_addr #Holds all Jump address all_jump_addr = [] #Including start & end of function global bp_to_be_set #bp for control flow drawing bp_to_be_set = [] global addr_dict #holding address and corresponding instruction addr_dict = {} #address dictionary global addr_cmnt_dict #holding address and corresponding instruction addr_cmnt_dict = {start_address:'NA'} #address dictionary while offset < len(raw_bin): i = pydasm.get_instruction(raw_bin[offset:], pydasm.MODE_32) instruction = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, start_address) instruction = instruction.replace('dword','') if offset == 0: address = start_address next_addr = start_address + i.length else: address = next_addr next_addr = address+i.length array = instruction.split(' ',1) new = [] if len(hex(address)) == 9: new.append(hex(address)[:-1]) else: new.append(hex(address)) ''' addr_dict = {'Address':'Correspoding Instruction String', 'Address':'Correspoding Array of instruction at that address' } ''' addr_dict[address] = instruction addr_cmnt_dict[address] = "NA" for k in array:new.append(k) chunck.append(new) ####################################################### if address == end_address-1: bp_to_be_set.append(chunck[0][0]) add_to_json_tree(chunck)## if new[1] in all_jump: all_jump_addr.append(address) add_to_json_tree(chunck)## bp_to_be_set.append(chunck[0][0]) chunck = [] ###################################################### offset += i.length closejs() print '[+] Disassembling Done!' return DBG_CONTINUE
def preserve_entry_instructions(pe, ep, ep_ava, offset_end): offset=0 original_instructions = pe.get_memory_mapped_image()[ep:ep+offset_end+30] print "[*] Preserving the following entry instructions (at entry address %s):" % hex(ep_ava) while offset < offset_end: i = pydasm.get_instruction(original_instructions[offset:], pydasm.MODE_32) asm = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) print "\t[+] " + asm offset += i.length # re-get instructions with confirmed offset to avoid partial instructions original_instructions = pe.get_memory_mapped_image()[ep:ep+offset] return original_instructions
def create(self, func_entry_addr, callback_function): """Creates the hook. @param func_entry_addr: The address of the function entry to hook @type func_entry_addr: int (0<= func_entry_addr < 2**32) @param callback_function: Python callback function @type callback_function: Python function with parameter (ExecutionContext) @return: C{True} on success. C{False} on failure """ if not (0<= func_entry_addr <= 0xffffffff): raise ValueError, "Invalid function entry address <> [0, 2**32]" # read disassembly and make sure we can at least 5 consecutive bytes # longest x86 instruction is 15 bytes: # add [ds:esi+ecx*2+0x67452301], 0xEFCDAB89 code = memorymanager.read_addr(func_entry_addr, 20) save_code = "" while len(save_code) < 5: instr = pydasm.get_instruction(code, pydasm.MODE_32) if not instr: logging.warn("Cannot hook. Failed to disassemble bytes: \n" + \ binascii.hexlify(code)) return False save_code += code[:instr.length] code = code[instr.length:] # create trampoline if not self.create_trampoline(func_entry_addr, func_entry_addr + len(save_code), save_code, [1], #check locking callback_function): logging.warn("Failed to create trampoline") return False # overwrite the original code (write hook) tramp_offset = ctypes.addressof(self.trampoline) - (func_entry_addr + 5) hook_code = "\xE9" + struct.pack("I", tramp_offset) hook_code += "\x90"*(len(save_code)-5) #hook_code = "\xeb\xfe" + hook_code if memorymanager.write_mem(func_entry_addr, hook_code): logging.debug("Successfully hooked target address %08x -> %08x" %\ (func_entry_addr, ctypes.addressof(self.trampoline))) else: logging.error("Failed to create hook at address %08x" % \ func_entry_addr) return False return True
def main(): # Check one byte instructions for one_byte1 in gen_one_byte(): instruction = one_byte1 i = pydasm.get_instruction(instruction, pydasm.MODE_32) if not i or i.length > len(instruction): print gen_str_from_byte( instruction), 'is INVALID 1 byte instruction.' # Check two byte instructions for one_byte1 in gen_one_byte(): for one_byte2 in gen_one_byte(): instruction = one_byte1 + one_byte2 i = pydasm.get_instruction(instruction, pydasm.MODE_32) if not i or i.length > len(instruction): print gen_str_from_byte( instruction), 'is INVALID 2 byte instruction.' # Check three byte instructions for one_byte1 in gen_one_byte(): for one_byte2 in gen_one_byte(): for one_byte3 in gen_one_byte(): instruction = one_byte1 + one_byte2 + one_byte3 i = pydasm.get_instruction(instruction, pydasm.MODE_32) if not i or i.length > len(instruction): print gen_str_from_byte( instruction), 'is INVALID 3 byte instruction.' # Check four byte instructions for one_byte1 in gen_one_byte(): for one_byte2 in gen_one_byte(): for one_byte3 in gen_one_byte(): for one_byte4 in gen_one_byte(): instruction = one_byte1 + one_byte2 + one_byte3 + one_byte4 i = pydasm.get_instruction(instruction, pydasm.MODE_32) if not i or i.length > len(instruction): print gen_str_from_byte( instruction), 'is INVALID 4 byte instruction.'
def disassemble_dll(file_path): f = open(file_path, "rb") buff = f.read() f.close() instructions = [] offset = 0 while offset < len(buff): i = pydasm.get_instruction(buff[offset:], pydasm.MODE_32) if not i: break instructions.append(pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0)) offset += i.length return instructions
def set_bp_on_ret(dbg, addr, handler): sizeof_code_to_read = 0x1000 asm = dbg.read_process_memory(addr, sizeof_code_to_read) i = 0 while i < sizeof_code_to_read: inst = pydasm.get_instruction(asm[i:], pydasm.MODE_32) inststr = pydasm.get_instruction_string(inst, pydasm.FORMAT_INTEL, 0) if inststr.startswith("jmp"): return elif inststr.startswith("ret"): dbg.bp_set(addr + i, description="", handler=handler) return i += inst.length return
def check_validity(byte, f): for one_byte1 in gen_one_byte(): instruction = byte + one_byte1 i = pydasm.get_instruction(instruction, pydasm.MODE_32) if not i or i.length > len(instruction): # print gen_str_from_byte(instruction), 'is INVALID of length', len(instruction) if len(instruction) <= 4: check_validity(instruction, f) str_instr = gen_str_from_byte(instruction) print str_instr, 'is INVALID of length', len(str_instr) f.write(str_instr + '\n')
def print_section_info(pe): for section in pe.sections: print section # If you don't have pydasm installed comment the rest of the function out. print "The instructions at the beginning of the last section:" ep = pe.sections[-1].VirtualAddress ep_ava = ep+pe.OPTIONAL_HEADER.ImageBase data = pe.get_memory_mapped_image()[ep:ep+6] offset = 0 while offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) offset += i.length
def peek_next_instruction(self): if self.__decoded_instructions__.has_key( self.__current_offset__) and self.__decoded_instructions__[ self.__current_offset__]: return self.__decoded_instructions__[self.__current_offset__] s = str(self.__instruction_stream__[self.__current_offset__:]) instruction = pydasm.get_instruction(s, pydasm.MODE_32) del s result = Instruction( self, instruction, self.__instruction_stream__[self.__current_offset__:self. __current_offset__ + instruction.length], self.__current_offset__) self.__decoded_instructions__[self.__current_offset__] = result return result
def getCode(pe): """Obtiene la representación en código ensamblador de las instrucciones del fichero binario. Recibe un objeto PE, y devuelve un string con el código ASM""" code = "" ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = ep + pe.OPTIONAL_HEADER.ImageBase data = pe.get_memory_mapped_image()[ep:] offset = 0 l = long(len(data)) while offset < l: i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) if i is None: break code += pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) + "\n" offset += int(i.length) return code
def __init__(self, ea, bytes, spd): self.addr = ea self.bytes = bytes # copy whatever we need from the pydasm instruction object inst = pydasm.get_instruction(bytes, pydasm.MODE_32) if inst == None: print "IGNORE:", hex(ea), ''.join( ('\\x%02x' % ord(b) for b in bytes)) inst = nop self.disas = pydasm.get_instruction_string(inst, pydasm.FORMAT_INTEL, ea) self.mnem = pydasm.get_mnemonic_string(inst, pydasm.FORMAT_INTEL) self.type = inst.type self.modrm_off = inst.modrm_offset self.opc_off = inst.opcode_offset self.eflags_r = inst.eflags_used self.eflags_w = inst.eflags_affected self.uses_sib = False self.inst_len = inst.length - inst.opcode_offset # no prefixes!! self.spd = spd # stack pointer delta self.pos = -1 # instruction position after ordering self.raddr = ea # address after reordering (if changed) self.implicit = set() # registers used implicitly by this instruction self.f_entry = False # whether the instruction is a function entry point self.f_exit = inst.type == pydasm.INSTRUCTION_TYPE_RET self.regs = dict() # holds bit positions in the instruction per reg self.updated = False # for call instr, tells whether it was updated self.can_change = set() # registers that can change in a indirect call # these copies of bytes and regs are initialized by reset_changed self.cregs = None self.cbytes = None self.creg_names = None # liveness information self.succ = set() # list of successor instruction addresses self.USE = set() # regs used (read) by this instruction self.DEF = set() # regs defined (written) by this instruction self.IN = set() # regs that are live before instruction execution self.OUT = set() # regs that are live after instruction execution self.IN_old = None self.OUT_old = None # TODO: special case for lea optimization (3 operands) self._get_use_def(inst) self._store_operands(inst) self.reset_changed()
def test_get_instruction_string(self): buffer = b'\x90\x31\xc9\x31\xca\x31\xcb' offset = 0 dasm = '' expected = ( 'nop ' 'xor ecx,ecx' 'xor edx,ecx' 'xor ebx,ecx' ) while offset < len(buffer): instruction = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) dasm += pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, 0) if not instruction: break offset += instruction.length self.assertEqual(dasm, expected)
def __init__(self, ea, bytes, spd): self.addr = ea self.bytes = bytes # copy whatever we need from the pydasm instruction object inst = pydasm.get_instruction(bytes, pydasm.MODE_32) if inst == None: print "IGNORE:", hex(ea), ''.join(('\\x%02x' % ord(b) for b in bytes)) inst = nop self.disas = pydasm.get_instruction_string( inst, pydasm.FORMAT_INTEL, ea) self.mnem = pydasm.get_mnemonic_string(inst, pydasm.FORMAT_INTEL) self.type = inst.type self.modrm_off = inst.modrm_offset self.opc_off = inst.opcode_offset self.eflags_r = inst.eflags_used self.eflags_w = inst.eflags_affected self.uses_sib = False self.inst_len = inst.length - inst.opcode_offset # no prefixes!! self.spd = spd # stack pointer delta self.pos = -1 # instruction position after ordering self.raddr = ea # address after reordering (if changed) self.implicit = set() # registers used implicitly by this instruction self.f_entry = False # whether the instruction is a function entry point self.f_exit = inst.type == pydasm.INSTRUCTION_TYPE_RET self.regs = dict() # holds bit positions in the instruction per reg self.updated = False # for call instr, tells whether it was updated self.can_change = set() # registers that can change in a indirect call # these copies of bytes and regs are initialized by reset_changed self.cregs = None self.cbytes = None self.creg_names = None # liveness information self.succ = set() # list of successor instruction addresses self.USE = set() # regs used (read) by this instruction self.DEF = set() # regs defined (written) by this instruction self.IN = set() # regs that are live before instruction execution self.OUT = set() # regs that are live after instruction execution self.IN_old = None self.OUT_old = None # TODO: special case for lea optimization (3 operands) self._get_use_def(inst) self._store_operands(inst) self.reset_changed()
def get_disassembly(sel, start=None): from pydasm import get_instruction, get_instruction_string, MODE_32, FORMAT_INTEL if start is None: start = 0 r = [] offset = 0 #TODO: get the source offset while offset < len(sel): instruction = get_instruction(sel[offset:], MODE_32) l = instruction.length h = [] for _ in sel[offset:offset + l]: h.append("%02X" % ord(_)) h = ",".join(h) asm = get_instruction_string(instruction, FORMAT_INTEL, offset + start) r.append([h, asm]) offset += l return r
def findOptimalTrampolineLength(address, minlen=5, maxlen=12, noisy=False): if noisy: log.debug("Determining optimal tramploine size for address 0x%08x:"%(address)) buffer = _detour.util.read(address, maxlen+5) l = 0 ic = 0 offset = 0 while l < maxlen: i = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) if not i: break if noisy: log.debug("%d bytes: %s"%(i.length, pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0))) ic += 1 offset += i.length l += i.length if l >= minlen: break if noisy: log.debug("optimal size is %d bytes (%d instructions)"%(l, ic)) return l
def get_disassembly(sel, start=None): from pydasm import get_instruction, get_instruction_string, MODE_32, FORMAT_INTEL if start is None: start = 0 r = [] offset = 0 #TODO: get the source offset while offset < len(sel): instruction = get_instruction(sel[offset:], MODE_32) l = instruction.length h = [] for _ in sel[offset: offset + l]: h.append("%02X" % ord(_)) h = ",".join(h) asm = get_instruction_string(instruction, FORMAT_INTEL, offset + start) r.append([h, asm]) offset += l return r
def pydasm_disasm_recursive(file_name, loadStart, loadEnd, pos, history): with open(file_name, 'r') as f: f.seek(loadStart + pos) buffer = f.read(loadEnd - pos) # Iterate through the buffer and disassemble offset = 0 while offset < len(buffer): if (pos + offset) not in history: history.append((pos + offset)) inst = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) if not inst: break inst_str = pydasm.get_instruction_string(inst, pydasm.FORMAT_INTEL, 0) print inst_str if is_jump(inst_str): jump_loc = pydasm.get_operand_string(inst, 0, pydasm.FORMAT_INTEL, 0) try: parsed_jump_loc = int(jump_loc, 16) if not is_new_jump(int(jump_loc, 16), history): print "Did not go to jump because location has already been visited. (" + jump_loc + ")" else: print "Jumping to " + jump_loc + "!" history.append( pydasm_disasm_recursive(file_name, loadStart, loadEnd, parsed_jump_loc, history)) print "Returned from jumping to " + jump_loc + "!" except ValueError: print "Did not go to jump because location is not numeric. (" + jump_loc + ")" if is_unconditional_jump(inst_str): return history offset += inst.length return history
def get_next_from_trace(trace, i): # Find the most recent instruction while trace[i].op != 'IFLO_INSN_BYTES': i -= 1 addr, insn = trace[i].args insn = pydasm.get_instruction(insn.decode('hex'), pydasm.MODE_32) next = predict_next(addr, insn) if next == [-1]: # Based on dynamic information while trace[i].op != 'IFLO_JMP_T0': i += 1 if trace[i].op == 'IFLO_INSN_BYTES': # We reached the next instruction before finding # a JMP_T0 -- utter failure raise PredictionFailure("Missing JMP_T0 before next instruction") next = [trace[i].args[0]] return next
def decode(self, address, code): # Decode each instruction in the buffer. result = [] offset = 0 while offset < len(code): # Try to decode the current instruction. instruction = pydasm.get_instruction(code[offset:offset+32], pydasm.MODE_32) # Get the memory address of the current instruction. current = address + offset # Illegal opcode or opcode longer than remaining buffer. if not instruction or instruction.length + offset > len(code): hexdump = '%.2X' % ord(code[offset]) disasm = 'db 0x%s' % hexdump ilen = 1 # Correctly decoded instruction. else: disasm = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, current) ilen = instruction.length hexdump = HexDump.hexadecimal(code[offset:offset+ilen]) # Add the decoded instruction to the list. result.append(( current, ilen, disasm, hexdump, )) # Move to the next instruction. offset += ilen # Return the list of decoded instructions. return result
def decode(self, address, code): # Decode each instruction in the buffer. result = [] offset = 0 while offset < len(code): # Try to decode the current instruction. instruction = pydasm.get_instruction(code[offset:offset + 32], pydasm.MODE_32) # Get the memory address of the current instruction. current = address + offset # Illegal opcode or opcode longer than remaining buffer. if not instruction or instruction.length + offset > len(code): hexdump = '%.2X' % ord(code[offset]) disasm = 'db 0x%s' % hexdump ilen = 1 # Correctly decoded instruction. else: disasm = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, current) ilen = instruction.length hexdump = HexDump.hexadecimal(code[offset:offset + ilen]) # Add the decoded instruction to the list. result.append(( current, ilen, disasm, hexdump, )) # Move to the next instruction. offset += ilen # Return the list of decoded instructions. return result
def get_next_from_trace(trace, i): # Find the most recent instruction while trace[i].op != 'IFLO_INSN_BYTES': i -= 1 addr, insn = trace[i].args insn = pydasm.get_instruction(insn.decode('hex'), pydasm.MODE_32) next = predict_next(addr, insn) if next == [-1]: # Based on dynamic information while trace[i].op != 'IFLO_JMP_T0': i += 1 if trace[i].op == 'IFLO_INSN_BYTES': # We reached the next instruction before finding # a JMP_T0 -- utter failure raise PredictionFailure( "Missing JMP_T0 before next instruction") next = [trace[i].args[0]] return next
def fix_sti(trace, tbdict): tbs_to_fix = [] for t in tbdict: for tb in tbdict[t]: if any(i.op == 'IFLO_RESET_INHIBIT_IRQ' for i in tb.rbody): tbs_to_fix.append(tb) edits = [] for tb in tbs_to_fix: i, _ = first(lambda x: x[1].op == 'IFLO_RESET_INHIBIT_IRQ', tb.body) j = i while trace[j].op != 'IFLO_INSN_BYTES': j -= 1 last_idx = j last_eip = trace[j].args[0] last_insn = pydasm.get_instruction(trace[j].args[1].decode('hex'), pydasm.MODE_32) assert trace[i].op == 'IFLO_RESET_INHIBIT_IRQ' assert trace[i+1].op == 'IFLO_MOVL_T0_0' assert trace[i+2].op == 'IFLO_EXIT_TB' if is_branch(last_insn): ep = i+2 elif trace[i+3].op == 'IFLO_TB_HEAD_EIP': ep = i+3 elif trace[i+3].op == 'IFLO_TB_ID' and trace[i+4].op == 'IFLO_TB_HEAD_EIP': ep = i+4 else: ep = i+2 #print "Will heal TB %s by removing trace entry %d" % (`tb`, ep) edits.append((i,ep)) edits.sort() while edits: a,b = edits.pop() del trace[a:b+1] return remake_trace(trace)
def _get_instructions(pe): """ Extrae la representación textual de las instrucciones del programa Devuelve el conjunto de instrucciones. Parámetros: pe -- Objeto de pefile con el programa abierto. Excepciones: A implementar """ entry_point = pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = entry_point data = pe.get_memory_mapped_image()[entry_point:entry_point+pe.OPTIONAL_HEADER.SizeOfCode] offset,instructions = 0,set() while offset<len(data): instruction = get_instruction(data[offset:],MODE_32) if instruction==None: break else: instructions.add(get_instruction_string(instruction,FORMAT_INTEL,ep_ava+offset)) offset += instruction.length return list(instructions)
def ScanFunc(IntAddress): print "************************************************" print "Trying To scan "+str(hex(IntAddress)) ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = ep+pe.OPTIONAL_HEADER.ImageBase offset = IntAddress-pe.OPTIONAL_HEADER.ImageBase #print hex(IntAddress)+">"+hex(ep+pe.OPTIONAL_HEADER.ImageBase) data = pe.get_memory_mapped_image()[ep:ep+pe.OPTIONAL_HEADER.SizeOfCode] while offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) if i!= None: #print hex(pe.OPTIONAL_HEADER.ImageBase+offset)," ", i.ptr.mnemonic if i.ptr.mnemonic=="ret": #print str(hex(ep_ava+offset))+ " I am Return Statement from"+str(hex(IntAddress)) break elif i.ptr.mnemonic=="retn": #print str(hex(ep_ava+offset))+ " I am Return Statement from"+str(hex(IntAddress)) break elif i.ptr.mnemonic=="call": HexString = pydasm.get_operand_string(i,0,pydasm.FORMAT_INTEL, ep_ava+offset) if len(HexString)==10 and HexString[1]!="e" : HexStr = HexString.lstrip("[") HexStrAdd = HexStr.rstrip("]") intAdd=int(HexStrAdd,16) for entry in pe.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: if imp.address==intAdd: print "From Function:",str(hex(IntAddress)),"at",hex(intAdd), i.ptr.mnemonic +" "+ entry.dll+"."+imp.name+","+str(imp.hint) else: #print str(hex(ep_ava+offset))+" "+i.ptr.mnemonic+" "+ HexString if(HexString[0]!="e" and HexString[1]!="e"): # print str(hex(ep_ava+offset)) ScanFunc(int(HexString,16)) offset += i.length #print str(hex(ep_ava+offset))+" "+ i.ptr.mnemonic offset+=1
def findBytesToPop(address, maxlen=512, noisy=False): t = None if noisy: log.debug("Determining bytes to pop for function at address 0x%08x:"%(address)) buffer = _detour.util.read(address, maxlen+5) #buffer = "\xC3" #ret #buffer = "\xC2\x04" #retn 4 l = 0 ic = 0 offset = 0 num = None while l < maxlen: i = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) if not i: break istr = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) if noisy: log.debug("%d bytes: %s"%(i.length, istr)) ic += 1 offset += i.length l += i.length if istr.strip() == "ret": if noisy: log.debug("found ret instruction (no bytes to pop)") num = 0 t = "cdecl" break if istr.startswith("retn"): if noisy: log.debug(i) num = istr[5:] num = int(num, 16) t = "stdcall" if noisy: log.debug("found retn instruction, bytes to pop = %s"%(num)) break if num is None: if noisy: log.debug("warning, no retn instruction found") else: if noisy: log.debug("bytes to pop is %d bytes (found after %d instructions)"%(num, ic)) return (t, num)
def is_rep(insn): assert insn.op == 'IFLO_INSN_BYTES' PREFIX_REP = 0x03000000 xi = pydasm.get_instruction(insn.args[1].decode('hex'), pydasm.MODE_32) if not xi: return False return bool(xi.flags & PREFIX_REP)