def deobsfucate(self) : self.initalizesections() ep = self.pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = ep+self.pe.OPTIONAL_HEADER.ImageBase data = self.pe.get_memory_mapped_image()[ep:ep+40] offset=0 self.emu.set_register("EIP", self.entrypoint) self.emu.set_register("ECX", 0x00000000) self.emu.set_register("BL", 0x00) self.emu.set_register("DL", 0x00) instruction = "NOP" while not instruction.startswith("loop") : #offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) instruction=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) offset+=i.length print instruction #c = raw_input("emulator> ") while 1 : value1 = self.emu.get_register("ECX") value2 = self.emu.get_register("BL") value3 = self.emu.get_register("DL") if(value1 != 0 and value2 !=0 and value3 != 0): break; self.emu.execute() self.bytesobs = value1 byte_obs = value1 while value1 !=0: self.emu.execute() value1 =self.emu.get_register("ECX") i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) instruction=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) print instruction offset+=i.length self.emu.execute() #offset+=((ep_ava+offset)- emu.get_register("EIP")) nxt_offset = self.emu.get_register("EIP")- (ep_ava+offset) offset+=nxt_offset instruction = "nop" while not instruction.startswith("jmp") : # offset < len(data) i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) instruction=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) self.emu.execute() offset+=i.length print instruction #c = raw_input("emulator> ") ret=instruction.find("0x") jmp_address= instruction[ret+2:]
def example_pydasm(): buffer = '\x90\x31\xc9\x31\xca\x31\xcb' offset = 0 while offset < len(buffer): i = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) if not i: break offset += i.length
def pydasm_linear(binary, start, startat): binary.seek(startat) for line in binary: offset = 0; while offset < len(buffer): i = pydasm.get_instruction(buffer[offset:], pydasm.MODE_64) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) if not i: break offset += i.length
def opPrint(data, ep_ava): print hexlify(data) offset = 0 while offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) if i: print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) else: print "Unknown Opcode" break offset += i.length
def print_section_info(pe): for section in pe.sections: print section # If you don't have pydasm installed comment the rest of the function out. print "The instructions at the beginning of the last section:" ep = pe.sections[-1].VirtualAddress ep_ava = ep+pe.OPTIONAL_HEADER.ImageBase data = pe.get_memory_mapped_image()[ep:ep+6] offset = 0 while offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) offset += i.length
def set_instruction(self, instruction): self.length = instruction.length self.type = instruction.type self.mode = instruction.mode self.opcode = instruction.opcode self.modrm = instruction.modrm self.sib = instruction.sib self.extindex = instruction.extindex self.fpuindex = instruction.fpuindex self.dispbytes = instruction.dispbytes self.immbytes = instruction.immbytes self.sectionbytes = instruction.sectionbytes self.flags = instruction.flags if instruction.op1.type: self.op1 = PyOperand(instruction.op1) if instruction.op2.type: self.op2 = PyOperand(instruction.op2) if instruction.op3.type: self.op3 = PyOperand(instruction.op3) # Disassembly string of instruction self.disasm = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, 0x0).rstrip(" ") self.mnemonic = pydasm.get_mnemonic_string(instruction, pydasm.FORMAT_INTEL).rstrip(" ")
def disas(tb, until=None): if until: while True: insns = [ e for i,e in tb.body if e.op == 'IFLO_INSN_BYTES' ] dis = [ (e.args[0],pydasm.get_instruction(e.args[1].decode('hex'), pydasm.MODE_32)) for e in insns ] for addr, insn in dis: print "%08x %s" % (addr, pydasm.get_instruction_string(insn, pydasm.FORMAT_INTEL, addr)) if until(tb): return tb = tb.next else: insns = [ e for i,e in tb.body if e.op == 'IFLO_INSN_BYTES' ] dis = [ (e.args[0],pydasm.get_instruction(e.args[1].decode('hex'), pydasm.MODE_32)) for e in insns ] for addr, insn in dis: print "%08x %s" % (addr, pydasm.get_instruction_string(insn, pydasm.FORMAT_INTEL, addr))
def assembly_disassemble(): try: max_bytes = int(getInput('Number of bytes to disassemble')) except ValueError: reportError('Please specify only numeric values') return clog = ChocolateLog() OEP = SUBJECT.OPTIONAL_HEADER.AddressOfEntryPoint OEP_base = OEP + SUBJECT.OPTIONAL_HEADER.ImageBase data = SUBJECT.get_memory_mapped_image()[OEP:] offset = 0 while offset < max_bytes: ins = pydasm.get_instruction(data[offset:], pydasm.MODE_32) if ins is None: asm = 'db %02x' % ord(data[offset]) clog.add('%s\t%s' % (__assembly_offset(offset), asm)) offset += 1 continue asm = pydasm.get_instruction_string(ins, pydasm.FORMAT_INTEL, OEP_base + offset) clog.add('%s\t%s' % (__assembly_offset(offset), asm)) offset += ins.length clog.interactiveOutput()
def modify_entry_instructions(ep_ava, original_instructions, heuristic_decoder_offset, code_cave_address): updated_instructions = "" # holds the modified data unconditional_jump_opcodes = { "eb":"\xe9", # jmp short "e9":"\xe9", # jmp "ea":"\xea", # jmp far "e8":"\xe8" # call } conditional_jump_opcodes = { "77":"\x0f\x87", # ja/jnbe "73":"\x0f\x83", # jae/jnb "72":"\x0f\x82", # jb/jnae "76":"\x0f\x86", # jbe/jna "74":"\x0f\x84", # je/jz "7f":"\x0f\x8f", # jg/jnle "7d":"\x0f\x8d", # jge/jnl "7c":"\x0f\x8c", # jl/jnge "7e":"\x0f\x8e", # jle/jng "75":"\x0f\x85", # jne/jnz "71":"\x0f\x81", # jne/jnz "79":"\x0f\x89", # jns "7b":"\x0f\x8b", # jnp/jpo "70":"\x0f\x80", # jo "7a":"\x0f\x8a", # jp/jpe "78":"\x0f\x88" # js } current_offset = 0 prior_offset = 0 added_bytes = 0 while current_offset < len(original_instructions): # get the asm for each instruction i = pydasm.get_instruction(original_instructions[current_offset:], pydasm.MODE_32) asm = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+current_offset) # increment counters prior_offset = current_offset current_offset += i.length instruct_bytes = original_instructions[prior_offset:current_offset] # grab current instruction bytes opcode = binascii.hexlify(instruct_bytes[0]) # extract first opcode byte # the current address = the code cave address + the length of the heuristic functions + the decoder functions + # the length of the replaced entry instructions + any additional bytes we add as a result of modification current_address = int(code_cave_address, 16) + heuristic_decoder_offset + prior_offset + added_bytes # check opcode to see if it's is a relative conditional or unconditional jump if opcode in conditional_jump_opcodes: new_jmp_loc = update_jump_location(asm, current_address, 6) new_instruct_bytes = conditional_jump_opcodes[opcode] + struct.pack("l", new_jmp_loc) # replace short jump with long jump and update location elif opcode in unconditional_jump_opcodes: new_jmp_loc = update_jump_location(asm, current_address, 5) new_instruct_bytes = unconditional_jump_opcodes[opcode] + struct.pack("l", new_jmp_loc) # replace short jump with long jump and update locatio else: new_instruct_bytes = instruct_bytes updated_instructions += new_instruct_bytes # add to updated instructions added_bytes += len(new_instruct_bytes) - len(instruct_bytes) # by modifying these to long jmps we're adding bytes return updated_instructions
def disassemble(buf): # the actual diassembly process # we need an offset to keep track of each instruction in the given shell executable # we also need a list of tuples to be outputted offset = 0 output = [] # now we have to loop through all the executable hex and and parse each instruction, increment offset # get_instruction method only gets the first instruction in the first input argument while offset < len(buf): # check if bytecount was inputted by user, and return function once reached byte count if __BYTECOUNT__ is not 0 and offset >= __BYTECOUNT__: print '[+] Byte count was given, and all is parsed until bytecount' return output # get instruction instruction = pydasm.get_instruction(buf[offset:], pydasm.MODE_32) if not instruction: print '[+] Cannot find intructions in the given buffer at offset: ', offset return output instruction_string = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, offset) # increment offset, while keeping track of old one old_offset = offset offset = offset + instruction.length # append to output output.append((buf[old_offset:offset + 1], instruction_string)) return output
def dumpasm(data, opcodesize=configopts['asmopcodesize'], fillchar=configopts['asmfillchar']): if not configopts['asm4shellcode']: return else: import pydasm offset = 0 while offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) if not i: break else: j = 1 opcodes = "" buf = data[offset:(offset + i.length)] for c in buf: opcodes = opcodes + str("%02x " % (ord(c))) print "[0x%08x] (%02dB) %s %s" % (offset, i.length, opcodes.ljust(opcodesize, fillchar), pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0)) offset += i.length
def __init__(self, instruction, rawinstruction, eip): self.length = instruction.length self.type = instruction.type self.mode = instruction.mode self.opcode = instruction.opcode self.modrm = instruction.modrm self.sib = instruction.sib self.extindex = instruction.extindex self.fpuindex = instruction.fpuindex self.dispbytes = instruction.dispbytes self.immbytes = instruction.immbytes self.sectionbytes = instruction.sectionbytes self.flags = instruction.flags self.eip = eip self.raw = rawinstruction[0:self.length] self.mnemonic = pydasm.get_mnemonic_string( instruction, pydasm.FORMAT_INTEL).rstrip(" ") self.disasm = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, self.eip).rstrip(" ") self.description = '' self.op1 = '' self.op2 = '' self.op3 = '' # # In the future we'll use a PyOperand class, for now its to slow # if instruction.op1.type: self.op1 = instruction.op1 if instruction.op2.type: self.op2 = instruction.op2 if instruction.op3.type: self.op3 = instruction.op3
def __init__(self, instruction, rawinstruction, eip): self.length = instruction.length self.type = instruction.type self.mode = instruction.mode self.opcode = instruction.opcode self.modrm = instruction.modrm self.sib = instruction.sib self.extindex = instruction.extindex self.fpuindex = instruction.fpuindex self.dispbytes = instruction.dispbytes self.immbytes = instruction.immbytes self.sectionbytes = instruction.sectionbytes self.flags = instruction.flags self.eip = eip self.raw = rawinstruction[0:self.length] self.mnemonic = pydasm.get_mnemonic_string(instruction, pydasm.FORMAT_INTEL).rstrip(" ") self.disasm = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, self.eip).rstrip(" ") self.description = '' self.op1 = '' self.op2 = '' self.op3 = '' # # In the future we'll use a PyOperand class, for now its to slow # if instruction.op1.type: self.op1 = instruction.op1 if instruction.op2.type: self.op2 = instruction.op2 if instruction.op3.type: self.op3 = instruction.op3
def __str__(self): output_header = "gadget @ %.08X:%.08X %s\n" % (self.get_start_ea( ), self.get_end_ea(), "(overlapping)" if self.overlap else "") output_lines = [] for ea, instr in self._instrs: instr_str = pydasm.get_instruction_string(instr, pydasm.FORMAT_INTEL, 0) output_lines.append("%.08X %.2X %s" % (ea, instr.opcode, instr_str)) return (output_header + '\n'.join(output_lines))
def disassemble_range(dbg): print '[+] Disassembling the given address range' raw_bin = dbg.read_process_memory(start_address, end_address - start_address) offset = 0 chunck = [] global all_jump_addr #Holds all Jump address all_jump_addr = [] #Including start & end of function global bp_to_be_set #bp for control flow drawing bp_to_be_set = [] global addr_dict #holding address and corresponding instruction addr_dict = {} #address dictionary global addr_cmnt_dict #holding address and corresponding instruction addr_cmnt_dict = {start_address: 'NA'} #address dictionary while offset < len(raw_bin): i = pydasm.get_instruction(raw_bin[offset:], pydasm.MODE_32) instruction = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, start_address) instruction = instruction.replace('dword', '') if offset == 0: address = start_address next_addr = start_address + i.length else: address = next_addr next_addr = address + i.length array = instruction.split(' ', 1) new = [] if len(hex(address)) == 9: new.append(hex(address)[:-1]) else: new.append(hex(address)) ''' addr_dict = {'Address':'Correspoding Instruction String', 'Address':'Correspoding Array of instruction at that address' } ''' addr_dict[address] = instruction addr_cmnt_dict[address] = "NA" for k in array: new.append(k) chunck.append(new) ####################################################### if address == end_address - 1: bp_to_be_set.append(chunck[0][0]) add_to_json_tree(chunck) ## if new[1] in all_jump: all_jump_addr.append(address) add_to_json_tree(chunck) ## bp_to_be_set.append(chunck[0][0]) chunck = [] ###################################################### offset += i.length closejs() print '[+] Disassembling Done!' return DBG_CONTINUE
def getDisasm(raw_bin): asm_buff = "" offset = 0 while offset < len(raw_bin): try: i = pydasm.get_instruction(raw_bin[offset:], pydasm.MODE_32) instruction = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) asm_buff += instruction + '; ' offset += i.length except Exception,e: asm_buff += 'Unknown' + ';'
def disassembl(dat): mal=binascii.hexlify(dat) assem="" offset=0 while offset < len(mal): i=pydasm.get_instruction(mal[offset:],pydasm.MODE_32) assem+=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0)+"\n" if not i: break offset+=i.length return assem
def dump_simple(self, extra=False): func_ea = inp.get_func_of(self._start_ea) red = (not func_ea and not inp.get_func_of(self.get_real_end_ea() - 1)) sg = SimpleGadget(self._start_ea, self.get_real_end_ea(), self.overlap, red, len(self._instrs), func_ea) if extra: sg.set_extra([a for a, i in self._instrs], '; '.join([ pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) for a, i in self._instrs ]), inp.get_func_of(self.get_real_end_ea() - 1)) return sg
def dis(buff): offset = 0 outDis = [] while offset < len(buff): i = pydasm.get_instruction(buff[offset:], pydasm.MODE_32) tmp = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, offset) outDis.append(tmp) if not i: return outDis offset += i.length return outDis
def disassembl(dat): mal = binascii.hexlify(dat) assem = "" offset = 0 while offset < len(mal): i = pydasm.get_instruction(mal[offset:], pydasm.MODE_32) assem += pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) + "\n" if not i: break offset += i.length return assem
def disassemble_range(dbg): print '[+] Disassembling the given address range' raw_bin = dbg.read_process_memory(start_address, end_address-start_address) offset = 0 chunck = [] global all_jump_addr #Holds all Jump address all_jump_addr = [] #Including start & end of function global bp_to_be_set #bp for control flow drawing bp_to_be_set = [] global addr_dict #holding address and corresponding instruction addr_dict = {} #address dictionary global addr_cmnt_dict #holding address and corresponding instruction addr_cmnt_dict = {start_address:'NA'} #address dictionary while offset < len(raw_bin): i = pydasm.get_instruction(raw_bin[offset:], pydasm.MODE_32) instruction = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, start_address) instruction = instruction.replace('dword','') if offset == 0: address = start_address next_addr = start_address + i.length else: address = next_addr next_addr = address+i.length array = instruction.split(' ',1) new = [] if len(hex(address)) == 9: new.append(hex(address)[:-1]) else: new.append(hex(address)) ''' addr_dict = {'Address':'Correspoding Instruction String', 'Address':'Correspoding Array of instruction at that address' } ''' addr_dict[address] = instruction addr_cmnt_dict[address] = "NA" for k in array:new.append(k) chunck.append(new) ####################################################### if address == end_address-1: bp_to_be_set.append(chunck[0][0]) add_to_json_tree(chunck)## if new[1] in all_jump: all_jump_addr.append(address) add_to_json_tree(chunck)## bp_to_be_set.append(chunck[0][0]) chunck = [] ###################################################### offset += i.length closejs() print '[+] Disassembling Done!' return DBG_CONTINUE
def preserve_entry_instructions(pe, ep, ep_ava, offset_end): offset=0 original_instructions = pe.get_memory_mapped_image()[ep:ep+offset_end+30] print "[*] Preserving the following entry instructions (at entry address %s):" % hex(ep_ava) while offset < offset_end: i = pydasm.get_instruction(original_instructions[offset:], pydasm.MODE_32) asm = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) print "\t[+] " + asm offset += i.length # re-get instructions with confirmed offset to avoid partial instructions original_instructions = pe.get_memory_mapped_image()[ep:ep+offset] return original_instructions
def dump_simple(self, extra=False): func_ea = inp.get_func_of(self._start_ea) red = (not func_ea and not inp.get_func_of(self.get_real_end_ea() - 1)) sg = SimpleGadget(self._start_ea, self.get_real_end_ea(), self.overlap, red, len(self._instrs), func_ea) if extra: sg.set_extra([a for a, i in self._instrs], '; '.join([ pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) for a, i in self._instrs]), inp.get_func_of(self.get_real_end_ea() - 1)) return sg
def pydasm_disasm(file_name, start, size, linear): if linear: print "Running Pydasm Linear Disassembler starting from " + str( start) + " for " + str(size) + " bytes!" # Load file into a string buffer with open(file_name, 'r') as f: f.seek(start) buffer = f.read(size) # Iterate through the buffer and disassemble offset = 0 while offset < len(buffer): i = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) if not i: break offset += i.length else: pydasm_disasm_recursive(file_name, start, start + size, start, []) print "Pydasm - RECURSIVE: TO BE IMPLEMENTED!"
def disassemble_dll(file_path): f = open(file_path, "rb") buff = f.read() f.close() instructions = [] offset = 0 while offset < len(buff): i = pydasm.get_instruction(buff[offset:], pydasm.MODE_32) if not i: break instructions.append(pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0)) offset += i.length return instructions
def set_bp_on_ret(dbg, addr, handler): sizeof_code_to_read = 0x1000 asm = dbg.read_process_memory(addr, sizeof_code_to_read) i = 0 while i < sizeof_code_to_read: inst = pydasm.get_instruction(asm[i:], pydasm.MODE_32) inststr = pydasm.get_instruction_string(inst, pydasm.FORMAT_INTEL, 0) if inststr.startswith("jmp"): return elif inststr.startswith("ret"): dbg.bp_set(addr + i, description="", handler=handler) return i += inst.length return
def getCode(pe): """Obtiene la representación en código ensamblador de las instrucciones del fichero binario. Recibe un objeto PE, y devuelve un string con el código ASM""" code = "" ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = ep + pe.OPTIONAL_HEADER.ImageBase data = pe.get_memory_mapped_image()[ep:] offset = 0 l = long(len(data)) while offset < l: i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) if i is None: break code += pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) + "\n" offset += int(i.length) return code
def __init__(self, ea, bytes, spd): self.addr = ea self.bytes = bytes # copy whatever we need from the pydasm instruction object inst = pydasm.get_instruction(bytes, pydasm.MODE_32) if inst == None: print "IGNORE:", hex(ea), ''.join( ('\\x%02x' % ord(b) for b in bytes)) inst = nop self.disas = pydasm.get_instruction_string(inst, pydasm.FORMAT_INTEL, ea) self.mnem = pydasm.get_mnemonic_string(inst, pydasm.FORMAT_INTEL) self.type = inst.type self.modrm_off = inst.modrm_offset self.opc_off = inst.opcode_offset self.eflags_r = inst.eflags_used self.eflags_w = inst.eflags_affected self.uses_sib = False self.inst_len = inst.length - inst.opcode_offset # no prefixes!! self.spd = spd # stack pointer delta self.pos = -1 # instruction position after ordering self.raddr = ea # address after reordering (if changed) self.implicit = set() # registers used implicitly by this instruction self.f_entry = False # whether the instruction is a function entry point self.f_exit = inst.type == pydasm.INSTRUCTION_TYPE_RET self.regs = dict() # holds bit positions in the instruction per reg self.updated = False # for call instr, tells whether it was updated self.can_change = set() # registers that can change in a indirect call # these copies of bytes and regs are initialized by reset_changed self.cregs = None self.cbytes = None self.creg_names = None # liveness information self.succ = set() # list of successor instruction addresses self.USE = set() # regs used (read) by this instruction self.DEF = set() # regs defined (written) by this instruction self.IN = set() # regs that are live before instruction execution self.OUT = set() # regs that are live after instruction execution self.IN_old = None self.OUT_old = None # TODO: special case for lea optimization (3 operands) self._get_use_def(inst) self._store_operands(inst) self.reset_changed()
def get_disassembly(sel, start=None): from pydasm import get_instruction, get_instruction_string, MODE_32, FORMAT_INTEL if start is None: start = 0 r = [] offset = 0 #TODO: get the source offset while offset < len(sel): instruction = get_instruction(sel[offset:], MODE_32) l = instruction.length h = [] for _ in sel[offset: offset + l]: h.append("%02X" % ord(_)) h = ",".join(h) asm = get_instruction_string(instruction, FORMAT_INTEL, offset + start) r.append([h, asm]) offset += l return r
def get_disassembly(sel, start=None): from pydasm import get_instruction, get_instruction_string, MODE_32, FORMAT_INTEL if start is None: start = 0 r = [] offset = 0 #TODO: get the source offset while offset < len(sel): instruction = get_instruction(sel[offset:], MODE_32) l = instruction.length h = [] for _ in sel[offset:offset + l]: h.append("%02X" % ord(_)) h = ",".join(h) asm = get_instruction_string(instruction, FORMAT_INTEL, offset + start) r.append([h, asm]) offset += l return r
def findOptimalTrampolineLength(address, minlen=5, maxlen=12, noisy=False): if noisy: log.debug("Determining optimal tramploine size for address 0x%08x:"%(address)) buffer = _detour.util.read(address, maxlen+5) l = 0 ic = 0 offset = 0 while l < maxlen: i = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) if not i: break if noisy: log.debug("%d bytes: %s"%(i.length, pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0))) ic += 1 offset += i.length l += i.length if l >= minlen: break if noisy: log.debug("optimal size is %d bytes (%d instructions)"%(l, ic)) return l
def test_get_instruction_string(self): buffer = b'\x90\x31\xc9\x31\xca\x31\xcb' offset = 0 dasm = '' expected = ( 'nop ' 'xor ecx,ecx' 'xor edx,ecx' 'xor ebx,ecx' ) while offset < len(buffer): instruction = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) dasm += pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, 0) if not instruction: break offset += instruction.length self.assertEqual(dasm, expected)
def pydasm_disasm_recursive(file_name, loadStart, loadEnd, pos, history): with open(file_name, 'r') as f: f.seek(loadStart + pos) buffer = f.read(loadEnd - pos) # Iterate through the buffer and disassemble offset = 0 while offset < len(buffer): if (pos + offset) not in history: history.append((pos + offset)) inst = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) if not inst: break inst_str = pydasm.get_instruction_string(inst, pydasm.FORMAT_INTEL, 0) print inst_str if is_jump(inst_str): jump_loc = pydasm.get_operand_string(inst, 0, pydasm.FORMAT_INTEL, 0) try: parsed_jump_loc = int(jump_loc, 16) if not is_new_jump(int(jump_loc, 16), history): print "Did not go to jump because location has already been visited. (" + jump_loc + ")" else: print "Jumping to " + jump_loc + "!" history.append( pydasm_disasm_recursive(file_name, loadStart, loadEnd, parsed_jump_loc, history)) print "Returned from jumping to " + jump_loc + "!" except ValueError: print "Did not go to jump because location is not numeric. (" + jump_loc + ")" if is_unconditional_jump(inst_str): return history offset += inst.length return history
def __init__(self, ea, bytes, spd): self.addr = ea self.bytes = bytes # copy whatever we need from the pydasm instruction object inst = pydasm.get_instruction(bytes, pydasm.MODE_32) if inst == None: print "IGNORE:", hex(ea), ''.join(('\\x%02x' % ord(b) for b in bytes)) inst = nop self.disas = pydasm.get_instruction_string( inst, pydasm.FORMAT_INTEL, ea) self.mnem = pydasm.get_mnemonic_string(inst, pydasm.FORMAT_INTEL) self.type = inst.type self.modrm_off = inst.modrm_offset self.opc_off = inst.opcode_offset self.eflags_r = inst.eflags_used self.eflags_w = inst.eflags_affected self.uses_sib = False self.inst_len = inst.length - inst.opcode_offset # no prefixes!! self.spd = spd # stack pointer delta self.pos = -1 # instruction position after ordering self.raddr = ea # address after reordering (if changed) self.implicit = set() # registers used implicitly by this instruction self.f_entry = False # whether the instruction is a function entry point self.f_exit = inst.type == pydasm.INSTRUCTION_TYPE_RET self.regs = dict() # holds bit positions in the instruction per reg self.updated = False # for call instr, tells whether it was updated self.can_change = set() # registers that can change in a indirect call # these copies of bytes and regs are initialized by reset_changed self.cregs = None self.cbytes = None self.creg_names = None # liveness information self.succ = set() # list of successor instruction addresses self.USE = set() # regs used (read) by this instruction self.DEF = set() # regs defined (written) by this instruction self.IN = set() # regs that are live before instruction execution self.OUT = set() # regs that are live after instruction execution self.IN_old = None self.OUT_old = None # TODO: special case for lea optimization (3 operands) self._get_use_def(inst) self._store_operands(inst) self.reset_changed()
def decode(self, address, code): # Decode each instruction in the buffer. result = [] offset = 0 while offset < len(code): # Try to decode the current instruction. instruction = pydasm.get_instruction(code[offset:offset + 32], pydasm.MODE_32) # Get the memory address of the current instruction. current = address + offset # Illegal opcode or opcode longer than remaining buffer. if not instruction or instruction.length + offset > len(code): hexdump = '%.2X' % ord(code[offset]) disasm = 'db 0x%s' % hexdump ilen = 1 # Correctly decoded instruction. else: disasm = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, current) ilen = instruction.length hexdump = HexDump.hexadecimal(code[offset:offset + ilen]) # Add the decoded instruction to the list. result.append(( current, ilen, disasm, hexdump, )) # Move to the next instruction. offset += ilen # Return the list of decoded instructions. return result
def decode(self, address, code): # Decode each instruction in the buffer. result = [] offset = 0 while offset < len(code): # Try to decode the current instruction. instruction = pydasm.get_instruction(code[offset:offset+32], pydasm.MODE_32) # Get the memory address of the current instruction. current = address + offset # Illegal opcode or opcode longer than remaining buffer. if not instruction or instruction.length + offset > len(code): hexdump = '%.2X' % ord(code[offset]) disasm = 'db 0x%s' % hexdump ilen = 1 # Correctly decoded instruction. else: disasm = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, current) ilen = instruction.length hexdump = HexDump.hexadecimal(code[offset:offset+ilen]) # Add the decoded instruction to the list. result.append(( current, ilen, disasm, hexdump, )) # Move to the next instruction. offset += ilen # Return the list of decoded instructions. return result
def _get_instructions(pe): """ Extrae la representación textual de las instrucciones del programa Devuelve el conjunto de instrucciones. Parámetros: pe -- Objeto de pefile con el programa abierto. Excepciones: A implementar """ entry_point = pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = entry_point data = pe.get_memory_mapped_image()[entry_point:entry_point+pe.OPTIONAL_HEADER.SizeOfCode] offset,instructions = 0,set() while offset<len(data): instruction = get_instruction(data[offset:],MODE_32) if instruction==None: break else: instructions.add(get_instruction_string(instruction,FORMAT_INTEL,ep_ava+offset)) offset += instruction.length return list(instructions)
def disassemble(hex_str): buffer = str(bytearray.fromhex(hex_str)) start = 0 end = 1 print_data = [] while end <= len(buffer): i = pydasm.get_instruction(buffer[start:end], pydasm.MODE_32) hex_str = ' '.join( [format(c, 'x') for c in bytearray(buffer[start:end])]) if not i: print_data.append([hex_str, 'BAD']) if i.length != end - start: pass else: print_data.append([ hex_str, pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) ]) #print 'length:', i.length start += i.length end += 1 return print_data
def findBytesToPop(address, maxlen=512, noisy=False): t = None if noisy: log.debug("Determining bytes to pop for function at address 0x%08x:"%(address)) buffer = _detour.util.read(address, maxlen+5) #buffer = "\xC3" #ret #buffer = "\xC2\x04" #retn 4 l = 0 ic = 0 offset = 0 num = None while l < maxlen: i = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) if not i: break istr = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) if noisy: log.debug("%d bytes: %s"%(i.length, istr)) ic += 1 offset += i.length l += i.length if istr.strip() == "ret": if noisy: log.debug("found ret instruction (no bytes to pop)") num = 0 t = "cdecl" break if istr.startswith("retn"): if noisy: log.debug(i) num = istr[5:] num = int(num, 16) t = "stdcall" if noisy: log.debug("found retn instruction, bytes to pop = %s"%(num)) break if num is None: if noisy: log.debug("warning, no retn instruction found") else: if noisy: log.debug("bytes to pop is %d bytes (found after %d instructions)"%(num, ic)) return (t, num)
print '''Actual Instructions (Disassembly taken from Immunity debugger) 00433441 |. 3956 38 CMP DWORD PTR DS:[ESI+38],EDX 00433444 |. 0F8E 1E010000 JLE 0x00433568 0043344A |> 8BBD E4EBFFFF /MOV EDI,DWORD PTR SS:[EBP-141C] ''' print 'Pydasm Output Below:\n\n' buffer = '\x39\x56\x38\x0F\x8E\x1E\x01\x00\x00\x8B\xBD\xE4\xEB\xFF\xFF' offset = 0 while offset < len(buffer): i = pydasm.get_instruction(buffer[offset:], pydasm.MODE_32) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, 0) if not i: break offset += i.length ''' Above prog. out put will be : cmp [esi+0x38],edx jng 0x124 mov edi,[ebp-0x141c] As well as Libdasm produce the same: debasish@debasish ~/Downloads/libdasm-beta/examples $ hexdump test.bin 0000000 5639 0f38 1e8e 0001 8b00 e4bd ffeb 00ff 000000f
# absolute_offset = memory_address + offset # Illegal opcode or opcode longer than remaining buffer if not instruction or instruction.length+offset>len(PEbin): if bytes: str += '%.2x ' % ord(PEbin[offset]) + ' '*(bytes-1)*2 str += 'db 0x%.2x' % ord(PEbin[offset]) print str offset += 1 continue ilen = instruction.length # Print absolute offset and raw PEbin bytes up to 'bytes' # (not needed, but looks nice). if bytes: for i in range(min(bytes, ilen)): str += '%.2x' % ord(PEbin[offset+i]) str += ' ' for i in range(min(bytes, ilen), bytes*2-ilen): str += ' ' # Print the parsed instruction, format using user-supplied # format. We could of course format the instruction in some # other way by accessing struct INSTRUCTION members directly. str += pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, offset); # str += pydasm.get_instruction_string(instruction, pydasm.FORMAT_ATT, offset); print str offset += ilen
time.sleep(1) print "[+]Analyzing exe for entry point address..." pe = pefile.PE("tempCave.exe") ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = ep+pe.OPTIONAL_HEADER.ImageBase data = pe.get_memory_mapped_image()[ep:ep+10] offset = 0 save_instr = [] d = {} while offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) print "i: " + str(i) instr = pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) save_instr.append(instr) interim = str(hex(ep_ava+offset)) d[interim] = instr offset += i.length print "dictionary:" print d print "[+]Saving initial instructions: " print save_instr """ Trying to get the physical address of each pe section In optional_headers there is ImageBase. This seems to be the base offset of pe file so: physcial address = virtual address + ImageBase """
import sys sys.path.append(r'c:\code\python\public\pyemu') sys.path.append(r'c:\code\python\public\pyemu\lib') import pydasm from PyCPU import * from PyDebug import * from PyEmu import PEPyEmu rawinstruction = "\x66\x89\x45\xF6" instruction = pydasm.get_instruction(rawinstruction, pydasm.MODE_32) pyinstruction = PyInstruction(instruction) disasm = pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, 0).rstrip(" ") #DebugInstruction(pyinstruction) emu = PEPyEmu() emu.cpu.set_debug(1) emu.set_register("EDX", 0xfe) print "EAX: 0x%08x EDX: 0x%08x" % (emu.cpu.EAX, emu.cpu.EDX) print "Executing [%s]..." % disasm, # An oversight in pydasm mnemonic parsing pyinstruction.mnemonic = pyinstruction.mnemonic.split() if pyinstruction.mnemonic[0] in ["rep", "repe", "repne", "lock"]: pyinstruction.mnemonic = pyinstruction.mnemonic[1] else:
def instruction_repr(self, instruction): return pydasm.get_instruction_string(instruction,pydasm.FORMAT_INTEL,0)
def Loadpe(path): #load PE pe = pefile.PE(path) print "\n" print "[+] MSHTML.dll version : " , pe.FileInfo[0].StringTable[0].entries['ProductVersion'] fi = open('mshtml.dll_%s.txt'%(pe.FileInfo[0].StringTable[0].entries['ProductVersion']), 'w+') # looking for sections in PE #for section in pe.sections: #if section.Name.find(".text") != -1 : #print "[+] Found .text Section At Base : " , hex(section.VirtualAddress) # looking for kernel32.dll for entry in pe.DIRECTORY_ENTRY_IMPORT: if entry.dll.lower().find("kernel32") != -1 : #print "[+] Found Kernel32.dll" for imp in entry.imports: if imp.name.find("VirtualProtect") != -1 : print '[+] Found VirtualProtect Import At : ', hex(imp.address - pe.OPTIONAL_HEADER.ImageBase) fi.write('[+] Found VirtualProtect Import At %s: \n'%(hex(imp.address - pe.OPTIONAL_HEADER.ImageBase))) ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = ep+pe.OPTIONAL_HEADER.ImageBase text_va = pe.sections[0].VirtualAddress data = pe.get_memory_mapped_image()[text_va:text_va+pe.sections[0].SizeOfRawData] gadg_offset = data.find('\x94\xc3') x = gadg_offset + 2 print "[+] Pivot Offset : %s"%(hex(gadg_offset+text_va)) fi.write("[+] Pivot Offset : %s\n"%(hex(gadg_offset+text_va))) while gadg_offset < x: i = pydasm.get_instruction(data[gadg_offset:], pydasm.MODE_32) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+gadg_offset) fi.write(pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+gadg_offset)) fi.write('\n') gadg_offset += i.length gadg_offset = data.find('\x83\xc4\x30\x5f\x5e\xc3') x = gadg_offset + 6 print "[+] Gadget I Offset : %s"%(hex(gadg_offset+text_va)) fi.write("[+] Gadget I Offset : %s\n"%(hex(gadg_offset+text_va))) while gadg_offset < x: i = pydasm.get_instruction(data[gadg_offset:], pydasm.MODE_32) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+gadg_offset) fi.write(pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+gadg_offset)) fi.write('\n') gadg_offset += i.length gadg_offset = data.find('\x8b\x06\x5e\x5d\xc2\x04\x00') x = gadg_offset + 7 print "[+] Gadget II Offset : %s"%(hex(gadg_offset+text_va)) fi.write("[+] Gadget II Offset : %s\n"%(hex(gadg_offset+text_va))) while gadg_offset < x: i = pydasm.get_instruction(data[gadg_offset:], pydasm.MODE_32) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+gadg_offset) fi.write(pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+gadg_offset)) fi.write('\n') gadg_offset += i.length gadg_offset = data.find('\xff\xd0\xc3') x = gadg_offset + 3 print "[+] Gadget III Offset : %s"%(hex(gadg_offset+text_va)) fi.write("[+] Gadget III Offset : %s\n"%(hex(gadg_offset+text_va))) while gadg_offset < x: i = pydasm.get_instruction(data[gadg_offset:], pydasm.MODE_32) print pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+gadg_offset) fi.write(pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+gadg_offset)) fi.write('\n') gadg_offset += i.length fi.close()
def x86_disas_func(bin, addr, stop=0, running=0): pc = addr mnemonic = "" while pc < stop or (stop == 0 and "hlt" not in mnemonic and "ret" not in mnemonic): try: if not running: instr = pydasm.get_instruction(bin.mem[pc:pc+15], pydasm.MODE_32) else: instr = pydasm.get_instruction(str(bin.read(pc,15)), pydasm.MODE_32) except IndexError: #bad memory address break if not instr: break mnemonic = pydasm.get_instruction_string(instr, pydasm.FORMAT_ATT, pc) head = mid = tail = reset = "" if colors.COLORS: reset = colors.RESET head += "0x%.8x:\t"%pc #print "0x%.8x:\t%s"%(pc,mnemonic), if int(instr.type) in [pydasm.INSTRUCTION_TYPE_CMP, pydasm.INSTRUCTION_TYPE_CMPS, pydasm.INSTRUCTION_TYPE_TEST]: if colors.COLORS: mid = colors.PURPLEfgB elif instr.type in [pydasm.INSTRUCTION_TYPE_PUSH, pydasm.INSTRUCTION_TYPE_POP]: if colors.COLORS: mid = colors.GREENfg elif instr.type in [pydasm.INSTRUCTION_TYPE_RET]: if colors.COLORS: mid = colors.YELLOWfgB elif instr.type in [pydasm.INSTRUCTION_TYPE_CALL, pydasm.INSTRUCTION_TYPE_JMPC, pydasm.INSTRUCTION_TYPE_JMP]: #hilite calls if colors.COLORS: mid = colors.YELLOWfgB #show function name/ annotations if instr.immbytes: dest = instr.op1.immediate + instr.length + pc f = bin.find_func( dest ) if f and f.name: tail = "\t###\t%s()"%f.name elif instr.op1.reg == pydasm.REGISTER_ESP or instr.op2.reg == pydasm.REGISTER_ESP or instr.op3.reg == pydasm.REGISTER_ESP: if colors.COLORS: mid = colors.CYANfg if instr.immbytes: x = "" if instr.op2.immediate in bin.mem: x = instr.op2.immediate elif instr.op1.immediate in bin.mem: x = instr.op1.immediate if x != "": if colors.COLORS: tail = "\t@@@\t%s%r%s"%(colors.REDfg, get_ascii_string(bin, x), reset) else: tail = "\t@@@\t%r%s"%(get_ascii_string(bin, x), reset) print head + mid + mnemonic +reset + tail pc += instr.length #print "--" return pc
def getinfo(data, fn): """parse data and collect information""" print fn offset = 0 #target specific inits spec.init() while offset < len(data) - 0x10: prefixes = [] opcode_off = offset byte = ord(data[offset]) byte2 = ord(data[offset + 1]) previous = -1 while byte in PREFIX: prefixes.append(byte) opcode_off += 1 byte = ord(data[opcode_off]) byte2 = ord(data[opcode_off + 1]) #target specific alerts, termination, data collection if spec.check(data, opcode_off, fn) == -1: instruction = pydasm.get_instruction(data[offset:], pydasm.MODE_32) print "%08X: %s %s" % ( offset, (" ".join(["%02X" % ord(i) for i in data[offset:offset + min(NUMBYTES, instruction.length)]])).ljust(NUMBYTES * 3), pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, offset)) break #todo: merge double byte in a recursive way if byte == 0x0f: opcode_off += 1 byte = ord(data[opcode_off]) byte2 = ord(data[opcode_off + 1]) if byte in GROUPS0F: previous = d_l(hitmap0F_groups[byte][mid(byte2)], prefixes) else: previous = d_l(hitmap0F[byte], prefixes) if byte in operands0F: size, d = operands0F[byte] if size == 4: op = struct.unpack("L", data[opcode_off + 1:opcode_off + 1 + 4])[0] elif size == 1: op = ord(data[opcode_off + 1]) if op not in d: d[op] = 0 d[op] += 1 elif byte in GROUPS: previous = d_l(hitmap_groups[byte][mid(byte2)], prefixes) else: previous = d_l(hitmap[byte], prefixes) if byte in operands: size, d = operands[byte] if size == 4: op = struct.unpack("L", data[opcode_off + 1:opcode_off + 1 + 4])[0] elif size == 1: op = ord(data[opcode_off + 1]) if op not in d: d[op] = 0 d[op] += 1 instruction = pydasm.get_instruction(data[offset:], pydasm.MODE_32) # optional output current line if opcode/group never marked before if SHOW_NEW and previous == 0: print "%08X: %s %s" % ( offset, (" ".join(["%02X" % ord(i) for i in data[offset:offset + min(NUMBYTES, instruction.length)]])).ljust(NUMBYTES * 3), pydasm.get_instruction_string(instruction, pydasm.FORMAT_INTEL, offset)) offset += instruction.length print "final offset %08X" % offset return
def findObsSignature(filename): if filename: pe = pefile.PE(filename) else: print "[!] Blank filename specified" sys.exit(2) imagebase = pe.OPTIONAL_HEADER.ImageBase codebase = pe.OPTIONAL_HEADER.ImageBase + pe.OPTIONAL_HEADER.BaseOfCode database = pe.OPTIONAL_HEADER.ImageBase + pe.OPTIONAL_HEADER.BaseOfData entrypoint = pe.OPTIONAL_HEADER.ImageBase + pe.OPTIONAL_HEADER.AddressOfEntryPoint print "[*] Image Base Addr: 0x%08x" % (imagebase) print "[*] Code Base Addr: 0x%08x" % (codebase) print "[*] Data Base Addr: 0x%08x" % (database) print "[*] Entry Point Addr: 0x%08x\n" % (entrypoint) ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = ep+pe.OPTIONAL_HEADER.ImageBase data = pe.get_memory_mapped_image()[ep:ep+40] offset=0 #idata = array.array('B', [0xb9,0x5a,0x01,0x00,0x00,0x43,0x4b,0xbe,0x00,0x10,0x40,0x00,0xb3,0xa6,0x8a,0x16,0x32,0xd3]) idata = array.array('B', [0xB9,0x2A,0xBE,0x2A,0xB3,0x2A,0x8A,0x16,0x32,0xD3,0x8A,0x1E,0x88,0x16,0x46,0xE2,0x2A,0xE9]) signature = idata.tostring() #ret= data.find(data,signature,4) ret= SearchString(data,signature,5) print "Signature " ,ret emu = PEPyEmu() #emu.debug(1) for section in pe.sections: if section.Name.startswith(".text"): textsection = section elif section.Name.startswith(".rdata"): datasection = section for x in range(len(textsection.data)): c = textsection.data[x] emu.set_memory(codebase + x, int(ord(c)), size=1) for x in range(len(datasection.data)): c = datasection.data[x] emu.set_memory(database + x, int(ord(c)), size=1) ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint ep_ava = ep+pe.OPTIONAL_HEADER.ImageBase data = pe.get_memory_mapped_image()[ep:ep+40] emu.set_stack_argument(0x8, 0x10, name="arg_0") emu.set_stack_argument(0xc, 0x20, name="arg_4") if (ret): offset=0 emu.set_register("EIP", entrypoint) emu.set_register("ECX", 0x00000000) emu.set_register("BL", 0x00) emu.set_register("DL", 0x00) instruction = "NOP" while not instruction.startswith("loop") : #offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) instruction=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) offset+=i.length print instruction #c = raw_input("emulator> ") while 1 : value1 = emu.get_register("ECX") value2 = emu.get_register("BL") value3 = emu.get_register("DL") if(value1 != 0 and value2 !=0 and value3 != 0): break; emu.execute() byte_obs = value1 while value1 !=0: emu.execute() value1 =emu.get_register("ECX") i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) instruction=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) print instruction offset+=i.length emu.execute() #offset+=((ep_ava+offset)- emu.get_register("EIP")) print "after jump" resu = emu.get_register("EIP")- (ep_ava+offset) print " %x " % resu print "%x" % emu.get_register("EIP") offset+=resu instruction = "nop" while not instruction.startswith("jmp") : #offset < len(data): if ord(data[0]) == 0x90: print "nop" i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) print "i " , i instruction=pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset) emu.execute() offset+=i.length print instruction #c = raw_input("emulator> ") ret=instruction.find("0x") jmp_address= instruction[ret+2:] #print "ret " , ret, hex(jmp_address) #emu.execute() #emu.execute() #print " I am here1 " emu.dump_regs() diff =abs(byte_obs-(emu.get_register("EIP")-codebase)) print "diff " , diff exec_code =emu.get_memory(emu.get_register("EIP") ,diff ) print hex(ord(exec_code[0])) idata = array.array('B', [0x68,0x2A,0x6A,0x01,0x6A,0x00,0xE8]) signature = idata.tostring() ret = SearchString(exec_code,signature,2) if ret : print "bad code " else: print "good code "
def collect(self, verb): count = 0 for file in self.files: out = [] try: FILE = open(file, "rb") data = FILE.read() FILE.close() except: continue if data == None or len(data) == 0: out.append("Cannot read %s (maybe empty?)" % file) out.append("") continue try: pe = pefile.PE(data=data, fast_load=True) pe.parse_data_directories( directories=[ pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'], pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'], pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_TLS'], pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']]) except: out.append("Cannot parse %s (maybe not PE?)" % file) out.append("") continue # source: https://code.google.com/p/pyew/ def get_filearch(data): if pe.FILE_HEADER.Machine == 0x14C: # IMAGE_FILE_MACHINE_I386 processor="intel" type = 32 return "32 Bits binary" elif pe.FILE_HEADER.Machine == 0x8664: # IMAGE_FILE_MACHINE_AMD64 processor="intel" type = 64 return "64 Bits binary" out.append(("#" * 90) + "\n[%d] File: %s\n" % (count, file) + ("#" * 90)) out.append(header("Meta-data")) out.append("Size\t\t: %d bytes" % len(data)) out.append("Type\t\t: %s" % get_filetype(data)) out.append("Architecture\t: %s" % get_filearch(data)) out.append("MD5\t\t: %s" % hashlib.md5(data).hexdigest()) out.append("SHA1\t\t: %s" % hashlib.sha1(data).hexdigest()) out.append("ssdeep\t\t: %s" % get_ssdeep(file)) out.append("imphash\t\t: %s" % self.get_imphash(pe)) out.append("Date\t\t: %s" % self.get_timestamp(pe)) out.append("Language\t: %s" % self.get_lang(pe)) crc_claimed = pe.OPTIONAL_HEADER.CheckSum crc_actual = pe.generate_checksum() out.append("CRC:\t(Claimed) : 0x%x, (Actual): 0x%x %s" % ( crc_claimed, crc_actual, "[SUSPICIOUS]" if crc_actual != crc_claimed else "")) packers = self.check_packers(pe) if len(packers): out.append("Packers\t\t: %s" % ','.join(packers)) # Alert if the EP section is not in a known good section or if its in the last PE section (ep, name, pos) = self.check_ep_section(pe) ep_ava = ep+pe.OPTIONAL_HEADER.ImageBase s = "Entry Point\t: %s %s %d/%d" % (hex(ep_ava), name, pos, len(pe.sections)) if (name not in good_ep_sections) or pos == len(pe.sections): s += " [SUSPICIOUS]" out.append(s) # Dism. the first 100 bytes of the Entry Point s = ("=" * 16 + '\n' + subTitle("Offset | Instructions")) data = pe.get_memory_mapped_image()[ep:ep+100] offset = 0 while offset < len(data): i = pydasm.get_instruction(data[offset:], pydasm.MODE_32) s += "\n%d\t%s" % (offset,pydasm.get_instruction_string(i, pydasm.FORMAT_INTEL, ep_ava+offset)) offset += i.length out.append(s) verinfo = self.check_verinfo(pe) if len(verinfo): out.append(header("Version info")) out.append(verinfo) if sys.modules.has_key('yara'): yarahits = self.check_yara(data) else: yarahits = [] clamhits = self.check_clam(file) if len(yarahits) or len(clamhits): out.append(header("Signature scans")) out.append(yarahits) out.append(clamhits) callbacks = self.check_tls(pe) if len(callbacks): out.append(header("TLS callbacks")) for cb in callbacks: out.append(" 0x%x" % cb) out.append(header("Sections")) out.append("%-10s %-12s %-12s %-10s %-12s %27s" % ("Name", "VirtAddr", "VirtSize", "RawSize", "MD5", "Entropy")) out.append("-" * 90) for sec in pe.sections: s = "%-10s %-12s %-12s %-10s %-12s %-12f" % ( ''.join([c for c in sec.Name if c in string.printable]), hex(sec.VirtualAddress), hex(sec.Misc_VirtualSize), hex(sec.SizeOfRawData), sec.get_hash_md5(), sec.get_entropy()) if sec.SizeOfRawData == 0 or (sec.get_entropy() > 0 and sec.get_entropy() < 1) or sec.get_entropy() > 7: s += "[SUSPICIOUS]" out.append(s) resources = self.check_rsrc(pe) if len(resources): out.append(header("Resource entries")) names_holder = [] for rsrc in resources.keys(): (name,rva,size,type,lang,sublang) = resources[rsrc] names_holder.append(name) names_count = collections.Counter(names_holder) names_common = names_count.most_common() out.append("%-18s %-8s" % ("Resource type", "Total")) out.append("-" * 25) for name,occur in names_common: out.append("%-18s : %-8s" % (name,occur)) if verb == True: out.append("-" * 90) out.append("%-18s %-8s %-8s %-12s %-24s Type" % ("Name", "RVA", "Size", "Lang", "Sublang")) out.append("-" * 90) for rsrc in resources.keys(): (name,rva,size,type,lang,sublang) = resources[rsrc] out.append("%-18s %-8s %-8s %-12s %-24s %s" % (name, hex(rva), hex(size), lang, sublang, type)) # source: https://code.google.com/p/pyew/ imports_total = len(pe.DIRECTORY_ENTRY_IMPORT) if imports_total > 0: c = 1 out.append(header("Imports")) for entry in pe.DIRECTORY_ENTRY_IMPORT: out.append("[%s] %s" % (c,entry.dll)) if verb == True: for imp in entry.imports: if (imp.name != None) and (imp.name != ""): out.append("\t%s %s" % (hex(imp.address),imp.name)) c += 1 imports = self.check_imports(pe) if len(imports): ret = [] out.append(header("Suspicious IAT alerts")) for imp in imports: ret.append(imp) c = 1 for i in sorted(set(ret)): out.append("[%s] %s" % (c,i)) c += 1 # Grab the exports info , if available if hasattr(pe,"DIRECTORY_ENTRY_EXPORT"): c = 1 out.append(header("Exports")) for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: if verb == True: out.append("[%s] %s %s" % (c,hex(exp.address),exp.name)) else: out.append("[%s] %s" % (c,exp.name)) c += 1 out.append("") print '\n'.join(out) count += 1
def graph_function(bin, func): pc = func.start_addr blocks = {} curblock = hex(int(pc)) blocks[curblock] = block(pc) while pc <= func.end_addr: instr = pydasm.get_instruction(bin.mem[pc:pc+15], pydasm.MODE_32) if not instr: break mnemonic = pydasm.get_instruction_string(instr, pydasm.FORMAT_ATT, pc) #add mnemon strings blocks[curblock].code.append("%s: %.30s"%(hex(int(pc)),mnemonic)) if instr.type in [pydasm.INSTRUCTION_TYPE_JMP, pydasm.INSTRUCTION_TYPE_JMPC]: if instr.op1.immediate < 256: branchdest = pc + instr.op1.immediate + instr.length branchdest2 = pc + instr.length #add the branches to the current block blocks[curblock].branch1 = branchdest if instr.type == pydasm.INSTRUCTION_TYPE_JMPC: blocks[curblock].branch2 = branchdest2 print "BLOCK @ %x"%branchdest2 bname = hex(int(branchdest2)) if bname not in blocks: blocks[bname]= block(branchdest2) print "BLOCK @ %x"%branchdest bname = hex(int(branchdest)) if bname not in blocks: blocks[bname]= block(branchdest) print "switching to block %x"%branchdest2 blocks[curblock].end = pc curblock = hex(int(branchdest2)) if curblock != hex(int(pc+instr.length)): if hex(int(pc+instr.length)) in blocks: print "picking up curblock %x"%(pc+instr.length) blocks[curblock].end = pc blocks[curblock].branch1 = pc+instr.length curblock = hex(int(pc+instr.length)) pc += instr.length blocks[curblock].end = pc for b in blocks: if blocks[b].branch1: for c in blocks: if blocks[c].start < blocks[b].branch1 and blocks[c].end >= blocks[b].branch1: print "split A" newblock = blocks[c].split(blocks[b].branch1) print "---------------" blocks[hex(int(newblock.start))] = newblock if blocks[b].branch2: for c in blocks: if blocks[c].start < blocks[b].branch2 and blocks[c].end >= blocks[b].branch2: print "split B" newblock = blocks[c].split(blocks[b].branch2) blocks[hex(int(newblock.start))] = newblock print blocks[c] for j in blocks[c].code: print j o = "digraph function_%x_%s {\n"%(func.start_addr, func.name) for b in blocks: code = "\n".join(blocks[b].code) o += " block_%s [shape=box align=left label=\"%r\"];\n"%(b, code) if blocks[b].branch1: o += " block_%s -> block_0x%x;\n"%(b, blocks[b].branch1) if blocks[b].branch2: o += " block_%s -> block_0x%x;\n"%(b, blocks[b].branch2) o += "}\n" open("graphsx/digraph%x-funcs.dot"%func.start_addr,'w').write(o)