def get_insn_disasm(insn): """ Wraps idc.GetDisasmEx with some cleaning of whitespace """ global RE_SPACES disasm = idc.GetDisasmEx(insn.ea, idc.GENDSM_FORCE_CODE) return RE_SPACES.sub(' ', disasm.replace('\t', ' '))
def refreshitems(self): # Pb : rop engine has not been init if self.idarop.rop == None: return # No new data present if self.rop_list_cache == self.idarop.rop.gadgets: return self.items = [] # No data present if len(self.idarop.rop.gadgets) == 0: return if len(self.idarop.rop.gadgets) > 10000: idaapi.show_wait_box("Ida rop : loading rop list ...") for i, g in enumerate(self.idarop.rop.gadgets): # reconstruct disas if g.opcodes == "": bad_gadget = False opcodes = idc.GetManyBytes(g.address, g.ret_address - g.address + 1) instructions = list() ea = g.address while ea <= g.ret_address: instructions.append( idc.GetDisasmEx(ea, idaapi.GENDSM_FORCE_CODE)) ea += idaapi.decode_insn(ea) # Badly decoded gadget if idaapi.decode_insn(ea) == 0: bad_gadget = True break if not bad_gadget: h = Gadget(address=g.address, ret_address=g.ret_address, instructions=instructions, opcodes=opcodes, size=len(opcodes)) self.idarop.rop.gadgets[i] = h self.items.append( h.get_display_list(self.idarop.addr_format)) else: self.items.append(g.get_display_list(self.idarop.addr_format)) self.rop_list_cache = self.idarop.rop.gadgets if len(self.idarop.rop.gadgets) > 10000: idaapi.hide_wait_box()
def data(self): md5 = hashlib.md5() for ea in idautils.FuncItems(self.offset): asm_line = idc.GetDisasmEx(ea, idc.GENDSM_MULTI_LINE) if ';' in asm_line: asm_line = asm_line[:asm_line.find(';')] asm_line = asm_line.strip() asm_line = " ".join(asm_line.split()) asm_line = asm_line.lower() md5.update(asm_line) return md5.hexdigest()
def search_pointers(self): # HACK: A separate flag is used to track user canceling the search, # because multiple calls to idaapi.wasBreak() do not properly # detect cancellations. breakFlag = False # Show wait dialog idaapi.show_wait_box("Searching writable function pointers...") for m in self.modules: ################################################################### # Locate all of the CALL and JMP instructions in the current module # which use an immediate operand. # List of call/jmp pointer calls in a given module ptr_calls = list() # Iterate over segments in the module # BUG: Iterating over all loaded segments is more stable than looking up by address for n in xrange(idaapi.get_segm_qty()): seg = idaapi.getnseg(n) # Segment in a selected modules if seg and seg.startEA >= m.addr and seg.endEA <= (m.addr + m.size): # Locate executable segments # NOTE: Each module may have multiple executable segments # TODO: Search for "MOV REG, PTR # CALL REG" if seg.perm & idaapi.SEGPERM_EXEC: # Search all instances of CALL /2 imm32/64 - FF 15 # TODO: Alternative pointer calls using SIB: FF 14 E5 11 22 33 44 - call dword/qword ptr [0x44332211] # FF 14 65 11 22 33 44 # FF 14 25 11 22 33 44 call_ea = seg.startEA while True: call_ea = idaapi.find_binary(call_ea + 1, seg.endEA, "FF 15", 16, idaapi.SEARCH_DOWN) if call_ea == idaapi.BADADDR: break ptr_calls.append(call_ea) # Search all instances of JMP /2 imm32/64 - FF 25 # TODO: Alternative pointer calls using SIB: FF 24 E5 11 22 33 44 - jmp dword/qword ptr [0x44332211] # FF 24 65 11 22 33 44 # FF 24 25 11 22 33 44 call_ea = seg.startEA while True: call_ea = idaapi.find_binary(call_ea + 1, seg.endEA, "FF 25", 16, idaapi.SEARCH_DOWN) if call_ea == idaapi.BADADDR: break ptr_calls.append(call_ea) ################################################################### # Extract all of the function pointers and make sure they are # are writable. # List of writable function pointer objects in a given module ptrs = list() for call_ea in ptr_calls: # Decode CALL/JMP instruction # NOTE: May result in invalid disassembly of split instructions insn_size = idaapi.decode_insn(call_ea) if insn_size: insn = idaapi.cmd insn_op1 = insn.Operands[0].type # Verify first operand is a direct memory reference if insn.Operands[0].type == idaapi.o_mem: # Get operand address ptr_ea = insn.Operands[0].addr # Apply pointer offset ptr_ea -= self.ptrOffset # Locate segment where the pointer is located ptr_seg = idaapi.getseg(ptr_ea) # Make sure a valid segment writeable segment was found if ptr_seg and ptr_seg.perm & idaapi.SEGPERM_WRITE: # Get pointer charset ptr_charset = self.sploiter.get_ptr_charset(ptr_ea) # Filter the pointer if not self.filterP2P: if ptr_charset == None: continue if self.ptrNonull and not "nonull" in ptr_charset: continue if self.ptrUnicode and not "unicode" in ptr_charset: continue if self.ptrAscii and not "ascii" in ptr_charset: continue if self.ptrAsciiPrint and not "asciiprint" in ptr_charset: continue if self.ptrAlphaNum and not "alphanum" in ptr_charset: continue if self.ptrNum and not "numeric" in ptr_charset: continue if self.ptrAlpha and not "alpha" in ptr_charset: continue # Increment the fptr counter # Get pointer disassembly insn_disas = idc.GetDisasmEx(call_ea, idaapi.GENDSM_FORCE_CODE) # Add pointer to the list ptr = Ptr(m.file, ptr_ea, self.ptrOffset, ptr_charset, call_ea, insn_disas) ptrs.append(ptr) ################################################################### # Cache Pointers to Pointers ptr_ea_prefix_cache = dict() if self.searchP2P: # CACHE: Running repeated searches over the entire memory space is # very expensive. Let's cache all of the addresses containing # bytes corresponding to discovered function pointers in a # single search and simply reference this cache for each # function pointer. Specifically running idaapi.find_binary() # is much more expensive than idaapi.dbg_read_memory(). # # NOTE: For performance considerations, the cache works on a per # module basis, but could be expanded for the entire memory # space. # # prefix_offset - how many bytes of discovered function # pointers to cache. # # Example: For function pointers 0x00401234, 0x00404321, 0x000405678 # we are going to use prefix_offset 2, so we will cache all of the # values located at addresses 0x0040XXXX if self.sploiter.addr64: pack_format = "<Q" addr_bytes = 8 prefix_offset = 6 else: pack_format = "<I" addr_bytes = 4 prefix_offset = 2 # Set of unique N-byte address prefixes to search in memory ea_prefix_set = set() for ptr in ptrs: ptr_ea = ptr.ptr_ea ptr_bytes = struct.pack(pack_format, ptr_ea) ptr_bytes = ptr_bytes[-prefix_offset:] ea_prefix_set.add(ptr_bytes) # Search the module for all bytes corresponding to the prefix # and use them as candidates for pointers-to-pointers for ea_prefix in ea_prefix_set: # NOTE: Make sure you search using 44 33 22 11 format and not 11223344 ea_prefix_str = " ".join(["%02x" % ord(b) for b in ea_prefix]) # Initialize search parameters for a given module ea = m.addr maxea = m.addr + m.size while True: ea = idaapi.find_binary(ea + 1, maxea, ea_prefix_str, 16, idaapi.SEARCH_DOWN) if ea == idaapi.BADADDR: break p2p_ea = ea - (addr_bytes - prefix_offset) dbg_mem = read_module_memory(p2p_ea, addr_bytes) ptr_ea_prefix = unpack(pack_format, dbg_mem)[0] if ptr_ea_prefix in ptr_ea_prefix_cache: ptr_ea_prefix_cache[ptr_ea_prefix].add(p2p_ea) else: ptr_ea_prefix_cache[ptr_ea_prefix] = set([p2p_ea, ]) # Detect search cancellation, but allow the loop below # to run to create already cached/found function pointers # Canceled if breakFlag or idaapi.wasBreak(): breakFlag = True break # Canceled if breakFlag or idaapi.wasBreak(): breakFlag = True break ################################################################### # Locate Pointer to Pointers for ptr in ptrs: ptr_ea = ptr.ptr_ea # Locate pointers-to-pointers for a given function pointer in the cache if self.searchP2P and ptr_ea in ptr_ea_prefix_cache: for p2p_ea in ptr_ea_prefix_cache[ptr_ea]: # Apply pointer-to-pointer offset p2p_ea -= self.p2pOffset p2p_charset = self.sploiter.get_ptr_charset(p2p_ea) # Filter the pointer if self.filterP2P: if p2p_charset == None: continue if self.ptrNonull and not "nonull" in p2p_charset: continue if self.ptrUnicode and not "unicode" in p2p_charset: continue if self.ptrAscii and not "ascii" in p2p_charset: continue if self.ptrAsciiPrint and not "asciiprint" in p2p_charset: continue if self.ptrAlphaNum and not "alphanum" in p2p_charset: continue if self.ptrNum and not "numeric" in p2p_charset: continue if self.ptrAlpha and not "alpha" in p2p_charset: continue # Copy existing pointer object to modify it for the particular p p2p = copy.copy(ptr) p2p.p2p_ea = p2p_ea p2p.p2p_offset = self.p2pOffset p2p.p2p_charset = p2p_charset # Apppend p2p specific pointer object to the global list self.ptrs.append(p2p) # Exceeded maximum number of pointers if self.maxPtrs and len(self.ptrs) >= self.maxPtrs: breakFlag = True print "[idasploiter] Maximum number of pointers exceeded." break # Simply append pointer object to the global list else: self.ptrs.append(ptr) # Exceeded maximum number of pointers if self.maxPtrs and len(self.ptrs) >= self.maxPtrs: breakFlag = True print "[idasploiter] Maximum number of pointers exceeded." break if breakFlag or idaapi.wasBreak(): breakFlag = True break # Canceled # NOTE: Only works when started from GUI not script. if breakFlag or idaapi.wasBreak(): breakFlag = True print "[idasploiter] Canceled." break print "[idasploiter] Found %d total pointers." % len(self.ptrs) idaapi.hide_wait_box()
def decode_instruction(self, insn, ea, ea_end): # Instruction specific characteristics insn_chg_registers = set() insn_use_registers = set() insn_operations = set() insn_pivot = 0 # Instruction feature # # instruc_t.feature # # CF_STOP = 0x00001 # Instruction doesn't pass execution to the next instruction # CF_CALL = 0x00002 # CALL instruction (should make a procedure here) # CF_CHG1 = 0x00004 # The instruction modifies the first operand # CF_CHG2 = 0x00008 # The instruction modifies the second operand # CF_CHG3 = 0x00010 # The instruction modifies the third operand # CF_CHG4 = 0x00020 # The instruction modifies 4 operand # CF_CHG5 = 0x00040 # The instruction modifies 5 operand # CF_CHG6 = 0x00080 # The instruction modifies 6 operand # CF_USE1 = 0x00100 # The instruction uses value of the first operand # CF_USE2 = 0x00200 # The instruction uses value of the second operand # CF_USE3 = 0x00400 # The instruction uses value of the third operand # CF_USE4 = 0x00800 # The instruction uses value of the 4 operand # CF_USE5 = 0x01000 # The instruction uses value of the 5 operand # CF_USE6 = 0x02000 # The instruction uses value of the 6 operand # CF_JUMP = 0x04000 # The instruction passes execution using indirect jump or call (thus needs additional analysis) # CF_SHFT = 0x08000 # Bit-shift instruction (shl,shr...) # CF_HLL = 0x10000 # Instruction may be present in a high level language function. insn_feature = insn.get_canon_feature() # Instruction mnemonic name insn_mnem = insn.get_canon_mnem() # if insn_mnem in self.mnems: self.mnems[insn_mnem] += 1 # else: self.mnems[insn_mnem] = 1 # Get instruction operand types # # op_t.type # Description Data field # o_void = 0 # No Operand ---------- # o_reg = 1 # General Register (al,ax,es,ds...) reg # o_mem = 2 # Direct Memory Reference (DATA) addr # o_phrase = 3 # Memory Ref [Base Reg + Index Reg] phrase # o_displ = 4 # Memory Reg [Base Reg + Index Reg + Displacement] phrase+addr # o_imm = 5 # Immediate Value value # o_far = 6 # Immediate Far Address (CODE) addr # o_near = 7 # Immediate Near Address (CODE) addr insn_op1 = insn.Operands[0].type insn_op2 = insn.Operands[1].type ############################################################### # Filter gadget ############################################################### # Do not filter ROP, JOP, COP, always decode them # NOTE: A separate check must be done to check if they are out of place. if not insn_mnem in ["retn", "jmp", "call"]: # Filter gadgets with instructions that don't forward execution to the next address if insn_feature & idaapi.CF_STOP: return None # Filter gadgets with instructions in a bad list elif insn_mnem in self.ropBadMnems: return None # Filter gadgets with jump instructions # Note: conditional jumps may still be useful if we can # set flags prior to calling them. elif not self.ropAllowJcc and insn_mnem[0] == "j": return None ############################################################### # Get disassembly ############################################################### # NOTE: GENDSM_FORCE_CODE ensures correct decoding # of split instructions. insn_disas = idc.GetDisasmEx(ea, idaapi.GENDSM_FORCE_CODE) insn_disas = insn_disas.partition(';')[0] # Remove comments from disassembly insn_disas = ' '.join(insn_disas.split()) # Remove extraneous space from disassembly ############################################################### # Analyze instruction ############################################################### # Standalone instruction if insn_op1 == idaapi.o_void: # TODO: Determine and test how these instructions affect the stack # in 32-bit and 64-bit modes. if insn_mnem in ["pusha", "pushad", "popa", "popad", "pushf", "pushfd", "pushfq", "popf", "popfd", "popfq"]: insn_operations.add("stack") if insn_mnem in ["popa", "popad"]: insn_pivot += 7 * 4 elif insn_mnem in ["pusha", "pushad"]: insn_pivot -= 8 * 4 elif insn_mnem in ["popf", "popfd"]: insn_pivot += 4 elif insn_mnem in ["pushf", "pushfd"]: insn_pivot -= 4 elif insn_mnem == "popfq": # TODO: Needs testing insn_pivot += 8 elif insn_mnem == "pushfq": # TODO: Needs testing insn_pivot -= 8 # Single operand instruction elif insn_op2 == idaapi.o_void: # Single operand register if insn_op1 == idaapi.o_reg: insn_operations.add("one-reg") if insn_feature & idaapi.CF_CHG1: reg_name = self.get_o_reg_name(insn, 0) insn_chg_registers.add(reg_name) # Check for stack operation if reg_name[1:] == "sp": insn_operations.add("stack") if insn_mnem == "inc": insn_pivot += 1 elif insn_mnem == "dec": insn_pivot -= 1 elif insn_feature & idaapi.CF_USE1: reg_name = self.get_o_reg_name(insn, 0) insn_use_registers.add(reg_name) # Single operand immediate elif insn_op1 == idaapi.o_imm: insn_operations.add("one-imm") # Single operand reference # TODO: determine the [reg + ...] value if present elif insn_op1 == idaapi.o_phrase or insn_op1 == idaapi.o_displ: insn_operations.add("one-mem") # PUSH/POP mnemonic with a any operand type if insn_mnem in ["push", "pop"]: insn_operations.add("stack") # Adjust pivot based on operand size (32bit vs 64bit) if insn_mnem == "pop": if insn.Operands[0].dtyp == idaapi.dt_dword: insn_pivot += 4 elif insn.Operands[0].dtyp == idaapi.dt_qword: insn_pivot += 8 elif insn_mnem == "push": if insn.Operands[0].dtyp == idaapi.dt_dword: insn_pivot -= 4 elif insn.Operands[0].dtyp == idaapi.dt_qword: insn_pivot -= 8 # Check for arithmetic operation: if insn_mnem in self.insn_arithmetic_ops: insn_operations.add("math") # Check for bit-wise operations: if insn_mnem in self.insn_bit_ops: insn_operations.add("bit") # Two operand instruction else: # Check for arithmetic operations if insn_mnem in self.insn_arithmetic_ops: insn_operations.add("math") # Check for bit-wise operations if insn_mnem in self.insn_bit_ops: insn_operations.add("bit") # Two operand instruction with the first operand a register if insn_op1 == idaapi.o_reg: reg_name = self.get_o_reg_name(insn, 0) if insn_feature & idaapi.CF_CHG1: insn_chg_registers.add(reg_name) # Check for stack operation if reg_name[1:] == "sp": insn_operations.add("stack") # Determine stack pivot distance if insn_op2 == idaapi.o_imm: # NOTE: adb and sbb may also be useful, but let the user # determine their use by locating the operations "stack" if insn_mnem == "add": insn_pivot += insn.Operands[1].value elif insn_mnem == "sub": insn_pivot -= insn.Operands[1].value # Check for operations if insn_op2 == idaapi.o_reg: insn_operations.add("reg-to-reg") elif insn_op2 == idaapi.o_imm: insn_operations.add("imm-to-reg") # TODO: determine the [reg + ...] value if present elif insn_op2 == idaapi.o_phrase or insn_op2 == idaapi.o_displ: insn_operations.add("mem-to-reg") if insn_feature & idaapi.CF_USE1: insn_use_registers.add(reg_name) # Two operand instruction with the second operand a register if insn_op2 == idaapi.o_reg: reg_name = self.get_o_reg_name(insn, 1) if insn_feature & idaapi.CF_CHG2: insn_chg_registers.add(reg_name) # Check for stack operation if reg_name[1:] == "sp": insn_operations.add("stack") if insn_feature & idaapi.CF_USE2: insn_use_registers.add(reg_name) # Check for operations # TODO: determine the [reg + ...] value if present if insn_op1 == idaapi.o_phrase or insn_op1 == idaapi.o_displ: insn_operations.add("reg-to-mem") # Build instruction dictionary insn = dict() insn["insn_mnem"] = insn_mnem insn["insn_disas"] = insn_disas insn["insn_operations"] = insn_operations insn["insn_chg_registers"] = insn_chg_registers insn["insn_use_registers"] = insn_use_registers insn["insn_pivot"] = insn_pivot return insn
def decode_instruction(self, insn, ea, ea_end): # Instruction specific characteristics insn_chg_registers = set() insn_use_registers = set() insn_operations = set() insn_pivot = 0 # Instruction feature # # instruc_t.feature # # CF_STOP = 0x00001 # Instruction doesn't pass execution to the next instruction # CF_CALL = 0x00002 # CALL instruction (should make a procedure here) # CF_CHG1 = 0x00004 # The instruction modifies the first operand # CF_CHG2 = 0x00008 # The instruction modifies the second operand # CF_CHG3 = 0x00010 # The instruction modifies the third operand # CF_CHG4 = 0x00020 # The instruction modifies 4 operand # CF_CHG5 = 0x00040 # The instruction modifies 5 operand # CF_CHG6 = 0x00080 # The instruction modifies 6 operand # CF_USE1 = 0x00100 # The instruction uses value of the first operand # CF_USE2 = 0x00200 # The instruction uses value of the second operand # CF_USE3 = 0x00400 # The instruction uses value of the third operand # CF_USE4 = 0x00800 # The instruction uses value of the 4 operand # CF_USE5 = 0x01000 # The instruction uses value of the 5 operand # CF_USE6 = 0x02000 # The instruction uses value of the 6 operand # CF_JUMP = 0x04000 # The instruction passes execution using indirect jump or call (thus needs additional analysis) # CF_SHFT = 0x08000 # Bit-shift instruction (shl,shr...) # CF_HLL = 0x10000 # Instruction may be present in a high level language function. insn_feature = insn.get_canon_feature() if insn_feature == 0: return None # Instruction mnemonic name insn_mnem = insn.get_canon_mnem() # BUGBUG: The IDA PowerPC processor module has a bug that causes 'b', 'blr', 'bctr', and 'bctrl' instructions to decode # identically. Manually check the opcode bytes to determine for real what instruction this is. insn_opcode = idc.Dword(ea) if insn_opcode == 0x4E800020: insn_mnem = "blr" elif insn_opcode == 0x4E800420: insn_mnem = "bctr" elif insn_opcode == 0x4E800421: insn_mnem = "bctrl" #if insn_mnem in self.mnems: self.mnems[insn_mnem] += 1 #else: self.mnems[insn_mnem] = 1 # Get instruction operand types # # op_t.type # Description Data field # o_void = 0 # No Operand ---------- # o_reg = 1 # General Register (al,ax,es,ds...) reg # o_mem = 2 # Direct Memory Reference (DATA) addr # o_phrase = 3 # Memory Ref [Base Reg + Index Reg] phrase # o_displ = 4 # Memory Reg [Base Reg + Index Reg + Displacement] phrase+addr # o_imm = 5 # Immediate Value value # o_far = 6 # Immediate Far Address (CODE) addr # o_near = 7 # Immediate Near Address (CODE) addr insn_op1 = insn.Operands[0].type insn_op2 = insn.Operands[1].type insn_op3 = insn.Operands[2].type ############################################################### # Filter gadget ############################################################### # Filter gadgets with instructions that don't forward execution to the next address if insn_feature & idaapi.CF_STOP: if insn_mnem != "blr" and insn_mnem != "bctrl": return None # Filter gadgets with instructions in a bad list elif insn_mnem in self.ropBadMnems: return None # Filter gadgets with branch instructions # Note: conditional branches may still be useful if we can # set flags prior to calling them. elif not self.ropAllowJcc and insn_mnem == "b": return None ############################################################### # Get disassembly ############################################################### # NOTE: GENDSM_FORCE_CODE ensures correct decoding # of split instructions. insn_disas = idc.GetDisasmEx(ea, idaapi.GENDSM_FORCE_CODE) insn_disas = insn_disas.partition(';')[0] # Remove comments from disassembly insn_disas = ' '.join(insn_disas.split()) # Remove extraneous space from disassembly ############################################################### # Analyze instruction ############################################################### # Check for arithmetic operations if insn_mnem in self.insn_arithmetic_ops: insn_operations.add("math") # Check for bit-wise operations if insn_mnem in self.insn_bit_ops: insn_operations.add("bit") # Check if the operands are registers and record how it is used. if insn_op1 == idaapi.o_reg: reg1_name = self.get_o_reg_name(insn, 0) # Check if the register is used or modified or both. if insn_feature & idaapi.CF_USE1: insn_use_registers.add(reg1_name) if insn_feature & idaapi.CF_CHG1: insn_chg_registers.add(reg1_name) # Check for stack operation if reg1_name == "r1": insn_operations.add("stack") # Determine stack pivot distance if insn_op3 == idaapi.o_imm: # NOTE: adb and sbb may also be useful, but let the user # determine their use by locating the operations "stack" if insn_mnem == "add" or insn_mnem == "addi": insn_pivot += insn.Operands[2].value elif insn_mnem == "sub" or insn_mnem == "subi": insn_pivot -= insn.Operands[2].value # Check for operations if insn_op3 == idaapi.o_reg: insn_operations.add("reg-to-reg") elif insn_op3 == idaapi.o_phrase or insn_op3 == idaapi.o_displ: insn_operations.add("mem-to-reg") # Check second operand. if insn_op2 == idaapi.o_reg or insn_op2 == idaapi.o_displ: reg2_name = self.get_o_reg_name(insn, 1) insn_use_registers.add(reg2_name) # Check if the register is used or modified or both. if insn_feature & idaapi.CF_CHG2: insn_chg_registers.add(reg2_name) # Check if the third operand is used and if not determine the instruction type. if insn_feature & idaapi.CF_USE3 == 0: # Two operand register if insn_op2 == idaapi.o_reg: insn_operations.add("reg-to-reg") # Two operand immediate elif insn_op2 == idaapi.o_imm: insn_operations.add("imm-reg") # Two operand reference elif insn_op2 == idaapi.o_phrase or insn_op2 == idaapi.o_displ: # Hack to try and determine if the instruction is a load or store instruction. if insn_mnem[0] == "l": insn_operations.add("mem-reg") else: insn_operations.add("reg-mem") # Check third operand. if insn_op3 == idaapi.o_reg: reg3_name = self.get_o_reg_name(insn, 2) # Check if the register is used or modified or both. if insn_feature & idaapi.CF_USE3: insn_use_registers.add(reg3_name) if insn_feature & idaapi.CF_CHG3: insn_chg_registers.add(reg3_name) # Build instruction dictionary insn = dict() insn["insn_mnem"] = insn_mnem insn["insn_disas"] = insn_disas insn["insn_operations"] = insn_operations insn["insn_chg_registers"] = insn_chg_registers insn["insn_use_registers"] = insn_use_registers insn["insn_pivot"] = insn_pivot return insn