def _build_from(self, address, root, base_address, depth=2): """Build gadget recursively. """ if depth == 0: return end_addr = address for step in range(1, self._max_bytes + 1): start_addr = address - step if start_addr < 0 or start_addr < base_address: break raw_bytes = self._mem[start_addr:end_addr] asm_instr = self._disasm.disassemble(raw_bytes, start_addr) if not asm_instr or asm_instr.size != step: continue try: ir_instrs = self._ir_trans.translate(asm_instr) except: continue if self._is_valid_ins(ir_instrs, asm_instr): child = GadgetTreeNode(DualInstruction(start_addr, asm_instr, \ ir_instrs)) root.add_child(child) self._build_from(address - step, child, base_address, depth - 1)
def _disassemble_bb(self, start, end, symbols): bb = BasicBlock() addr = start taken, not_taken, direct = None, None, None while addr < end: try: data_end = addr + self._arch_info.max_instruction_size data_chunk = self._memory[addr:min(data_end, end)] asm = self._disasm.disassemble(data_chunk, addr) except (DisassemblerError, InvalidAddressError, InvalidDisassemblerData): logger.warn("Error while disassembling @ {:#x}".format(addr), exc_info=True) break ir = self._translator.translate(asm) bb.instrs.append(DualInstruction(addr, asm, ir)) # If it is a RET or HALT instruction, break. if self._arch_info.instr_is_ret(asm) or \ self._arch_info.instr_is_halt(asm): bb.is_exit = True break # If it is a CALL instruction and the callee does not return, break. if self._arch_info.instr_is_call(asm): target = helper.extract_call_target(asm) if target and func_is_non_return(target, symbols): bb.is_exit = True break # If it is a BRANCH instruction, extract target and break. if self._arch_info.instr_is_branch(asm): target = helper.extract_branch_target(asm) if self._arch_info.instr_is_branch_cond(asm): taken = target not_taken = asm.address + asm.size else: direct = target # Jump to a function? if direct in symbols: bb.is_exit = True break # Update instruction pointer and iterate. addr += asm.size bb.taken_branch = taken bb.not_taken_branch = not_taken bb.direct_branch = direct return bb
def _disassemble_bb(self, start_address, end_address): bb_current = BasicBlock() if start_address > end_address: return bb_current addr = start_address taken = None not_taken = None direct = None while addr < end_address: start, end = addr, min(addr + self._lookahead_max, end_address) try: data_chunk = self._mem[start:end] except: # TODO: Log error. break asm = self._disasm.disassemble(data_chunk, addr) if not asm: break ir = self._ir_trans.translate(asm) bb_current.instrs.append(DualInstruction(addr, asm, ir)) # if there is an 'end' instruction process it accordingly if ir[-1].mnemonic == ReilMnemonic.RET: break # TODO: Manage 'call' instruction properly (without # resorting to 'asm.mnemonic == "call"'). if ir[-1].mnemonic == ReilMnemonic.JCC and not asm.mnemonic == "call": taken, not_taken, direct = self._extract_branches( addr, asm, asm.size, ir) break # if ir[-1].mnemonic == ReilMnemonic.JCC and asm.mnemonic == "call": # direct_branch = addr + asm.size # break # update instruction pointer and iterate addr += asm.size bb_current.taken_branch = taken bb_current.not_taken_branch = not_taken bb_current.direct_branch = direct # print "bb addr : ", hex(bb_current.address), " bb end addr : ", hex(bb_current.end_address) # print " taken :", hex(taken) if taken else "" # print " not_taken :", hex(not_taken) if not_taken else "" # print " direct :", hex(direct) if direct else "" return bb_current
def _disassemble_bb(self, start_address, end_address, symbols): bb = BasicBlock() addr = start_address taken, not_taken, direct = None, None, None while addr < end_address: try: start, end = addr, min(addr + self._lookahead_max, end_address) data_chunk = self._mem[start:end] except Exception: # TODO: Log error. break asm = self._disasm.disassemble(data_chunk, addr) if not asm: break ir = self._ir_trans.translate(asm) bb.instrs.append(DualInstruction(addr, asm, ir)) # TODO: Process instructions without resorting to # asm.mnemonic or asm.prefix. # If it is a RET instruction, break. if ir[-1].mnemonic == ReilMnemonic.RET: break # If it is a x86 hlt instruction, break. if asm.mnemonic == "hlt": break # If callee does not return, break. if ir[-1].mnemonic == ReilMnemonic.JCC and \ asm.mnemonic == "call" and \ isinstance(ir[-1].operands[2], ReilImmediateOperand) and \ (ir[-1].operands[2].immediate >> 0x8) in symbols and \ not symbols[ir[-1].operands[2].immediate >> 0x8][2]: break # If it is a JCC instruction, process it and break. if ir[-1].mnemonic == ReilMnemonic.JCC and \ not asm.mnemonic == "call" and \ not asm.prefix in ["rep", "repe", "repne", "repz"]: taken, not_taken, direct = self._extract_branches(asm, ir) break # Update instruction pointer and iterate. addr += asm.size bb.taken_branch = taken bb.not_taken_branch = not_taken bb.direct_branch = direct return bb
def test_equality(self): addr = 0x0804842f asm = self._parser.parse("cmp DWORD PTR [esp+0x18], 0x41424344") asm.address = 0x08048425 asm.size = 8 asm1 = [asm] asm = self._parser.parse("jne 0x08048445") asm.address = 0x0804842d asm.size = 2 asm1 += [asm] ir1 = [self._translator.translate(asm1[0])] ir1 += [self._translator.translate(asm1[1])] asm = self._parser.parse("cmp DWORD PTR [esp+0x18], 0x41424344") asm.address = 0x08048425 asm.size = 8 asm2 = [asm] asm = self._parser.parse("jne 0x0804844f") asm.address = 0x0804842d asm.size = 2 asm2 += [asm] ir2 = [self._translator.translate(asm2[0])] ir2 += [self._translator.translate(asm2[1])] bb1 = BasicBlock() bb1.instrs.append(DualInstruction(addr, asm1[0], ir1[0])) bb1.instrs.append(DualInstruction(addr, asm1[1], ir1[1])) bb2 = BasicBlock() bb2.instrs.append(DualInstruction(addr, asm2[0], ir2[0])) bb2.instrs.append(DualInstruction(addr, asm2[1], ir2[1])) self.assertTrue(bb1 == bb1) self.assertTrue(bb2 == bb2)
def _find_x86_candidates(self, start_address, end_address): """Finds possible 'RET-ended' gadgets. """ roots = [] # find gadget tail for addr in xrange(start_address, end_address + 1): # TODO: Make this 'speed improvement' architecture-agnostic op_codes = [ "\xc3", # RET "\xc2", # RET imm16 "\xeb", # JMP rel8 "\xe8", # CALL rel{16,32} "\xe9", # JMP rel{16,32} "\xff", # JMP/CALL r/m{16,32,64} ] if self._mem[addr] not in op_codes: continue asm_instr = self._disasm.disassemble( self._mem[addr:min(addr + 16, end_address + 1)], addr) if not asm_instr: continue # restarts ir register numbering self._ir_trans.reset() try: ins_ir = self._ir_trans.translate(asm_instr) except: continue # build gadget if ins_ir[-1] and (ins_ir[-1].mnemonic == ReilMnemonic.RET \ or (ins_ir[-1].mnemonic == ReilMnemonic.JCC and isinstance(ins_ir[-1].operands[2], ReilRegisterOperand))): root = GadgetTreeNode(DualInstruction(addr, asm_instr, ins_ir)) roots.append(root) self._build_from(addr, root, start_address, self._instrs_depth) # filter roots with no children roots = [r for r in roots if len(r.get_children()) > 0] # build gadgets root_gadgets = [self._build_gadgets(r) for r in roots] # flatten root gadget list candidates = [item for l in root_gadgets for item in l] return candidates
def _build_from(self, address, root, base_address, depth=2): """Build gadget recursively. """ if depth == 0: return end_addr = address for step in range(1, self._max_bytes + 1): start_addr = address - step if start_addr < 0 or start_addr < base_address: break raw_bytes = self._mem[start_addr:end_addr] # TODO: Improve this code. if self._architecture == ARCH_ARM: try: asm_instr = self._disasm.disassemble( raw_bytes, start_addr, architecture_mode=self._architecture_mode) except InvalidDisassemblerData: continue else: try: asm_instr = self._disasm.disassemble(raw_bytes, start_addr) except: asm_instr = None if not asm_instr or asm_instr.size != step: continue try: ir_instrs = self._ir_trans.translate(asm_instr) except: continue if self._is_valid_ins(ir_instrs): child = GadgetTreeNode( DualInstruction(start_addr, asm_instr, ir_instrs)) root.add_child(child) self._build_from(address - step, child, base_address, depth - 1)
def _build_gadgets_rec(self, gadget_tree_root): """Build a gadget from a gadget tree. """ root = gadget_tree_root.get_root() children = gadget_tree_root.get_children() node_list = [] root_gadget_ins = DualInstruction(root.address, root.asm_instr, root.ir_instrs) if not children: node_list += [[root_gadget_ins]] else: for child in children: node_list_rec = self._build_gadgets_rec(child) node_list += [n + [root_gadget_ins] for n in node_list_rec] return node_list
def _find_arm_candidates(self, start_address, end_address): """Finds possible 'RET-ended' gadgets. """ roots = [] gadget_tail_addr = [] # From ROPgadget: free_jump_gadgets = [ "[\x10-\x19\x1e]{1}\xff\x2f\xe1", # bx reg "[\x30-\x39\x3e]{1}\xff\x2f\xe1", # blx reg "[\x00-\xff]{1}\x80\xbd\xe8", # pop {,pc} ] # find gadget tail for addr in xrange(start_address, end_address + 1): # TODO: Make this 'speed improvement' architecture-agnostic # TODO: Add thumb # TODO: Little-Endian # TODO: Evaluate performance gad_found = False for gad in free_jump_gadgets: if len( re.findall( gad, "".join( self._mem[addr:min(addr + 4, end_address + 1)]) )) > 0: # TODO: Add thumb (+2) gad_found = True break if not gad_found: continue gadget_tail_addr.append(addr) for addr in gadget_tail_addr: try: asm_instr = self._disasm.disassemble( self._mem[addr:min(addr + 4, end_address + 1)], # TODO: Add thumb (+16) addr, architecture_mode=self._architecture_mode) except: asm_instr = None if not asm_instr: continue # restarts ir register numbering self._ir_trans.reset() try: ins_ir = self._ir_trans.translate(asm_instr) except: continue root = GadgetTreeNode(DualInstruction(addr, asm_instr, ins_ir)) roots.append(root) self._build_from(addr, root, start_address, self._instrs_depth) # filter roots with no children roots = [r for r in roots if len(r.get_children()) > 0] # build gadgets root_gadgets = [self._build_gadgets(r) for r in roots] # flatten root gadget list candidates = [item for l in root_gadgets for item in l] return candidates
def _disassemble_bb(self, start_address, end_address, symbols, explore): bb = BasicBlock() addr = start_address taken, not_taken, direct = None, None, None while addr < end_address: try: start, end = addr, min(addr + self._lookahead_max, end_address) data_chunk = self._mem[start:end] except Exception: # TODO: Log error. break try: asm = self._disasm.disassemble(data_chunk, addr) except (InvalidDisassemblerData, CapstoneOperandNotSupported): break if not asm: break ir = self._ir_trans.translate(asm) bb.instrs.append(DualInstruction(addr, asm, ir)) # TODO: Process instructions without resorting to # asm.mnemonic or asm.prefix. if asm.mnemonic == "ret": bb.is_exit = True break # If it is a RET instruction, break. if ir[-1].mnemonic == ReilMnemonic.JCC and \ asm.mnemonic == "ret": bb.is_exit = True break # If it is a x86 hlt instruction, break. if asm.mnemonic == "hlt": bb.is_exit = True break # Add target address to the explore list. if ir[-1].mnemonic == ReilMnemonic.JCC and \ (asm.mnemonic == "call" or asm.mnemonic == "bl"): explore.append(ir[-1].operands[2]) # If callee does not return, break. if ir[-1].mnemonic == ReilMnemonic.JCC and \ (asm.mnemonic == "call" or asm.mnemonic == "bl") and \ isinstance(ir[-1].operands[2], ReilImmediateOperand) and \ (ir[-1].operands[2].immediate >> 0x8) in symbols and \ not symbols[ir[-1].operands[2].immediate >> 0x8][2]: bb.is_exit = True break # If it is a JCC instruction, process it and break. if ir[-1].mnemonic == ReilMnemonic.JCC and \ not asm.mnemonic == "call" and \ not asm.mnemonic == "blx" and \ not asm.mnemonic == "bl" and \ not asm.prefix in ["rep", "repe", "repne", "repz"]: taken, not_taken, direct = self._extract_branches(asm, ir) # jump to a function, take it as an exit bb if direct in symbols: bb.is_exit = True break # Process ARM instrs: pop reg, {reg*, pc} if isinstance(self._arch_info, ArmArchitectureInformation) and \ asm.mnemonic == "pop" and \ ("pc" in str(asm.operands[1]) or "r15" in str(asm.operands[1])): bb.is_exit = True break # Process ARM instrs: ldr pc, * if isinstance(self._arch_info, ArmArchitectureInformation) and \ asm.mnemonic == "ldr" and \ ("pc" in str(asm.operands[0]) or "r15" in str(asm.operands[0])): bb.is_exit = True break # Update instruction pointer and iterate. addr += asm.size bb.taken_branch = taken bb.not_taken_branch = not_taken bb.direct_branch = direct return bb