def __init__(self, binary): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(architecture_mode=self.__arch_mode) self.__translator = X86Translator(architecture_mode=self.__arch_mode) self.__bb_builder = CFGRecoverer(RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch))
def test_sample_2(self): binary = BinaryFile(get_full_path("/data/bin/x86_sample_2")) strategy = RecursiveDescent(self._disassembler, binary.text_section, self._translator, self._arch_info) recoverer = CFGRecoverer(strategy) # Recover "main" function. bbs, call_targets = recoverer.build(0x0804846d, 0x080484a3) self.assertEquals(len(bbs), 4) cfg = ControlFlowGraph(bbs, name="main") self.assertEquals(cfg.start_address, 0x0804846d) self.assertEquals(cfg.end_address, 0x080484a3) self.assertEquals(len(cfg.basic_blocks), 4) bb_entry = cfg.find_basic_block(0x0804846d) self.assertEquals(len(bb_entry.branches), 2) self.assertEquals(bb_entry.taken_branch, 0x08048491) self.assertEquals(bb_entry.not_taken_branch, 0x0804848a) bb_taken = cfg.find_basic_block(0x08048491) self.assertEquals(len(bb_taken.branches), 1) self.assertEquals(bb_taken.taken_branch, None) self.assertEquals(bb_taken.not_taken_branch, None) self.assertEquals(bb_taken.direct_branch, 0x08048496) bb_not_taken = cfg.find_basic_block(0x0804848a) self.assertEquals(len(bb_not_taken.branches), 1) self.assertEquals(bb_not_taken.taken_branch, None) self.assertEquals(bb_not_taken.not_taken_branch, None) self.assertEquals(bb_not_taken.direct_branch, 0x08048496) # Recover "func_1" function. bbs, call_targets = recoverer.build(0x0804843b, 0x8048453) self.assertEquals(len(bbs), 1) cfg = ControlFlowGraph(bbs, name="main") self.assertEquals(cfg.start_address, 0x0804843b) self.assertEquals(cfg.end_address, 0x8048453) self.assertEquals(len(cfg.basic_blocks), 1) # Recover "func_2" function. bbs, call_targets = recoverer.build(0x08048454, 0x0804846c) self.assertEquals(len(bbs), 1) cfg = ControlFlowGraph(bbs, name="main") self.assertEquals(cfg.start_address, 0x08048454) self.assertEquals(cfg.end_address, 0x0804846c) self.assertEquals(len(cfg.basic_blocks), 1)
def test_sample_1(self): binary = BinaryFile(get_full_path("/data/bin/x86_sample_1")) strategy = RecursiveDescent(self._disassembler, binary.text_section, self._translator, self._arch_info) recoverer = CFGRecoverer(strategy) bbs, call_targets = recoverer.build(0x0804840b, 0x08048438) self.assertEquals(len(bbs), 1) cfg = ControlFlowGraph(bbs, name="main") self.assertEquals(cfg.start_address, 0x0804840b) self.assertEquals(cfg.end_address, 0x08048438) self.assertEquals(len(cfg.basic_blocks), 1)
def setUp(self): self._arch_info = X86ArchitectureInformation(ARCH_X86_MODE_32) self._operand_size = self._arch_info.operand_size self._memory = Memory() self._smt_solver = SmtSolver() self._smt_translator = SmtTranslator(self._smt_solver, self._operand_size) self._smt_translator.set_arch_alias_mapper( self._arch_info.alias_mapper) self._smt_translator.set_arch_registers_size( self._arch_info.registers_size) self._disasm = X86Disassembler() self._ir_translator = X86Translator() self._bb_builder = CFGRecoverer( RecursiveDescent(self._disasm, self._memory, self._ir_translator, self._arch_info))
def __init__(self, binary, symbols): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(architecture_mode=self.__arch_mode) self.__translator = X86Translator(architecture_mode=self.__arch_mode) self.__bb_builder = CFGRecoverer(RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch)) self.__container = {} self.__symbols = symbols self.__symbols_by_addr = {} for name, start, end in symbols: self.__symbols_by_addr[start] = (name, end)
def setUp(self): self._arch_info = X86ArchitectureInformation(ARCH_X86_MODE_32) self._operand_size = self._arch_info.operand_size self._memory = Memory() self._smt_solver = SmtSolver() self._smt_translator = SmtTranslator(self._smt_solver, self._operand_size) self._smt_translator.set_arch_alias_mapper(self._arch_info.alias_mapper) self._smt_translator.set_arch_registers_size(self._arch_info.registers_size) self._disasm = X86Disassembler() self._ir_translator = X86Translator() self._bb_builder = CFGRecoverer(RecursiveDescent(self._disasm, self._memory, self._ir_translator, self._arch_info))
class CodeAnalyzerTests(unittest.TestCase): def setUp(self): self._arch_info = X86ArchitectureInformation(ARCH_X86_MODE_32) self._operand_size = self._arch_info.operand_size self._memory = Memory() self._smt_solver = SmtSolver() self._smt_translator = SmtTranslator(self._smt_solver, self._operand_size) self._smt_translator.set_arch_alias_mapper( self._arch_info.alias_mapper) self._smt_translator.set_arch_registers_size( self._arch_info.registers_size) self._disasm = X86Disassembler() self._ir_translator = X86Translator() self._bb_builder = CFGRecoverer( RecursiveDescent(self._disasm, self._memory, self._ir_translator, self._arch_info)) def test_check_path_satisfiability(self): if VERBOSE: print "[+] Test: test_check_path_satisfiability" # binary : stack1 bin_start_address, bin_end_address = 0x08048ec0, 0x8048f02 binary = "\x55" # 0x08048ec0 : push ebp binary += "\x89\xe5" # 0x08048ec1 : mov ebp,esp binary += "\x83\xec\x60" # 0x08048ec3 : sub esp,0x60 binary += "\x8d\x45\xfc" # 0x08048ec6 : lea eax,[ebp-0x4] binary += "\x89\x44\x24\x08" # 0x08048ec9 : mov DWORD PTR [esp+0x8],eax binary += "\x8d\x45\xac" # 0x08048ecd : lea eax,[ebp-0x54] binary += "\x89\x44\x24\x04" # 0x08048ed0 : mov DWORD PTR [esp+0x4],eax binary += "\xc7\x04\x24\xa8\x5a\x0c\x08" # 0x08048ed4 : mov DWORD PTR [esp],0x80c5aa8 binary += "\xe8\xa0\x0a\x00\x00" # 0x08048edb : call 8049980 <_IO_printf> binary += "\x8d\x45\xac" # 0x08048ee0 : lea eax,[ebp-0x54] binary += "\x89\x04\x24" # 0x08048ee3 : mov DWORD PTR [esp],eax binary += "\xe8\xc5\x0a\x00\x00" # 0x08048ee6 : call 80499b0 <_IO_gets> binary += "\x8b\x45\xfc" # 0x08048eeb : mov eax,DWORD PTR [ebp-0x4] binary += "\x3d\x44\x43\x42\x41" # 0x08048eee : cmp eax,0x41424344 binary += "\x75\x0c" # 0x08048ef3 : jne 8048f01 <main+0x41> binary += "\xc7\x04\x24\xc0\x5a\x0c\x08" # 0x08048ef5 : mov DWORD PTR [esp],0x80c5ac0 binary += "\xe8\x4f\x0c\x00\x00" # 0x08048efc : call 8049b50 <_IO_puts> binary += "\xc9" # 0x08048f01 : leave binary += "\xc3" # 0x08048f02 : ret self._memory.add_vma(bin_start_address, bytearray(binary)) start = 0x08048ec0 # start = 0x08048ec6 # end = 0x08048efc end = 0x08048f01 registers = { "eax": GenericRegister("eax", 32, 0xffffd0ec), "ecx": GenericRegister("ecx", 32, 0x00000001), "edx": GenericRegister("edx", 32, 0xffffd0e4), "ebx": GenericRegister("ebx", 32, 0x00000000), "esp": GenericRegister("esp", 32, 0xffffd05c), "ebp": GenericRegister("ebp", 32, 0x08049580), "esi": GenericRegister("esi", 32, 0x00000000), "edi": GenericRegister("edi", 32, 0x08049620), "eip": GenericRegister("eip", 32, 0x08048ec0), } flags = { "af": GenericFlag("af", 0x0), "cf": GenericFlag("cf", 0x0), "of": GenericFlag("of", 0x0), "pf": GenericFlag("pf", 0x1), "sf": GenericFlag("sf", 0x0), "zf": GenericFlag("zf", 0x1), } memory = {} bb_list, calls = self._bb_builder.build(bin_start_address, bin_end_address) bb_graph = ControlFlowGraph(bb_list) codeAnalyzer = CodeAnalyzer(self._smt_solver, self._smt_translator, self._arch_info) codeAnalyzer.set_context(GenericContext(registers, flags, memory)) for bb_path in bb_graph.all_simple_bb_paths(start, end): if VERBOSE: print "[+] Checking path satisfiability :" print " From : %s" % hex(start) print " To : %s" % hex(end) print " Path : %s" % " -> ".join( (map(lambda o: hex(o.address), bb_path))) is_sat = codeAnalyzer.check_path_satisfiability(bb_path, start, verbose=False) if VERBOSE: print "[+] Satisfiability : %s" % str(is_sat) self.assertTrue(is_sat) if is_sat and VERBOSE: print codeAnalyzer.get_context() if VERBOSE: print ":" * 80 print ""
class ReilContainerEx(object): def __init__(self, binary, symbols): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(architecture_mode=self.__arch_mode) self.__translator = X86Translator(architecture_mode=self.__arch_mode) self.__bb_builder = CFGRecoverer(RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch)) self.__container = {} self.__symbols = symbols self.__symbols_by_addr = {} for name, start, end in symbols: self.__symbols_by_addr[start] = (name, end) # Auxiliary methods # ======================================================================== # def __translate_cfg(self, cfg, reil_container=None): if not reil_container: reil_container = ReilContainer() asm_instrs = [] for bb in cfg.basic_blocks: for dual_instr in bb: asm_instrs += [dual_instr.asm_instr] reil_container = self.__translate(asm_instrs, reil_container) return reil_container def __translate(self, asm_instrs, reil_container): asm_instr_last = None instr_seq_prev = None for asm_instr in asm_instrs: instr_seq = ReilSequence() for reil_instr in self.__translator.translate(asm_instr): instr_seq.append(reil_instr) if instr_seq_prev: instr_seq_prev.next_sequence_address = instr_seq.address reil_container.add(instr_seq) instr_seq_prev = instr_seq if instr_seq_prev: if asm_instr_last: instr_seq_prev.next_sequence_address = (asm_instr_last.address + asm_instr_last.size) << 8 return reil_container def add(self, sequence): base_addr, _ = split_address(sequence.address) if base_addr in self.__container.keys(): raise Exception("Invalid sequence") else: self.__container[base_addr] = sequence def fetch(self, address): base_addr, index = split_address(address) if base_addr not in self.__container.keys(): self.__resolve_address(base_addr) return self.__container[base_addr].get(index) def get_next_address(self, address): base_addr, index = split_address(address) if base_addr not in self.__container.keys(): raise Exception("Invalid address.") addr = address if index < len(self.__container[base_addr]) - 1: addr += 1 else: addr = self.__container[base_addr].next_sequence_address return addr def dump(self): for base_addr in sorted(self.__container.keys()): self.__container[base_addr].dump() print("-" * 80) def __iter__(self): for addr in sorted(self.__container.keys()): for instr in self.__container[addr]: yield instr def __resolve_address(self, address): if address not in self.__symbols_by_addr: # print("Not symbol : {:#010x}".format(address)) raise Exception("Symbol not found!") name, end = self.__symbols_by_addr[address] # print("Resolving {:s} @ {:#010x}".format(name, address)) cfg = ControlFlowGraph(self.__bb_builder.build(address, end)) _ = self.__translate_cfg(cfg, reil_container=self)
class ReilContainerBuilder(object): def __init__(self, binary): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(architecture_mode=self.__arch_mode) self.__translator = X86Translator(architecture_mode=self.__arch_mode) self.__bb_builder = CFGRecoverer(RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch)) def build(self, functions): reil_container = ReilContainer() for _, start, end in functions: bbs, _ = self.__bb_builder.build(start, end) cfg = ControlFlowGraph(bbs) reil_container = self.__translate_cfg(cfg, reil_container=reil_container) return reil_container # Auxiliary methods # ======================================================================== # def __translate_cfg(self, cfg, reil_container=None): if not reil_container: reil_container = ReilContainer() asm_instrs = [] for bb in cfg.basic_blocks: for dual_instr in bb: asm_instrs += [dual_instr.asm_instr] reil_container = self.__translate(asm_instrs, reil_container) return reil_container def __translate(self, asm_instrs, reil_container): asm_instr_last = None instr_seq_prev = None for asm_instr in asm_instrs: instr_seq = ReilSequence() for reil_instr in self.__translator.translate(asm_instr): instr_seq.append(reil_instr) if instr_seq_prev: instr_seq_prev.next_sequence_address = instr_seq.address reil_container.add(instr_seq) instr_seq_prev = instr_seq if instr_seq_prev: if asm_instr_last: instr_seq_prev.next_sequence_address = (asm_instr_last.address + asm_instr_last.size) << 8 return reil_container
class CodeAnalyzerTests(unittest.TestCase): def setUp(self): self._arch_info = X86ArchitectureInformation(ARCH_X86_MODE_32) self._operand_size = self._arch_info.operand_size self._memory = Memory() self._smt_solver = SmtSolver() self._smt_translator = SmtTranslator(self._smt_solver, self._operand_size) self._smt_translator.set_arch_alias_mapper(self._arch_info.alias_mapper) self._smt_translator.set_arch_registers_size(self._arch_info.registers_size) self._disasm = X86Disassembler() self._ir_translator = X86Translator() self._bb_builder = CFGRecoverer(RecursiveDescent(self._disasm, self._memory, self._ir_translator, self._arch_info)) def test_check_path_satisfiability(self): if VERBOSE: print "[+] Test: test_check_path_satisfiability" # binary : stack1 bin_start_address, bin_end_address = 0x08048ec0, 0x8048f02 binary = "\x55" # 0x08048ec0 : push ebp binary += "\x89\xe5" # 0x08048ec1 : mov ebp,esp binary += "\x83\xec\x60" # 0x08048ec3 : sub esp,0x60 binary += "\x8d\x45\xfc" # 0x08048ec6 : lea eax,[ebp-0x4] binary += "\x89\x44\x24\x08" # 0x08048ec9 : mov DWORD PTR [esp+0x8],eax binary += "\x8d\x45\xac" # 0x08048ecd : lea eax,[ebp-0x54] binary += "\x89\x44\x24\x04" # 0x08048ed0 : mov DWORD PTR [esp+0x4],eax binary += "\xc7\x04\x24\xa8\x5a\x0c\x08" # 0x08048ed4 : mov DWORD PTR [esp],0x80c5aa8 binary += "\xe8\xa0\x0a\x00\x00" # 0x08048edb : call 8049980 <_IO_printf> binary += "\x8d\x45\xac" # 0x08048ee0 : lea eax,[ebp-0x54] binary += "\x89\x04\x24" # 0x08048ee3 : mov DWORD PTR [esp],eax binary += "\xe8\xc5\x0a\x00\x00" # 0x08048ee6 : call 80499b0 <_IO_gets> binary += "\x8b\x45\xfc" # 0x08048eeb : mov eax,DWORD PTR [ebp-0x4] binary += "\x3d\x44\x43\x42\x41" # 0x08048eee : cmp eax,0x41424344 binary += "\x75\x0c" # 0x08048ef3 : jne 8048f01 <main+0x41> binary += "\xc7\x04\x24\xc0\x5a\x0c\x08" # 0x08048ef5 : mov DWORD PTR [esp],0x80c5ac0 binary += "\xe8\x4f\x0c\x00\x00" # 0x08048efc : call 8049b50 <_IO_puts> binary += "\xc9" # 0x08048f01 : leave binary += "\xc3" # 0x08048f02 : ret self._memory.add_vma(bin_start_address, bytearray(binary)) start = 0x08048ec0 # start = 0x08048ec6 # end = 0x08048efc end = 0x08048f01 registers = { "eax" : GenericRegister("eax", 32, 0xffffd0ec), "ecx" : GenericRegister("ecx", 32, 0x00000001), "edx" : GenericRegister("edx", 32, 0xffffd0e4), "ebx" : GenericRegister("ebx", 32, 0x00000000), "esp" : GenericRegister("esp", 32, 0xffffd05c), "ebp" : GenericRegister("ebp", 32, 0x08049580), "esi" : GenericRegister("esi", 32, 0x00000000), "edi" : GenericRegister("edi", 32, 0x08049620), "eip" : GenericRegister("eip", 32, 0x08048ec0), } flags = { "af" : GenericFlag("af", 0x0), "cf" : GenericFlag("cf", 0x0), "of" : GenericFlag("of", 0x0), "pf" : GenericFlag("pf", 0x1), "sf" : GenericFlag("sf", 0x0), "zf" : GenericFlag("zf", 0x1), } memory = { } bb_list, calls = self._bb_builder.build(bin_start_address, bin_end_address) bb_graph = ControlFlowGraph(bb_list) codeAnalyzer = CodeAnalyzer(self._smt_solver, self._smt_translator, self._arch_info) codeAnalyzer.set_context(GenericContext(registers, flags, memory)) for bb_path in bb_graph.all_simple_bb_paths(start, end): if VERBOSE: print "[+] Checking path satisfiability :" print " From : %s" % hex(start) print " To : %s" % hex(end) print " Path : %s" % " -> ".join((map(lambda o : hex(o.address), bb_path))) is_sat = codeAnalyzer.check_path_satisfiability(bb_path, start, verbose=False) if VERBOSE: print "[+] Satisfiability : %s" % str(is_sat) self.assertTrue(is_sat) if is_sat and VERBOSE: print codeAnalyzer.get_context() if VERBOSE: print ":" * 80 print ""