def __init__(self, binary): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(self.__arch_mode) self.__translator = X86Translator(self.__arch_mode) self.__bb_builder = CFGRecoverer( RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch))
def test_sample_2(self): binary = BinaryFile(get_full_path("/data/bin/x86_sample_2")) strategy = RecursiveDescent(self._disassembler, binary.text_section, self._translator, self._arch_info) recoverer = CFGRecoverer(strategy) # Recover "main" function. bbs, call_targets = recoverer.build(0x0804846d, 0x080484a3) self.assertEqual(len(bbs), 4) cfg = ControlFlowGraph(bbs, name="main") self.assertEqual(cfg.start_address, 0x0804846d) self.assertEqual(cfg.end_address, 0x080484a3) self.assertEqual(len(cfg.basic_blocks), 4) bb_entry = cfg.find_basic_block(0x0804846d) self.assertEqual(len(bb_entry.branches), 2) self.assertEqual(bb_entry.taken_branch, 0x08048491) self.assertEqual(bb_entry.not_taken_branch, 0x0804848a) bb_taken = cfg.find_basic_block(0x08048491) self.assertEqual(len(bb_taken.branches), 1) self.assertEqual(bb_taken.taken_branch, None) self.assertEqual(bb_taken.not_taken_branch, None) self.assertEqual(bb_taken.direct_branch, 0x08048496) bb_not_taken = cfg.find_basic_block(0x0804848a) self.assertEqual(len(bb_not_taken.branches), 1) self.assertEqual(bb_not_taken.taken_branch, None) self.assertEqual(bb_not_taken.not_taken_branch, None) self.assertEqual(bb_not_taken.direct_branch, 0x08048496) # Recover "func_1" function. bbs, call_targets = recoverer.build(0x0804843b, 0x8048453) self.assertEqual(len(bbs), 1) cfg = ControlFlowGraph(bbs, name="main") self.assertEqual(cfg.start_address, 0x0804843b) self.assertEqual(cfg.end_address, 0x8048453) self.assertEqual(len(cfg.basic_blocks), 1) # Recover "func_2" function. bbs, call_targets = recoverer.build(0x08048454, 0x0804846c) self.assertEqual(len(bbs), 1) cfg = ControlFlowGraph(bbs, name="main") self.assertEqual(cfg.start_address, 0x08048454) self.assertEqual(cfg.end_address, 0x0804846c) self.assertEqual(len(cfg.basic_blocks), 1)
def test_sample_1(self): binary = BinaryFile(get_full_path("/data/bin/x86_sample_1")) strategy = RecursiveDescent(self._disassembler, binary.text_section, self._translator, self._arch_info) recoverer = CFGRecoverer(strategy) bbs, call_targets = recoverer.build(0x0804840b, 0x08048438) self.assertEqual(len(bbs), 1) cfg = ControlFlowGraph(bbs, name="main") self.assertEqual(cfg.start_address, 0x0804840b) self.assertEqual(cfg.end_address, 0x08048438) self.assertEqual(len(cfg.basic_blocks), 1)
def __init__(self, binary, symbols): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(self.__arch_mode) self.__translator = X86Translator(self.__arch_mode) self.__bb_builder = CFGRecoverer( RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch)) self.__container = {} self.__symbols = symbols self.__symbols_by_addr = {} for name, start, end in symbols: self.__symbols_by_addr[start] = (name, end)
def __init__(self, binary): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(self.__arch_mode) self.__translator = X86Translator(self.__arch_mode) self.__bb_builder = CFGRecoverer(RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch))
def __init__(self, binary, symbols): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(self.__arch_mode) self.__translator = X86Translator(self.__arch_mode) self.__bb_builder = CFGRecoverer(RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch)) self.__container = {} self.__symbols = symbols self.__symbols_by_addr = {} for name, start, end in symbols: self.__symbols_by_addr[start] = (name, end)
class ReilContainerBuilder(object): def __init__(self, binary): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(self.__arch_mode) self.__translator = X86Translator(self.__arch_mode) self.__bb_builder = CFGRecoverer( RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch)) def build(self, functions): reil_container = ReilContainer() for _, start, end in functions: bbs, _ = self.__bb_builder.build(start, end) cfg = ControlFlowGraph(bbs) reil_container = self.__translate_cfg( cfg, reil_container=reil_container) return reil_container # Auxiliary methods # ======================================================================== # def __translate_cfg(self, cfg, reil_container=None): if not reil_container: reil_container = ReilContainer() asm_instrs = [] for bb in cfg.basic_blocks: for instr in bb: asm_instrs += [instr] reil_container = self.__translate(asm_instrs, reil_container) return reil_container def __translate(self, asm_instrs, reil_container): asm_instr_last = None instr_seq_prev = None for asm_instr in asm_instrs: instr_seq = ReilSequence() for reil_instr in self.__translator.translate(asm_instr): instr_seq.append(reil_instr) if instr_seq_prev: instr_seq_prev.next_sequence_address = instr_seq.address reil_container.add(instr_seq) instr_seq_prev = instr_seq if instr_seq_prev: if asm_instr_last: instr_seq_prev.next_sequence_address = ( asm_instr_last.address + asm_instr_last.size) << 8 return reil_container
class ReilContainerEx(object): def __init__(self, binary, symbols): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(self.__arch_mode) self.__translator = X86Translator(self.__arch_mode) self.__bb_builder = CFGRecoverer( RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch)) self.__container = {} self.__symbols = symbols self.__symbols_by_addr = {} for name, start, end in symbols: self.__symbols_by_addr[start] = (name, end) # Auxiliary methods # ======================================================================== # def __translate_cfg(self, cfg, reil_container=None): if not reil_container: reil_container = ReilContainer() asm_instrs = [] for bb in cfg.basic_blocks: for instr in bb: asm_instrs += [instr] reil_container = self.__translate(asm_instrs, reil_container) return reil_container def __translate(self, asm_instrs, reil_container): asm_instr_last = None instr_seq_prev = None for asm_instr in asm_instrs: instr_seq = ReilSequence() for reil_instr in self.__translator.translate(asm_instr): instr_seq.append(reil_instr) if instr_seq_prev: instr_seq_prev.next_sequence_address = instr_seq.address reil_container.add(instr_seq) instr_seq_prev = instr_seq if instr_seq_prev: if asm_instr_last: instr_seq_prev.next_sequence_address = ( asm_instr_last.address + asm_instr_last.size) << 8 return reil_container def add(self, sequence): base_addr, _ = split_address(sequence.address) if base_addr in self.__container.keys(): raise Exception("Invalid sequence") else: self.__container[base_addr] = sequence def fetch(self, address): base_addr, index = split_address(address) if base_addr not in self.__container.keys(): self.__resolve_address(base_addr) return self.__container[base_addr].get(index) def get_next_address(self, address): base_addr, index = split_address(address) if base_addr not in self.__container.keys(): raise Exception("Invalid address.") addr = address if index < len(self.__container[base_addr]) - 1: addr += 1 else: addr = self.__container[base_addr].next_sequence_address return addr def dump(self): for base_addr in sorted(self.__container.keys()): self.__container[base_addr].dump() print("-" * 80) def __iter__(self): for addr in sorted(self.__container.keys()): for instr in self.__container[addr]: yield instr def __resolve_address(self, address): if address not in self.__symbols_by_addr: # print("Not symbol : {:#010x}".format(address)) raise Exception("Symbol not found!") name, end = self.__symbols_by_addr[address] # print("Resolving {:s} @ {:#010x}".format(name, address)) cfg = ControlFlowGraph(self.__bb_builder.build(address, end)) _ = self.__translate_cfg(cfg, reil_container=self)
class ReilContainerBuilder(object): def __init__(self, binary): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(self.__arch_mode) self.__translator = X86Translator(self.__arch_mode) self.__bb_builder = CFGRecoverer(RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch)) def build(self, functions): reil_container = ReilContainer() for _, start, end in functions: bbs, _ = self.__bb_builder.build(start, end) cfg = ControlFlowGraph(bbs) reil_container = self.__translate_cfg(cfg, reil_container=reil_container) return reil_container # Auxiliary methods # ======================================================================== # def __translate_cfg(self, cfg, reil_container=None): if not reil_container: reil_container = ReilContainer() asm_instrs = [] for bb in cfg.basic_blocks: for instr in bb: asm_instrs += [instr] reil_container = self.__translate(asm_instrs, reil_container) return reil_container def __translate(self, asm_instrs, reil_container): asm_instr_last = None instr_seq_prev = None for asm_instr in asm_instrs: instr_seq = ReilSequence() for reil_instr in self.__translator.translate(asm_instr): instr_seq.append(reil_instr) if instr_seq_prev: instr_seq_prev.next_sequence_address = instr_seq.address reil_container.add(instr_seq) instr_seq_prev = instr_seq if instr_seq_prev: if asm_instr_last: instr_seq_prev.next_sequence_address = (asm_instr_last.address + asm_instr_last.size) << 8 return reil_container
class ReilContainerEx(object): def __init__(self, binary, symbols): self.__binary = binary self.__arch_mode = self.__binary.architecture_mode self.__arch = X86ArchitectureInformation(self.__arch_mode) self.__disassembler = X86Disassembler(self.__arch_mode) self.__translator = X86Translator(self.__arch_mode) self.__bb_builder = CFGRecoverer(RecursiveDescent(self.__disassembler, self.__binary.text_section, self.__translator, self.__arch)) self.__container = {} self.__symbols = symbols self.__symbols_by_addr = {} for name, start, end in symbols: self.__symbols_by_addr[start] = (name, end) # Auxiliary methods # ======================================================================== # def __translate_cfg(self, cfg, reil_container=None): if not reil_container: reil_container = ReilContainer() asm_instrs = [] for bb in cfg.basic_blocks: for instr in bb: asm_instrs += [instr] reil_container = self.__translate(asm_instrs, reil_container) return reil_container def __translate(self, asm_instrs, reil_container): asm_instr_last = None instr_seq_prev = None for asm_instr in asm_instrs: instr_seq = ReilSequence() for reil_instr in self.__translator.translate(asm_instr): instr_seq.append(reil_instr) if instr_seq_prev: instr_seq_prev.next_sequence_address = instr_seq.address reil_container.add(instr_seq) instr_seq_prev = instr_seq if instr_seq_prev: if asm_instr_last: instr_seq_prev.next_sequence_address = (asm_instr_last.address + asm_instr_last.size) << 8 return reil_container def add(self, sequence): base_addr, _ = split_address(sequence.address) if base_addr in self.__container.keys(): raise Exception("Invalid sequence") else: self.__container[base_addr] = sequence def fetch(self, address): base_addr, index = split_address(address) if base_addr not in self.__container: self.__resolve_address(base_addr) return self.__container[base_addr].get(index) def get_next_address(self, address): base_addr, index = split_address(address) if base_addr not in self.__container: raise Exception("Invalid address.") addr = address if index < len(self.__container[base_addr]) - 1: addr += 1 else: addr = self.__container[base_addr].next_sequence_address return addr def dump(self): for base_addr in sorted(self.__container.keys()): self.__container[base_addr].dump() print("-" * 80) def __iter__(self): for addr in sorted(self.__container.keys()): for instr in self.__container[addr]: yield instr def __resolve_address(self, address): if address not in self.__symbols_by_addr: # print("Not symbol : {:#010x}".format(address)) raise Exception("Symbol not found!") name, end = self.__symbols_by_addr[address] # print("Resolving {:s} @ {:#010x}".format(name, address)) cfg = ControlFlowGraph(self.__bb_builder.build(address, end)) _ = self.__translate_cfg(cfg, reil_container=self)