class BARF(object): """Binary Analysis Framework.""" def __init__(self, filename): logger.info("[+] BARF: Initializing...") self.code_analyzer = None self.ir_translator = None self.binary = None self.smt_solver = None self.gadget_classifier = None self.gadget_verifier = None self.arch_info = None self.gadget_finder = None self.text_section = None self.disassembler = None self.smt_translator = None self.ir_emulator = None self.bb_builder = None self.open(filename) def _load(self): # setup architecture self._setup_arch() # set up core modules self._setup_core_modules() # setup analysis modules self._setup_analysis_modules() def _setup_arch(self): """Set up architecture. """ # set up architecture information self.arch_info = None if self.binary.architecture == arch.ARCH_X86: self._setup_x86_arch() else: # TODO: add arch in the binary file class self._setup_arm_arch() def _setup_arm_arch(self): """Set up ARM architecture. """ arch_mode = arch.ARCH_ARM_MODE_THUMB self.arch_info = ArmArchitectureInformation(arch_mode) self.disassembler = ArmDisassembler(architecture_mode=arch_mode) self.ir_translator = ArmTranslator(architecture_mode=arch_mode) def _setup_x86_arch(self): """Set up x86 architecture. """ arch_mode = self.binary.architecture_mode # Set up architecture information self.arch_info = X86ArchitectureInformation(arch_mode) self.disassembler = X86Disassembler(architecture_mode=arch_mode) self.ir_translator = X86Translator(architecture_mode=arch_mode) def _setup_core_modules(self): """Set up core modules. """ self.ir_emulator = None self.smt_solver = None self.smt_translator = None if self.arch_info: # Set REIL emulator. self.ir_emulator = ReilEmulator(self.arch_info) # Set SMT Solver. if SMT_SOLVER == "Z3": self.smt_solver = Z3Solver() elif SMT_SOLVER == "CVC4": self.smt_solver = CVC4Solver() elif SMT_SOLVER is not None: raise Exception("Invalid SMT solver.") # Set SMT translator. if self.smt_solver: self.smt_translator = SmtTranslator(self.smt_solver, self.arch_info.address_size) self.smt_translator.set_arch_alias_mapper(self.arch_info.alias_mapper) self.smt_translator.set_arch_registers_size(self.arch_info.registers_size) def _setup_analysis_modules(self): """Set up analysis modules. """ ## basic block self.bb_builder = BasicBlockBuilder(self.disassembler, self.text_section, self.ir_translator, self.arch_info) ## code analyzer self.code_analyzer = CodeAnalyzer(self.smt_solver, self.smt_translator, self.arch_info) ## gadget self.gadget_classifier = GadgetClassifier(self.ir_emulator, self.arch_info) self.gadget_finder = GadgetFinder(self.disassembler, self.text_section, self.ir_translator, self.binary.architecture, self.binary.architecture_mode) self.gadget_verifier = GadgetVerifier(self.code_analyzer, self.arch_info) # ======================================================================== # def open(self, filename): """Open a file for analysis. :param filename: name of an executable file :type filename: str """ if filename: self.binary = BinaryFile(filename) self.text_section = self.binary.text_section self._load() def translate(self, ea_start=None, ea_end=None): """Translate to REIL instructions. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a tuple of the form (address, assembler instruction, instruction size) :rtype: (int, Instruction, int) """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end self.ir_translator.reset() for addr, asm, _ in self.disassemble(start_addr, end_addr): yield addr, asm, self.ir_translator.translate(asm) def disassemble(self, ea_start=None, ea_end=None): """Disassemble assembler instructions. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a tuple of the form (address, assembler instruction, instruction size) :rtype: (int, Instruction, int) """ curr_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end while curr_addr < end_addr: # disassemble instruction start, end = curr_addr, min(curr_addr + 16, self.binary.ea_end + 1) asm = self.disassembler.disassemble(self.text_section[start:end], curr_addr) if not asm: return yield curr_addr, asm, asm.size # update instruction pointer curr_addr += asm.size def recover_cfg(self, ea_start=None, ea_end=None, symbols=None): """Recover CFG :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a graph where each node is a basic block :rtype: BasicBlockGraph """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end bb_list = self.bb_builder.build(start_addr, end_addr, symbols) bb_graph = BasicBlockGraph(bb_list) return bb_graph def recover_bbs(self, ea_start=None, ea_end=None): """Recover basic blocks. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a list of basic blocks :rtype: list """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end bb_list = self.bb_builder.build(start_addr, end_addr) return bb_list def emulate_full(self, context, ea_start=None, ea_end=None): """Emulate REIL instructions. :param context: processor context :type context: dict :returns: a context :rtype: dict """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end # load registers if 'registers' in context: for reg, val in context['registers'].items(): self.ir_emulator.registers[reg] = val # load memory if 'memory' in context: for addr, val in context['memory'].items(): self.ir_emulator.memory.write(addr, 32 / 8, val) # instrs = [reil for _, _, reil in self.translate(ea_start, ea_end)] # self.ir_emulator.execute(instrs, start_addr << 8, end_address=end_addr << 8) # Create ReilContainer # ==================================================================== # from core.reil.reil import ReilContainer from core.reil.reil import ReilSequence instr_container = ReilContainer() asm_instr_last = None instr_seq_prev = None for asm_addr, asm_instr, asm_size in self.disassemble(ea_start, ea_end): instr_seq = ReilSequence() for reil_instr in self.ir_translator.translate(asm_instr): instr_seq.append(reil_instr) if instr_seq_prev: instr_seq_prev.next_sequence_address = instr_seq.address instr_container.add(instr_seq) instr_seq_prev = instr_seq if instr_seq_prev: if asm_instr_last: instr_seq_prev.next_sequence_address = (asm_instr_last.address + asm_instr_last.size) << 8 # ==================================================================== # self.ir_emulator.execute(instr_container, start_addr << 8, end=end_addr << 8) context_out = {} # save registers context_out['registers'] = {} for reg, val in self.ir_emulator.registers.items(): context_out['registers'][reg] = val # save memory context_out['memory'] = {} return context_out def emulate_full_ex(self, context, instr_container, ea_start=None, ea_end=None): """Emulate REIL instructions from an instruction container. :param context: processor context :type context: dict :returns: a context :rtype: dict """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end # load registers if 'registers' in context: for reg, val in context['registers'].items(): self.ir_emulator.registers[reg] = val # load memory if 'memory' in context: for addr, val in context['memory'].items(): self.ir_emulator.memory.write(addr, 32 / 8, val) self.ir_emulator.execute(instr_container, start_addr << 8, end=end_addr << 8) context_out = {} # save registers context_out['registers'] = {} for reg, val in self.ir_emulator.registers.items(): context_out['registers'][reg] = val # save memory context_out['memory'] = {} return context_out
class BARF(object): """Binary Analysis Framework.""" def __init__(self, filename): logger.info("[+] BARF: Initializing...") self.code_analyzer = None self.ir_translator = None self.binary = None self.smt_solver = None self.gadget_classifier = None self.gadget_verifier = None self.arch_info = None self.gadget_finder = None self.text_section = None self.disassembler = None self.smt_translator = None self.ir_emulator = None self.bb_builder = None self.open(filename) def _load(self): # setup architecture self._setup_arch() # set up core modules self._setup_core_modules() # setup analysis modules self._setup_analysis_modules() def _setup_arch(self): """Set up architecture. """ # set up architecture information self.arch_info = None if self.binary.architecture == arch.ARCH_X86: self._setup_x86_arch() else: # TODO: add arch in the binary file class self._setup_arm_arch() def _setup_arm_arch(self): """Set up ARM architecture. """ self.arch_info = ArmArchitectureInformation(ARCH_ARM_MODE_32) self.disassembler = ArmDisassembler(architecture_mode=ARCH_ARM_MODE_32) self.ir_translator = ArmTranslator(architecture_mode=ARCH_ARM_MODE_32) def _setup_x86_arch(self): """Set up x86 architecture. """ arch_mode = self.binary.architecture_mode # Set up architecture information self.arch_info = X86ArchitectureInformation(arch_mode) self.disassembler = X86Disassembler(architecture_mode=arch_mode) self.ir_translator = X86Translator(architecture_mode=arch_mode) def _setup_core_modules(self): """Set up core modules. """ self.ir_emulator = None self.smt_solver = None self.smt_translator = None if self.arch_info: self.ir_emulator = ReilEmulator(self.arch_info.address_size) if SMT_SOLVER == "Z3": self.smt_solver = Z3Solver() elif SMT_SOLVER == "CVC4": self.smt_solver = CVC4Solver() else: raise Exception("Invalid SMT solver.") self.smt_translator = SmtTranslator(self.smt_solver, self.arch_info.address_size) self.ir_emulator.set_arch_registers( self.arch_info.registers_gp_all) self.ir_emulator.set_arch_registers_size( self.arch_info.registers_size) self.ir_emulator.set_reg_access_mapper(self.arch_info.alias_mapper) self.smt_translator.set_reg_access_mapper( self.arch_info.alias_mapper) self.smt_translator.set_arch_registers_size( self.arch_info.registers_size) def _setup_analysis_modules(self): """Set up analysis modules. """ ## basic block self.bb_builder = BasicBlockBuilder(self.disassembler, self.text_section, self.ir_translator) ## code analyzer self.code_analyzer = CodeAnalyzer(self.smt_solver, self.smt_translator) ## gadget self.gadget_classifier = GadgetClassifier(self.ir_emulator, self.arch_info) self.gadget_finder = GadgetFinder(self.disassembler, self.text_section, self.ir_translator, self.binary.architecture, self.binary.architecture_mode) self.gadget_verifier = GadgetVerifier(self.code_analyzer, self.arch_info) # ======================================================================== # def open(self, filename): """Open a file for analysis. :param filename: name of an executable file :type filename: str """ if filename: self.binary = BinaryFile(filename) self.text_section = self.binary.text_section self._load() def translate(self, ea_start=None, ea_end=None): """Translate to REIL instructions. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a tuple of the form (address, assembler instruction, instruction size) :rtype: (int, Instruction, int) """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end self.ir_translator.reset() for addr, asm, _ in self.disassemble(start_addr, end_addr): yield addr, asm, self.ir_translator.translate(asm) def disassemble(self, ea_start=None, ea_end=None): """Disassemble assembler instructions. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a tuple of the form (address, assembler instruction, instruction size) :rtype: (int, Instruction, int) """ curr_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end while curr_addr < end_addr: # disassemble instruction start, end = curr_addr, min(curr_addr + 16, self.binary.ea_end + 1) asm = self.disassembler.disassemble(self.text_section[start:end], curr_addr) if not asm: return yield curr_addr, asm, asm.size # update instruction pointer curr_addr += asm.size def recover_cfg(self, ea_start=None, ea_end=None): """Recover CFG :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a graph where each node is a basic block :rtype: BasicBlockGraph """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end bb_list = self.bb_builder.build(start_addr, end_addr) bb_graph = BasicBlockGraph(bb_list) return bb_graph def recover_bbs(self, ea_start=None, ea_end=None): """Recover basic blocks. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a list of basic blocks :rtype: list """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end bb_list = self.bb_builder.build(start_addr, end_addr) return bb_list def emulate_full(self, context, ea_start=None, ea_end=None): """Emulate REIL instructions. :param context: processor context :type context: dict :returns: a context :rtype: dict """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end # load registers if 'registers' in context: for reg, val in context['registers'].items(): self.ir_emulator.registers[reg] = val # load memory if 'memory' in context: for addr, val in context['memory'].items(): self.ir_emulator.memory.write(addr, 32, val) instrs = [reil for _, _, reil in self.translate(ea_start, ea_end)] self.ir_emulator.execute(instrs, start_addr << 8, end_address=end_addr << 8) context_out = {} # save registers context_out['registers'] = {} for reg, val in self.ir_emulator.registers.items(): context_out['registers'][reg] = val # save memory context_out['memory'] = {} return context_out
class BARF(object): """Binary Analysis Framework.""" def __init__(self, filename): logger.info("[+] BARF: Initializing...") self.code_analyzer = None self.ir_translator = None self.binary = None self.smt_solver = None self.gadget_classifier = None self.gadget_verifier = None self.arch_info = None self.gadget_finder = None self.text_section = None self.disassembler = None self.smt_translator = None self.ir_emulator = None self.bb_builder = None self.open(filename) def _load(self, arch_mode=None): # setup architecture self._setup_arch(arch_mode=arch_mode) # set up core modules self._setup_core_modules() # setup analysis modules self._setup_analysis_modules() def _setup_arch(self, arch_mode=None): """Set up architecture. """ # set up architecture information self.arch_info = None if self.binary.architecture == arch.ARCH_X86: self._setup_x86_arch(arch_mode) else: # TODO: add arch to the binary file class. self._setup_arm_arch(arch_mode) def _setup_arm_arch(self, arch_mode=None): """Set up ARM architecture. """ if arch_mode is None: arch_mode = arch.ARCH_ARM_MODE_THUMB self.arch_info = ArmArchitectureInformation(arch_mode) self.disassembler = ArmDisassembler(architecture_mode=arch_mode) self.ir_translator = ArmTranslator(architecture_mode=arch_mode) def _setup_x86_arch(self, arch_mode=None): """Set up x86 architecture. """ if arch_mode is None: arch_mode = self.binary.architecture_mode # Set up architecture information self.arch_info = X86ArchitectureInformation(arch_mode) self.disassembler = X86Disassembler(architecture_mode=arch_mode) self.ir_translator = X86Translator(architecture_mode=arch_mode) def _setup_core_modules(self): """Set up core modules. """ self.ir_emulator = None self.smt_solver = None self.smt_translator = None if self.arch_info: # Set REIL emulator. self.ir_emulator = ReilEmulator(self.arch_info) # Set SMT Solver. self.smt_solver = None if SMT_SOLVER == "Z3": if _check_solver_installation("z3"): self.smt_solver = Z3Solver() else: logger.warn("z3 solver is not installed. Run 'barf-install-solvers.sh' to install it.") elif SMT_SOLVER == "CVC4": if _check_solver_installation("cvc4"): self.smt_solver = CVC4Solver() else: logger.warn("cvc4 solver is not installed. Run 'barf-install-solvers.sh' to install it.") elif SMT_SOLVER is not None: raise Exception("Invalid SMT solver.") # Set SMT translator. self.smt_translator = None if self.smt_solver: self.smt_translator = SmtTranslator(self.smt_solver, self.arch_info.address_size) self.smt_translator.set_arch_alias_mapper(self.arch_info.alias_mapper) self.smt_translator.set_arch_registers_size(self.arch_info.registers_size) def _setup_analysis_modules(self): """Set up analysis modules. """ # Basic block. self.bb_builder = CFGRecoverer(RecursiveDescent(self.disassembler, self.text_section, self.ir_translator, self.arch_info)) # Code analyzer. self.code_analyzer = None if self.smt_translator: self.code_analyzer = CodeAnalyzer(self.smt_solver, self.smt_translator, self.arch_info) # Gadgets classifier. self.gadget_classifier = GadgetClassifier(self.ir_emulator, self.arch_info) # Gadgets finder. self.gadget_finder = GadgetFinder(self.disassembler, self.text_section, self.ir_translator, self.binary.architecture, self.binary.architecture_mode) # Gadget verifier. self.gadget_verifier = None if self.code_analyzer: self.gadget_verifier = GadgetVerifier(self.code_analyzer, self.arch_info) # ======================================================================== # def open(self, filename): """Open a file for analysis. :param filename: name of an executable file :type filename: str """ if filename: self.binary = BinaryFile(filename) self.text_section = self.binary.text_section self._load() def load_architecture(self, name, arch_info, disassembler, translator): # Set up architecture information self.arch_info = arch_info self.disassembler = disassembler self.ir_translator = translator # setup analysis modules self._setup_analysis_modules() def translate(self, ea_start=None, ea_end=None, arch_mode=None): """Translate to REIL instructions. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :param arch_mode: architecture mode :type arch_mode: int :returns: a tuple of the form (address, assembler instruction, instruction size) :rtype: (int, Instruction, int) """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end self.ir_translator.reset() for addr, asm, _ in self.disassemble(ea_start=start_addr, ea_end=end_addr, arch_mode=arch_mode): yield addr, asm, self.ir_translator.translate(asm) def disassemble(self, ea_start=None, ea_end=None, arch_mode=None): """Disassemble assembler instructions. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :param arch_mode: architecture mode :type arch_mode: int :returns: a tuple of the form (address, assembler instruction, instruction size) :rtype: (int, Instruction, int) """ if arch_mode is None: arch_mode = self.binary.architecture_mode curr_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end while curr_addr < end_addr: # disassemble instruction start, end = curr_addr, min(curr_addr + 16, self.binary.ea_end + 1) asm = self.disassembler.disassemble(self.text_section[start:end], curr_addr, architecture_mode=arch_mode) if not asm: return yield curr_addr, asm, asm.size # update instruction pointer curr_addr += asm.size def recover_cfg(self, ea_start=None, ea_end=None, symbols=None, callback=None, arch_mode=None): """Recover CFG :int start: Start address. :int end: End address. :returns: A CFG. """ # Set architecture in case it wasn't already set. if arch_mode is None: arch_mode = self.binary.architecture_mode # Reload modules. self._load(arch_mode=arch_mode) cfg, _ = self._recover_cfg(start=ea_start, end=ea_end, symbols=symbols, callback=callback) return cfg def recover_cfg_all(self, entries, symbols=None, callback=None, arch_mode=None): """Recover CFG for all functions from an entry point and/or symbol table. :int start: Start address. :returns: A list of CFGs. """ # Set architecture in case it wasn't already set. if arch_mode is None: arch_mode = self.binary.architecture_mode # Reload modules. self._load(arch_mode=arch_mode) # Set symbols. symbols = {} if not symbols else symbols # Recover the CFGs. cfgs = [] addrs_processed = set() calls = entries while len(calls) > 0: start, calls = calls[0], calls[1:] cfg, calls_tmp = self._recover_cfg(start=start, symbols=symbols, callback=callback) addrs_processed.add(start) cfgs.append(cfg) for addr in sorted(calls_tmp): if addr not in addrs_processed and addr not in calls: calls.append(addr) return cfgs def _recover_cfg(self, start=None, end=None, symbols=None, callback=None): """Recover CFG """ # Retrieve symbol name in case it is available. if symbols and start in symbols: name = symbols[start][0] size = symbols[start][1] - 1 if symbols[start][1] != 0 else 0 else: name = "sub_{:x}".format(start) size = 0 # Compute start and end address. start_addr = start if start else self.binary.ea_start end_addr = end if end else self.binary.ea_end # Set callback. if callback: callback(start, name, size) # Recover basic blocks. bbs, calls = self.bb_builder.build(start_addr, end_addr, symbols) # Build CFG. cfg = ControlFlowGraph(bbs, name=name) return cfg, calls def recover_bbs(self, ea_start=None, ea_end=None): """Recover basic blocks. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a list of basic blocks :rtype: list """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end bb_list = self.bb_builder.build(start_addr, end_addr) return bb_list def emulate_full(self, context, ea_start=None, ea_end=None, arch_mode=None): """Emulate REIL instructions. :param context: processor context (register and/or memory) :type context: dict :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :param arch_mode: architecture mode :type arch_mode: int :returns: a context :rtype: dict """ def _translate_asm_instruction(asm_instr): reil_translator = self.ir_translator # Create ReilContainer instr_container = ReilContainer() instr_seq = ReilSequence() for reil_instr in reil_translator.translate(asm_instr): instr_seq.append(reil_instr) instr_container.add(instr_seq) return instr_container def _process_asm_instruction(reil_emulator, asm_instr): instr_container = _translate_asm_instruction(asm_instr) ip = asm_instr.address << 8 | 0x0 next_addr = None while ip: # Fetch instruction. try: reil_instr = instr_container.fetch(ip) except ReilContainerInvalidAddressError: next_addr = split_address(ip)[0] break next_ip = reil_emulator.single_step(reil_instr) # Update instruction pointer. ip = next_ip if next_ip else instr_container.get_next_address(ip) if next_addr is None: next_addr = asm_instr.address + asm_instr.size return next_addr if arch_mode is not None: # Reload modules. self._load(arch_mode=arch_mode) start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end # Load registers for reg, val in context.get('registers', {}).items(): self.ir_emulator.registers[reg] = val # Load memory for addr, val in context.get('memory', {}).items(): self.ir_emulator.memory.write(addr, 4, val) next_addr = start_addr while next_addr != end_addr: start, end = next_addr, next_addr + self.arch_info.max_instruction_size asm_instr = self.disassembler.disassemble(self.text_section[start:end], next_addr) next_addr = _process_asm_instruction(self.ir_emulator, asm_instr) context_out = { 'registers': {}, 'memory': {} } # save registers for reg, val in self.ir_emulator.registers.items(): context_out['registers'][reg] = val return context_out def emulate_full_ex(self, context, instr_container, ea_start=None, ea_end=None, arch_mode=None): """Emulate REIL instructions from an instruction container. :param context: processor context (register and/or memory) :type context: dict :param instr_container: instruction container :type instr_container: ReilContainer :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :param arch_mode: architecture mode :type arch_mode: int :returns: a context :rtype: dict """ if arch_mode is not None: # Reload modules. self._load(arch_mode=arch_mode) start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end # Load registers for reg, val in context.get('registers', {}).items(): self.ir_emulator.registers[reg] = val # Load memory for addr, val in context.get('memory', {}).items(): self.ir_emulator.memory.write(addr, 4, val) self.ir_emulator.execute(instr_container, start=start_addr << 8, end=end_addr << 8) context_out = { 'registers': {}, 'memory': {} } # save registers for reg, val in self.ir_emulator.registers.items(): context_out['registers'][reg] = val return context_out
class BARF(object): """Binary Analysis Framework.""" def __init__(self, filename): logger.info("[+] BARF: Initializing...") self.code_analyzer = None self.ir_translator = None self.binary = None self.smt_solver = None self.gadget_classifier = None self.gadget_verifier = None self.arch_info = None self.gadget_finder = None self.text_section = None self.disassembler = None self.smt_translator = None self.ir_emulator = None self.bb_builder = None self.open(filename) def _load(self, arch_mode=None): # setup architecture self._setup_arch(arch_mode=arch_mode) # set up core modules self._setup_core_modules() # setup analysis modules self._setup_analysis_modules() def _setup_arch(self, arch_mode=None): """Set up architecture. """ # set up architecture information self.arch_info = None if self.binary.architecture == arch.ARCH_X86: self._setup_x86_arch(arch_mode) else: # TODO: add arch to the binary file class. self._setup_arm_arch(arch_mode) def _setup_arm_arch(self, arch_mode=None): """Set up ARM architecture. """ if arch_mode is None: arch_mode = arch.ARCH_ARM_MODE_THUMB self.arch_info = ArmArchitectureInformation(arch_mode) self.disassembler = ArmDisassembler(architecture_mode=arch_mode) self.ir_translator = ArmTranslator(architecture_mode=arch_mode) def _setup_x86_arch(self, arch_mode=None): """Set up x86 architecture. """ if arch_mode is None: arch_mode = self.binary.architecture_mode # Set up architecture information self.arch_info = X86ArchitectureInformation(arch_mode) self.disassembler = X86Disassembler(architecture_mode=arch_mode) self.ir_translator = X86Translator(architecture_mode=arch_mode) def _setup_core_modules(self): """Set up core modules. """ self.ir_emulator = None self.smt_solver = None self.smt_translator = None if self.arch_info: # Set REIL emulator. self.ir_emulator = ReilEmulator(self.arch_info) # Set SMT Solver. if SMT_SOLVER == "Z3": self.smt_solver = Z3Solver() elif SMT_SOLVER == "CVC4": self.smt_solver = CVC4Solver() elif SMT_SOLVER is not None: raise Exception("Invalid SMT solver.") # Set SMT translator. if self.smt_solver: self.smt_translator = SmtTranslator( self.smt_solver, self.arch_info.address_size) self.smt_translator.set_arch_alias_mapper( self.arch_info.alias_mapper) self.smt_translator.set_arch_registers_size( self.arch_info.registers_size) def _setup_analysis_modules(self): """Set up analysis modules. """ # Basic block. self.bb_builder = CFGRecoverer( RecursiveDescent(self.disassembler, self.text_section, self.ir_translator, self.arch_info)) # Code analyzer. self.code_analyzer = CodeAnalyzer(self.smt_solver, self.smt_translator, self.arch_info) # Gadgets. self.gadget_classifier = GadgetClassifier(self.ir_emulator, self.arch_info) self.gadget_finder = GadgetFinder(self.disassembler, self.text_section, self.ir_translator, self.binary.architecture, self.binary.architecture_mode) self.gadget_verifier = GadgetVerifier(self.code_analyzer, self.arch_info) # ======================================================================== # def open(self, filename): """Open a file for analysis. :param filename: name of an executable file :type filename: str """ if filename: self.binary = BinaryFile(filename) self.text_section = self.binary.text_section self._load() def load_architecture(self, name, arch_info, disassembler, translator): # Set up architecture information self.arch_info = arch_info self.disassembler = disassembler self.ir_translator = translator # setup analysis modules self._setup_analysis_modules() def translate(self, ea_start=None, ea_end=None, arch_mode=None): """Translate to REIL instructions. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :param arch_mode: architecture mode :type arch_mode: int :returns: a tuple of the form (address, assembler instruction, instruction size) :rtype: (int, Instruction, int) """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end self.ir_translator.reset() for addr, asm, _ in self.disassemble(ea_start=start_addr, ea_end=end_addr, arch_mode=arch_mode): yield addr, asm, self.ir_translator.translate(asm) def disassemble(self, ea_start=None, ea_end=None, arch_mode=None): """Disassemble assembler instructions. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :param arch_mode: architecture mode :type arch_mode: int :returns: a tuple of the form (address, assembler instruction, instruction size) :rtype: (int, Instruction, int) """ if arch_mode is None: arch_mode = self.binary.architecture_mode curr_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end while curr_addr < end_addr: # disassemble instruction start, end = curr_addr, min(curr_addr + 16, self.binary.ea_end + 1) asm = self.disassembler.disassemble(self.text_section[start:end], curr_addr, architecture_mode=arch_mode) if not asm: return yield curr_addr, asm, asm.size # update instruction pointer curr_addr += asm.size def recover_cfg(self, ea_start=None, ea_end=None, symbols=None, callback=None, arch_mode=None): """Recover CFG :int start: Start address. :int end: End address. :returns: A CFG. """ # Set architecture in case it wasn't already set. if arch_mode is None: arch_mode = self.binary.architecture_mode # Reload modules. self._load(arch_mode=arch_mode) cfg, _ = self._recover_cfg(start=ea_start, end=ea_end, symbols=symbols, callback=callback) return cfg def recover_cfg_all(self, entries, symbols=None, callback=None, arch_mode=None): """Recover CFG for all functions from an entry point and/or symbol table. :int start: Start address. :returns: A list of CFGs. """ # Set architecture in case it wasn't already set. if arch_mode is None: arch_mode = self.binary.architecture_mode # Reload modules. self._load(arch_mode=arch_mode) # Set symbols. symbols = {} if not symbols else symbols # Recover the CFGs. cfgs = [] addrs_processed = set() calls = entries while len(calls) > 0: start, calls = calls[0], calls[1:] cfg, calls_tmp = self._recover_cfg(start=start, symbols=symbols, callback=callback) addrs_processed.add(start) cfgs.append(cfg) for addr in sorted(calls_tmp): if addr not in addrs_processed and addr not in calls: calls.append(addr) return cfgs def _recover_cfg(self, start=None, end=None, symbols=None, callback=None): """Recover CFG """ # Retrieve symbol name in case it is available. if symbols and start in symbols: name = symbols[start][0] size = symbols[start][1] - 1 if symbols[start][1] != 0 else 0 else: name = "sub_{:x}".format(start) size = 0 # Compute start and end address. start_addr = start if start else self.binary.ea_start end_addr = end if end else self.binary.ea_end # Set callback. if callback: callback(start, name, size) # Recover basic blocks. bbs, calls = self.bb_builder.build(start_addr, end_addr, symbols) # Build CFG. cfg = ControlFlowGraph(bbs, name=name) return cfg, calls def recover_bbs(self, ea_start=None, ea_end=None): """Recover basic blocks. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a list of basic blocks :rtype: list """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end bb_list = self.bb_builder.build(start_addr, end_addr) return bb_list def emulate_full(self, context, ea_start=None, ea_end=None, arch_mode=None): """Emulate REIL instructions. :param context: processor context (register and/or memory) :type context: dict :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :param arch_mode: architecture mode :type arch_mode: int :returns: a context :rtype: dict """ def _translate_asm_instruction(asm_instr): reil_translator = self.ir_translator # Create ReilContainer instr_container = ReilContainer() instr_seq = ReilSequence() for reil_instr in reil_translator.translate(asm_instr): instr_seq.append(reil_instr) instr_container.add(instr_seq) return instr_container def _process_asm_instruction(reil_emulator, asm_instr): instr_container = _translate_asm_instruction(asm_instr) ip = asm_instr.address << 8 | 0x0 next_addr = None while ip: # Fetch instruction. try: reil_instr = instr_container.fetch(ip) except ReilContainerInvalidAddressError: next_addr = split_address(ip)[0] break next_ip = reil_emulator.single_step(reil_instr) # Update instruction pointer. ip = next_ip if next_ip else instr_container.get_next_address( ip) if next_addr is None: next_addr = asm_instr.address + asm_instr.size return next_addr if arch_mode is not None: # Reload modules. self._load(arch_mode=arch_mode) start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end # Load registers for reg, val in context.get('registers', {}).items(): self.ir_emulator.registers[reg] = val # Load memory for addr, val in context.get('memory', {}).items(): self.ir_emulator.memory.write(addr, 4, val) next_addr = start_addr while next_addr != end_addr: start, end = next_addr, next_addr + self.arch_info.max_instruction_size asm_instr = self.disassembler.disassemble( self.text_section[start:end], next_addr) next_addr = _process_asm_instruction(self.ir_emulator, asm_instr) context_out = {'registers': {}, 'memory': {}} # save registers for reg, val in self.ir_emulator.registers.items(): context_out['registers'][reg] = val return context_out def emulate_full_ex(self, context, instr_container, ea_start=None, ea_end=None, arch_mode=None): """Emulate REIL instructions from an instruction container. :param context: processor context (register and/or memory) :type context: dict :param instr_container: instruction container :type instr_container: ReilContainer :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :param arch_mode: architecture mode :type arch_mode: int :returns: a context :rtype: dict """ if arch_mode is not None: # Reload modules. self._load(arch_mode=arch_mode) start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end # Load registers for reg, val in context.get('registers', {}).items(): self.ir_emulator.registers[reg] = val # Load memory for addr, val in context.get('memory', {}).items(): self.ir_emulator.memory.write(addr, 4, val) self.ir_emulator.execute(instr_container, start=start_addr << 8, end=end_addr << 8) context_out = {'registers': {}, 'memory': {}} # save registers for reg, val in self.ir_emulator.registers.items(): context_out['registers'][reg] = val return context_out
class BARF(object): """Binary Analysis Framework.""" def __init__(self, filename): if verbose: print("[+] BARF: Initializing...") self.open(filename) def _load(self): # setup architecture self._setup_arch() # set up core modules self._setup_core_modules() # setup analysis modules self._setup_analysis_modules() def _setup_arch(self): """Set up architecture. """ # set up architecture information self.arch_info = None if self.binary.architecture == arch.ARCH_X86: self._setup_x86_arch() def _setup_x86_arch(self): """Set up x86 architecture. """ # set up architecture information self.arch_info = X86ArchitectureInformation(self.binary.architecture_mode) def _setup_core_modules(self): """Set up core modules. """ self.disassembler = None self.ir_emulator = None self.ir_translator = None self.smt_solver = None self.smt_translator = None if self.arch_info: self.disassembler = X86Disassembler(architecture_mode=self.arch_info.architecture_mode) self.ir_emulator = ReilEmulator(self.arch_info.address_size) self.ir_translator = X86Translator(architecture_mode=self.arch_info.architecture_mode) if SMT_SOLVER == "Z3": self.smt_solver = Z3Solver() elif SMT_SOLVER == "CVC4": self.smt_solver = CVC4Solver() else: raise Exception("Invalid SMT solver.") self.smt_translator = SmtTranslator(self.smt_solver, self.arch_info.address_size) self.ir_emulator.set_arch_registers(self.arch_info.registers_gp) self.ir_emulator.set_arch_registers_size(self.arch_info.register_size) self.ir_emulator.set_reg_access_mapper(self.arch_info.register_access_mapper()) self.smt_translator.set_reg_access_mapper(self.arch_info.register_access_mapper()) self.smt_translator.set_arch_registers_size(self.arch_info.register_size) def _setup_analysis_modules(self): """Set up analysis modules. """ ## basic block self.bb_builder = BasicBlockBuilder(self.disassembler, self.text_section, self.ir_translator) ## code analyzer self.code_analyzer = CodeAnalyzer(self.smt_solver, self.smt_translator) # TODO: This should not be part of the framework, but something that # it is build upon. ## gadget self.gadget_classifier = GadgetClassifier(self.ir_emulator, self.arch_info) self.gadget_finder = GadgetFinder(self.disassembler, self.text_section, self.ir_translator) self.gadget_verifier = GadgetVerifier(self.code_analyzer, self.arch_info) # ======================================================================== # def open(self, filename): """Open a file for analysis. :param filename: name of an executable file :type filename: str """ if filename: self.binary = BinaryFile(filename) self.text_section = self.binary.text_section self._load() def translate(self, ea_start=None, ea_end=None): """Translate to REIL instructions. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a tuple of the form (address, assembler instruction, instruction size) :rtype: (int, Instruction, int) """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end self.ir_translator.reset() for addr, asm, size in self.disassemble(start_addr, end_addr): yield addr, asm, self.ir_translator.translate(asm) def disassemble(self, ea_start=None, ea_end=None): """Disassemble assembler instructions. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a tuple of the form (address, assembler instruction, instruction size) :rtype: (int, Instruction, int) """ curr_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end while curr_addr < end_addr: # disassemble instruction start, end = curr_addr, min(curr_addr + 16, self.binary.ea_end + 1) asm, size = self.disassembler.disassemble(self.text_section[start:end], curr_addr) if not asm: return yield curr_addr, asm, size # update instruction pointer curr_addr += size def recover_cfg(self, ea_start=None, ea_end=None, mode=None): """Recover CFG :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a graph where each node is a basic block :rtype: BasicBlockGraph """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end bb_list = self.bb_builder.build(start_addr, end_addr) bb_graph = BasicBlockGraph(bb_list) return bb_graph def recover_bbs(self, ea_start=None, ea_end=None, mode=None): """Recover basic blocks. :param ea_start: start address :type ea_start: int :param ea_end: end address :type ea_end: int :returns: a list of basic blocks :rtype: list """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end bb_list = self.bb_builder.build(start_addr, end_addr) return bb_list def emulate_full(self, context, ea_start=None, ea_end=None): """Emulate REIL instructions. :param context: processor context :type context: dict :returns: a context :rtype: dict """ start_addr = ea_start if ea_start else self.binary.ea_start end_addr = ea_end if ea_end else self.binary.ea_end # load registers if 'registers' in context: for reg, val in context['registers'].items(): self.ir_emulator.registers[reg] = val # load memory if 'memory' in context: for addr, val in context['memory'].items(): self.ir_emulator.get_memory().write(addr, 32, val) instrs = [reil for addr, asm, reil in self.translate(ea_start, ea_end)] self.ir_emulator.execute(instrs, ea_start << 8, end_address=ea_end << 8) context_out = {} # save registers context_out['registers'] = {} for reg, val in self.ir_emulator.registers.items(): context_out['registers'][reg] = val # save memory context_out['memory'] = {} return context_out