# https://github.com/angr/angr-doc/pull/122/commits/c478f26f12411f567669530385d146194ef58031 # We are using angr's CFGAccurate to generate a CFG from the given binary which asks for a specific user input. # As angr itself cannot display CFGs (e.g. as png-files), we are using [angrutils'](https://github.com/axt/angr-utils) function plot_cfg. # The various parameters of CFGAccurate are described in the [docs](docs/analyses/cfg_accurate.md) # and in the [api](http://angr.io/api-doc/angr.html#angr.analyses.cfg_accurate.CFGAccurate). import angr #from angrutils import plot_cfg # CFG very slow with this. # https://docs.angr.io/built-in-analyses/cfg # The CFG analysis does not distinguish between code from different binary objects. # This means that by default, it will try to analyze control flow through loaded shared libraries. # This is almost never intended behavior, since this will extend the analysis time to several days, probably. proj = angr.Project("C:\\Windows\\System32\\notepad.exe", load_options={'auto_load_libs': False}) #proj = angr.Project("C:\\Windows\\System32\\normaliz.dll", load_options={'auto_load_libs': False}) print(proj.loader) print(proj.loader.main_object) print("main_object", dir(proj.loader.main_object)) print("") print("Arch=", proj.arch) print("Entry=", proj.entry) print("Filename=", proj.filename) # exit(0) cfg_fast = proj.analyses.CFGFast() print("cfg_fast:", dir(cfg_fast))
# coding: utf-8 # # This file solves the problem `nobranch` from 9447 CTF 2015. It got the first blood solution! # It takes a VERY long time to run! I took a well-deserved nap while it was solving :) # import angr, claripy p = angr.Project('nobranch') all_blocks = [] mainaddr = 0x400400 outaddr = 0x616050 shouldbe = 'HMQhQLi6VqgeOj78AbiaqquK3noeJt' def main(): state = p.factory.blank_state( addr=mainaddr, add_options={angr.options.LAZY_SOLVES }) # set up the initial state at the start of main state.memory.store(state.regs.rsp, claripy.BVV(0x4141414141414141, 64), endness='Iend_LE') # set fake return address state.memory.store( state.regs.rsp + 8, state.regs.rsp + 64, endness='Iend_LE' ) # I can't remember if I even need this... better safe than sorry state.memory.store(state.regs.rsp + 16, claripy.BVV(0, 64), endness='Iend_LE') # see above
from mem import SimSymbolicIdaMemory import angr import idaapi import idc import claripy print print "########### IDAngr ###########" print " usage: sm = StateManager()" print print " >> creating angr project..." project = angr.Project(idaapi.get_input_file_path(), main_opts={'custom_base_addr': idaapi.get_imagebase()}, load_options={"auto_load_libs": False}) print " >> done." def StateShot(): global project idc.RefreshDebuggerMemory() mem = SimSymbolicIdaMemory(memory_backer=project.loader.memory, permissions_backer=None, memory_id="mem") state = project.factory.blank_state(plugins={"memory": mem}) for reg in sorted(project.arch.registers, key=lambda x: project.arch.registers.get(x)[1]): if reg in ("sp", "bp", "ip"): continue try:
#! /usr/bin/env python import angr from angrutils import plot_func_graph def analyze(b, addr, name=None): start_state = b.factory.blank_state(addr=addr) start_state.stack_push(0x0) cfg = b.analyses.CFGAccurate(fail_fast=True, starts=[addr], initial_state=start_state, context_sensitivity_level=2, keep_state=True, call_depth=100, normalize=True) for func in b.kb.functions.values(): try: ri = b.analyses.RegionIdentifier(func) plot_func_graph(b, func.transition_graph, "%s" % (func.name), asminst=True, vexinst=False, structure=ri.region, color_depth=True) except: pass if __name__ == "__main__": proj = angr.Project("../samples/1.6.26-libjsound.so", load_options={'auto_load_libs':False, 'main_opts':{'custom_base_addr':0}}) main = proj.loader.main_object.get_symbol("Java_com_sun_media_sound_MixerSequencer_nAddControllerEventCallback") analyze(proj, main.rebased_addr, "libjsound")
import nose import os import angr import claripy import angrcli.plugins.ContextView.context_view from angrcli.interaction.explore import ExploreInteractive import angrcli.plugins.watches from angrcli.plugins.ContextView.colors import Color Color.disable_colors = True morph_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "example", "morph") proj = angr.Project(morph_location, load_options={'auto_load_libs': False}) class NotVeryRand(angr.SimProcedure): def run(self, return_values=None): rand_idx = self.state.globals.get('rand_idx', 0) % len(return_values) out = return_values[rand_idx] self.state.globals['rand_idx'] = rand_idx + 1 return out argv = claripy.BVS('argv1', 8 * 0x17) state = proj.factory.entry_state(args=[proj.filename, argv]) state.watches.add_watch(lambda state: state.solver.eval(argv, cast_to=bytes), "argv[1]")
def fire(self, return_loader=False, **kwargs): #pylint:disable=arguments-differ if self.project is None: tmpdir = tempfile.mkdtemp() self.target.retrieve_into(self.target.target_path, tmpdir) the_binary = os.path.join( tmpdir, os.path.basename(self.target.target_path)) # preload the binary to decide if it supports setting library options or base addresses cle_args = dict(kwargs) cle_args.update(cle_args.pop('load_options', {})) cle_args.pop('use_sim_procedures', None) # TODO do something less hacky than this preload_kwargs = dict(cle_args) preload_kwargs['auto_load_libs'] = False preloader = cle.Loader(the_binary, **preload_kwargs) if self.scout_bow is not None: _, _, _, self._mem_mapping = self.scout_bow.fire() target_libs = [ lib for lib in self._mem_mapping if lib.startswith("/") ] the_libs = [] for target_lib in target_libs: local_lib = os.path.join(tmpdir, os.path.basename(target_lib)) self.target.retrieve_into(target_lib, tmpdir) the_libs.append(local_lib) lib_opts = { os.path.basename(lib): { 'base_addr': libaddr } for lib, libaddr in self._mem_mapping.items() } bin_opts = { "base_addr": 0x555555554000 } if preloader.main_object.pic else {} else: the_libs = {} lib_opts = {} bin_opts = {} self._mem_mapping = {} if return_loader: return cle.Loader(the_binary, preload_libs=the_libs, lib_opts=lib_opts, main_opts=bin_opts, **cle_args) self.project = angr.Project(the_binary, preload_libs=the_libs, lib_opts=lib_opts, main_opts=bin_opts, **kwargs) if self.static_simproc: self._apply_simprocedures() if return_loader: return self.project.loader return self.project
def run_cgc(binary_name): binary_path = os.path.join(test_location, '..', 'binaries-private', 'cgc_qualifier_event', 'cgc', binary_name) project = angr.Project(binary_path, auto_load_libs=False) categorization = project.analyses.FunctionCategorizationAnalysis()
def test_function_rename(self): binpath = os.path.join(common.test_location, "x86_64", "fauxware") new_function_name = "leet_main" user_1 = "user_1" user_2 = "user_2" with tempfile.TemporaryDirectory() as sync_dir_path: # ====== USER 1 ====== # setup GUI main = MainWindow(show=False) main.workspace.instance.project.am_obj = angr.Project( binpath, auto_load_libs=False) main.workspace.instance.project.am_event() main.workspace.instance.join_all_jobs() func = main.workspace.instance.project.kb.functions['main'] self.assertIsNotNone(func) # find the binsync plugin # noinspection PyTypeChecker binsync_plugin = next( iter([ p for p in main.workspace.plugins.active_plugins if "Binsync" in str(p) ])) # type: BinsyncPlugin # configure, and connect config = SyncConfig(main.workspace.instance, binsync_plugin.controller) config._user_edit.setText("") config._repo_edit.setText("") QTest.keyClicks(config._user_edit, user_1) QTest.keyClicks(config._repo_edit, sync_dir_path) # always init for first user QTest.mouseClick(config._initrepo_checkbox, Qt.MouseButton.LeftButton) QTest.mouseClick(config._ok_button, Qt.MouseButton.LeftButton) self.assertTrue(binsync_plugin.controller.sync.connected) self.assertEqual(binsync_plugin.controller.sync.client.master_user, user_1) # trigger a function rename in decompilation disasm_view = main.workspace._get_or_create_disassembly_view() disasm_view._t_flow_graph_visible = True disasm_view.display_function(func) disasm_view.decompile_current_function() main.workspace.instance.join_all_jobs() pseudocode_view = main.workspace._get_or_create_pseudocode_view() for _, item in pseudocode_view.codegen.map_pos_to_node.items(): if isinstance( item.obj, angr.analyses.decompiler.structured_codegen. c.CFunction): func_node = item.obj break else: self.fail("The CFunction instance is not found.") rnode = RenameNode(code_view=pseudocode_view, node=func_node) rnode._name_box.setText("") QTest.keyClicks(rnode._name_box, new_function_name) QTest.mouseClick(rnode._ok_button, Qt.MouseButton.LeftButton) self.assertEqual(func.name, new_function_name) # assure a new commit makes it to the repo time.sleep(10) # reset the repo os.remove(sync_dir_path + "/.git/binsync.lock") # ====== USER 2 ====== # setup GUI main = MainWindow(show=False) main.workspace.instance.project.am_obj = angr.Project( binpath, auto_load_libs=False) main.workspace.instance.project.am_event() main.workspace.instance.join_all_jobs() func = main.workspace.instance.project.kb.functions['main'] self.assertIsNotNone(func) # find the binsync plugin # noinspection PyTypeChecker binsync_plugin = next( iter([ p for p in main.workspace.plugins.active_plugins if "Binsync" in str(p) ])) # type: BinsyncPlugin # configure, and connect config = SyncConfig(main.workspace.instance, binsync_plugin.controller) config._user_edit.setText("") config._repo_edit.setText("") QTest.keyClicks(config._user_edit, user_2) QTest.keyClicks(config._repo_edit, sync_dir_path) QTest.mouseClick(config._ok_button, Qt.MouseButton.LeftButton) self.assertTrue(binsync_plugin.controller.sync.connected) self.assertEqual(binsync_plugin.controller.sync.client.master_user, user_2) self.assertIn( user_1, [u.name for u in binsync_plugin.controller.sync.users()]) # pull down the changes # TODO: this could be more GUI based sync_menu = SyncMenu(binsync_plugin.controller, [func]) sync_menu._do_action("Sync", user_1, func) # get the current decompilation of the function func_code = binsync_plugin.controller.decompile_function(func) self.assertEqual(func_code.cfunc.name, new_function_name) self.assertEqual(func.name, new_function_name) common.app.exit(0)
#!/usr/bin/env python3 import time import angr import claripy before = time.time() binary = "./dist/tellme" proj = angr.Project(binary) start = claripy.BVV(b"flag{") flag = claripy.BVS("flag", 16 * 8) end = claripy.BVV(b"}\n") flagsym = claripy.Concat(start, flag, end) opts = angr.options.unicorn.union({"ZERO_FILL_UNCONSTRAINED_REGISTERS"}) state = proj.factory.full_init_state(args=[binary], add_options=opts, stdin=flagsym) for c in flag.chop(8): state.solver.add(c != 0) state.solver.add(c != ord("\n")) state.solver.add(c >= ord(" ")) state.solver.add(c <= ord("~")) simman = proj.factory.simulation_manager(state) #simman.explore(find=lambda s: b"You got it!" in s.posix.dumps(1)) simman.explore(find=0x004014aa, avoid=[0x004014b8]) for s in simman.found: print(s.solver.eval(flagsym, cast_to=bytes))
from .func.jni_onload import JniOnLoad import angr from .project import Project from .called_by_init_if_need import CalledByInitIfNeed from src.func import jni_onload from . import am_graph from .func.init_0 import Init0 so_path = os.path.abspath("./out/libcms_removed_rubbilish1.so") if not os.path.exists(so_path): exit(f"{so_path} not exist") # p.arch = <Arch ARMEL (LE)> p = angr.Project(so_path, load_options={"auto_load_libs": False}, main_opts={"base_addr": 0}) proj = Project(p) func_called_by_init_if_need = CalledByInitIfNeed() # try defalt jni_on_load # jni_onload = JniOnLoad(p) # jni_onload.initial() # jni_onload.symbol_execute() init_0 = Init0(p) init_0.initial() init_0_root = init_0.symbol_execute2() init_0.trim_node_tree(init_0_root)
import angr, simuvex, claripy start = 0x400781 p = angr.Project("RamG_patch3.exe", load_options={'auto_load_libs': False}) state = p.factory.blank_state(addr=start) path = p.factory.path(state=state) ex = angr.surveyors.Explorer(p, start=path, find=(0x40274d, )) r = ex.run() print r.found[0].state.posix.dump(0)
#!/usr/bin/env python import angr, logging proj = angr.Project('./bomb', load_options={'auto_load_libs': False}) logging.basicConfig() logging.getLogger('angr.surveyors.explorer').setLevel(logging.DEBUG) bomb_explode = 0x8048b3b # Start analysis at the phase_2 function after the sscanf state = proj.factory.blank_state(addr=0x8048977) # Where our input string comes from str_ptr = 0x800000 # Load our input string, and make sure there is no null byte inside content = initial_state.memory.load(str_ptr, len(encrypted)) for i in xrange(0, len(content), 8): initial_state.add_constraints(content[i + 7:i] != 0) # Make sure the input string ends with a null byte zero = initial_state.memory.load(str_ptr + len(encrypted), 1) initial_state.add_constraints(zero == 0) # Push the str_ptr onto stack initial_state.stack_push(initial_state.se.BVV(str_ptr, 32)) # Push a return address initial_state.stack_push(initial_state.se.BVV(0, 32))
def main(): # Load the project p = angr.Project("onlyone.exe", use_sim_procedures=True) # Hook the malloc - we cannot automatically use SimProcedures for it, which will be fixed soon p.hook(0x2398, angr.SIM_PROCEDURES['libc']['malloc']) # Hook the decrypt function merely because we don't support pow/sqrt/floor p.hook(0x401038, decrypt, length=5) # This is the content in 'encrypted' file # Our input string should be encrypted to this string encrypted = "253e315126363a2e551c".decode('hex') # Create the initial state starting from the target function initial_state = p.factory.blank_state(addr=0x401000) # Where our input string comes from str_ptr = 0x800000 # Load our input string, and make sure there is no null byte inside content = initial_state.memory.load(str_ptr, len(encrypted)) for i in range(0, len(content), 8): initial_state.add_constraints(content[i + 7:i] != 0) # Make sure the input string ends with a null byte zero = initial_state.memory.load(str_ptr + len(encrypted), 1) initial_state.add_constraints(zero == 0) # Push the str_ptr onto stack initial_state.stack_push(initial_state.solver.BVV(str_ptr, 32)) # Push a return address initial_state.stack_push(initial_state.solver.BVV(0, 32)) # Create the initial path # Call explorer to execute the function # Note that Veritesting is important since we want to avoid unnecessary branching ex = angr.surveyors.Explorer(p, start=initial_state, find=(0x4010c9, ), enable_veritesting=True) print("Executing...") angr.surveyors.explorer.l.setLevel(logging.DEBUG) angr.surveyors.surveyor.l.setLevel(logging.DEBUG) r = ex.run() if r.found: final_state = r.found[0] else: final_state = r.errored[0].previous_run.initial_state # Load the final encrypted string, add constraints to make the string be equal to encrypted data buf_ptr = final_state.memory.load(final_state.regs.ebp - 0x18, 4, endness=p.arch.memory_endness) for i in range(0, len(encrypted)): final_state.add_constraints( final_state.memory.load(buf_ptr + i, 1) == ord(encrypted[i])) # Our input - solve it! input_string = final_state.memory.load(str_ptr, 10) print("Solving...") candidates = final_state.solver.eval_upto(input_string, 2) assert len(candidates) == 1 return hex(candidates[0])[2:-1].decode('hex')
print(colored("only callable inside a r2-instance!", "red", attrs=["bold"])) exit(0) isX86 = isArchitectureX86(rzproj) print("isX86: {}".format(isX86)) # get Offsets, we only need start_offset for callable _, _, start_offset = getOffsets(rzproj) # get parameters expected_result = int(sys.argv[1], 16) # get binary name and create angr project binaryname = getBinaryName(rzproj) proj = angr.Project(binaryname, load_options={"auto_load_libs": False}) # setup hooks hook_variables = getHooks(rzproj) if len(hook_variables) != 0: for hook in hook_variables: # 0=address, 1=patch_length, 2=instructions proj.hook(hook[0], make_hook(hook[2]), length=hook[1]) print( colored( "setup Hook: {}, addr: {}, patchlength: {}, instr: {}".format( hook[3], hex(hook[0]), hook[1], hook[2]), "green")) # create call function callstate = proj.factory.callable(start_offset)
# -*- coding: utf-8 -*- import angr p = angr.Project("../file/yakisoba") state = p.factory.entry_state() simgr = p.factory.simulation_manager(state) simgr.explore(find=lambda s: b"Correct!" in s.posix.dumps(1), avoid=lambda s: b"Wrong!" in s.posix.dumps(1)) print(simgr.found[0].posix.dumps(0)) print(simgr.found[0].posix.dumps(1))
def __init__(self, binary_path, testcase, timeout=None): """ :param binary_path: path to the binary which the testcase applies to :param testcase: string representing the contents of the testcase :param timeout: millisecond timeout """ self.binary_path = binary_path self.testcase = testcase self.timeout = None if isinstance(binary_path, basestring): self.is_multicb = False self.binaries = [binary_path] elif isinstance(binary_path, (list, tuple)): self.is_multicb = True self.binaries = binary_path else: raise ValueError( "Was expecting either a string or a list/tuple for binary_path! " "It's {} instead.".format(type(binary_path))) if timeout is not None: if isinstance(timeout, (int, long)): self.timeout = str(timeout) elif isinstance(timeout, (str)): self.timeout = timeout else: raise ValueError("timeout param must be of type int or str") # will be set by showmap's return code self.causes_crash = False Fuzzer._perform_env_checks() self.base = Fuzzer._get_base() l.debug("got base dir %s", self.base) # unfortunately here is some code reuse between Fuzzer and Minimizer (and Showmap!) p = angr.Project(self.binaries[0]) tracer_id = 'cgc' if p.loader.main_object.os == 'cgc' else p.arch.qemu_name if self.is_multicb: tracer_id = 'multi-{}'.format(tracer_id) self.showmap_path = os.path.join(shellphish_afl.afl_dir(tracer_id), "afl-showmap") self.afl_path_var = shellphish_afl.afl_path_var(tracer_id) l.debug("showmap_path: %s", self.showmap_path) l.debug("afl_path_var: %s", self.afl_path_var) os.environ['AFL_PATH'] = self.afl_path_var # create temp self.work_dir = tempfile.mkdtemp(prefix='showmap-', dir='/tmp/') # flag for work directory removal self._removed = False self.input_testcase = os.path.join(self.work_dir, 'testcase') self.output = os.path.join(self.work_dir, 'out') l.debug("input_testcase: %s", self.input_testcase) l.debug("output: %s", self.output) # populate contents of input testcase with open(self.input_testcase, 'w') as f: f.write(testcase)
import os import sys import angr import json BIN = os.path.join(os.path.dirname(__file__), "../hal/bins", sys.argv[1]) RES = os.path.join(os.path.dirname(__file__), "../results", sys.argv[1], "strings.json") project = angr.Project(BIN) cfg = project.analyses.CFG(normalize=True, collect_data_references=True, show_progressbar=True) #all_variable_recoveries = { # f: project.analyses.VariableRecoveryFast(f) # for f in cfg.kb.functions.values() if not f.is_simprocedure #} # #categorization = project.analyses.FunctionCategorizationAnalysis() #tag_manager = categorization.function_tag_manager # #input_functions = tag_manager.input_functions() #output_functions = tag_manager.output_functions() # #io_strings = { # f: project.analyses.IOStrings(f, cfg, input_functions, output_functions) # for f in cfg.kb.functions.values() if not f.is_simprocedure #} all_outs = {} all_ins = {}
#!/usr/bin/env python3 import os import time import angr import claripy bvs = claripy.BVS('foo', 8) test_location = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../binaries') state = angr.Project(os.path.join(test_location, 'tests', 'x86_64', 'fauxware'), main_opts={ 'base_addr': 0x400000 }, auto_load_libs=True).factory.full_init_state( add_options={angr.options.REVERSE_MEMORY_NAME_MAP}) def cycle(s): s = s.copy() s.memory.store(0x400000, bvs) return s def main(): s = cycle(state) for _ in range(20000): s = cycle(s)
def test_tail_call_optimization_detection_armel(): # GitHub issue #1286 path = os.path.join(test_location, 'armel', 'Nucleo_read_hyperterminal-stripped.elf') proj = angr.Project(path, auto_load_libs=False) cfg = proj.analyses.CFGFast(resolve_indirect_jumps=True, force_complete_scan=False, normalize=True, symbols=False, detect_tail_calls=True) all_func_addrs = set(cfg.functions.keys()) nose.tools.assert_not_in(0x80010b5, all_func_addrs, "0x80010b5 is inside Reset_Handler().") nose.tools.assert_not_in(0x8003ef9, all_func_addrs, "0x8003ef9 is inside memcpy().") nose.tools.assert_not_in(0x8008419, all_func_addrs, "0x8008419 is inside __mulsf3().") # Functions that are jumped to from tail-calls tail_call_funcs = [ 0x8002bc1, 0x80046c1, 0x8000281, 0x8001bdb, 0x8002839, 0x80037ad, 0x8002c09, 0x8004165, 0x8004be1, 0x8002eb1 ] for member in tail_call_funcs: nose.tools.assert_in(member, all_func_addrs) # also test for tailcall return addresses # mapping of return blocks to return addrs that are the actual callers of certain tail-calls endpoints tail_call_return_addrs = { 0x8002bd9: [0x800275f], # 0x8002bc1 0x80046d7: [0x800275f], # 0x80046c1 0x80046ed: [0x800275f], # 0x80046c1 0x8001be7: [0x800068d, 0x8000695], # 0x8001bdb ?? 0x800284d: [0x800028b, 0x80006e1, 0x80006e7], # 0x8002839 0x80037f5: [0x800270b, 0x8002733, 0x8002759, 0x800098f, 0x8000997], # 0x80037ad 0x80037ef: [0x800270b, 0x8002733, 0x8002759, 0x800098f, 0x8000997], # 0x80037ad 0x8002cc9: [ 0x8002d3b, 0x8002b99, 0x8002e9f, 0x80041ad, 0x8004c87, 0x8004d35, 0x8002efb, 0x8002be9, 0x80046eb, 0x800464f, 0x8002a09, 0x800325f, 0x80047c1 ], # 0x8002c09 0x8004183: [0x8002713], # 0x8004165 0x8004c31: [0x8002713], # 0x8004be1 0x8004c69: [0x8002713], # 0x8004be1 0x8002ef1: [0x800273b] } # 0x8002eb1 # check all expected return addrs are present for returning_block_addr, expected_return_addrs in tail_call_return_addrs.items( ): returning_block = cfg.model.get_any_node(returning_block_addr) return_block_addrs = [ rb.addr for rb in cfg.model.get_successors(returning_block) ] msg = "%x: unequal sizes of expected_addrs [%d] and return_block_addrs [%d]" % \ (returning_block_addr, len(expected_return_addrs), len(return_block_addrs)) nose.tools.assert_equal(len(return_block_addrs), len(expected_return_addrs), msg) for expected_addr in expected_return_addrs: msg = "expected retaddr %x not found for returning_block %x" % \ (expected_addr, returning_block_addr) nose.tools.assert_in(expected_addr, return_block_addrs, msg)
import angr import sys from angrutils import plot_cfg proj = angr.Project(sys.argv[1], load_options={"auto_load_libs": False}) cfg = proj.analyses.CFGFast() plot_cfg(cfg, sys.argv[1], remove_imports=True, remove_path_terminator=True)
def __init__(self, binary_path, work_dir, afl_count=1, library_path=None, time_limit=None, memory="8G", target_opts=None, extra_opts=None, create_dictionary=False, seeds=None, crash_mode=False, never_resume=False, qemu=True, stuck_callback=None, force_interval=None, job_dir=None, timeout=None): ''' :param binary_path: path to the binary to fuzz. List or tuple for multi-CB. :param work_dir: the work directory which contains fuzzing jobs, our job directory will go here :param afl_count: number of AFL jobs total to spin up for the binary :param library_path: library path to use, if none is specified a default is chosen :param timelimit: amount of time to fuzz for, has no effect besides returning True when calling timed_out :param seeds: list of inputs to seed fuzzing with :param target_opts: extra options to pass to the target :param extra_opts: extra options to pass to AFL when starting up :param crash_mode: if set to True AFL is set to crash explorer mode, and seed will be expected to be a crashing input :param never_resume: never resume an old fuzzing run, even if it's possible :param qemu: Utilize QEMU for instrumentation of binary. :param memory: AFL child process memory limit (default: "8G") :param stuck_callback: the callback to call when afl has no pending fav's :param job_dir: a job directory to override the work_dir/binary_name path :param timeout: timeout for individual runs within AFL ''' self.binary_path = binary_path self.work_dir = work_dir self.afl_count = afl_count self.time_limit = time_limit self.library_path = library_path self.target_opts = [] if target_opts is None else target_opts self.crash_mode = crash_mode self.memory = memory self.qemu = qemu self.force_interval = force_interval self.timeout = timeout Fuzzer._perform_env_checks() if isinstance(binary_path, str): self.is_multicb = False self.binary_id = os.path.basename(binary_path) elif isinstance(binary_path, (list, tuple)): self.is_multicb = True self.binary_id = os.path.basename(binary_path[0]) else: raise ValueError( "Was expecting either a string or a list/tuple for binary_path! It's {} instead." .format(type(binary_path))) # sanity check crash mode if self.crash_mode: if seeds is None: raise ValueError( "Seeds must be specified if using the fuzzer in crash mode" ) l.info("AFL will be started in crash mode") self.seeds = [b"fuzz"] if seeds is None or len(seeds) == 0 else seeds self.job_dir = os.path.join(self.work_dir, self.binary_id) if not job_dir else job_dir self.in_dir = os.path.join(self.job_dir, "input") self.out_dir = os.path.join(self.job_dir, "sync") # sanity check extra opts self.extra_opts = extra_opts if self.extra_opts is not None: if not isinstance(self.extra_opts, list): raise ValueError( "extra_opts must be a list of command line arguments") # base of the fuzzer package self.base = Fuzzer._get_base() self.start_time = int(time.time()) # create_dict script self.create_dict_path = os.path.join(self.base, "bin", "create_dict.py") # afl dictionary self.dictionary = None # processes spun up self.procs = [] # start the fuzzer ids at 0 self.fuzz_id = 0 # test if we're resuming an old run self.resuming = bool(os.listdir(self.out_dir)) if os.path.isdir( self.out_dir) else False # has the fuzzer been turned on? self._on = False if never_resume and self.resuming: l.info("could resume, but starting over upon request") shutil.rmtree(self.job_dir) self.resuming = False if self.is_multicb: # Where cgc/setup's Dockerfile checks it out # NOTE: 'afl/fakeforksrv' serves as 'qemu', as far as AFL is concerned # Will actually invoke 'fakeforksrv/multicb-qemu' # This QEMU cannot run standalone (always speaks the forkserver "protocol"), # but 'fakeforksrv/run_via_fakeforksrv' allows it. # XXX: There is no driller/angr support, and probably will never be. self.afl_path = shellphish_afl.afl_bin('multi-cgc') self.afl_path_var = shellphish_afl.afl_path_var('multi-cgc') self.qemu_name = 'TODO' else: p = angr.Project(binary_path) self.os = p.loader.main_object.os self.afl_dir = shellphish_afl.afl_dir(self.os) # the path to AFL capable of calling driller self.afl_path = shellphish_afl.afl_bin(self.os) if self.os == 'cgc': self.afl_path_var = shellphish_afl.afl_path_var('cgc') else: self.afl_path_var = shellphish_afl.afl_path_var( p.arch.qemu_name) # set up libraries self._export_library_path(p) # the name of the qemu port used to run these binaries self.qemu_name = p.arch.qemu_name self.qemu_dir = self.afl_path_var l.debug("self.start_time: %r", self.start_time) l.debug("self.afl_path: %s", self.afl_path) l.debug("self.afl_path_var: %s", self.afl_path_var) l.debug("self.qemu_dir: %s", self.qemu_dir) l.debug("self.binary_id: %s", self.binary_id) l.debug("self.work_dir: %s", self.work_dir) l.debug("self.resuming: %s", self.resuming) # if we're resuming an old run set the input_directory to a '-' if self.resuming: l.info("[%s] resuming old fuzzing run", self.binary_id) self.in_dir = "-" else: # create the work directory and input directory try: os.makedirs(self.in_dir) except OSError: l.warning("unable to create in_dir \"%s\"", self.in_dir) # populate the input directory self._initialize_seeds() # look for a dictionary dictionary_file = os.path.join(self.job_dir, "%s.dict" % self.binary_id) if os.path.isfile(dictionary_file): self.dictionary = dictionary_file # if a dictionary doesn't exist and we aren't resuming a run, create a dict elif not self.resuming: # call out to another process to create the dictionary so we can # limit it's memory if create_dictionary: if self._create_dict(dictionary_file): self.dictionary = dictionary_file l.warning("done making dictionary") else: # no luck creating a dictionary l.warning("[%s] unable to create dictionary", self.binary_id) if self.force_interval is None: l.warning("not forced") self._timer = InfiniteTimer(30, self._timer_callback) else: l.warning("forced") self._timer = InfiniteTimer(self.force_interval, self._timer_callback) self._stuck_callback = stuck_callback # set environment variable for the AFL_PATH os.environ['AFL_PATH'] = self.afl_path_var
def test(): logging.getLogger('angr_antievasion').setLevel(logging.INFO) logging.getLogger('testing.utilities').setLevel(logging.INFO) logging.getLogger('angr.procedures').setLevel(logging.DEBUG) proj_unaided = angr.Project('./pafish.exe', load_options={ 'auto_load_libs': True, 'use_system_libs': False, 'case_insensitive': True, 'custom_ld_path': '../../windows_dlls', 'except_missing_libs': True, }) proj_extended = angr.Project('./pafish.exe', load_options={ 'auto_load_libs': True, 'use_system_libs': False, 'case_insensitive': True, 'custom_ld_path': '../../windows_dlls', 'except_missing_libs': True, }) # stub out imports proj_unaided.analyses.CalleeCleanupFinder(hook_all=True) proj_extended.analyses.CalleeCleanupFinder(hook_all=True) # Alternative, expensive but exhaustive way to stub out all imports # for obj in proj.loader.all_pe_objects: # # stub out all imports (by stubbing out each module exports) # export_addrs = [x.rebased_addr for x in obj._exports.values() if x.forwarder is None] # proj.analyses.CalleeCleanupFinder(starts=export_addrs, hook_all=True) # setup testing utilities # symbols for which no SimProcedure is available and/or is better to use the actual implementation no_sim_syms = [ '_vsnprintf', 'mbstowcs', 'wcsstr', 'toupper', 'tolower', 'lstrcmpiA', 'lstrcmpiW' ] # snprintf is (ab)used by pafish: angr stub returns an empty string so it's useless # we use the concrete implementation for the extended, and an unconstrained stub for the unaided testing.setup(proj_unaided, cdecl_stub=['_vsnprintf'], stdcall_stub=['IsWow64Process']) testing.setup(proj_extended, unhook=no_sim_syms) # anti-evasion hooks angr_antievasion.hook_all(proj_extended) # return addresses for the check call state configuration ret_addr_unaided = proj_unaided.loader.extern_object.allocate() ret_addr_extended = proj_extended.loader.extern_object.allocate() # import IPython; IPython.embed() if 'CHECK_TABLE' not in globals(): # load it from json file with open('pafish.exe_checks.json', 'r') as jfile: global CHECK_TABLE CHECK_TABLE = json.load(jfile) latex_table = [] for check_name, check_addr in CHECK_TABLE: print '\n### {} check @ {} ###'.format(check_name, hex(check_addr)) call_state_unaided = proj_unaided.factory.call_state( check_addr, ret_addr=ret_addr_unaided) call_state_extended = proj_extended.factory.call_state( check_addr, ret_addr=ret_addr_extended) simgr_unaided = proj_unaided.factory.simulation_manager( call_state_unaided, save_unconstrained=True) simgr_extended = proj_extended.factory.simulation_manager( call_state_extended, save_unconstrained=True) print '! Unaided exploration !' unaided_total = 0 unaided_false = 0 unaided_true = 0 if check_name in UNAIDED_SKIP: print 'SKIPPED' else: angr_antievasion.rdtsc_monkey_unpatch( ) # monkey patch is global so we need to patch and unpatch for each check while len(simgr_unaided.active) > 0: if check_name in UNAIDED_SKIP: break simgr_unaided.explore(find=ret_addr_unaided) print simgr_unaided for sim in simgr_unaided.found: ret = sim.state.regs.eax ret_str = colored(ret, 'red') if not sim.state.solver.symbolic( ret) and sim.state.solver.eval(ret) == 0: ret_str = colored(ret, 'cyan') unaided_false += 1 else: if sim.state.solver.symbolic(ret): unaided_false += 1 # symbolic means undetermined so add to false too unaided_total += 1 unaided_true += 1 print sim, "returned {}".format(ret_str) # import IPython; IPython.embed() unaided_total += len(simgr_unaided.found) print '\n! Instrumented exploration !' extended_total = 0 extended_false = 0 extended_true = 0 angr_antievasion.rdtsc_monkey_patch( ) # monkey patch is global so we need to patch and unpatch for each check while len(simgr_extended.active) > 0: simgr_extended.explore(find=ret_addr_extended) print simgr_extended for sim in simgr_extended.found: ret = sim.state.regs.eax ret_str = colored(ret, 'red') if not sim.state.solver.symbolic(ret) and sim.state.solver.eval( ret) == 0: ret_str = colored(ret, 'cyan') extended_false += 1 else: if sim.state.solver.symbolic(ret): extended_false += 1 # symbolic means undetermined so add to false too extended_total += 1 extended_true += 1 print sim, "returned {}".format(ret_str) # import IPython; IPython.embed() extended_total += len(simgr_extended.found) import IPython IPython.embed() latex_table.append("{} & {} & {} & {} && {} & {} & {}".format( check_name, unaided_total, unaided_true, unaided_false, extended_total, extended_true, extended_false)) for line in latex_table: print line
print "mem[%#x]: " % i + str(e) continue if v > 0: print "mem[%#x] = %#x" % (i, v) # for i in range(0x1002000000018, 0x1002000000018 + 0x20, 8): # v = state.solver.eval(state.memory.load(i, 8)) # if v > 0: # print "mem[%#x] = %#x" % (i, v) if _exit: exit() rebased_addr = lambda x: proj.loader.find_symbol(x).rebased_addr ELF_FILE = "./bin/simhost-STABLE-1_3_1-test" proj = angr.Project(ELF_FILE, load_options={'auto_load_libs': False}) start_addr = rebased_addr('tcp_input') print "[*] analysis start: %#x" % start_addr state = proj.factory.blank_state(addr=start_addr) ### load preprocessed data from preprocess import Info, Symbol INFO_FILE = ELF_FILE + ".info" try: with open(INFO_FILE) as f: info = pickle.load(f) except IOError as e: print e print "[!] run `./preprocess.py %s` first" % (ELF_FILE) exit()
#!/usr/bin/env python ''' We'd like to understand the stack layout of the main function by performing generating a VFG on it. ''' from pprint import pprint from collections import defaultdict import angr # create the project project = angr.Project("fauxware") # WRITEME: generate a CFG first so we have access to all functions cfg = project.analyses.CFG() # WRITEME: get the address of the main function main_func = project.kb.functions.function(name='main') # WRITEME: run VFG on it # Here is the suggested parameter setup # context_sensitivity_level: 3 # interfunction_level: 3 vfg = project.analyses.VFG(start=main_func.addr, context_sensitivity_level=3, interfunction_level=3) print("VFG analysis is over. We have some nodes now:") pprint(vfg.graph.nodes()) # WRITEME: get the input state to the very last basic block # the very last basic block in the main function is 0x80486e8
# cek = [0x5B,0x43,0x6D,0x67,0x1C,0x38,0x10,0x33,0x14,0x52,0x33,0x7A,0x27,0x1B,0x3D,0x3D,0x40,0x6A,0x0F,0x68,0x60,0x0C,0x6E,0x5C,0x19,0x58,0x3D,0x46,0x5C,0x79,0x67,0x6F,0x5E,0x51,0x49,0x65,0x6B,0x7D,0x18,0x6B,0x7C,0x78,0x58,0x74,0x4B,0x27,0x27,0x50,0x1D,0x58,0x2E,0x6F,0x45,0x10,0x3A,0x5A,0x1C,0x7D,0x20,0x53,0x64,0x4B,0x31,0x20,0x46,0x74,0x38,0x5A,0x9,0x0D,0x59,0x38,0x11,0x24,0x18,0x46,0x15,0x1B,0x44,0x66,0x5A,0x15,0x1E,0x66,0x39,0x5C,0x6D,0x1A,0x0C,0x29,0x5C,0x4B,0x31,0x71,0x10,0x5B,0x1,0x7B,0x16,0x61,0x6,0x9,0x0B,0x1B,0x7E,0x42,0x2B,0x10,0x1E,0x55,0x5E,0x62,0x2,0x74,0x2A,0x0B,0x15,0x75,0x79,0x1C,0x59,0x5D,0x18,0x79,0x40,0x5,0x2E,0x29,0x1B,0x3,0x3C,0x2,0x23,0x41,0x2F,0x2E,0x6,0x9,0x77,0x65,0x3E,0x2C,0x6E,0x6D,0x43,0x7,0x9,0x76,0x4,0x2D,0x53,0x43,0x35,0x6F,0x54,0x10,0x1C,0x62,0x0,0x3E,0x12,0x2D,0x68,0x5E,0x5,0x57,0x49,0x59,0x57,0x40,0x62,0x10,0x62,0x14,0x78,0x59,0x61,0x62,0x6E,0x2,0x7C,0x30,0x4C,0x4C,0x49,0x18,0x4,0x1B,0x51,0x4B,0x1C,0x4E,0x19,0x4E,0x2E,0x2D,0x42,0x3,0x10,0x46,0x33,0x46,0x4C,0x7,0x41,0x6,0x38,0x6A,0x13,0x46,0x71,0x12,0x18,0x26,0x3F,0x42,0x5D,0x47,0x39,0x11,0x11,0x58,0x3A,0x7D,0x22,0x35,0x5E,0x45,0x34,0x48,0x59,0x37,0x68,0x7A,0x49,0x78,0x28,0x39,0x52,0x48,0x56,0x0B,0x1C,0x4C,0x75,0x7A,0x17,0x17,0x0B,0x47,0x3D,0x6F,0x3C,0x3A,0x20,0x31,0x51,0x43,0x18,0x64,0x20,0x27,0x6E,0x7A,0x11,0x4C,0x18,0x69,0x67,0x4F,0x6C,0x31,0x2B,0x23,0x39,0x71,0x49,0x75,0x66,0x6C,0x51,0x3A,0x62,0x3C,0x5A,0x68,0x14,0x42,0x72,0x27,0x0E,0x24,0x71,0x4B,0x72,0x25,0x20,0x0A,0x5A,0x34,0x37,0x70,0x48,0x2B,0x30,0x8,0x74,0x60,0x65,0x47,0x65,0x5,0x51,0x74,0x65,0x2F,0x43,0x6E,0x4,0x76,0x1F,0x5C,0x26,0x43,0x0F,0x6E,0x53,0x29,0x40,0x66,0x35,0x23,0x4,0x42,0x5,0x1B,0x23,0x65,0x55,0x9,0x1A,0x0,0x31,0x6A,0x15,0x22,0x18,0x78,0x65,0x64,0x49,0x3E,0x53,0x4F,0x47,0x49,0x0F,0x18,0x55,0x2C,0x21,0x49,0x1,0x17,0x2F,0x6E,0x63,0x7B,0x5B,0x3B,0x32,0x2,0x6A,0x13,0x66,0x26,0x3B,0x18,0x24,0x39,0x2C,0x3F,0x66,0x74,0x63,0x26,0x1A,0x10,0x8,0x43,0x71,0x38,0x6D,0x29,0x1E,0x2D,0x41,0x56,0x38,0x40,0x4A,0x37,0x5F,0x57,0x29,0x49,0x41,0x74,0x4A,0x1B,0x9,0x69,0x59,0x3A,0x2,0x1B,0x9,0x6B,0x71,0x4C,0x55,0x3C,0x5B,0x6A,0x7B,0x32,0x74,0x52,0x0E,0x33,0x0F,0x18,0x7E,0x5A,0x4D,0x20,0x55,0x4A,0x34,0x0E,0x7A,0x66,0x42,0x3B,0x5E,0x68,0x5F,0x4A,0x28,0x16,0x38,0x4,0x55,0x54,0x1A,0x5,0x1B,0x52,0x2,0x40,0x79,0x56,0x6D,0x23,0x7,0x73,0x1,0x67,0x58,0x51,0x0F,0x54,0x23,0x2D,0x11,0x48,0x15,0x4E,0x55,0x3F,0x3C,0x24,0x28,0x10,0x2A,0x4E,0x26,0x38,0x2F,0x0C,0x4E,0x74,0x5B,0x6C,0x2A,0x61,0x6,0x0C,0x12,0x42,0x23,0x4,0x63,0x38,0x3B,0x55,0x1F,0x67,0x26,0x38,0x4F,0x1,0x26,0x29,0x2B,0x4A,0x1C,0x0E,0x2F,0x7B,0x30,0x26,0x3D,0x6B,0x3,0x3F,0x6C,0x28,0x7C,0x62,0x6C,0x44,0x66,0x2B,0x54,0x50,0x0F,0x6C,0x38,0x45,0x6A,0x41,0x41,0x0B,0x73,0x78,0x60,0x2F,0x12,0x40,0x53,0x15,0x8,0x1C,0x1D,0x2B,0x0,0x5E,0x30,0x49,0x3E,0x64,0x0B,0x19,0x72,0x1,0x11,0x79,0x29,0x0A,0x17,0x2,0x6,0x4C,0x29,0x58,0x23,0x55,0x32,0x38,0x7E,0x65,0x5,0x26,0x37,0x5B,0x13,0x7,0x16,0x5,0x0D,0x0A,0x6D,0x59,0x73,0x71,0x64,0x74,0x48,0x42,0x57,0x48,0x20,0x25,0x73,0x30,0x1B,0x22,0x23,0x0B,0x70,0x3E,0x4C,0x31,0x7D,0x41,0x36,0x46,0x58,0x7C,0x2C,0x4,0x0A,0x71,0x3,0x34,0x3B,0x4A,0x1A,0x47,0x10,0x3A,0x67,0x1F,0x51,0x6F,0x44,0x70,0x73,0x32,0x6B,0x22,0x13,0x64,0x77,0x72,0x6,0x17,0x66,0x31,0x38,0x2F,0x0E,0x54,0x50,0x61,0x7E,0x74,0x74,0x25,0x0B,0x48,0x7C,0x5F,0x0C,0x25,0x3F,0x75,0x51,0x11,0x19,0x3F,0x44,0x27,0x25,0x79,0x64,0x6E,0x68,0x41,0x37,0x50,0x9,0x1A,0x4F,0x5D,0x72,0x71,0x52,0x6E,0x17,0x0C,0x25,0x5D,0x26,0x21,0x0F,0x6A,0x3E,0x2E,0x3B,0x4B,0x1A,0x0B,0x54,0x6,0x38,0x60,0x25,0x55,0x3C,0x3,0x2C,0x6D,0x77,0x5A,0x3A,0x73,0x17,0x79,0x2B,0x4A,0x54,0x33,0x69,0x46,0x62,0x77,0x0D,0x50,0x73,0x2C,0x12,0x5E,0x3A,0x5D,0x3F,0x64,0x0F,0x40,0x2E,0x64,0x59,0x6E,0x1D,0x67,0x1A,0x16,0x1C,0x0B,0x14,0x10,0x5F,0x17,0x3,0x28,0x45,0x54,0x23,0x5,0x5D,0x69,0x79,0x0C,0x72,0x5,0x1E,0x19,0x51,0x4E,0x5,0x23,0x6A,0x0F,0x35,0x5B,0x7B,0x23,0x5C,0x19,0x2F,0x51,0x53,0x7A,0x75,0x58,0x18,0x5B,0x3A,0x2,0x42,0x53,0x15,0x42,0x1B,0x0,0x1F,0x21,0x61,0x66,0x25,0x1,0x39,0x63,0x4D,0x1,0x75,0x51,0x4A,0x5F,0x8,0x4B,0x12,0x56,0x8,0x0F,0x3E,0x11,0x40,0x68,0x8,0x7C,0x19,0x0E,0x4F,0x21,0x1E,0x7D,0x12,0x3A,0x4B,0x5A,0x51,0x65,0x18,0x7E,0x1B,0x7B,0x66] # cek2 = [0x53,0x62,0x2,0x59,0x51,0x73,0x30,0x40,0x6,0x15,0x6A,0x55,0x29,0x9,0x4E,0x6A,0x76,0x2C,0x61,0x5B,0x12,0x79,0x6D,0x72,0x1E,0x2F,0x3F,0x34,0x6E] import angr import claripy FLAG_LEN = 28 STDIN_FD = 0 base_addr = 0x8049000 proj = angr.Project("./soal",main_opts={'base_addr':base_addr}) flag_chars = [claripy.BVS('flag_%d' % i, 8) for i in range(FLAG_LEN)] flag = claripy.Concat( *flag_chars + [claripy.BVV(b'\n')]) # Add \n for scanf() to accept the input state = proj.factory.full_init_state( args=['./soal'], add_options=angr.options.unicorn, stdin=flag, ) # Add constraints that all characters are printable for k in flag_chars: state.solver.add(k >= ord('!')) state.solver.add(k <= ord('~')) simgr = proj.factory.simulation_manager(state) find_addr = 0x080490C5 # SUCCESS avoid_addr = 0x080490DD # FAILURE simgr.explore(find=find_addr, avoid=avoid_addr)
def main(argv): path_to_binary = argv[1] project = angr.Project(path_to_binary) start_address = 0x08048699 initial_state = project.factory.blank_state(addr=start_address) # The binary is calling scanf("%8s %8s"). # (!) password0 = claripy.BVS('password0', 8 * 8) password1 = claripy.BVS('password1', 8 * 8) # Instead of telling the binary to write to the address of the memory # allocated with malloc, we can simply fake an address to any unused block of # memory and overwrite the pointer to the data. This will point the pointer # with the address of pointer_to_malloc_memory_address0 to fake_heap_address. # Be aware, there is more than one pointer! Analyze the binary to determine # global location of each pointer. # Note: by default, Angr stores integers in memory with big-endianness. To # specify to use the endianness of your architecture, use the parameter # endness=project.arch.memory_endness. On x86, this is little-endian. # (!) fake_heap_address0 = 8000 fake_heap_address1 = 9000 pointer_to_malloc_memory_address0 = 0xabcc8a4 pointer_to_malloc_memory_address1 = 0xabcc8ac initial_state.memory.store(pointer_to_malloc_memory_address0, fake_heap_address0, endness=project.arch.memory_endness) initial_state.memory.store(pointer_to_malloc_memory_address1, fake_heap_address1, endness=project.arch.memory_endness) # Store our symbolic values at our fake_heap_address. Look at the binary to # determine the offsets from the fake_heap_address where scanf writes. # (!) initial_state.memory.store(fake_heap_address0, password0) initial_state.memory.store(fake_heap_address1, password1) simulation = project.factory.simgr(initial_state) def is_successful(state): stdout_output = state.posix.dumps(sys.stdout.fileno()) return b'Good Job.' in stdout_output def should_abort(state): stdout_output = state.posix.dumps(sys.stdout.fileno()) return b'Try again.' in stdout_output simulation.explore(find=is_successful, avoid=should_abort) if simulation.found: solution_state = simulation.found[0] solution0 = solution_state.solver.eval(password0, cast_to=bytes).decode() solution1 = solution_state.solver.eval(password1, cast_to=bytes).decode() solution = "%s %s" % (solution0, solution1) print(solution) else: raise Exception('Could not find the solution')
def main(): parser = argparse.ArgumentParser(description="deflat control flow script") parser.add_argument("-f", "--file", help="binary to analyze") parser.add_argument("--addr", nargs="+", help="address of target function in hex format.Two or more addresses splited by space is allowed") args = parser.parse_args() if args.file is None or args.addr is None: parser.print_help() sys.exit(0) for i, addr in enumerate(args.addr): filename = args.file start = int(addr, 16) try: project = angr.Project(filename, load_options={'auto_load_libs': False}) # do normalize to avoid overlapping blocks, disable force_complete_scan to avoid possible "wrong" blocks cfg = project.analyses.CFGFast(normalize=True, force_complete_scan=False) target_function = cfg.functions.get(start) # A super transition graph is a graph that looks like IDA Pro's CFG supergraph = am_graph.to_supergraph(target_function.transition_graph) base_addr = project.loader.main_object.mapped_base >> 12 << 12 # get prologue_node and retn_node prologue_node = None for node in supergraph.nodes(): if supergraph.in_degree(node) == 0: prologue_node = node if supergraph.out_degree(node) == 0: retn_node = node if prologue_node is None or prologue_node.addr != start: print("Something must be wrong...") sys.exit(-1) main_dispatcher_node = list(supergraph.successors(prologue_node))[0] for node in supergraph.predecessors(main_dispatcher_node): if node.addr != prologue_node.addr: pre_dispatcher_node = node break relevant_nodes, nop_nodes = get_relevant_nop_nodes( supergraph, pre_dispatcher_node, prologue_node, retn_node) print('*******************relevant blocks************************') print('prologue: %#x' % start) print('main_dispatcher: %#x' % main_dispatcher_node.addr) print('pre_dispatcher: %#x' % pre_dispatcher_node.addr) print('retn: %#x' % retn_node.addr) relevant_block_addrs = [node.addr for node in relevant_nodes] print('relevant_blocks:', [hex(addr) for addr in relevant_block_addrs]) print('*******************symbolic execution*********************') relevants = relevant_nodes relevants.append(prologue_node) relevants_without_retn = list(relevants) relevants.append(retn_node) relevant_block_addrs.extend([prologue_node.addr, retn_node.addr]) flow = defaultdict(list) patch_instrs = {} for relevant in relevants_without_retn: print('-------------------dse %#x---------------------' % relevant.addr) block = project.factory.block(relevant.addr, size=relevant.size) has_branches = False hook_addrs = set([]) for ins in block.capstone.insns: if project.arch.name in ARCH_X86: if ins.insn.mnemonic.startswith('cmov'): # only record the first one if relevant not in patch_instrs: patch_instrs[relevant] = ins has_branches = True elif ins.insn.mnemonic.startswith('call'): hook_addrs.add(ins.insn.address) elif project.arch.name in ARCH_ARM: if ins.insn.mnemonic != 'mov' and ins.insn.mnemonic.startswith('mov'): if relevant not in patch_instrs: patch_instrs[relevant] = ins has_branches = True elif ins.insn.mnemonic in {'bl', 'blx'}: hook_addrs.add(ins.insn.address) elif project.arch.name in ARCH_ARM64: if ins.insn.mnemonic.startswith('cset'): if relevant not in patch_instrs: patch_instrs[relevant] = ins has_branches = True elif ins.insn.mnemonic in {'bl', 'blr'}: hook_addrs.add(ins.insn.address) if has_branches: flow[relevant].append(symbolic_execution(project, relevant_block_addrs, relevant.addr, hook_addrs, claripy.BVV(1, 1), True)) flow[relevant].append(symbolic_execution(project, relevant_block_addrs, relevant.addr, hook_addrs, claripy.BVV(0, 1), True)) else: flow[relevant].append(symbolic_execution( project, relevant_block_addrs, relevant.addr, hook_addrs)) print('************************flow******************************') for k, v in flow.items(): print('%#x: ' % k.addr, [hex(child) for child in v]) print('%#x: ' % retn_node.addr, []) print('************************patch*****************************') with open(filename, 'rb') as origin: # Attention: can't transform to str by calling decode() directly. so use bytearray instead. origin_data = bytearray(origin.read()) origin_data_len = len(origin_data) recovery_file = filename recovery = open(recovery_file, 'wb') # patch irrelevant blocks for nop_node in nop_nodes: fill_nop(origin_data, nop_node.addr-base_addr, nop_node.size, project.arch) # remove unnecessary control flows for parent, childs in flow.items(): if len(childs) == 1: parent_block = project.factory.block(parent.addr, size=parent.size) last_instr = parent_block.capstone.insns[-1] file_offset = last_instr.address - base_addr # patch the last instruction to jmp if project.arch.name in ARCH_X86: fill_nop(origin_data, file_offset, last_instr.size, project.arch) patch_value = ins_j_jmp_hex_x86(last_instr.address, childs[0], 'jmp') elif project.arch.name in ARCH_ARM: patch_value = ins_b_jmp_hex_arm(last_instr.address, childs[0], 'b') if project.arch.memory_endness == "Iend_BE": patch_value = patch_value[::-1] elif project.arch.name in ARCH_ARM64: # FIXME: For aarch64/arm64, the last instruction of prologue seems useful in some cases, so patch the next instruction instead. if parent.addr == start: file_offset += 4 patch_value = ins_b_jmp_hex_arm64(last_instr.address+4, childs[0], 'b') else: patch_value = ins_b_jmp_hex_arm64(last_instr.address, childs[0], 'b') if project.arch.memory_endness == "Iend_BE": patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) else: instr = patch_instrs[parent] file_offset = instr.address - base_addr # patch instructions starting from `cmovx` to the end of block fill_nop(origin_data, file_offset, parent.addr + parent.size - base_addr - file_offset, project.arch) if project.arch.name in ARCH_X86: # patch the cmovx instruction to jx instruction patch_value = ins_j_jmp_hex_x86(instr.address, childs[0], instr.mnemonic[len('cmov'):]) patch_instruction(origin_data, file_offset, patch_value) file_offset += 6 # patch the next instruction to jmp instrcution patch_value = ins_j_jmp_hex_x86(instr.address+6, childs[1], 'jmp') patch_instruction(origin_data, file_offset, patch_value) elif project.arch.name in ARCH_ARM: # patch the movx instruction to bx instruction bx_cond = 'b' + instr.mnemonic[len('mov'):] patch_value = ins_b_jmp_hex_arm(instr.address, childs[0], bx_cond) if project.arch.memory_endness == 'Iend_BE': patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) file_offset += 4 # patch the next instruction to b instrcution patch_value = ins_b_jmp_hex_arm(instr.address+4, childs[1], 'b') if project.arch.memory_endness == 'Iend_BE': patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) elif project.arch.name in ARCH_ARM64: # patch the cset.xx instruction to bx instruction bx_cond = instr.op_str.split(',')[-1].strip() patch_value = ins_b_jmp_hex_arm64(instr.address, childs[0], bx_cond) if project.arch.memory_endness == 'Iend_BE': patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) file_offset += 4 # patch the next instruction to b instruction patch_value = ins_b_jmp_hex_arm64(instr.address+4, childs[1], 'b') if project.arch.memory_endness == 'Iend_BE': patch_value = patch_value[::-1] patch_instruction(origin_data, file_offset, patch_value) assert len(origin_data) == origin_data_len, "Error: size of data changed!!!" recovery.write(origin_data) recovery.close() print('Successful! The recovered file: %s' % recovery_file) except Exception as e: print(e)
def __init__(self, file_path=None): # self.__init_proj(file_path) self.proj = angr.Project(file_path)
def test_amd64(): logging.getLogger('angr.analyses.cfg').setLevel(logging.DEBUG) fauxware_amd64 = angr.Project(test_location + "/x86_64/fauxware") EXPECTED_FUNCTIONS = { 0x4004e0, 0x400510, 0x400520, 0x400530, 0x400540, 0x400550, 0x400560, 0x400570, 0x400580, 0x4005ac, 0x400640, 0x400664, 0x4006ed, 0x4006fd, 0x40071d, 0x4007e0, 0x400880 } EXPECTED_BLOCKS = { 0x40071D, 0x40073E, 0x400754, 0x40076A, 0x400774, 0x40078A, 0x4007A0, 0x4007B3, 0x4007C7, 0x4007C9, 0x4007BD, 0x4007D3 } EXPECTED_CALLSITES = { 0x40071D, 0x40073E, 0x400754, 0x40076A, 0x400774, 0x40078A, 0x4007A0, 0x4007BD, 0x4007C9 } EXPECTED_CALLSITE_TARGETS = { 4195600L, 4195632L, 4195632L, 4195600L, 4195632L, 4195632L, 4195940L, 4196077L, 4196093L } EXPECTED_CALLSITE_RETURNS = { 0x40073e, 0x400754, 0x40076a, 0x400774, 0x40078a, 0x4007a0, 0x4007b3, 0x4007c7, None } fauxware_amd64.analyses.CFG() nose.tools.assert_equal( set([k for k in fauxware_amd64.kb.functions.keys() if k < 0x500000]), EXPECTED_FUNCTIONS) main = fauxware_amd64.kb.functions.function(name='main') nose.tools.assert_equal(main.startpoint.addr, 0x40071D) nose.tools.assert_equal(set(main.block_addrs), EXPECTED_BLOCKS) nose.tools.assert_equal([0x4007D3], [bl.addr for bl in main.endpoints]) nose.tools.assert_equal(set(main.get_call_sites()), EXPECTED_CALLSITES) nose.tools.assert_equal( set(map(main.get_call_target, main.get_call_sites())), EXPECTED_CALLSITE_TARGETS) nose.tools.assert_equal( set(map(main.get_call_return, main.get_call_sites())), EXPECTED_CALLSITE_RETURNS) nose.tools.assert_true(main.has_return) rejected = fauxware_amd64.kb.functions.function(name='rejected') nose.tools.assert_equal(rejected.returning, False) # transition graph main_g = main.transition_graph main_g_edges_ = main_g.edges(data=True) # Convert nodes those edges from blocks to addresses main_g_edges = [] for src_node, dst_node, data in main_g_edges_: main_g_edges.append((src_node.addr, dst_node.addr, data)) nose.tools.assert_true((0x40071d, 0x400510, { 'type': 'call' }) in main_g_edges) nose.tools.assert_true((0x40071d, 0x40073e, { 'type': 'fake_return', 'confirmed': True }) in main_g_edges) nose.tools.assert_true((0x40073e, 0x400530, { 'type': 'call' }) in main_g_edges) nose.tools.assert_true((0x40073e, 0x400754, { 'type': 'fake_return', 'confirmed': True }) in main_g_edges) # rejected() does not return nose.tools.assert_true((0x4007c9, 0x4006fd, { 'type': 'call' }) in main_g_edges) nose.tools.assert_true((0x4007c9, 0x4007d3, { 'type': 'fake_return' }) in main_g_edges)
def get_possible_flags(): # load the binary print('[*] loading the binary') p = angr.Project("whitehat_crypto400") # this is a statically-linked binary, and it's easer for angr if we use Python # summaries for the libc functions p.hook(0x4018B0, angr.SIM_PROCEDURES['glibc']['__libc_start_main']()) p.hook(0x422690, angr.SIM_PROCEDURES['libc']['memcpy']()) p.hook(0x408F10, angr.SIM_PROCEDURES['libc']['puts']()) # this is some anti-debugging initialization. It doesn't do much against angr, # but wastes time p.hook(0x401438, angr.SIM_PROCEDURES['stubs']['ReturnUnconstrained']()) # from playing with the binary, we can easily see that it requires strings of # length 8, so we'll hook the strlen calls and make sure we pass an 8-byte # string def hook_length(state): state.regs.rax = 8 p.hook(0x40168e, hook_length, length=5) p.hook(0x4016BE, hook_length, length=5) # here, we create the initial state to start execution. argv[1] is our 8-byte # string, and we add an angr option to gracefully handle unsupported syscalls arg1 = claripy.BVS('arg1', 8 * 8) initial_state = p.factory.entry_state( args=["crypto400", arg1], add_options={"BYPASS_UNSUPPORTED_SYSCALL"}) # and let's add a constraint that none of the string's bytes can be null for b in arg1.chop(8): initial_state.add_constraints(b != 0) # Simulation managers are a basic building block of the symbolic execution engine. # They track a group of states as the binary is executed, and allows for easier # management, pruning, and so forth of those states sm = p.factory.simulation_manager(initial_state) # here, we get to stage 2 using the simulation manager's explore() functionality. # This executes until at least one path reaches the specified address, and can # discard paths that hit certain other addresses. print('[*] executing') sm.explore(find=0x4016A3).unstash(from_stash='found', to_stash='active') sm.explore(find=0x4016B7, avoid=[0x4017D6, 0x401699, 0x40167D]).unstash(from_stash='found', to_stash='active') sm.explore(find=0x4017CF, avoid=[0x4017D6, 0x401699, 0x40167D]).unstash(from_stash='found', to_stash='active') sm.explore(find=0x401825, avoid=[0x401811]) # now, we're at stage 2. stage 2 is too complex for a SAT solver to solve, but # stage1 has narrowed down the keyspace enough to brute-force the rest, so # let's get the possible values for the passphrase and brute-force the rest. s = sm.found[0] # to reduce the keyspace further, let's assume the bytes are printable for i in range(8): b = s.memory.load(0x6C4B20 + i, 1) s.add_constraints(b >= 0x21, b <= 0x7e) # now get the possible values. One caveat is that getting all possible values # for all 8 bytes pushes a lot of complexity to the SAT solver, and it chokes. # To avoid this, we're going to get the solutions to 2 bytes at a time, and # brute force the combinations. possible_values = [ s.solver.eval_upto(s.memory.load(0x6C4B20 + i, 2), 65536, cast_to=bytes) for i in range(0, 8, 2) ] possibilities = tuple(itertools.product(*possible_values)) return possibilities