def main(): global cfg global block global data #Paint the cfg_before image from disassembly cont = Container.from_stream(open('300.bin')) bin_stream = cont.bin_stream adr = 0x401550 machine = Machine(cont.arch) mdis = machine.dis_engine(bin_stream) blocks = mdis.dis_multibloc(adr) open("cfg_before.dot", "w").write(blocks.dot()) #Get filename parser = Sandbox_Linux_x86_64.parser(description="300.bin") parser.add_argument("filename", help="filename") options = parser.parse_args() options.mimic_env = True #Start Sandbox sb = Sandbox_Linux_x86_64(options.filename, options, globals()) sb.jitter.init_run(sb.entry_point) sb.jitter.add_breakpoint(sb.entry_point, stop) machine = Machine("x86_64") sb.run() #Get bytecode interpret() #Paint cfg open("vm_graph.dot", "w").write(cfg.dot())
def run(self): # Architecture architecture = False if self.args.architecture: architecture = self.args.architecture else: with open(self.args.filename) as fdesc: architecture = ArchHeuristic(fdesc).guess() if not architecture: raise ValueError( "Unable to recognize the architecture, please specify it") if self.args.verbose: print "Guessed architecture: %s" % architecture cont = Container.from_stream(open(self.args.filename)) machine = Machine(architecture) addr_size = machine.ira().pc.size / 4 fh = FuncHeuristic(cont, machine) # Enable / disable heuristics for name in self.args.enable_heuristic: heur = fh.name2heuristic(name) if heur not in fh.heuristics: fh.heuristics.append(heur) for name in self.args.disable_heuristic: heur = fh.name2heuristic(name) fh.heuristics.remove(heur) if self.args.verbose: print "Heuristics to run: %s" % ", ".join(fh.heuristic_names) # Launch guess fmt = "0x{:0%dx}" % addr_size for addr in fh.guess(): print fmt.format(addr)
def run(self): # Architecture architecture = False if self.args.architecture: architecture = self.args.architecture else: with open(self.args.filename) as fdesc: architecture = ArchHeuristic(fdesc).guess() if not architecture: raise ValueError("Unable to recognize the architecture, please specify it") if self.args.verbose: print "Guessed architecture: %s" % architecture cont = Container.from_stream(open(self.args.filename)) machine = Machine(architecture) addr_size = machine.ira().pc.size / 4 fh = FuncHeuristic(cont, machine) # Enable / disable heuristics for name in self.args.enable_heuristic: heur = fh.name2heuristic(name) if heur not in fh.heuristics: fh.heuristics.append(heur) for name in self.args.disable_heuristic: heur = fh.name2heuristic(name) fh.heuristics.remove(heur) if self.args.verbose: print "Heuristics to run: %s" % ", ".join(fh.heuristic_names) # Launch guess fmt = "0x{:0%dx}" % addr_size for addr in fh.guess(): print fmt.format(addr)
def main(): #Setup Machine for arm, get filename machine = Machine('armtl') parser = ArgumentParser("Description") parser.add_argument('filename', help='filename') args = parser.parse_args() #Setup disassembly stream in container, get blocks and draw the graph cont = Container.from_stream(open(args.filename)) bin_stream = cont.bin_stream mdis = machine.dis_engine(bin_stream) blocks = mdis.dis_multibloc(0x614) open("cfg.dot", "w").write(blocks.dot()) #Create a template for matching blocks in the control flow graph #Requirement 1) Don't get block 0xdf8, it can't disassemble #Requirement 2) Get ones that start with LDR #Requirement 3) Get ones where the second to last instruction is CMP #No restructions for in going and out going edges mblock = MatchGraphJoker( name='mblock', restrict_in=False, restrict_out=False, filt=lambda block: block.label.offset != 0xdf8 and "LDR" in block. lines[0].name and "CMP" in block.lines[-2].name) #Basic block matcher nblock = MatchGraphJoker(name="next", restrict_in=False, restrict_out=False) #Now it should match the blocks we want with the checks matcher = nblock >> mblock flag_storage = {} #Loop through matching template blocks for sol in matcher.match(blocks): try: #Grab position line pline = sol[mblock].lines[3] #Grab character check line cline = sol[mblock].lines[-2] #Transform character and position to integer pos = int(pline.arg2str(pline.args[1]), 16) c = int(cline.arg2str(cline.args[1]), 16) #If its NULL, ignore if c != 0: flag_storage.update({pos: c}) except ValueError: #The F at the beginning is a NULL check pass #Print Flag flag = "".join(map(lambda x: chr(flag_storage[x]), sorted(flag_storage))).replace("F", "I") print "F" + flag pass
def container_guess(archinfo): """Use the architecture provided by the container, if any @archinfo: ArchHeuristic instance """ cont = Container.from_stream(archinfo.stream) if isinstance(cont, ContainerUnknown) or not cont.arch: return {} return {cont.arch: 1}
def test_learn(args): machine = Machine("x86_64") # Compil tests log_info("Remove old files") os.system("make clean") log_info("Compile C files") status = os.system("make") assert status == 0 # Find test names c_files = [] for cur_dir, sub_dir, files in os.walk("."): c_files += [x[:-2] for x in files if x.endswith(".c")] for c_file in c_files: cont = Container.from_stream(open(c_file)) func_name = c_file main_addr = cont.symbol_pool["main"].offset func_addr = cont.symbol_pool[func_name].offset log_info("Learning " + func_name + " over " + func_name + ".c") cmd = [ "sibyl", "learn", "-t", "miasm", "-m", hex(main_addr), func_name, c_file ] sibyl = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = sibyl.communicate() assert sibyl.returncode == 0 log_info("Testing generated class") mod = imp.new_module("testclass") exec stdout in mod.__dict__ classTest = getattr(mod, "Test" + c_file) tl = TestLauncher(c_file, machine, ABI_AMD64_SYSTEMV, [classTest], "gcc") possible_funcs = tl.run(func_addr) if tl.possible_funcs: log_success("Generated class recognize the function " + func_name) else: log_error("Generated class failed to recognize the function " + func_name) log_info("Remove old files") os.system("make clean")
def run(self): # Architecture map_addr = int(self.args.mapping_base, 0) architecture = False if self.args.architecture: architecture = self.args.architecture else: with open(self.args.filename) as fdesc: architecture = ArchHeuristic(fdesc).guess() if not architecture: raise ValueError("Unable to recognize the architecture, please specify it") if self.args.verbose: print "Guessed architecture: %s" % architecture #cont = Container.from_stream(open(self.args.filename)) cont = Container.from_stream(open(self.args.filename), addr=map_addr) machine = Machine(architecture) addr_size = machine.ira().pc.size / 4 fh = FuncHeuristic(cont, machine, self.args.filename) # Default: force only IDA or GHIDRA if available if config.idaq64_path: fh.heuristics = [ida_funcs] elif config.ghidra_headless_path: fh.heuristics = [ghidra_funcs] # Enable / disable heuristics for name in self.args.enable_heuristic: heur = fh.name2heuristic(name) if heur not in fh.heuristics: fh.heuristics.append(heur) for name in self.args.disable_heuristic: heur = fh.name2heuristic(name) fh.heuristics.remove(heur) if self.args.verbose: print "Heuristics to run: %s" % ", ".join(fh.heuristic_names) # Launch guess fmt = "0x{:0%dx}" % addr_size for addr in fh.guess(): print fmt.format(addr)
def __init__(self, filename, verbose=False): """Load binary file. Args: filename (str): path to a file to be analyzed verbose (bool): affects log verbosity """ self._container = Container.from_stream(open(filename)) self.bin_stream = self._container.bin_stream self.entry_point = self._container.entry_point self.machine = Machine(self._container.arch) self.fn = {} self.interval = interval() self.deep = 0 self.offset = 0 self._set_logging(verbose) self._logger.info("PE loaded")
def run(self): # Currently only AMD64 SYSTEMV ABI is supported by the learning module abi = ABI_AMD64_SYSTEMV # Currently only x86_64 is supported by the learning module machine = "x86_64" if self.args.trace != "miasm" and self.args.main != None: raise ValueError("Main argument is only used by miasm tracer") main = int(self.args.main, 0) if self.args.main else None # If function address is not set then use the symbol address if self.args.address is None: cont = Container.from_stream(open(self.args.program)) try: address = cont.symbol_pool[self.args.functionname].offset except KeyError: raise ValueError("Symbol %s does not exists in %s" % (self.args.functionname, self.args.program)) else: address = int(self.args.address, 0) testcreator = TestCreator(self.args.functionname, address, self.args.program, self.args.headerfile, AVAILABLE_TRACER[self.args.trace], AVAILABLE_GENERATOR[self.args.generator], main, abi, machine) if self.args.verbose == 0: testcreator.logger.setLevel(logging.WARN) if self.args.verbose == 1: testcreator.logger.setLevel(logging.INFO) elif self.args.verbose == 2: testcreator.logger.setLevel(logging.DEBUG) createdTest = testcreator.create_test() if self.args.output: open(self.args.output, "w+").write(createdTest) else: print createdTest
def run(self): # Currently only AMD64 SYSTEMV ABI is supported by the learning module abi = ABI_AMD64_SYSTEMV # Currently only x86_64 is supported by the learning module machine = "x86_64" if self.args.trace != "miasm" and self.args.main != None: raise ValueError("Main argument is only used by miasm tracer") main = int(self.args.main, 0) if self.args.main else None # If function address is not set then use the symbol address if self.args.address is None: cont = Container.from_stream(open(self.args.program)) try: address = cont.symbol_pool[self.args.functionname].offset except KeyError: raise ValueError("Symbol %s does not exists in %s" % (self.args.functionname, self.args.program)) else: address = int(self.args.address, 0) testcreator = TestCreator(self.args.functionname, address, self.args.program, self.args.headerfile, AVAILABLE_TRACER[self.args.trace], AVAILABLE_GENERATOR[self.args.generator], main, abi, machine, self.args.avoid_null) if self.args.verbose == 0: testcreator.logger.setLevel(logging.WARN) if self.args.verbose == 1: testcreator.logger.setLevel(logging.INFO) elif self.args.verbose == 2: testcreator.logger.setLevel(logging.DEBUG) createdTest = testcreator.create_test() if self.args.output: open(self.args.output, "w+").write(createdTest) else: print createdTest
def load_vm(self, filename, map_addr): self.ctr = Container.from_stream(open(filename), vm=self.jitter.vm, addr=map_addr) self.jitter.cpu.init_regs() self.jitter.init_stack()
parser.add_argument("-p", "--passthrough", help="Reg-exp for passthrough files", default="^$") parser.add_argument("-f", "--flags", help="Flags") parser.add_argument("-v", "--verbose", action="store_true", help="Activate verbose syscalls") args = parser.parse_args() if args.verbose: syscall.log.setLevel(logging.DEBUG) # Get corresponding interpreter and reloc address cont_target_tmp = Container.from_stream(open(args.target)) ld_path = str(cont_target_tmp.executable.getsectionbyname( ".interp").content).strip("\x00") if cont_target_tmp.executable.Ehdr.type in [elf_csts.ET_REL, elf_csts.ET_DYN]: elf_base_addr = 0x40000000 elif cont_target_tmp.executable.Ehdr.type == elf_csts.ET_EXEC: elf_base_addr = 0 # Not relocatable else: raise ValueError("Unsuported type %d" % cont_target_tmp.executable.Ehdr.type) # Instanciate a jitter machine = Machine(cont_target_tmp.arch) jitter = machine.jitter(args.jitter) jitter.init_stack()
import os from miasm2.analysis.binary import Container from miasm2.analysis.machine import Machine from miasm2.core.graph import DiGraphSimplifier, MatchGraphJoker container = Container.from_stream(open('dump2.bin')) bin_stream = container.bin_stream #machine name = container.arch machine = Machine(container.arch) #fireup disasm engine mdis = machine.dis_engine(bin_stream) #Return an AsmCFG instance containing disassembled blocks #https://github.com/cea-sec/miasm/pull/309 blocks = mdis.dis_multibloc(container.entry_point) #open('AsmCFG_input.dot','w+').write(blocks.dot()) ''' for head in blocks.heads(): for child in blocks.reachable_sons(head): print child ''' filter_block = lambda block: (len(block.lines)==2 and \ block.lines[0].name == 'PUSH' and \ block.lines[1].name == 'MOV') #parent joker node for the first block in MatchGraph / defining a filter for
parser.add_argument('-j', "--calldontmodstack", action="store_true", help="Consider stack high is not modified in subcalls") args = parser.parse_args() if args.verbose: log_asmblock.setLevel(logging.DEBUG) log.info('Load binary') if args.rawbinary: cont = Container.fallback_container(open(args.filename, "rb").read(), vm=None, addr=args.base_address) else: with open(args.filename, "rb") as fdesc: cont = Container.from_stream(fdesc, addr=args.base_address) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') log.info("import machine...") # Use the guessed architecture or the specified one arch = args.architecture if args.architecture else cont.arch if not arch: print "Architecture recognition fail. Please specify it in arguments" exit(-1) # Instance the arch-dependent machine machine = Machine(arch)
from argparse import ArgumentParser from miasm2.analysis.binary import Container from miasm2.analysis.machine import Machine from miasm2.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation from llvmlite import ir as llvm_ir from miasm2.expression.simplifications import expr_simp_high_to_explicit parser = ArgumentParser("LLVM export example") parser.add_argument("target", help="Target binary") parser.add_argument("addr", help="Target address") parser.add_argument("--architecture", "-a", help="Force architecture") args = parser.parse_args() # This part focus on obtaining an IRCFG to transform # cont = Container.from_stream(open(args.target)) machine = Machine(args.architecture if args.architecture else cont.arch) ir = machine.ir(cont.loc_db) dis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) asmcfg = dis.dis_multiblock(int(args.addr, 0)) ircfg = ir.new_ircfg_from_asmcfg(asmcfg) ircfg.simplify(expr_simp_high_to_explicit) ###################################################### # Instantiate a context and the function to fill context = LLVMContext_IRCompilation() context.ir_arch = ir func = LLVMFunction_IRCompilation(context, name="test") func.ret_type = llvm_ir.VoidType() func.init_fc()
print 'IN', [str(x) for x in irb_in_nodes[label]] print 'OUT', [str(x) for x in irb_out_nodes[label]] print '*' * 20, 'interblock', '*' * 20 inter_block_flow(ir_arch, ircfg, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) # from graph_qt import graph_qt # graph_qt(flow_graph) open('data.dot', 'w').write(flow_graph.dot()) ad = int(args.addr, 16) print 'disasm...' cont = Container.from_stream(open(args.filename)) machine = Machine("x86_32") mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) mdis.follow_call = True asmcfg = mdis.dis_multiblock(ad) print 'ok' print 'generating dataflow graph for:' ir_arch_analysis = machine.ira(mdis.loc_db) ircfg = ir_arch_analysis.new_ircfg_from_asmcfg(asmcfg) for irblock in ircfg.blocks.values(): print irblock if args.symb:
if options.verbose: log_asmbloc.setLevel(logging.DEBUG) log.info("import machine...") machine = Machine(options.machine) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira log.info('ok') if options.bw != None: options.bw = int(options.bw) if options.funcswd != None: options.funcswd = int(options.funcswd) log.info('Load binary') with open(fname) as fdesc: cont = Container.from_stream(fdesc, addr=options.shiftoffset) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') mdis = dis_engine(bs) # configure disasm engine mdis.dontdis_retcall = options.dontdis_retcall mdis.blocs_wd = options.bw mdis.dont_dis_nulstart_bloc = not options.dis_nulstart_bloc todo = [] addrs = [int(a, 16) for a in args[1:]]
help="Use implicit tracking", action="store_true") parser.add_argument("--unfollow-mem", help="Stop on memory statements", action="store_true") parser.add_argument("--unfollow-call", help="Stop on call statements", action="store_true") parser.add_argument("--do-not-simplify", help="Do not simplify expressions", action="store_true") args = parser.parse_args() # Get architecture with open(args.filename) as fstream: cont = Container.from_stream(fstream) arch = args.architecture if args.architecture else cont.arch machine = Machine(arch) # Check elements elements = set() regs = machine.mn.regs.all_regs_ids_byname for element in args.element: try: elements.add(regs[element.upper()]) except KeyError: raise ValueError("Unknown element '%s'" % element) mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True) ir_arch = machine.ira(mdis.symbol_pool)
p = log.progress("emulating step%d" % x) for i in range((len(bytecode) * 8) - 1): if i % 100 == 0: p.status("%d/%d" % (i, (len(bytecode) * 8) - 1)) sb.call(0x5DD9, i) # VMDecodeInstruction # with open("all_steps", "w") as f: # import json # json.dump(lt, f) print(lt) parser = Sandbox_Linux_x86_64.parser(description="ELF sandboxer") parser.add_argument("filename", help="ELF Filename") options = parser.parse_args() sb = Sandbox_Linux_x86_64(options.filename, options, globals()) machine = Machine('x86_64') cont = Container.from_stream(open(options.filename)) mdis = machine.dis_engine(cont.bin_stream) sb.jitter.add_breakpoint(0x2C87F, GetMem) # break on return of VMDecodeInstruction sb.jitter.add_breakpoint( 0x21B0, Malloc ) # Break on call to _malloc because for some reason MIASM can't find the symbol sb.jitter.add_breakpoint( 0x4A9C, GetBytecode ) # break on return of VMDecodeBytecode to return the good value that could not be loaded because of global variable not set emulate(sb, 0, step0, lookup_table_step0) emulate(sb, 1, step1, lookup_table_step1) emulate(sb, 2, step2, lookup_table_step2) emulate(sb, -1, step1 + step2, lookup_table_stepX)
import sys from miasm2.arch.x86.disasm import dis_x86_32 from miasm2.core.asmbloc import bloc2graph from miasm2.analysis.binary import Container from pdb import pm if len(sys.argv) != 3: print 'Example:' print "%s samples/box_upx.exe 0x407570" % sys.argv[0] sys.exit(0) addr = int(sys.argv[2], 16) cont = Container.from_stream(open(sys.argv[1])) mdis = dis_x86_32(cont.bin_stream) # Inform the engine to avoid disassembling null instructions mdis.dont_dis_nulstart_bloc = True blocs = mdis.dis_multibloc(addr) graph = bloc2graph(blocs) open('graph.txt', 'w').write(graph)
def load_vm(self, filename, map_addr): self.ctr = Container.from_stream(open(filename), vm=self.jitter.vm, addr=map_addr) self.jitter.cpu.init_regs() self.jitter.init_stack()
# First stage sample name (also its SHA-1) first_stage_fn = "0413f832d8161187172aef7a769586515f969479" # ChaCha decryption function address for this particular sample decrypt_func_addr = 0x400830 # Memory address of the initialization vector iv_addr = 0x614000 # Memory address of the key key_addr = 0x614020 # Arbitrary memory address to map the encrypted file in memory in_addr = 0x40000000 # Create new instance of x86_64 sandbox to emulate the decryption function sb = Sandbox_Linux_x86_64(first_stage_fn, options, globals()) with open(first_stage_fn, "rb") as first_stage, open(options.in_filename) as enc_bin: cont = Container.from_stream(first_stage) in_data = enc_bin.read() in_size = len(in_data) # Allocate memory to store the output out_addr = linobjs.heap.alloc(sb.jitter, in_size) # Map the encrypted file in memory sb.jitter.vm.add_memory_page(in_addr, PAGE_READ | PAGE_WRITE, in_data) # Call the decryption function with the good arguments sb.call(decrypt_func_addr, key_addr, 1, iv_addr, in_addr, out_addr, in_size) # Get the decrypted data from memory out_bin = sb.jitter.vm.get_mem(out_addr, in_size) with open(options.out_filename, "wb") as dec_bin: dec_bin.write(out_bin)
from miasm2.analysis.data_flow import dead_simp from miasm2.expression.simplifications import expr_simp parser = ArgumentParser("Constant expression propagation") parser.add_argument('filename', help="File to analyze") parser.add_argument('address', help="Starting address for disassembly engine") parser.add_argument('-s', "--simplify", action="store_true", help="Apply simplifications rules (liveness, graph simplification, ...)") args = parser.parse_args() machine = Machine("x86_32") cont = Container.from_stream(open(args.filename)) ira, dis_engine = machine.ira, machine.dis_engine mdis = dis_engine(cont.bin_stream) ir_arch = ira(mdis.symbol_pool) addr = int(args.address, 0) asmcfg = mdis.dis_multiblock(addr) for block in asmcfg.blocks: ir_arch.add_block(block) init_infos = ir_arch.arch.regs.regs_init cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) if args.simplify:
if options.verbose: log_asmbloc.setLevel(logging.DEBUG) log.info("import machine...") machine = Machine(options.machine) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira log.info('ok') if options.bw != None: options.bw = int(options.bw) if options.funcswd != None: options.funcswd = int(options.funcswd) log.info('Load binary') with open(fname) as fdesc: cont = Container.from_stream(fdesc, addr=options.shiftoffset) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') mdis = dis_engine(bs) # configure disasm engine mdis.dontdis_retcall = options.dontdis_retcall mdis.blocs_wd = options.bw mdis.dont_dis_nulstart_bloc = not options.dis_nulstart_bloc todo = [] addrs = [int(a, 16) for a in args[1:]]
action="store_true") parser.add_argument("--unfollow-call", help="Stop on call statements", action="store_true") parser.add_argument("--do-not-simplify", help="Do not simplify expressions", action="store_true") parser.add_argument("--rename-args", help="Rename common arguments (@32[ESP_init] -> Arg1)", action="store_true") parser.add_argument("--json", help="Output solution in JSON", action="store_true") args = parser.parse_args() # Get architecture with open(args.filename) as fstream: cont = Container.from_stream(fstream) arch = args.architecture if args.architecture else cont.arch machine = Machine(arch) # Check elements elements = set() regs = machine.mn.regs.all_regs_ids_byname for element in args.element: try: elements.add(regs[element]) except KeyError: raise ValueError("Unknown element '%s'" % element) mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True) ir_arch = machine.ira(mdis.symbol_pool)
def test_learn(args): machine = Machine("x86_64") # Compil tests log_info("Remove old files") os.system("make clean") log_info("Compile C files") status = os.system("make") assert status == 0 # Find test names c_files = [] for cur_dir, sub_dir, files in os.walk("."): c_files += [x[:-2] for x in files if x.endswith(".c")] # Ways to invoke to_invoke = { "Miasm": invoke_miasm, } if args.pin_tracer: to_invoke["PIN"] = invoke_pin # Learn + test fail = False for filename in c_files: if filename in unsupported: log_error("Skip %s (unsupported)" % filename) continue with open(filename) as fdesc: cont = Container.from_stream(fdesc) func_name = filename func_addr = cont.loc_db.get_name_offset(func_name) header_filename = "%s.h" % filename for name, cb in to_invoke.iteritems(): log_info("Learning %s over %s with %s" % (func_name, filename, name)) cmdline = cb(filename, func_name, header_filename, cont) print " ".join(cmdline) sibyl = subprocess.Popen(cmdline, env=os.environ, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = sibyl.communicate() if sibyl.returncode != 0: log_error("Failed to learn with error:") print stderr fail = True continue log_info("Testing generated class") mod = imp.new_module("testclass") exec stdout in mod.__dict__ classTest = getattr(mod, "TESTS")[0] tl = TestLauncher(filename, machine, ABI_AMD64_SYSTEMV, [classTest], config.jit_engine) possible_funcs = tl.run(func_addr) if tl.possible_funcs and possible_funcs == [filename]: log_success("Generated class recognize the function " \ "'%s'" % func_name) else: log_error("Generated class failed to recognize the function " \ "'%s'" % func_name) fail = True # Clean log_info( "Remove old files" ) os.system("make clean") return fail
elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination") if __name__ == '__main__': translator_smt2 = Translator.to_language("smt2") addr = int(options.address, 16) cont = Container.from_stream(open(args[0])) mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) ir_arch = machine.ir(mdis.loc_db) ircfg = ir_arch.new_ircfg() symbexec = SymbolicExecutionEngine(ir_arch) asmcfg, loc_db = parse_asm.parse_txt(machine.mn, 32, ''' init: PUSH argv PUSH argc PUSH ret_addr ''', loc_db=mdis.loc_db)
"""This example illustrate the Sandbox.call API, for direct call of a given function""" from miasm2.analysis.sandbox import Sandbox_Linux_arml from miasm2.analysis.binary import Container from miasm2.os_dep.linux_stdlib import linobjs from miasm2.core.utils import hexdump # Parse arguments parser = Sandbox_Linux_arml.parser(description="ELF sandboxer") parser.add_argument("filename", help="ELF Filename") options = parser.parse_args() sb = Sandbox_Linux_arml(options.filename, options, globals()) with open(options.filename, "rb") as fdesc: cont = Container.from_stream(fdesc) loc_key = cont.loc_db.get_name_location("md5_starts") addr_to_call = cont.loc_db.get_location_offset(loc_key) # Calling md5_starts(malloc(0x64)) addr = linobjs.heap.alloc(sb.jitter, 0x64) sb.call(addr_to_call, addr) hexdump(sb.jitter.vm.get_mem(addr, 0x64))
parser.add_argument('-o', "--shiftoffset", default=None, type=lambda x: int(x, 0), help="Shift input binary by an offset") parser.add_argument('-a', "--try-disasm-all", action="store_true", help="Try to disassemble the whole binary") parser.add_argument('-i', "--image", action="store_true", help="Display image representation of disasm") args = parser.parse_args() if args.verbose: log_asmbloc.setLevel(logging.DEBUG) log.info('Load binary') with open(args.filename) as fdesc: cont = Container.from_stream(fdesc, addr=args.shiftoffset) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') log.info("import machine...") # Use the guessed architecture or the specified one arch = args.architecture if args.architecture else cont.arch if not arch: print "Architecture recognition fail. Please specify it in arguments" exit(-1) # Instance the arch-dependent machine machine = Machine(arch)
def test_learn(args): machine = Machine("x86_64") # Compil tests log_info("Remove old files") os.system("make clean") log_info("Compile C files") status = os.system("make") assert status == 0 # Find test names c_files = [] for cur_dir, sub_dir, files in os.walk("."): c_files += [x[:-2] for x in files if x.endswith(".c")] # Ways to invoke to_invoke = { "Miasm": invoke_miasm, } if args.pin_tracer: to_invoke["PIN"] = invoke_pin # Learn + test fail = False for filename in c_files: if filename in unsupported: log_error("Skip %s (unsupported)" % filename) continue with open(filename) as fdesc: cont = Container.from_stream(fdesc) func_name = filename func_addr = cont.loc_db.get_name_offset(func_name) header_filename = "%s.h" % filename for name, cb in to_invoke.iteritems(): log_info("Learning %s over %s with %s" % (func_name, filename, name)) cmdline = cb(filename, func_name, header_filename, cont) print " ".join(cmdline) sibyl = subprocess.Popen(cmdline, env=os.environ, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = sibyl.communicate() if sibyl.returncode != 0: log_error("Failed to learn with error:") print stderr fail = True continue log_info("Testing generated class") mod = imp.new_module("testclass") exec stdout in mod.__dict__ classTest = getattr(mod, "TESTS")[0] tl = TestLauncher(filename, machine, ABI_AMD64_SYSTEMV, [classTest], config.jit_engine) possible_funcs = tl.run(func_addr) if tl.possible_funcs and possible_funcs == [filename]: log_success("Generated class recognize the function " \ "'%s'" % func_name) else: log_error("Generated class failed to recognize the function " \ "'%s'" % func_name) fail = True # Clean log_info("Remove old files") os.system("make clean") return fail
"The defuse is dumped after simplifications if -s option is specified") args = parser.parse_args() if args.verbose: log_asmblock.setLevel(logging.DEBUG) log.info('Load binary') if args.rawbinary: shift = args.shiftoffset if args.shiftoffset is not None else 0 cont = Container.fallback_container(open(args.filename).read(), None, addr=shift) else: with open(args.filename) as fdesc: cont = Container.from_stream(fdesc, addr=args.shiftoffset) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') log.info("import machine...") # Use the guessed architecture or the specified one arch = args.architecture if args.architecture else cont.arch if not arch: print "Architecture recognition fail. Please specify it in arguments" exit(-1) # Instance the arch-dependent machine machine = Machine(arch)
def run(self): """Launch search""" # Import multiprocessing only when required from multiprocessing import cpu_count, Queue, Process # Parse args self.map_addr = int(self.args.mapping_base, 0) if self.args.monoproc: cpu_count = lambda: 1 Process = FakeProcess # Architecture architecture = False if self.args.architecture: architecture = self.args.architecture else: with open(self.args.filename) as fdesc: architecture = ArchHeuristic(fdesc).guess() if not architecture: raise ValueError( "Unable to recognize the architecture, please specify it") if self.args.verbose > 0: print "Guessed architecture: %s" % architecture self.machine = Machine(architecture) if not self.args.address: if self.args.verbose > 0: print "No function address provided, start guessing" cont = Container.from_stream(open(self.args.filename)) fh = FuncHeuristic(cont, self.machine) addresses = list(fh.guess()) if self.args.verbose > 0: print "Found %d addresses" % len(addresses) else: addresses = [int(addr, 0) for addr in self.args.address] # Select ABI if self.args.abi is None: candidates = set(abicls for abicls in ABIS if architecture in abicls.arch) if not candidates: raise ValueError("No ABI for architecture %s" % architecture) if len(candidates) > 1: print "Please specify the ABI:" print "\t" + "\n\t".join(cand.__name__ for cand in candidates) exit(0) abicls = candidates.pop() else: for abicls in ABIS: if self.args.abi == abicls.__name__: break else: raise ValueError("Unknown ABI name: %s" % self.args.abi) self.abicls = abicls # Select Test set self.tests = [] for tname, tcases in config.available_tests.iteritems(): if "all" in self.args.tests or tname in self.args.tests: self.tests += tcases if self.args.verbose > 0: print "Found %d test cases" % len(self.tests) # Prepare multiprocess cpu_c = cpu_count() addr_queue = Queue() msg_queue = Queue() processes = [] # Add tasks for address in addresses: addr_queue.put(address) # Add poison pill for _ in xrange(cpu_c): addr_queue.put(None) # Launch workers for _ in xrange(cpu_c): p = Process(target=self.do_test, args=(addr_queue, msg_queue)) processes.append(p) p.start() addr_queue.close() # Get results nb_poison = 0 results = {} # address -> possible functions while nb_poison < cpu_c: msg = msg_queue.get() # Poison pill if msg is None: nb_poison += 1 continue # Save result results[msg.address] = msg.results # Display status if needed if self.args.verbose > 0: sys.stdout.write("\r%d / %d" % (len(results), len(addresses))) sys.stdout.flush() if msg.results and self.args.output_format == "human": prefix = "" if self.args.verbose > 0: prefix = "\r" print prefix + "0x%08x : %s" % (msg.address, ",".join( msg.results)) # End connexions msg_queue.close() msg_queue.join_thread() addr_queue.join_thread() for p in processes: p.join() if not addr_queue.empty(): raise RuntimeError("An error occured: queue is not empty") # Print final results if self.args.output_format == "JSON": # Expand results to always have the same key, and address as int print json.dumps({ "information": { "total_count": len(addresses), "test_cases": len(self.tests) }, "results": [{ "address": addr, "functions": result } for addr, result in results.iteritems()], })