def run(self): # Architecture architecture = False if self.args.architecture: architecture = self.args.architecture else: with open(self.args.filename) as fdesc: architecture = ArchHeuristic(fdesc).guess() if not architecture: raise ValueError("Unable to recognize the architecture, please specify it") if self.args.verbose: print "Guessed architecture: %s" % architecture cont = Container.from_stream(open(self.args.filename)) machine = Machine(architecture) addr_size = machine.ira().pc.size / 4 fh = FuncHeuristic(cont, machine) # Enable / disable heuristics for name in self.args.enable_heuristic: heur = fh.name2heuristic(name) if heur not in fh.heuristics: fh.heuristics.append(heur) for name in self.args.disable_heuristic: heur = fh.name2heuristic(name) fh.heuristics.remove(heur) if self.args.verbose: print "Heuristics to run: %s" % ", ".join(fh.heuristic_names) # Launch guess fmt = "0x{:0%dx}" % addr_size for addr in fh.guess(): print fmt.format(addr)
def run(self): # Architecture architecture = False if self.args.architecture: architecture = self.args.architecture else: with open(self.args.filename) as fdesc: architecture = ArchHeuristic(fdesc).guess() if not architecture: raise ValueError( "Unable to recognize the architecture, please specify it") if self.args.verbose: print "Guessed architecture: %s" % architecture cont = Container.from_stream(open(self.args.filename)) machine = Machine(architecture) addr_size = machine.ira().pc.size / 4 fh = FuncHeuristic(cont, machine) # Enable / disable heuristics for name in self.args.enable_heuristic: heur = fh.name2heuristic(name) if heur not in fh.heuristics: fh.heuristics.append(heur) for name in self.args.disable_heuristic: heur = fh.name2heuristic(name) fh.heuristics.remove(heur) if self.args.verbose: print "Heuristics to run: %s" % ", ".join(fh.heuristic_names) # Launch guess fmt = "0x{:0%dx}" % addr_size for addr in fh.guess(): print fmt.format(addr)
def symexec(handler): inst_bytes = handler.bytes_without_jmp machine = Machine("x86_32") cont = Container.from_string(inst_bytes) bs = cont.bin_stream mdis = machine.dis_engine(bs, symbol_pool=cont.symbol_pool) end_offset = len(inst_bytes) mdis.dont_dis = [end_offset] asm_block = mdis.dis_block(0) # print asm_block ira = machine.ira(mdis.symbol_pool) ira.add_block(asm_block) symb = SymbolicExecutionEngine(ira, symbols_init) cur_addr = symb.emul_ir_block(0) count = 0 while cur_addr != ExprInt(end_offset, 32): # execute to end cur_addr = symb.emul_ir_block(cur_addr) count += 1 if count > 1000: print '[!] to many loop at %s' % handler.name break return symb
def main(): global cfg global block global data #Paint the cfg_before image from disassembly cont = Container.from_stream(open('300.bin')) bin_stream = cont.bin_stream adr = 0x401550 machine = Machine(cont.arch) mdis = machine.dis_engine(bin_stream) blocks = mdis.dis_multibloc(adr) open("cfg_before.dot", "w").write(blocks.dot()) #Get filename parser = Sandbox_Linux_x86_64.parser(description="300.bin") parser.add_argument("filename", help="filename") options = parser.parse_args() options.mimic_env = True #Start Sandbox sb = Sandbox_Linux_x86_64(options.filename, options, globals()) sb.jitter.init_run(sb.entry_point) sb.jitter.add_breakpoint(sb.entry_point, stop) machine = Machine("x86_64") sb.run() #Get bytecode interpret() #Paint cfg open("vm_graph.dot", "w").write(cfg.dot())
def test(data): # Digest C informations text = """ struct human { unsigned short age; unsigned int height; char name[50]; }; struct ll_human { struct ll_human* next; struct human human; }; """ my_types = CTypeAMD64_unk() types_mngr = CTypesManagerNotPacked(my_types.types) types_mngr.add_c_decl(text) # Analyze binary cont = Container.fallback_container(data, None, addr=0) machine = Machine("x86_64") dis_engine, ira = machine.dis_engine, machine.ira mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool) addr_head = 0 blocks = mdis.dis_multibloc(addr_head) lbl_head = mdis.symbol_pool.getby_offset(addr_head) ir_arch_a = ira(mdis.symbol_pool) for block in blocks: ir_arch_a.add_bloc(block) open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot()) # Main function's first argument's type is "struct ll_human*" void_ptr = types_mngr.void_ptr ll_human = types_mngr.get_type(('ll_human',)) ptr_llhuman = ObjCPtr('noname', ll_human, void_ptr.align, void_ptr.size) arg0 = ExprId('ptr', 64) ctx = {ir_arch_a.arch.regs.RDI: arg0} expr_types = {arg0.name: ptr_llhuman} mychandler = MyCHandler(types_mngr, expr_types) for expr in get_funcs_arg0(ctx, ir_arch_a, lbl_head): print "Access:", expr target_types = mychandler.expr_to_types(expr) for target_type in target_types: print '\tType:', target_type c_strs = mychandler.expr_to_c(expr) for c_str in c_strs: print "\tC access:", c_str print
def main(): #Setup Machine for arm, get filename machine = Machine('armtl') parser = ArgumentParser("Description") parser.add_argument('filename', help='filename') args = parser.parse_args() #Setup disassembly stream in container, get blocks and draw the graph cont = Container.from_stream(open(args.filename)) bin_stream = cont.bin_stream mdis = machine.dis_engine(bin_stream) blocks = mdis.dis_multibloc(0x614) open("cfg.dot", "w").write(blocks.dot()) #Create a template for matching blocks in the control flow graph #Requirement 1) Don't get block 0xdf8, it can't disassemble #Requirement 2) Get ones that start with LDR #Requirement 3) Get ones where the second to last instruction is CMP #No restructions for in going and out going edges mblock = MatchGraphJoker( name='mblock', restrict_in=False, restrict_out=False, filt=lambda block: block.label.offset != 0xdf8 and "LDR" in block. lines[0].name and "CMP" in block.lines[-2].name) #Basic block matcher nblock = MatchGraphJoker(name="next", restrict_in=False, restrict_out=False) #Now it should match the blocks we want with the checks matcher = nblock >> mblock flag_storage = {} #Loop through matching template blocks for sol in matcher.match(blocks): try: #Grab position line pline = sol[mblock].lines[3] #Grab character check line cline = sol[mblock].lines[-2] #Transform character and position to integer pos = int(pline.arg2str(pline.args[1]), 16) c = int(cline.arg2str(cline.args[1]), 16) #If its NULL, ignore if c != 0: flag_storage.update({pos: c}) except ValueError: #The F at the beginning is a NULL check pass #Print Flag flag = "".join(map(lambda x: chr(flag_storage[x]), sorted(flag_storage))).replace("F", "I") print "F" + flag pass
def container_guess(archinfo): """Use the architecture provided by the container, if any @archinfo: ArchHeuristic instance """ cont = Container.from_stream(archinfo.stream) if isinstance(cont, ContainerUnknown) or not cont.arch: return {} return {cont.arch: 1}
def test_learn(args): machine = Machine("x86_64") # Compil tests log_info("Remove old files") os.system("make clean") log_info("Compile C files") status = os.system("make") assert status == 0 # Find test names c_files = [] for cur_dir, sub_dir, files in os.walk("."): c_files += [x[:-2] for x in files if x.endswith(".c")] for c_file in c_files: cont = Container.from_stream(open(c_file)) func_name = c_file main_addr = cont.symbol_pool["main"].offset func_addr = cont.symbol_pool[func_name].offset log_info("Learning " + func_name + " over " + func_name + ".c") cmd = [ "sibyl", "learn", "-t", "miasm", "-m", hex(main_addr), func_name, c_file ] sibyl = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = sibyl.communicate() assert sibyl.returncode == 0 log_info("Testing generated class") mod = imp.new_module("testclass") exec stdout in mod.__dict__ classTest = getattr(mod, "Test" + c_file) tl = TestLauncher(c_file, machine, ABI_AMD64_SYSTEMV, [classTest], "gcc") possible_funcs = tl.run(func_addr) if tl.possible_funcs: log_success("Generated class recognize the function " + func_name) else: log_error("Generated class failed to recognize the function " + func_name) log_info("Remove old files") os.system("make clean")
def __init__(self, filename, verbose=False): """Load binary file. Args: filename (str): path to a file to be analyzed verbose (bool): affects log verbosity """ self._container = Container.from_stream(open(filename)) self.bin_stream = self._container.bin_stream self.entry_point = self._container.entry_point self.machine = Machine(self._container.arch) self.fn = {} self.interval = interval() self.deep = 0 self.offset = 0 self._set_logging(verbose) self._logger.info("PE loaded")
def run(self): # Architecture map_addr = int(self.args.mapping_base, 0) architecture = False if self.args.architecture: architecture = self.args.architecture else: with open(self.args.filename) as fdesc: architecture = ArchHeuristic(fdesc).guess() if not architecture: raise ValueError("Unable to recognize the architecture, please specify it") if self.args.verbose: print "Guessed architecture: %s" % architecture #cont = Container.from_stream(open(self.args.filename)) cont = Container.from_stream(open(self.args.filename), addr=map_addr) machine = Machine(architecture) addr_size = machine.ira().pc.size / 4 fh = FuncHeuristic(cont, machine, self.args.filename) # Default: force only IDA or GHIDRA if available if config.idaq64_path: fh.heuristics = [ida_funcs] elif config.ghidra_headless_path: fh.heuristics = [ghidra_funcs] # Enable / disable heuristics for name in self.args.enable_heuristic: heur = fh.name2heuristic(name) if heur not in fh.heuristics: fh.heuristics.append(heur) for name in self.args.disable_heuristic: heur = fh.name2heuristic(name) fh.heuristics.remove(heur) if self.args.verbose: print "Heuristics to run: %s" % ", ".join(fh.heuristic_names) # Launch guess fmt = "0x{:0%dx}" % addr_size for addr in fh.guess(): print fmt.format(addr)
def run(self): # Currently only AMD64 SYSTEMV ABI is supported by the learning module abi = ABI_AMD64_SYSTEMV # Currently only x86_64 is supported by the learning module machine = "x86_64" if self.args.trace != "miasm" and self.args.main != None: raise ValueError("Main argument is only used by miasm tracer") main = int(self.args.main, 0) if self.args.main else None # If function address is not set then use the symbol address if self.args.address is None: cont = Container.from_stream(open(self.args.program)) try: address = cont.symbol_pool[self.args.functionname].offset except KeyError: raise ValueError("Symbol %s does not exists in %s" % (self.args.functionname, self.args.program)) else: address = int(self.args.address, 0) testcreator = TestCreator(self.args.functionname, address, self.args.program, self.args.headerfile, AVAILABLE_TRACER[self.args.trace], AVAILABLE_GENERATOR[self.args.generator], main, abi, machine, self.args.avoid_null) if self.args.verbose == 0: testcreator.logger.setLevel(logging.WARN) if self.args.verbose == 1: testcreator.logger.setLevel(logging.INFO) elif self.args.verbose == 2: testcreator.logger.setLevel(logging.DEBUG) createdTest = testcreator.create_test() if self.args.output: open(self.args.output, "w+").write(createdTest) else: print createdTest
def run(self): # Currently only AMD64 SYSTEMV ABI is supported by the learning module abi = ABI_AMD64_SYSTEMV # Currently only x86_64 is supported by the learning module machine = "x86_64" if self.args.trace != "miasm" and self.args.main != None: raise ValueError("Main argument is only used by miasm tracer") main = int(self.args.main, 0) if self.args.main else None # If function address is not set then use the symbol address if self.args.address is None: cont = Container.from_stream(open(self.args.program)) try: address = cont.symbol_pool[self.args.functionname].offset except KeyError: raise ValueError("Symbol %s does not exists in %s" % (self.args.functionname, self.args.program)) else: address = int(self.args.address, 0) testcreator = TestCreator(self.args.functionname, address, self.args.program, self.args.headerfile, AVAILABLE_TRACER[self.args.trace], AVAILABLE_GENERATOR[self.args.generator], main, abi, machine) if self.args.verbose == 0: testcreator.logger.setLevel(logging.WARN) if self.args.verbose == 1: testcreator.logger.setLevel(logging.INFO) elif self.args.verbose == 2: testcreator.logger.setLevel(logging.DEBUG) createdTest = testcreator.create_test() if self.args.output: open(self.args.output, "w+").write(createdTest) else: print createdTest
from miasm2.analysis.binary import Container from miasm2.analysis.machine import Machine from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.core.locationdb import LocationDB START_ADDR = 0 machine = Machine("x86_32") loc_db = LocationDB() # Assemble and disassemble a MOV ## Ensure that attributes 'offset' and 'l' are set line = machine.mn.fromstring("MOV EAX, EBX", loc_db, 32) asm = machine.mn.asm(line)[0] # Get back block cont = Container.from_string(asm, loc_db=loc_db) mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) # Translate ASM -> IR ira = machine.ira(mdis.loc_db) ircfg = ira.new_ircfg() ira.add_asmblock_to_ircfg(asm_block, ircfg) # Instanciate a Symbolic Execution engine with default value for registers symb = SymbolicExecutionEngine(ira) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks cur_addr = symb.run_at(ircfg, START_ADDR)
import os from miasm2.analysis.binary import Container from miasm2.analysis.machine import Machine from miasm2.core.graph import DiGraphSimplifier, MatchGraphJoker container = Container.from_stream(open('dump2.bin')) bin_stream = container.bin_stream #machine name = container.arch machine = Machine(container.arch) #fireup disasm engine mdis = machine.dis_engine(bin_stream) #Return an AsmCFG instance containing disassembled blocks #https://github.com/cea-sec/miasm/pull/309 blocks = mdis.dis_multibloc(container.entry_point) #open('AsmCFG_input.dot','w+').write(blocks.dot()) ''' for head in blocks.heads(): for child in blocks.reachable_sons(head): print child ''' filter_block = lambda block: (len(block.lines)==2 and \ block.lines[0].name == 'PUSH' and \ block.lines[1].name == 'MOV') #parent joker node for the first block in MatchGraph / defining a filter for
parser.add_argument("-p", "--passthrough", help="Reg-exp for passthrough files", default="^$") parser.add_argument("-f", "--flags", help="Flags") parser.add_argument("-v", "--verbose", action="store_true", help="Activate verbose syscalls") args = parser.parse_args() if args.verbose: syscall.log.setLevel(logging.DEBUG) # Get corresponding interpreter and reloc address cont_target_tmp = Container.from_stream(open(args.target)) ld_path = str(cont_target_tmp.executable.getsectionbyname( ".interp").content).strip("\x00") if cont_target_tmp.executable.Ehdr.type in [elf_csts.ET_REL, elf_csts.ET_DYN]: elf_base_addr = 0x40000000 elif cont_target_tmp.executable.Ehdr.type == elf_csts.ET_EXEC: elf_base_addr = 0 # Not relocatable else: raise ValueError("Unsuported type %d" % cont_target_tmp.executable.Ehdr.type) # Instanciate a jitter machine = Machine(cont_target_tmp.arch) jitter = machine.jitter(args.jitter) jitter.init_stack()
from argparse import ArgumentParser from miasm2.analysis.binary import Container from miasm2.analysis.machine import Machine from miasm2.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation from llvmlite import ir as llvm_ir from miasm2.expression.simplifications import expr_simp_high_to_explicit parser = ArgumentParser("LLVM export example") parser.add_argument("target", help="Target binary") parser.add_argument("addr", help="Target address") parser.add_argument("--architecture", "-a", help="Force architecture") args = parser.parse_args() # This part focus on obtaining an IRCFG to transform # cont = Container.from_stream(open(args.target)) machine = Machine(args.architecture if args.architecture else cont.arch) ir = machine.ir(cont.loc_db) dis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) asmcfg = dis.dis_multiblock(int(args.addr, 0)) ircfg = ir.new_ircfg_from_asmcfg(asmcfg) ircfg.simplify(expr_simp_high_to_explicit) ###################################################### # Instantiate a context and the function to fill context = LLVMContext_IRCompilation() context.ir_arch = ir func = LLVMFunction_IRCompilation(context, name="test") func.ret_type = llvm_ir.VoidType() func.init_fc()
"Use only with --propagexpr option. " "WARNING: not reliable, may fail.") parser.add_argument('-e', "--loadint", action="store_true", help="Load integers from binary in fixed memory lookup.") parser.add_argument('-j', "--calldontmodstack", action="store_true", help="Consider stack high is not modified in subcalls") args = parser.parse_args() if args.verbose: log_asmblock.setLevel(logging.DEBUG) log.info('Load binary') if args.rawbinary: cont = Container.fallback_container(open(args.filename, "rb").read(), vm=None, addr=args.base_address) else: with open(args.filename, "rb") as fdesc: cont = Container.from_stream(fdesc, addr=args.base_address) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') log.info("import machine...") # Use the guessed architecture or the specified one arch = args.architecture if args.architecture else cont.arch if not arch: print "Architecture recognition fail. Please specify it in arguments" exit(-1)
# First stage sample name (also its SHA-1) first_stage_fn = "0413f832d8161187172aef7a769586515f969479" # ChaCha decryption function address for this particular sample decrypt_func_addr = 0x400830 # Memory address of the initialization vector iv_addr = 0x614000 # Memory address of the key key_addr = 0x614020 # Arbitrary memory address to map the encrypted file in memory in_addr = 0x40000000 # Create new instance of x86_64 sandbox to emulate the decryption function sb = Sandbox_Linux_x86_64(first_stage_fn, options, globals()) with open(first_stage_fn, "rb") as first_stage, open(options.in_filename) as enc_bin: cont = Container.from_stream(first_stage) in_data = enc_bin.read() in_size = len(in_data) # Allocate memory to store the output out_addr = linobjs.heap.alloc(sb.jitter, in_size) # Map the encrypted file in memory sb.jitter.vm.add_memory_page(in_addr, PAGE_READ | PAGE_WRITE, in_data) # Call the decryption function with the good arguments sb.call(decrypt_func_addr, key_addr, 1, iv_addr, in_addr, out_addr, in_size) # Get the decrypted data from memory out_bin = sb.jitter.vm.get_mem(out_addr, in_size) with open(options.out_filename, "wb") as dec_bin: dec_bin.write(out_bin)
print 'IN', [str(x) for x in irb_in_nodes[label]] print 'OUT', [str(x) for x in irb_out_nodes[label]] print '*' * 20, 'interblock', '*' * 20 inter_block_flow(ir_arch, ircfg, flow_graph, irblock_0.loc_key, irb_in_nodes, irb_out_nodes) # from graph_qt import graph_qt # graph_qt(flow_graph) open('data.dot', 'w').write(flow_graph.dot()) ad = int(args.addr, 16) print 'disasm...' cont = Container.from_stream(open(args.filename)) machine = Machine("x86_32") mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) mdis.follow_call = True asmcfg = mdis.dis_multiblock(ad) print 'ok' print 'generating dataflow graph for:' ir_arch_analysis = machine.ira(mdis.loc_db) ircfg = ir_arch_analysis.new_ircfg_from_asmcfg(asmcfg) for irblock in ircfg.blocks.values(): print irblock if args.symb:
def test_learn(args): machine = Machine("x86_64") # Compil tests log_info("Remove old files") os.system("make clean") log_info("Compile C files") status = os.system("make") assert status == 0 # Find test names c_files = [] for cur_dir, sub_dir, files in os.walk("."): c_files += [x[:-2] for x in files if x.endswith(".c")] # Ways to invoke to_invoke = { "Miasm": invoke_miasm, } if args.pin_tracer: to_invoke["PIN"] = invoke_pin # Learn + test fail = False for filename in c_files: if filename in unsupported: log_error("Skip %s (unsupported)" % filename) continue with open(filename) as fdesc: cont = Container.from_stream(fdesc) func_name = filename func_addr = cont.loc_db.get_name_offset(func_name) header_filename = "%s.h" % filename for name, cb in to_invoke.iteritems(): log_info("Learning %s over %s with %s" % (func_name, filename, name)) cmdline = cb(filename, func_name, header_filename, cont) print " ".join(cmdline) sibyl = subprocess.Popen(cmdline, env=os.environ, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = sibyl.communicate() if sibyl.returncode != 0: log_error("Failed to learn with error:") print stderr fail = True continue log_info("Testing generated class") mod = imp.new_module("testclass") exec stdout in mod.__dict__ classTest = getattr(mod, "TESTS")[0] tl = TestLauncher(filename, machine, ABI_AMD64_SYSTEMV, [classTest], config.jit_engine) possible_funcs = tl.run(func_addr) if tl.possible_funcs and possible_funcs == [filename]: log_success("Generated class recognize the function " \ "'%s'" % func_name) else: log_error("Generated class failed to recognize the function " \ "'%s'" % func_name) fail = True # Clean log_info( "Remove old files" ) os.system("make clean") return fail
help="Use implicit tracking", action="store_true") parser.add_argument("--unfollow-mem", help="Stop on memory statements", action="store_true") parser.add_argument("--unfollow-call", help="Stop on call statements", action="store_true") parser.add_argument("--do-not-simplify", help="Do not simplify expressions", action="store_true") args = parser.parse_args() # Get architecture with open(args.filename) as fstream: cont = Container.from_stream(fstream) arch = args.architecture if args.architecture else cont.arch machine = Machine(arch) # Check elements elements = set() regs = machine.mn.regs.all_regs_ids_byname for element in args.element: try: elements.add(regs[element.upper()]) except KeyError: raise ValueError("Unknown element '%s'" % element) mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True) ir_arch = machine.ira(mdis.symbol_pool)
help="Display image representation of disasm") parser.add_argument('-c', "--rawbinary", default=False, action="store_true", help="Don't interpret input as ELF/PE/...") parser.add_argument('-d', "--defuse", action="store_true", help="Dump the def-use graph in file 'defuse.dot'." "The defuse is dumped after simplifications if -s option is specified") args = parser.parse_args() if args.verbose: log_asmblock.setLevel(logging.DEBUG) log.info('Load binary') if args.rawbinary: shift = args.shiftoffset if args.shiftoffset is not None else 0 cont = Container.fallback_container(open(args.filename).read(), None, addr=shift) else: with open(args.filename) as fdesc: cont = Container.from_stream(fdesc, addr=args.shiftoffset) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') log.info("import machine...") # Use the guessed architecture or the specified one arch = args.architecture if args.architecture else cont.arch if not arch: print "Architecture recognition fail. Please specify it in arguments" exit(-1)
def from_bytecode(self, bytecode): container = Container.from_string(bytecode) mdis = self.machine.dis_engine(container.bin_stream) self.blks = mdis.dis_multibloc(0)
action="store_true") parser.add_argument("--unfollow-call", help="Stop on call statements", action="store_true") parser.add_argument("--do-not-simplify", help="Do not simplify expressions", action="store_true") parser.add_argument("--rename-args", help="Rename common arguments (@32[ESP_init] -> Arg1)", action="store_true") parser.add_argument("--json", help="Output solution in JSON", action="store_true") args = parser.parse_args() # Get architecture with open(args.filename) as fstream: cont = Container.from_stream(fstream) arch = args.architecture if args.architecture else cont.arch machine = Machine(arch) # Check elements elements = set() regs = machine.mn.regs.all_regs_ids_byname for element in args.element: try: elements.add(regs[element]) except KeyError: raise ValueError("Unknown element '%s'" % element) mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True) ir_arch = machine.ira(mdis.symbol_pool)
import sys from miasm2.arch.x86.disasm import dis_x86_32 from miasm2.core.asmbloc import bloc2graph from miasm2.analysis.binary import Container from pdb import pm if len(sys.argv) != 3: print 'Example:' print "%s samples/box_upx.exe 0x407570" % sys.argv[0] sys.exit(0) addr = int(sys.argv[2], 16) cont = Container.from_stream(open(sys.argv[1])) mdis = dis_x86_32(cont.bin_stream) # Inform the engine to avoid disassembling null instructions mdis.dont_dis_nulstart_bloc = True blocs = mdis.dis_multibloc(addr) graph = bloc2graph(blocs) open('graph.txt', 'w').write(graph)
from miasm2.analysis.data_flow import dead_simp from miasm2.expression.simplifications import expr_simp parser = ArgumentParser("Constant expression propagation") parser.add_argument('filename', help="File to analyze") parser.add_argument('address', help="Starting address for disassembly engine") parser.add_argument('-s', "--simplify", action="store_true", help="Apply simplifications rules (liveness, graph simplification, ...)") args = parser.parse_args() machine = Machine("x86_32") cont = Container.from_stream(open(args.filename)) ira, dis_engine = machine.ira, machine.dis_engine mdis = dis_engine(cont.bin_stream) ir_arch = ira(mdis.symbol_pool) addr = int(args.address, 0) asmcfg = mdis.dis_multiblock(addr) for block in asmcfg.blocks: ir_arch.add_block(block) init_infos = ir_arch.arch.regs.regs_init cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) if args.simplify:
def load_vm(self, filename, map_addr): self.ctr = Container.from_stream(open(filename), vm=self.jitter.vm, addr=map_addr) self.jitter.cpu.init_regs() self.jitter.init_stack()
"""This example illustrate the Sandbox.call API, for direct call of a given function""" from miasm2.analysis.sandbox import Sandbox_Linux_arml from miasm2.analysis.binary import Container from miasm2.os_dep.linux_stdlib import linobjs from miasm2.core.utils import hexdump # Parse arguments parser = Sandbox_Linux_arml.parser(description="ELF sandboxer") parser.add_argument("filename", help="ELF Filename") options = parser.parse_args() sb = Sandbox_Linux_arml(options.filename, options, globals()) with open(options.filename, "rb") as fdesc: cont = Container.from_stream(fdesc) loc_key = cont.loc_db.get_name_location("md5_starts") addr_to_call = cont.loc_db.get_location_offset(loc_key) # Calling md5_starts(malloc(0x64)) addr = linobjs.heap.alloc(sb.jitter, 0x64) sb.call(addr_to_call, addr) hexdump(sb.jitter.vm.get_mem(addr, 0x64))
}; struct ll_human { struct ll_human* next; struct human human; }; """ base_types = CTypeAMD64_unk() types_ast = CAstTypes() types_ast.add_c_decl(text) types_mngr = CTypesManagerNotPacked(types_ast, base_types) # Analyze binary cont = Container.fallback_container(data, None, addr=0) machine = Machine("x86_64") dis_engine, ira = machine.dis_engine, machine.ira mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool) addr_head = 0 asmcfg = mdis.dis_multiblock(addr_head) lbl_head = mdis.symbol_pool.getby_offset(addr_head) ir_arch_a = ira(mdis.symbol_pool) for block in asmcfg.blocks: ir_arch_a.add_block(block) open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot())
from pdb import pm from miasm2.arch.x86.disasm import dis_x86_32 from miasm2.analysis.binary import Container from miasm2.core.asmblock import AsmCFG, AsmConstraint, AsmBlock, \ AsmLabel, AsmBlockBad, AsmConstraintTo, AsmConstraintNext, \ bbl_simplifier from miasm2.core.graph import DiGraphSimplifier, MatchGraphJoker from miasm2.expression.expression import ExprId # Initial data: from 'samples/simple_test.bin' data = "5589e583ec10837d08007509c745fc01100000eb73837d08017709c745fc02100000eb64837d08057709c745fc03100000eb55837d080774138b450801c083f80e7509c745fc04100000eb3c8b450801c083f80e7509c745fc05100000eb298b450883e03085c07409c745fc06100000eb16837d08427509c745fc07100000eb07c745fc081000008b45fcc9c3".decode("hex") cont = Container.from_string(data) # Test Disasm engine mdis = dis_x86_32(cont.bin_stream) ## Disassembly of one block first_block = mdis.dis_bloc(0) assert len(first_block.lines) == 5 print first_block ## Disassembly of several block, with cache blocks = mdis.dis_multibloc(0) assert len(blocks) == 0 ## Test cache mdis.job_done.clear() blocks = mdis.dis_multibloc(0) assert len(blocks) == 17 ## Equality between assembly lines is not yet implemented assert len(blocks.heads()) == 1
from pdb import pm from miasm2.arch.x86.disasm import dis_x86_32 from miasm2.analysis.binary import Container from miasm2.core.asmblock import AsmCFG, AsmConstraint, AsmBlock, \ AsmLabel, AsmBlockBad, AsmConstraintTo, AsmConstraintNext, \ bbl_simplifier from miasm2.core.graph import DiGraphSimplifier, MatchGraphJoker from miasm2.expression.expression import ExprId # Initial data: from 'samples/simple_test.bin' data = "5589e583ec10837d08007509c745fc01100000eb73837d08017709c745fc02100000eb64837d08057709c745fc03100000eb55837d080774138b450801c083f80e7509c745fc04100000eb3c8b450801c083f80e7509c745fc05100000eb298b450883e03085c07409c745fc06100000eb16837d08427509c745fc07100000eb07c745fc081000008b45fcc9c3".decode( "hex") cont = Container.from_string(data) # Test Disasm engine mdis = dis_x86_32(cont.bin_stream) ## Disassembly of one block first_block = mdis.dis_block(0) assert len(first_block.lines) == 5 print first_block ## Test redisassemble blocks first_block_bis = mdis.dis_block(0) assert len(first_block.lines) == len(first_block_bis.lines) print first_block_bis ## Disassembly of several block, with cache blocks = mdis.dis_multiblock(0) assert len(blocks) == 17
def test_learn(args): machine = Machine("x86_64") # Compil tests log_info("Remove old files") os.system("make clean") log_info("Compile C files") status = os.system("make") assert status == 0 # Find test names c_files = [] for cur_dir, sub_dir, files in os.walk("."): c_files += [x[:-2] for x in files if x.endswith(".c")] # Ways to invoke to_invoke = { "Miasm": invoke_miasm, } if args.pin_tracer: to_invoke["PIN"] = invoke_pin # Learn + test fail = False for filename in c_files: if filename in unsupported: log_error("Skip %s (unsupported)" % filename) continue with open(filename) as fdesc: cont = Container.from_stream(fdesc) func_name = filename func_addr = cont.loc_db.get_name_offset(func_name) header_filename = "%s.h" % filename for name, cb in to_invoke.iteritems(): log_info("Learning %s over %s with %s" % (func_name, filename, name)) cmdline = cb(filename, func_name, header_filename, cont) print " ".join(cmdline) sibyl = subprocess.Popen(cmdline, env=os.environ, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = sibyl.communicate() if sibyl.returncode != 0: log_error("Failed to learn with error:") print stderr fail = True continue log_info("Testing generated class") mod = imp.new_module("testclass") exec stdout in mod.__dict__ classTest = getattr(mod, "TESTS")[0] tl = TestLauncher(filename, machine, ABI_AMD64_SYSTEMV, [classTest], config.jit_engine) possible_funcs = tl.run(func_addr) if tl.possible_funcs and possible_funcs == [filename]: log_success("Generated class recognize the function " \ "'%s'" % func_name) else: log_error("Generated class failed to recognize the function " \ "'%s'" % func_name) fail = True # Clean log_info("Remove old files") os.system("make clean") return fail
}; struct ll_human { struct ll_human* next; struct human human; }; """ base_types = CTypeAMD64_unk() types_ast = CAstTypes() types_ast.add_c_decl(text) types_mngr = CTypesManagerNotPacked(types_ast, base_types) # Analyze binary cont = Container.fallback_container(data, None, addr=0) machine = Machine("x86_64") dis_engine, ira = machine.dis_engine, machine.ira mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool) addr_head = 0 blocks = mdis.dis_multiblock(addr_head) lbl_head = mdis.symbol_pool.getby_offset(addr_head) ir_arch_a = ira(mdis.symbol_pool) for block in blocks: ir_arch_a.add_block(block) open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot())
# Update next blocks to process in the disassembly engine cur_bloc.bto.clear() cur_bloc.add_cst(loc_key, AsmConstraint.c_next) # Prepare a tiny shellcode shellcode = ''.join([ "\xe8\x00\x00\x00\x00", # CALL $ "X", # POP EAX "\xc3", # RET ]) # Instantiate a x86 32 bit architecture machine = Machine("x86_32") cont = Container.from_string(shellcode) mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) print "Without callback:\n" asmcfg = mdis.dis_multiblock(0) print "\n".join(str(block) for block in asmcfg.blocks) # Enable callback mdis.dis_block_callback = cb_x86_callpop print "=" * 40 print "With callback:\n" asmcfg_after = mdis.dis_multiblock(0) print "\n".join(str(block) for block in asmcfg_after.blocks) # Ensure the callback has been called
elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination") if __name__ == '__main__': translator_smt2 = Translator.to_language("smt2") addr = int(options.address, 16) cont = Container.from_stream(open(args[0])) mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) ir_arch = machine.ir(mdis.loc_db) ircfg = ir_arch.new_ircfg() symbexec = SymbolicExecutionEngine(ir_arch) asmcfg, loc_db = parse_asm.parse_txt(machine.mn, 32, ''' init: PUSH argv PUSH argc PUSH ret_addr ''', loc_db=mdis.loc_db)
from miasm2.analysis.binary import Container from miasm2.analysis.machine import Machine # The Container will provide a *bin_stream*, bytes source for the disasm engine cont = Container.from_string( "\x83\xf8\x10\x74\x07\x89\xc6\x0f\x47\xc3\xeb\x08\x89\xc8\xe8\x31\x33\x22\x11\x40\xc3" ) # Instantiate a x86 32 bit architecture machine = Machine("x86_32") # Instantiate a disassembler engine, using the previous bin_stream and its # associated location DB. mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) # Run a recursive traversal disassembling from address 0 asmcfg = mdis.dis_multiblock(0) # Display each basic blocks for block in asmcfg.blocks: print block # Output control flow graph in a dot file open('str_cfg.dot', 'w').write(asmcfg.dot())
parser.add_argument('-o', "--shiftoffset", default=None, type=lambda x: int(x, 0), help="Shift input binary by an offset") parser.add_argument('-a', "--try-disasm-all", action="store_true", help="Try to disassemble the whole binary") parser.add_argument('-i', "--image", action="store_true", help="Display image representation of disasm") args = parser.parse_args() if args.verbose: log_asmbloc.setLevel(logging.DEBUG) log.info('Load binary') with open(args.filename) as fdesc: cont = Container.from_stream(fdesc, addr=args.shiftoffset) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') log.info("import machine...") # Use the guessed architecture or the specified one arch = args.architecture if args.architecture else cont.arch if not arch: print "Architecture recognition fail. Please specify it in arguments" exit(-1) # Instance the arch-dependent machine machine = Machine(arch)
"--try-disasm-all", action="store_true", help="Try to disassemble the whole binary") parser.add_argument('-i', "--image", action="store_true", help="Display image representation of disasm") args = parser.parse_args() if args.verbose: log_asmbloc.setLevel(logging.DEBUG) log.info('Load binary') with open(args.filename) as fdesc: cont = Container.from_stream(fdesc, addr=args.shiftoffset) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') log.info("import machine...") # Use the guessed architecture or the specified one arch = args.architecture if args.architecture else cont.arch if not arch: print "Architecture recognition fail. Please specify it in arguments" exit(-1) # Instance the arch-dependent machine machine = Machine(arch)
'-d', "--defuse", action="store_true", help="Dump the def-use graph in file 'defuse.dot'." "The defuse is dumped after simplifications if -s option is specified") args = parser.parse_args() if args.verbose: log_asmblock.setLevel(logging.DEBUG) log.info('Load binary') if args.rawbinary: shift = args.shiftoffset if args.shiftoffset is not None else 0 cont = Container.fallback_container(open(args.filename).read(), None, addr=shift) else: with open(args.filename) as fdesc: cont = Container.from_stream(fdesc, addr=args.shiftoffset) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') log.info("import machine...") # Use the guessed architecture or the specified one arch = args.architecture if args.architecture else cont.arch if not arch: print "Architecture recognition fail. Please specify it in arguments"
if options.verbose: log_asmbloc.setLevel(logging.DEBUG) log.info("import machine...") machine = Machine(options.machine) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira log.info('ok') if options.bw != None: options.bw = int(options.bw) if options.funcswd != None: options.funcswd = int(options.funcswd) log.info('Load binary') with open(fname) as fdesc: cont = Container.from_stream(fdesc, addr=options.shiftoffset) default_addr = cont.entry_point bs = cont.bin_stream e = cont.executable log.info('ok') mdis = dis_engine(bs) # configure disasm engine mdis.dontdis_retcall = options.dontdis_retcall mdis.blocs_wd = options.bw mdis.dont_dis_nulstart_bloc = not options.dis_nulstart_bloc todo = [] addrs = [int(a, 16) for a in args[1:]]