def patch_xrefs(find_addr, patch_addr, args, ip='localhost', port=4455, conn=None): """ Patches xrefs with certain arguments :param find_addr: address of function whose xrefs are to be replaced :param patch_addr: the new target of the xref call :param args: dictionary mapping number of argument to its required value :param ip: optional, IP of the computer running rpyc server in IDA :param port: optional, port of the computer running rpyc server in IDA :param conn: optional, already estabilished connection to running rpyc server in IDA :return: None """ close_conn = False if not conn: close_conn = True conn = rpyc.classic.connect(ip, port) file_name = conn.modules.idaapi.get_input_file_path() idautils = conn.root.getmodule("idautils") with mock.patch("builtins.open", conn.builtins.open): cont = Container.from_stream(open(file_name, 'rb')) machine = Machine(cont.arch) mdis = machine.dis_engine(cont.bin_stream) exectbl = cont.executable for r in idautils.XrefsTo(find_addr): scn_args = conn.modules.idaapi.get_arg_addrs(r.frm) if scn_args is None: print("Couldn't find args of %x" % r.frm) continue if compare_args(args, scn_args, conn): patch_xref(r.frm, patch_addr, mdis, machine.mn, exectbl) with open(file_name, 'wb') as fl: fl.write(bytes(exectbl)) if close_conn: conn.close()
def main(file_path: Path, start_addr: int, oracle_path: Path) -> None: # symbol table loc_db = LocationDB() # open the binary for analysis container = Container.from_stream(open(file_path, 'rb'), loc_db) # cpu abstraction machine = Machine(container.arch) # init disassemble engine mdis = machine.dis_engine(container.bin_stream, loc_db=loc_db) # initialize intermediate representation lifter = machine.lifter_model_call(mdis.loc_db) # disassemble the function at address asm_block = mdis.dis_block(start_addr) # lift to Miasm IR ira_cfg = lifter.new_ircfg() lifter.add_asmblock_to_ircfg(asm_block, ira_cfg) # init symbolic execution engine sb = SymbolicExecutionEngine(lifter) # symbolically execute basic block sb.run_block_at(ira_cfg, start_addr) # initialize simplifier simplifier = Simplifier(oracle_path) for k, v in sb.modified(): if v.is_int() or v.is_id() or v.is_loc(): continue print(f"before: {v}") simplified = simplifier.simplify(v) print(f"simplified: {simplified}") print("\n\n")
def emotet_control_flow_unflatten(func_addr, filename): with open(filename, 'rb') as fstream: cont = Container.from_stream(fstream) machine = Machine(cont.arch) mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) asmcfg = mdis.dis_multiblock(func_addr) ir_arch = machine.ira(mdis.loc_db) ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) state_register = get_state_register(asmcfg, get_loc_key_at(cont.loc_db, func_addr)) if not state_register: print("[-] Function was not obfuscated") return to_patch_offsets = resolve_offsets(state_register, asmcfg, ircfg, ir_arch) to_patch_offsets.sort(key=lambda tup: tup[0]) fix_func_cfg(filename, to_patch_offsets) print("[+] Function was deobfuscated!")
class Block: def __init__(self, raw, address): self.cont = Container.fallback_container(raw + b'\xC3', vm=None, addr=address) self.address = address self.machine = Machine('x86_64') self.mdis = self.machine.dis_engine(self.cont.bin_stream, loc_db=self.cont.loc_db) self.asmcfg = self.mdis.dis_multiblock(self.address) self.head = self.asmcfg.getby_offset(self.address).loc_key self.orignal_ira = self.machine.ira(self.mdis.loc_db) self.orginal_ircfg = self.orignal_ira.new_ircfg_from_asmcfg( self.asmcfg) self.common_simplifier = IRCFGSimplifierCommon(self.orignal_ira) self.common_simplifier.simplify(self.orginal_ircfg, self.head) self.custom_ira1 = IRADelModCallStack(self.mdis.loc_db) self.custom_ira2 = IRAOutRegs(self.mdis.loc_db) self.ircfg = self.custom_ira1.new_ircfg_from_asmcfg(self.asmcfg) self.simplify() def simplify(self): simplifier = IRCFGSimplifierCommon(self.custom_ira1) simplifier.simplify(self.ircfg, self.head) for loc in self.ircfg.leaves(): irblock = self.ircfg.blocks.get(loc) if irblock is None: continue regs = {} for reg in self.custom_ira1.get_out_regs(irblock): regs[reg] = reg assignblks = list(irblock) newAssignBlk = AssignBlock(regs, assignblks[-1].instr) assignblks.append(newAssignBlk) newIrBlock = IRBlock(irblock.loc_key, assignblks) self.ircfg.blocks[loc] = newIrBlock simplifier = CustomIRCFGSimplifierSSA(self.custom_ira2) simplifier.simplify(self.ircfg, self.head)
with open(args.filename, "rb") as fstream: cont = Container.from_stream(fstream) arch = args.architecture if args.architecture else cont.arch machine = Machine(arch) # Check elements elements = set() regs = machine.mn.regs.all_regs_ids_byname for element in args.element: try: elements.add(regs[element]) except KeyError: raise ValueError("Unknown element '%s'" % element) mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True) ir_arch = machine.ira(mdis.loc_db) # Common argument forms init_ctx = {} if args.rename_args: if arch == "x86_32": # StdCall example for i in range(4): e_mem = ExprMem( ExprId("ESP_init", 32) + ExprInt(4 * (i + 1), 32), 32) init_ctx[e_mem] = ExprId("arg%d" % i, 32) # Disassemble the targeted function asmcfg = mdis.dis_multiblock(int(args.func_addr, 0))
from miasm.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation from llvmlite import ir as llvm_ir from miasm.expression.simplifications import expr_simp_high_to_explicit from miasm.core.locationdb import LocationDB parser = ArgumentParser("LLVM export example") parser.add_argument("target", help="Target binary") parser.add_argument("addr", help="Target address") parser.add_argument("--architecture", "-a", help="Force architecture") args = parser.parse_args() loc_db = LocationDB() # This part focus on obtaining an IRCFG to transform # cont = Container.from_stream(open(args.target, 'rb'), loc_db) machine = Machine(args.architecture if args.architecture else cont.arch) lifter = machine.lifter(loc_db) dis = machine.dis_engine(cont.bin_stream, loc_db=loc_db) asmcfg = dis.dis_multiblock(int(args.addr, 0)) ircfg = lifter.new_ircfg_from_asmcfg(asmcfg) ircfg.simplify(expr_simp_high_to_explicit) ###################################################### # Instantiate a context and the function to fill context = LLVMContext_IRCompilation() context.lifter = lifter func = LLVMFunction_IRCompilation(context, name="test") func.ret_type = llvm_ir.VoidType() func.init_fc() # Here, as an example, we arbitrarily represent registers with global # variables. Locals allocas are used for the computation during the function,
from miasm.arch.x86.arch import mn_x86 from miasm.core.locationdb import LocationDB from miasm.analysis.binary import Container from miasm.analysis.machine import Machine from miasm.ir.symbexec import SymbolicExecutionEngine from miasm.analysis.dse import DSEEngine from miasm.expression.expression import * from miasm.ir.translators.z3_ir import Z3Mem, TranslatorZ3 loc_db = LocationDB() s = '\x8dI\x04\x8d[\x01\x80\xf9\x01t\x05\x8d[\xff\xeb\x03\x8d[\x01\x89\xd8\xc3' s = '\x55\x8b\xec\x83\xec\x08\xc7\x45\xf8\xcc\xcc\xcc\xcc\xc7\x45\xfc\xcc\xcc\xcc\xcc\xc7\x45\xfc\x03\x00\x00\x00\xc7\x45\xf8\x05\x00\x00\x00\x83\x7d\xfc\x05\x7e\x07\x8b\x45\xfc\xeb\x09\xeb\x05\x8b\x45\xf8\xeb\x02\x33\xc0\x8b\xe5\x5d\xc3' c = Container.from_string(s) machine = Machine('x86_32') mdis = machine.dis_engine(c.bin_stream) asmcfg = mdis.dis_multiblock(0) for block in asmcfg.blocks: print(block.to_string(asmcfg.loc_db)) ira = machine.ira(loc_db) ircfg = ira.new_ircfg_from_asmcfg(asmcfg) # print(ircfg) # ircfg = ira.new_ircfg(asmcfg) # print(loc_db._offset_to_loc_key.keys()[0]) sb = SymbolicExecutionEngine(ira) # symbolic_pc = sb.run_at(ircfg, loc_db._offset_to_loc_key.keys()[0]) # for index, info in enumerate(sb.info_ids): # print('###### step', index+1) # print('\t', info[0]) # for reg in info[1]: # print('\t\t', reg, ':', info[1][reg])
# Container is superclass of ContainerELF, ContainerPE, etc. # Is container the analog of Binja's BinaryView? # # container.arch = 'x86_64' container = Container.from_string(data) # list all symbols / addresses available from the container ldb = container.loc_db for k in ldb.loc_keys: offset = ldb.get_location_offset(k) names = [x.decode() for x in ldb.get_location_names(k)] #print('%08X:' % offset) #for name in names: # print('\t"%s"' % name) # disassemble the given symbol machine = Machine(container.arch) disassembler = machine.dis_engine(container.bin_stream, loc_db=container.loc_db) sym_offs = resolve_symbol(container, sym_name) #print('%s is located at: 0x%X' % (sym_name, sym_offs)) # miasm.core.asmblock.AsmCFG cfg = disassembler.dis_multiblock(offset=sym_offs) print('```mermaid') print(cfg_to_mermaid(cfg)) print('```')
from __future__ import print_function from miasm.analysis.binary import Container from miasm.analysis.machine import Machine # The Container will provide a *bin_stream*, bytes source for the disasm engine cont = Container.from_string(b"\x83\xf8\x10\x74\x07\x89\xc6\x0f\x47\xc3\xeb\x08\x89\xc8\xe8\x31\x33\x22\x11\x40\xc3") # Instantiate a x86 32 bit architecture machine = Machine("x86_32") # Instantiate a disassembler engine, using the previous bin_stream and its # associated location DB. mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) # Run a recursive traversal disassembling from address 0 asmcfg = mdis.dis_multiblock(0) # Display each basic blocks for block in asmcfg.blocks: print(block) # Output control flow graph in a dot file open('str_cfg.dot', 'w').write(asmcfg.dot())
from miasm.core.utils import decode_hex from miasm.analysis.machine import Machine from miasm.analysis.binary import Container from miasm.core.asmblock import AsmCFG, AsmConstraint, AsmBlock, \ AsmBlockBad, AsmConstraintTo, AsmConstraintNext, \ bbl_simplifier from miasm.core.graph import DiGraphSimplifier, MatchGraphJoker from miasm.expression.expression import ExprId # Initial data: from 'samples/simple_test.bin' data = decode_hex("5589e583ec10837d08007509c745fc01100000eb73837d08017709c745fc02100000eb64837d08057709c745fc03100000eb55837d080774138b450801c083f80e7509c745fc04100000eb3c8b450801c083f80e7509c745fc05100000eb298b450883e03085c07409c745fc06100000eb16837d08427509c745fc07100000eb07c745fc081000008b45fcc9c3") cont = Container.from_string(data) # Test Disasm engine machine = Machine("x86_32") mdis = machine.dis_engine(cont.bin_stream, loc_db=cont.loc_db) ## Disassembly of one block first_block = mdis.dis_block(0) assert len(first_block.lines) == 5 print(first_block) ## Test redisassemble asmcfg first_block_bis = mdis.dis_block(0) assert len(first_block.lines) == len(first_block_bis.lines) print(first_block_bis) ## Disassembly of several block, with cache asmcfg = mdis.dis_multiblock(0) assert len(asmcfg) == 17 ## Test redisassemble asmcfg
with open(args.filename, "rb") as fstream: cont = Container.from_stream(fstream) arch = args.architecture if args.architecture else cont.arch machine = Machine(arch) # Check elements elements = set() regs = machine.mn.regs.all_regs_ids_byname for element in args.element: try: elements.add(regs[element]) except KeyError: raise ValueError("Unknown element '%s'" % element) mdis = machine.dis_engine(cont.bin_stream, dont_dis_nulstart_bloc=True) ir_arch = machine.ira(mdis.loc_db) # Common argument forms init_ctx = {} if args.rename_args: if arch == "x86_32": # StdCall example for i in range(4): e_mem = ExprMem(ExprId("ESP_init", 32) + ExprInt(4 * (i + 1), 32), 32) init_ctx[e_mem] = ExprId("arg%d" % i, 32) # Disassemble the targeted function asmcfg = mdis.dis_multiblock(int(args.func_addr, 0)) # Generate IR
loc_db = LocationDB() ira = mn.ira(loc_db) # create an empty ircfg ircfg = ira.new_ircfg() # Binary path and offset of the target function offset = 0x1150 fname = "bin/simple_test.bin" # Get Miasm's binary stream bin_file = open(fname).read() bin_stream = bin_stream_str(bin_file) # Disassemble blocks of the function at 'offset' mdis = mn.dis_engine(bin_stream) disasm = mdis.dis_multiblock(offset) ircfg = ira.new_ircfg_from_asmcfg(disasm) entry_points = set([mdis.loc_db.get_offset_location(offset)]) # Create target IR object and add all basic blocks to it ir = ir_a_x86_64(mdis.symbol_pool) for bbl in disasm.blocks: print(bbl.to_string(disasm.loc_db)) ira.add_asmblock_to_ircfg(bbl, ircfg) init_infos = ira.arch.regs.regs_init propagate_cst_expr(ira, ircfg, offset, init_infos) ircfg.simplify(expr_simp)
from miasm.analysis.binary import Container from miasm.analysis.machine import Machine from miasm.arch.x86.lifter_model_call import LifterModelCall_x86_32 from miasm.core.asmblock import AsmCFG from miasm.core.locationdb import LocationDB from miasm.expression.simplifications import expr_simp loc_db = LocationDB() cont = Container.from_stream(open("crackme0x02", "rb"), loc_db) machine = Machine(cont.arch) # --- Disassemble --- # mdis = machine.dis_engine(cont.bin_stream, loc_db=loc_db, follow_call=True) # Disassemble at the block that checks the key entry_addr = 0x804842B asmcfg = mdis.dis_multiblock(entry_addr) # Write Graphviz open("cfg.dot", "w").write(asmcfg.dot()) # --- Get IR --- # lifter = LifterModelCall_x86_32(loc_db) ircfg = lifter.new_ircfg() first_block = list(asmcfg.blocks)[0] lifter.add_asmblock_to_ircfg(first_block, ircfg) # --- Symbolic execution --- #
fdesc = open("bytecode", 'rb') loc_db = LocationDB() raw = fdesc.read() # The Container will provide a *bin_stream*, bytes source for the disasm engine # It will prodive a view from a PE or an ELF. # cont = Container.from_stream(fdesc, loc_db) # The Machine, instantiated with the detected architecture, will provide tools # (disassembler, etc.) to work with this architecture machine = Machine("spacez") # code.interact(local=locals()) # Instantiate a disassembler engine, using the previous bin_stream and its # associated location DB. The assembly listing will use the binary symbols mdis = machine.dis_engine(raw, loc_db=loc_db) # Run a recursive traversal disassembling from the entry point # (do not follow sub functions by default) addr = 0 asmcfg = mdis.dis_multiblock(addr) ira = machine.ira(loc_db) ircfg = ira.new_ircfg_from_asmcfg(asmcfg) state = regs_init # propagate_cst_expr(ira, ircfg, 0, state) # simp = IRCFGSimplifierCommon(ira) loc = loc_db.get_offset_location(addr) # simp.simplify(ircfg, loc)
def get_graph(self): simplify = self.simplify dontmodstack = self.dontmodstack loadmemint = self.loadmemint type_graph = self.type_graph bin_str = "" for s in self.data.segments: bin_str += self.data.read(s.start, len(s)) # add padding between each segment if s.end != self.data.end: bin_str += '\x00' * (((s.end | 0xfff) + 1) - s.end) bs = bin_stream_str(input_str=bin_str, base_address=self.data.start) machine = Machine(archs[self.data.arch.name]) mdis = machine.dis_engine(bs) asmcfg = mdis.dis_multiblock(self.function.start) entry_points = set( [mdis.loc_db.get_offset_location(self.function.start)]) class IRADelModCallStack(machine.ira): def call_effects(self, addr, instr): assignblks, extra = super(IRADelModCallStack, self).call_effects(addr, instr) if not dontmodstack: return assignblks, extra out = [] for assignblk in assignblks: dct = dict(assignblk) dct = { dst: src for (dst, src) in viewitems(dct) if dst != self.sp } out.append(AssignBlock(dct, assignblk.instr)) return out, extra ir_arch = IRADelModCallStack(mdis.loc_db) ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) for irb in list(viewvalues(ircfg.blocks)): irs = [] for assignblk in irb: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in viewitems(assignblk) } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) head = list(entry_points)[0] if simplify: ircfg_simplifier = IRCFGSimplifierCommon(ir_arch) ircfg_simplifier.simplify(ircfg, head) if type_graph == TYPE_GRAPH_IR: return MiasmIRGraph(self.add_names(ircfg)) class IRAOutRegs(machine.ira): def get_out_regs(self, block): regs_todo = super(IRAOutRegs, self).get_out_regs(block) out = {} for assignblk in block: for dst in assignblk: reg = self.ssa_var.get(dst, None) if reg is None: continue if reg in regs_todo: out[reg] = dst return set(viewvalues(out)) # Add dummy dependency to uncover out regs affectation for loc in ircfg.leaves(): irblock = ircfg.blocks.get(loc) if irblock is None: continue regs = {} for reg in ir_arch.get_out_regs(irblock): regs[reg] = reg assignblks = list(irblock) new_assiblk = AssignBlock(regs, assignblks[-1].instr) assignblks.append(new_assiblk) new_irblock = IRBlock(irblock.loc_key, assignblks) ircfg.blocks[loc] = new_irblock class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA): def do_simplify(self, ssa, head): modified = super(CustomIRCFGSimplifierSSA, self).do_simplify(ssa, head) if loadmemint: modified |= load_from_int(ssa.graph, bs, is_addr_ro_variable) return modified def simplify(self, ircfg, head): ssa = self.ircfg_to_ssa(ircfg, head) ssa = self.do_simplify_loop(ssa, head) if type_graph == TYPE_GRAPH_IRSSA: ret = ssa.graph elif type_graph == TYPE_GRAPH_IRSSAUNSSA: ircfg = self.ssa_to_unssa(ssa, head) ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) ircfg_simplifier.simplify(ircfg, head) ret = ircfg else: raise ValueError("Unknown option") return ret # dirty patch to synchronize nodes and blocks lists in ircfg nodes_to_del = [ node for node in ircfg.nodes() if not node in ircfg.blocks ] for node in nodes_to_del: ircfg.del_node(node) head = list(entry_points)[0] simplifier = CustomIRCFGSimplifierSSA(ir_arch) ircfg = simplifier.simplify(ircfg, head) return MiasmIRGraph(self.add_names(ircfg))
bclist.append(bc) i += 1 print("\n>>>>>> MAX No. of VM Nesting Levels =", len(bclist), " <<<<<<") machine = Machine("vmv") addr = 0x0 res_list = [] for i in range(len(bclist)): print("\nLength of VM(%d) bytecode = 0x%x" % (i + 1, len(bclist[i]) // 4)) opcodes = defaultdict() loc_db = LocationDB() mdis = machine.dis_engine(bclist[i], loc_db=loc_db) mdis.dis_block_callback = get_opcodes asmcfg = mdis.dis_multiblock(addr) #asmcfg_dc0 = mdis.dis_multiblock(0xdc0) res_list.append([loc_db, asmcfg]) save_ircfg(asmcfg, "output/vmv_asmcfg%s.dot" % str(i + 1)) #save_ircfg(asmcfg_dc0, "output/vmv_asmcfg_dc0_%s.dot"%str(i+1)) arch_vmv.nest_level += 1 if len(opcodes) == 24: add_new_opcodes(opcodes, i + 1) #print(res_list) nl = 4 # saving the simplified IR for a specific level loc_db = res_list[nl][0]