def test_ClassDef(self): from miasm2.expression.expression import ExprInt, ExprId, ExprMem, \ ExprCompose, ExprAssign from miasm2.arch.x86.sem import ir_x86_32 from miasm2.core.locationdb import LocationDB from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import AssignBlock loc_db = LocationDB() ira = ir_x86_32(loc_db) ircfg = ira.new_ircfg() id_x = ExprId('x', 32) id_a = ExprId('a', 32) id_b = ExprId('b', 32) id_c = ExprId('c', 32) id_d = ExprId('d', 32) id_e = ExprId('e', 64) sb = SymbolicExecutionEngine( ira, { ExprMem(ExprInt(0x4, 32), 8): ExprInt(0x44, 8), ExprMem(ExprInt(0x5, 32), 8): ExprInt(0x33, 8), ExprMem(ExprInt(0x6, 32), 8): ExprInt(0x22, 8), ExprMem(ExprInt(0x7, 32), 8): ExprInt(0x11, 8), ExprMem(ExprInt(0x20, 32), 32): id_x, ExprMem(ExprInt(0x40, 32), 32): id_x, ExprMem(ExprInt(0x44, 32), 32): id_a, ExprMem(ExprInt(0x54, 32), 32): ExprInt(0x11223344, 32), ExprMem(id_a, 32): ExprInt(0x11223344, 32), id_a: ExprInt(0, 32), id_b: ExprInt(0, 32), ExprMem(id_c, 32): ExprMem(id_d + ExprInt(0x4, 32), 32), ExprMem(id_c + ExprInt(0x4, 32), 32): ExprMem(id_d + ExprInt(0x8, 32), 32), }) self.assertEqual(sb.eval_expr(ExprInt(1, 32) - ExprInt(1, 32)), ExprInt(0, 32)) ## Test with unknown mem + integer self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0, 32), 32)), ExprMem(ExprInt(0, 32), 32)) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(1, 32), 32)), ExprCompose(ExprMem(ExprInt(1, 32), 24), ExprInt(0x44, 8))) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(2, 32), 32)), ExprCompose(ExprMem(ExprInt(2, 32), 16), ExprInt(0x3344, 16))) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(3, 32), 32)), ExprCompose(ExprMem(ExprInt(3, 32), 8), ExprInt(0x223344, 24))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(4, 32), 32)), ExprInt(0x11223344, 32)) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(5, 32), 32)), ExprCompose(ExprInt(0x112233, 24), ExprMem(ExprInt(8, 32), 8))) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(6, 32), 32)), ExprCompose(ExprInt(0x1122, 16), ExprMem(ExprInt(8, 32), 16))) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(7, 32), 32)), ExprCompose(ExprInt(0x11, 8), ExprMem(ExprInt(8, 32), 24))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(8, 32), 32)), ExprMem(ExprInt(8, 32), 32)) ## Test with unknown mem + integer self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x50, 32), 32)), ExprMem(ExprInt(0x50, 32), 32)) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0x51, 32), 32)), ExprCompose(ExprMem(ExprInt(0x51, 32), 24), ExprInt(0x44, 8))) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0x52, 32), 32)), ExprCompose(ExprMem(ExprInt(0x52, 32), 16), ExprInt(0x3344, 16))) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0x53, 32), 32)), ExprCompose(ExprMem(ExprInt(0x53, 32), 8), ExprInt(0x223344, 24))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x54, 32), 32)), ExprInt(0x11223344, 32)) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0x55, 32), 32)), ExprCompose(ExprInt(0x112233, 24), ExprMem(ExprInt(0x58, 32), 8))) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0x56, 32), 32)), ExprCompose(ExprInt(0x1122, 16), ExprMem(ExprInt(0x58, 32), 16))) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0x57, 32), 32)), ExprCompose(ExprInt(0x11, 8), ExprMem(ExprInt(0x58, 32), 24))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x58, 32), 32)), ExprMem(ExprInt(0x58, 32), 32)) ## Test with unknown mem + id self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1D, 32), 32)), ExprCompose(ExprMem(ExprInt(0x1D, 32), 24), id_x[:8])) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0x1E, 32), 32)), ExprCompose(ExprMem(ExprInt(0x1E, 32), 16), id_x[:16])) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1F, 32), 32)), ExprCompose(ExprMem(ExprInt(0x1F, 32), 8), id_x[:24])) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x20, 32), 32)), id_x) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x21, 32), 32)), ExprCompose(id_x[8:], ExprMem(ExprInt(0x24, 32), 8))) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0x22, 32), 32)), ExprCompose(id_x[16:], ExprMem(ExprInt(0x24, 32), 16))) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0x23, 32), 32)), ExprCompose(id_x[24:], ExprMem(ExprInt(0x24, 32), 24))) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x24, 32), 32)), ExprMem(ExprInt(0x24, 32), 32)) ## Partial read self.assertEqual(sb.eval_expr(ExprMem(ExprInt(4, 32), 8)), ExprInt(0x44, 8)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x20, 32), 8)), id_x[:8]) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x23, 32), 8)), id_x[24:]) ## Merge self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x40, 32), 64)), ExprCompose(id_x, id_a)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x42, 32), 32)), ExprCompose(id_x[16:], id_a[:16])) # Merge memory self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x100, 32), 32)), ExprMem(ExprInt(0x100, 32), 32)) self.assertEqual(sb.eval_expr(ExprMem(id_c + ExprInt(0x2, 32), 32)), ExprMem(id_d + ExprInt(0x6, 32), 32)) ## Func read def custom_func_read(mem): if mem == ExprMem(ExprInt(0x1000, 32), 32): return id_x return mem sb.func_read = custom_func_read ## Unmodified read self.assertEqual(sb.eval_expr(ExprMem(ExprInt(4, 32), 8)), ExprInt(0x44, 8)) ## Modified read self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x1000, 32), 32)), id_x) ## Apply_change / eval_ir / apply_expr ## x = a (with a = 0x0) assignblk = AssignBlock({id_x: id_a}) sb.eval_updt_assignblk(assignblk) self.assertEqual(sb.eval_expr(id_x), ExprInt(0, 32)) ## x = a (without replacing 'a' with 0x0) sb.apply_change(id_x, id_a) self.assertEqual(sb.eval_expr(id_x), id_a) ## x = a (with a = 0x0) self.assertEqual(sb.eval_updt_expr(assignblk.dst2ExprAssign(id_x)), ExprInt(0, 32)) self.assertEqual(sb.eval_expr(id_x), ExprInt(0, 32)) self.assertEqual(sb.eval_updt_expr(id_x), ExprInt(0, 32)) sb.dump() ## state reads = set() for dst, src in sb.modified(): reads.update(ExprAssign(dst, src).get_r()) self.assertEqual( reads, set([ id_x, id_a, ExprMem(id_d + ExprInt(0x4, 32), 32), ExprMem(id_d + ExprInt(0x8, 32), 32), ])) # Erase low id_x byte with 0xFF sb.apply_change(ExprMem(ExprInt(0x20, 32), 8), ExprInt(0xFF, 8)) state = dict(sb.modified(ids=False)) self.assertEqual(state[ExprMem(ExprInt(0x20, 32), 8)], ExprInt(0xFF, 8)) self.assertEqual(state[ExprMem(ExprInt(0x21, 32), 24)], id_x[8:32]) # Erase high id_x byte with 0xEE sb.apply_change(ExprMem(ExprInt(0x23, 32), 8), ExprInt(0xEE, 8)) state = dict(sb.modified(ids=False)) self.assertEqual(state[ExprMem(ExprInt(0x20, 32), 8)], ExprInt(0xFF, 8)) self.assertEqual(state[ExprMem(ExprInt(0x21, 32), 16)], id_x[8:24]) self.assertEqual(state[ExprMem(ExprInt(0x23, 32), 8)], ExprInt(0xEE, 8)) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0x22, 32), 32)), ExprCompose(id_x[16:24], ExprInt(0xEE, 8), ExprMem(ExprInt(0x24, 32), 16))) # Erase low byte of 0x11223344 with 0xFF at 0x54 sb.apply_change(ExprMem(ExprInt(0x54, 32), 8), ExprInt(0xFF, 8)) # Erase low byte of 0x11223344 with 0xFF at id_a sb.apply_change(ExprMem(id_a + ExprInt(0x1, 32), 8), ExprInt(0xFF, 8)) state = dict(sb.modified(ids=False)) self.assertEqual(state[ExprMem(id_a + ExprInt(0x1, 32), 8)], ExprInt(0xFF, 8)) self.assertEqual(state[ExprMem(id_a + ExprInt(0x2, 32), 16)], ExprInt(0x1122, 16)) # Write uint32_t at 0xFFFFFFFE sb.apply_change(ExprMem(ExprInt(0xFFFFFFFE, 32), 32), ExprInt(0x11223344, 32)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0, 32), 16)), ExprInt(0x1122, 16)) # Revert memory to original value at 0x42 sb.apply_change(ExprMem(ExprInt(0x42, 32), 32), ExprMem(ExprInt(0x42, 32), 32)) self.assertEqual(sb.eval_expr(ExprMem(ExprInt(0x42, 32), 32)), ExprMem(ExprInt(0x42, 32), 32)) # Revert memory to original value at c + 0x2 sb.apply_change(ExprMem(id_c + ExprInt(0x2, 32), 32), ExprMem(id_c + ExprInt(0x2, 32), 32)) self.assertEqual(sb.eval_expr(ExprMem(id_c + ExprInt(0x2, 32), 32)), ExprMem(id_c + ExprInt(0x2, 32), 32)) # Test del symbol del sb.symbols[id_a] sb.dump() del sb.symbols[ExprMem(id_a, 8)] print "*" * 40, 'Orig:' sb.dump() sb_cp = sb.symbols.copy() print "*" * 40, 'Copy:' sb_cp.dump() # Add symbol at address limit sb.apply_change(ExprMem(ExprInt(0xFFFFFFFE, 32), 32), id_c) sb.dump() found = False for dst, src in sb.symbols.iteritems(): if dst == ExprMem(ExprInt(0xFFFFFFFE, 32), 32) and src == id_c: found = True assert found # Add symbol at address limit sb.apply_change(ExprMem(ExprInt(0x7FFFFFFE, 32), 32), id_c) sb.dump() found = False for dst, src in sb.symbols.iteritems(): if dst == ExprMem(ExprInt(0x7FFFFFFE, 32), 32) and src == id_c: found = True assert found # Add truncated symbol at address limit sb.apply_change(ExprMem(ExprInt(0xFFFFFFFC, 32), 64), id_e) # Revert parts of memory sb.apply_change(ExprMem(ExprInt(0xFFFFFFFC, 32), 16), ExprMem(ExprInt(0xFFFFFFFC, 32), 16)) sb.apply_change(ExprMem(ExprInt(0x2, 32), 16), ExprMem(ExprInt(0x2, 32), 16)) sb.dump() found = False for dst, src in sb.symbols.iteritems(): if dst == ExprMem(ExprInt(0xFFFFFFFE, 32), 32) and src == id_e[16:48]: found = True assert found sb_empty = SymbolicExecutionEngine(ira) sb_empty.dump() # Test memory full print 'full' arch_addr8 = ir_x86_32(loc_db) ircfg = arch_addr8.new_ircfg() # Hack to obtain tiny address space arch_addr8.addrsize = 5 sb_addr8 = SymbolicExecutionEngine(arch_addr8) sb_addr8.dump() # Fulfill memory sb_addr8.apply_change(ExprMem(ExprInt(0, 5), 256), ExprInt(0, 256)) sb_addr8.dump() variables = sb_addr8.symbols.items() assert variables == [(ExprMem(ExprInt(0, 5), 256), ExprInt(0, 256))] print sb_addr8.symbols.symbols_mem sb_addr8.apply_change(ExprMem(ExprInt(0x5, 5), 256), ExprInt(0x123, 256)) sb_addr8.dump() variables = sb_addr8.symbols.items() assert variables == [(ExprMem(ExprInt(0x5, 5), 256), ExprInt(0x123, 256))] print sb_addr8.symbols.symbols_mem print 'dump' sb_addr8.symbols.symbols_mem.dump() sb.dump() try: del sb.symbols.symbols_mem[ExprMem(ExprInt(0xFFFFFFFF, 32), 32)] except KeyError: # ok pass else: raise RuntimeError("Should raise error!") del sb.symbols.symbols_mem[ExprMem(ExprInt(0xFFFFFFFF, 32), 16)] sb.dump() self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0xFFFFFFFE, 32), 32)), ExprCompose(id_e[16:24], ExprMem(ExprInt(0xFFFFFFFF, 32), 16), id_e[40:48])) sb.symbols.symbols_mem.delete_partial( ExprMem(ExprInt(0xFFFFFFFF, 32), 32)) self.assertEqual( sb.eval_expr(ExprMem(ExprInt(0xFFFFFFFE, 32), 32)), ExprCompose(id_e[16:24], ExprMem(ExprInt(0xFFFFFFFF, 32), 24))) sb.dump() assert ExprMem(ExprInt(0xFFFFFFFE, 32), 8) in sb.symbols assert ExprMem(ExprInt(0xFFFFFFFE, 32), 32) not in sb.symbols assert sb.symbols.symbols_mem.contains_partial( ExprMem(ExprInt(0xFFFFFFFE, 32), 32)) assert not sb.symbols.symbols_mem.contains_partial( ExprMem(ExprInt(0xFFFFFFFF, 32), 8)) assert sb_addr8.symbols.keys() == [ExprMem(ExprInt(0x5, 5), 256)]
def analyse_function(): # Get settings settings = TypePropagationForm() ret = settings.Execute() if not ret: return end = None if settings.cScope.value == 0: addr = settings.functionAddr.value else: addr = settings.startAddr.value if settings.cScope.value == 2: end = settings.endAddr # Init machine = guess_machine(addr=addr) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) if end is not None: mdis.dont_dis = [end] iraCallStackFixer = get_ira_call_fixer(ira) ir_arch = iraCallStackFixer(mdis.loc_db) asmcfg = mdis.dis_multiblock(addr) # Generate IR ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) cst_propag_link = {} if settings.cUnalias.value: init_infos = {ir_arch.sp: ir_arch.arch.regs.regs_init[ir_arch.sp]} cst_propag_link = propagate_cst_expr(ir_arch, ircfg, addr, init_infos) types_mngr = get_types_mngr(settings.headerFile.value, settings.arch.value) mychandler = MyCHandler(types_mngr, {}) infos_types = {} infos_types_raw = [] if settings.cTypeFile.value: infos_types_raw = open(settings.typeFile.value).read().split('\n') else: infos_types_raw = settings.strTypesInfo.value.split('\n') for line in infos_types_raw: if not line: continue expr_str, ctype_str = line.split(':') expr_str, ctype_str = expr_str.strip(), ctype_str.strip() expr = str_to_expr(expr_str) ast = mychandler.types_mngr.types_ast.parse_c_type(ctype_str) ctype = mychandler.types_mngr.types_ast.ast_parse_declaration( ast.ext[0]) objc = types_mngr.get_objc(ctype) print '=' * 20 print expr, objc infos_types[expr] = set([objc]) # Add fake head lbl_real_start = ir_arch.loc_db.get_offset_location(addr) lbl_head = ir_arch.loc_db.get_or_create_name_location("start") first_block = asmcfg.label2block(lbl_real_start) assignblk_head = AssignBlock([ ExprAff(ir_arch.IRDst, ExprLoc(lbl_real_start, ir_arch.IRDst.size)), ExprAff(ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp]) ], first_block.lines[0]) irb_head = IRBlock(lbl_head, [assignblk_head]) ircfg.blocks[lbl_head] = irb_head ircfg.add_uniq_edge(lbl_head, lbl_real_start) state = TypePropagationEngine.StateEngine(infos_types) states = {lbl_head: state} todo = set([lbl_head]) done = set() while todo: lbl = todo.pop() state = states[lbl] if (lbl, state) in done: continue done.add((lbl, state)) if lbl not in ircfg.blocks: continue symbexec_engine = TypePropagationEngine(ir_arch, types_mngr, state) addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) sons = ircfg.successors(lbl) for son in sons: add_state(ircfg, todo, states, son, symbexec_engine.get_state()) for lbl, state in states.iteritems(): if lbl not in ircfg.blocks: continue symbexec_engine = CTypeEngineFixer(ir_arch, types_mngr, state, cst_propag_link) addr = symbexec_engine.run_block_at(ircfg, lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp)
def build_graph(start_addr, type_graph, simplify=False, dontmodstack=True, loadint=False, verbose=False): machine = guess_machine(addr=start_addr) dis_engine, ira = machine.dis_engine, machine.ira class IRADelModCallStack(ira): def call_effects(self, addr, instr): assignblks, extra = super(IRADelModCallStack, self).call_effects(addr, instr) if not dontmodstack: return assignblks, extra out = [] for assignblk in assignblks: dct = dict(assignblk) dct = { dst: src for (dst, src) in dct.iteritems() if dst != self.sp } out.append(AssignBlock(dct, assignblk.instr)) return out, extra if verbose: print "Arch", dis_engine fname = idc.GetInputFile() if verbose: print fname bs = bin_stream_ida() mdis = dis_engine(bs) ir_arch = IRADelModCallStack(mdis.loc_db) # populate symbols with ida names for addr, name in idautils.Names(): if name is None: continue if (mdis.loc_db.get_offset_location(addr) or mdis.loc_db.get_name_location(name)): # Symbol alias continue mdis.loc_db.add_location(name, addr) if verbose: print "start disasm" if verbose: print hex(start_addr) asmcfg = mdis.dis_multiblock(start_addr) entry_points = set([mdis.loc_db.get_offset_location(start_addr)]) if verbose: print "generating graph" open('asm_flow.dot', 'w').write(asmcfg.dot()) print "generating IR... %x" % start_addr ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) if verbose: print "IR ok... %x" % start_addr for irb in ircfg.blocks.itervalues(): irs = [] for assignblk in irb: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) if verbose: out = ircfg.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" if simplify: dead_simp(ir_arch, ircfg) ircfg.simplify(expr_simp) modified = True while modified: modified = False modified |= dead_simp(ir_arch, ircfg) modified |= remove_empty_assignblks(ircfg) modified |= merge_blocks(ircfg, entry_points) title += " (simplified)" if type_graph == TYPE_GRAPH_IR: graph = GraphMiasmIR(ircfg, title, None) graph.Show() return head = list(entry_points)[0] class IRAOutRegs(ira): def get_out_regs(self, block): regs_todo = super(IRAOutRegs, self).get_out_regs(block) out = {} for assignblk in block: for dst in assignblk: reg = self.ssa_var.get(dst, None) if reg is None: continue if reg in regs_todo: out[reg] = dst return set(out.values()) # Add dummy dependency to uncover out regs affectation for loc in ircfg.leaves(): irblock = ircfg.blocks.get(loc) if irblock is None: continue regs = {} for reg in ir_arch.get_out_regs(irblock): regs[reg] = reg assignblks = list(irblock) new_assiblk = AssignBlock(regs, assignblks[-1].instr) assignblks.append(new_assiblk) new_irblock = IRBlock(irblock.loc_key, assignblks) ircfg.blocks[loc] = new_irblock ir_arch = IRAOutRegs(mdis.loc_db) ir_arch.ssa_var = {} modified = True ssa_forbidden_regs = set( [ir_arch.pc, ir_arch.IRDst, ir_arch.arch.regs.exception_flags]) head = list(entry_points)[0] heads = set([head]) all_ssa_vars = {} propagate_expr = PropagateExpr() ssa = SSADiGraph(ircfg) ssa.immutable_ids.update(ssa_forbidden_regs) ssa.ssa_variable_to_expr.update(all_ssa_vars) ssa.transform(head) all_ssa_vars.update(ssa.ssa_variable_to_expr) ir_arch.ssa_var.update(ssa.ssa_variable_to_expr) if simplify: while modified: ssa = SSADiGraph(ircfg) ssa.immutable_ids.update(ssa_forbidden_regs) ssa.ssa_variable_to_expr.update(all_ssa_vars) ssa.transform(head) all_ssa_vars.update(ssa.ssa_variable_to_expr) ir_arch.ssa_var.update(ssa.ssa_variable_to_expr) while modified: modified = False modified |= propagate_expr.propagate(ssa, head) modified |= ircfg.simplify(expr_simp) simp_modified = True while simp_modified: simp_modified = False simp_modified |= dead_simp(ir_arch, ircfg) simp_modified |= remove_empty_assignblks(ircfg) simp_modified |= load_from_int(ircfg, bs, is_addr_ro_variable) modified |= simp_modified ssa = SSADiGraph(ircfg) ssa.immutable_ids.update(ssa_forbidden_regs) ssa.ssa_variable_to_expr.update(all_ssa_vars) ssa.transform(head) all_ssa_vars.update(ssa.ssa_variable_to_expr) if type_graph == TYPE_GRAPH_IRSSA: graph = GraphMiasmIR(ssa.graph, title, None) graph.Show() return if type_graph == TYPE_GRAPH_IRSSAUNSSA: cfg_liveness = DiGraphLivenessSSA(ssa.graph) cfg_liveness.init_var_info(ir_arch) cfg_liveness.compute_liveness() UnSSADiGraph(ssa, head, cfg_liveness) if simplify: modified = True while modified: modified = False modified |= ssa.graph.simplify(expr_simp) simp_modified = True while simp_modified: simp_modified = False simp_modified |= dead_simp(ir_arch, ssa.graph) simp_modified |= remove_empty_assignblks(ssa.graph) simp_modified |= merge_blocks(ssa.graph, heads) modified |= simp_modified graph = GraphMiasmIR(ssa.graph, title, None) graph.Show()
def launch_depgraph(): global graphs, comments, sol_nb, settings, addr, ir_arch # Init machine = guess_machine() mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) ir_arch = ira(mdis.symbol_pool) # Populate symbols with ida names for ad, name in idautils.Names(): if name is None: continue mdis.symbol_pool.add_location(name, ad) # Get the current function addr = idc.ScreenEA() func = ida_funcs.get_func(addr) asmcfg = mdis.dis_multiblock(func.startEA) # Generate IR for block in asmcfg.blocks: ir_arch.add_block(block) # Get settings settings = depGraphSettingsForm(ir_arch) settings.Execute() loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb # Simplify affectations for irb in ir_arch.blocks.values(): irs = [] offset = ir_arch.symbol_pool.loc_key_to_offset(irb.loc_key) fix_stack = offset is not None and settings.unalias_stack for assignblk in irb: if fix_stack: stk_high = m2_expr.ExprInt(idc.GetSpd(assignblk.instr.offset), ir_arch.sp.size) fix_dct = { ir_arch.sp: mn.regs.regs_init[ir_arch.sp] + stk_high } new_assignblk = {} for dst, src in assignblk.iteritems(): if fix_stack: src = src.replace_expr(fix_dct) if dst != ir_arch.sp: dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ir_arch.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) # Get dependency graphs dg = settings.depgraph graphs = dg.get(loc_key, elements, line_nb, set([ir_arch.symbol_pool.getby_offset(func.startEA)])) # Display the result comments = {} sol_nb = 0 # Register and launch ida_kernwin.add_hotkey("Shift-N", next_element) treat_element()
if reg is None: continue if reg in regs_todo: out[reg] = dst return set(out.values()) # Add dummy dependency to uncover out regs assignment for loc in ircfg_a.leaves(): irblock = ircfg_a.blocks.get(loc) if irblock is None: continue regs = {} for reg in ir_arch_a.get_out_regs(irblock): regs[reg] = reg assignblks = list(irblock) new_assiblk = AssignBlock(regs, assignblks[-1].instr) assignblks.append(new_assiblk) new_irblock = IRBlock(irblock.loc_key, assignblks) ircfg_a.blocks[loc] = new_irblock ir_arch_a = IRAOutRegs(mdis.loc_db) def is_addr_ro_variable(bs, addr, size): """ Return True if address at @addr is a read-only variable. WARNING: Quick & Dirty @addr: integer representing the address of the variable @size: size in bits
def mn_do_store(ir, instr, arg1, arg2, arg3=None): assert instr.name[0:2] == 'ST' ret = [] additional_ir = [] if instr.name[2] == 'S': raise RuntimeError("STSWI, and STSWX need implementing") size = {'B': 8, 'H': 16, 'W': 32}[instr.name[2]] has_b = False has_u = False is_stwcx = False for l in instr.name[3:]: if l == 'B' or l == 'R': has_b = True elif l == 'U': has_u = True elif l == 'X' or l == 'Z': pass # Taken care of earlier elif l == 'C' or l == '.': is_stwcx = True else: assert False if arg3 is None: assert isinstance(arg2, ExprMem) address = arg2.arg else: address = arg2 + arg3 dest = ExprMem(address, size) src = arg1[0:size] if has_b: src = byte_swap(src) ret.append(ExprAff(dest, src)) if has_u: if arg3 is None: ret.append(ExprAff(arg2.arg.args[0], address)) else: ret.append(ExprAff(arg2, address)) if is_stwcx: loc_do = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) loc_dont = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) flags = [ ExprAff(CR0_LT, ExprInt(0, 1)), ExprAff(CR0_GT, ExprInt(0, 1)), ExprAff(CR0_SO, XER_SO) ] ret += flags ret.append(ExprAff(CR0_EQ, ExprInt(1, 1))) ret.append(ExprAff(ir.IRDst, loc_next)) dont = flags + [ ExprAff(CR0_EQ, ExprInt(0, 1)), ExprAff(ir.IRDst, loc_next) ] additional_ir = [ IRBlock(loc_do, [AssignBlock(ret)]), IRBlock(loc_dont, [AssignBlock(dont)]) ] ret = [ ExprAff(reserve, ExprInt(0, 1)), ExprAff(ir.IRDst, ExprCond(reserve, loc_do, loc_dont)) ] return ret, additional_ir
def remove_phi(ssa, head): """ Remove Phi using naive algorithm Note: The _ssa_variable_to_expr must be up to date @ssa: a SSADiGraph instance @head: the loc_key of the graph head """ phivar2var = {} all_ssa_vars = ssa._ssa_variable_to_expr # Retrieve Phi nodes phi_nodes = [] for irblock in ssa.graph.blocks.itervalues(): for index, assignblk in enumerate(irblock): for dst, src in assignblk.iteritems(): if src.is_op('Phi'): phi_nodes.append((irblock.loc_key, index)) for phi_loc, phi_index in phi_nodes: assignblk_dct = get_assignblk(ssa.graph, phi_loc, phi_index) for dst, src in assignblk_dct.iteritems(): if src.is_op('Phi'): break else: raise RuntimeError('Cannot find phi?') node_src = src var = dst # Create new variable new_var = ExprId('var%d' % len(phivar2var), var.size) phivar2var[var] = new_var phi_sources = set(node_src.args) # Place var init for non ssa variables to_remove = set() for phi_source in list(phi_sources): if phi_source not in all_ssa_vars.union(phivar2var.values()): assignblk_dct = get_assignblk(ssa.graph, head, 0) assignblk_dct[new_var] = phi_source new_irblock = set_assignblk(ssa.graph, head, 0, assignblk_dct) ssa.graph.blocks[head] = new_irblock to_remove.add(phi_source) phi_sources.difference_update(to_remove) var_to_replace = set([var]) var_to_replace.update(phi_sources) # Replace variables to_replace_dct = {x:new_var for x in var_to_replace} for loc in ssa.graph.blocks: irblock = ssa.graph.blocks[loc] assignblks = [] for assignblk in irblock: assignblk_dct = {} for dst, src in assignblk.iteritems(): dst = dst.replace_expr(to_replace_dct) src = src.replace_expr(to_replace_dct) assignblk_dct[dst] = src assignblks.append(AssignBlock(assignblk_dct, assignblk.instr)) new_irblock = IRBlock(loc, assignblks) ssa.graph.blocks[loc] = new_irblock # Remove phi assignblk_dct = get_assignblk(ssa.graph, phi_loc, phi_index) del assignblk_dct[new_var] new_irblock = set_assignblk(ssa.graph, phi_loc, phi_index, assignblk_dct) ssa.graph.blocks[phi_loc] = new_irblock
def analyse_function(): # Init machine = guess_machine() mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira bs = bin_stream_ida() mdis = dis_engine(bs, dont_dis_nulstart_bloc=True) iraCallStackFixer = get_ira_call_fixer(ira) ir_arch = iraCallStackFixer(mdis.symbol_pool) # Get the current function func = ida_funcs.get_func(idc.ScreenEA()) addr = func.startEA blocks = mdis.dis_multiblock(addr) # Generate IR for block in blocks: ir_arch.add_block(block) # Get settings settings = TypePropagationForm(ir_arch) ret = settings.Execute() if not ret: return cst_propag_link = {} if settings.cUnalias.value: init_infos = {ir_arch.sp: ir_arch.arch.regs.regs_init[ir_arch.sp]} cst_propag_link = propagate_cst_expr(ir_arch, addr, init_infos) types_mngr = get_types_mngr(settings.headerFile.value, settings.arch.value) mychandler = MyCHandler(types_mngr, {}) infos_types = {} for line in settings.strTypesInfo.value.split('\n'): if not line: continue expr_str, ctype_str = line.split(':') expr_str, ctype_str = expr_str.strip(), ctype_str.strip() expr = str_to_expr(expr_str) ast = mychandler.types_mngr.types_ast.parse_c_type(ctype_str) ctype = mychandler.types_mngr.types_ast.ast_parse_declaration( ast.ext[0]) objc = types_mngr.get_objc(ctype) print '=' * 20 print expr, objc infos_types[expr] = set([objc]) # Add fake head lbl_real_start = ir_arch.symbol_pool.getby_offset(addr) lbl_head = ir_arch.symbol_pool.getby_name_create("start") first_block = blocks.label2block(lbl_real_start) assignblk_head = AssignBlock([ ExprAff(ir_arch.IRDst, ExprId(lbl_real_start, ir_arch.IRDst.size)), ExprAff(ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp]) ], first_block.lines[0]) irb_head = IRBlock(lbl_head, [assignblk_head]) ir_arch.blocks[lbl_head] = irb_head ir_arch.graph.add_uniq_edge(lbl_head, lbl_real_start) state = TypePropagationEngine.StateEngine(infos_types) states = {lbl_head: state} todo = set([lbl_head]) done = set() while todo: lbl = todo.pop() state = states[lbl] if (lbl, state) in done: continue done.add((lbl, state)) if lbl not in ir_arch.blocks: continue symbexec_engine = TypePropagationEngine(ir_arch, types_mngr, state) addr = symbexec_engine.emul_ir_block(lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp) ir_arch._graph = None sons = ir_arch.graph.successors(lbl) for son in sons: add_state(ir_arch, todo, states, son, symbexec_engine.get_state()) for lbl, state in states.iteritems(): if lbl not in ir_arch.blocks: continue symbexec_engine = CTypeEngineFixer(ir_arch, types_mngr, state, cst_propag_link) addr = symbexec_engine.emul_ir_block(lbl) symbexec_engine.del_mem_above_stack(ir_arch.sp)
def as_assignblock(self): """Return the current state as an AssignBlock""" return AssignBlock({dst: self.symbols[dst] for dst in self.modified()})
def do_it_block(self, loc, index, block, assignments, gen_pc_updt): instr = block.lines[index] it_hints, it_cond = self.parse_itt(instr) cond_num = cond_dct_inv[it_cond.name] cond_eq = tab_cond[cond_num] if not index + len(it_hints) <= len(block.lines): raise NotImplementedError("Split IT block non supported yet") ir_blocks_all = [] # Gen dummy irblock for IT instr loc_next = self.get_next_loc_key(instr) dst = ExprAssign(self.IRDst, ExprLoc(loc_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) irblock = IRBlock(loc, assignments) ir_blocks_all.append([irblock]) loc = loc_next assignments = [] for hint in it_hints: irblocks = [] index += 1 instr = block.lines[index] # Add conditionnal jump to current irblock loc_do = self.loc_db.add_location() loc_next = self.get_next_loc_key(instr) if hint: local_cond = ~cond_eq else: local_cond = cond_eq dst = ExprAssign( self.IRDst, ExprCond(local_cond, ExprLoc(loc_do, 32), ExprLoc(loc_next, 32))) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) irblock = IRBlock(loc, assignments) irblocks.append(irblock) it_instr_irblocks = [] assignments = [] loc = loc_do split = self.add_instr_to_current_state(instr, block, assignments, it_instr_irblocks, gen_pc_updt) if split: raise NotImplementedError( "Unsupported instr in IT block (%s)" % instr) if it_instr_irblocks: assert len(it_instr_irblocks) == 1 it_instr_irblocks = it_instr_irblocks.pop() # Remove flags assignment if instr != [CMP, CMN, TST] if instr.name not in ["CMP", "CMN", "TST"]: # Fix assignments out = [] for assignment in assignments: assignment = AssignBlock( { dst: src for (dst, src) in assignment.iteritems() if dst not in [zf, nf, of, cf] }, assignment.instr) out.append(assignment) assignments = out # Fix extra irblocksx new_irblocks = [] for irblock in it_instr_irblocks: out = [] for tmp_assignment in irblock: assignment = AssignBlock( { dst: src for (dst, src) in assignment.iteritems() if dst not in [zf, nf, of, cf] }, assignment.instr) out.append(assignment) new_irblock = IRBlock(irblock.loc_key, out) new_irblocks.append(new_irblock) it_instr_irblocks = new_irblocks irblocks += it_instr_irblocks dst = ExprAssign(self.IRDst, ExprLoc(loc_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) irblock = IRBlock(loc, assignments) irblocks.append(irblock) loc = loc_next assignments = [] ir_blocks_all.append(irblocks) return index, ir_blocks_all
from miasm2.expression.expression import * from miasm2.ir.ir import AssignBlock from miasm2.expression.simplifications import expr_simp id_a = ExprId("a") id_b = ExprId("b") int0 = ExprInt(0, id_a.size) # Test AssignBlock ## Constructors assignblk1 = AssignBlock([ExprAff(id_a, id_b)]) assignblk2 = AssignBlock({id_a: id_b}) ## Equality assignblk1_bis = AssignBlock([ExprAff(id_a, id_b)]) assert assignblk1 == assignblk1_bis assert assignblk1 == assignblk2 ## Immutability try: assignblk1[id_a] = id_a except RuntimeError: pass else: raise RuntimeError("An error was expected") try: del assignblk1[id_a] except RuntimeError: pass else: raise RuntimeError("An error was expected")
for irb in ir_arch.blocks.values(): fix_stack = irb.label.offset is not None and settings.unalias_stack for i, assignblk in enumerate(irb.irs): if fix_stack: stk_high = m2_expr.ExprInt(GetSpd(irb.irs[i].instr.offset), ir_arch.sp.size) fix_dct = {ir_arch.sp: mn.regs.regs_init[ir_arch.sp] + stk_high} new_assignblk = {} for dst, src in assignblk.iteritems(): if fix_stack: src = src.replace_expr(fix_dct) if dst != ir_arch.sp: dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irb.irs[i] = AssignBlock(new_assignblk, instr=assignblk.instr) # Get dependency graphs dg = settings.depgraph graphs = dg.get(label, elements, line_nb, set([ir_arch.symbol_pool.getby_offset(func.startEA)])) # Display the result comments = {} sol_nb = 0 def clean_lines(): "Remove previous comments" global comments for offset in comments: SetColor(offset, CIC_ITEM, 0xffffff)
def mn_do_store(ir, instr, arg1, arg2, arg3=None): assert instr.name[0:2] == 'ST' ret = [] additional_ir = [] if instr.name[2] == 'S': return mn_do_stswi(ir, instr, arg1, arg2, arg3) # raise RuntimeError("STSWI, and STSWX need implementing") pass # XXX size = {'B': 8, 'H': 16, 'W': 32}[instr.name[2]] has_b = False has_u = False is_stwcx = False for l in instr.name[3:]: if l == 'B' or l == 'R': has_b = True elif l == 'U': has_u = True elif l == 'X' or l == 'Z': pass # Taken care of earlier elif l == 'C' or l == '.': is_stwcx = True else: assert False if arg3 is None: assert isinstance(arg2, ExprMem) address = arg2.arg else: address = arg2 + arg3 dest = ExprMem(address, size) src = arg1[0:size] if has_b: src = byte_swap(src) # with open("G:\\VulSeeker\\VulSeeker\\ppc_sem_mm_do_load_assert.txt", "a") as f: # f.write(str(type(ir)) + '\n') # f.write(str(ir) + '\n') # f.write(str(type(instr)) + '\n') # f.write(str(instr) + '\n') # f.write(str(instr.name) + '\n') # f.write(str(type(arg1)) + '\n') # f.write(str(arg1) + '\n') # f.write(str(type(arg2)) + '\n') # f.write(str(arg2) + '\n') # f.write(str(type(arg3)) + '\n') # f.write(str(arg3) + '\n') # f.write(str(type(arg2.arg)) + '\n') # f.write(str(arg2.arg) + '\n') # f.write(str(type(arg2.arg.args[0])) + '\n') # f.write(str(arg2.arg.args[0]) + '\n') ret.append(ExprAff(dest, src)) if has_u: if arg3 is None: # original: # ret.append(ExprAff(arg2.arg.args[0], address)) ret.append(ExprAff(arg2.arg, address)) else: ret.append(ExprAff(arg2, address)) if is_stwcx: lbl_do = ExprId(ir.gen_label(), ir.IRDst.size) lbl_dont = ExprId(ir.gen_label(), ir.IRDst.size) lbl_next = ExprId(ir.get_next_label(instr), ir.IRDst.size) flags = [ ExprAff(CR0_LT, ExprInt(0, 1)), ExprAff(CR0_GT, ExprInt(0, 1)), ExprAff(CR0_SO, XER_SO) ] ret += flags ret.append(ExprAff(CR0_EQ, ExprInt(1, 1))) ret.append(ExprAff(ir.IRDst, lbl_next)) dont = flags + [ ExprAff(CR0_EQ, ExprInt(0, 1)), ExprAff(ir.IRDst, lbl_next) ] additional_ir = [ IRBlock(lbl_do.name, [AssignBlock(ret)]), IRBlock(lbl_dont.name, [AssignBlock(dont)]) ] ret = [ ExprAff(reserve, ExprInt(0, 1)), ExprAff(ir.IRDst, ExprCond(reserve, lbl_do, lbl_dont)) ] return ret, additional_ir
def build_graph(verbose=False, simplify=False, ssa=False, ssa_simplify=False): start_addr = idc.ScreenEA() machine = guess_machine(addr=start_addr) dis_engine, ira = machine.dis_engine, machine.ira if verbose: print "Arch", dis_engine fname = idc.GetInputFile() if verbose: print fname bs = bin_stream_ida() mdis = dis_engine(bs) ir_arch = ira(mdis.loc_db) # populate symbols with ida names for addr, name in idautils.Names(): if name is None: continue if (mdis.loc_db.get_offset_location(addr) or mdis.loc_db.get_name_location(name)): # Symbol alias continue mdis.loc_db.add_location(name, addr) if verbose: print "start disasm" if verbose: print hex(addr) asmcfg = mdis.dis_multiblock(start_addr) entry_points = set([mdis.loc_db.get_offset_location(start_addr)]) if verbose: print "generating graph" open('asm_flow.dot', 'w').write(asmcfg.dot()) print "generating IR... %x" % start_addr ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) if verbose: print "IR ok... %x" % start_addr for irb in ircfg.blocks.itervalues(): irs = [] for assignblk in irb: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) if verbose: out = ircfg.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" if simplify: dead_simp(ir_arch, ircfg) ircfg.simplify(expr_simp) modified = True while modified: modified = False modified |= dead_simp(ir_arch, ircfg) modified |= remove_empty_assignblks(ircfg) modified |= merge_blocks(ircfg, entry_points) title += " (simplified)" graph = GraphMiasmIR(ircfg, title, None) if ssa: if len(entry_points) != 1: raise RuntimeError("Your graph should have only one head") head = list(entry_points)[0] ssa = SSADiGraph(ircfg) ssa.transform(head) title += " (SSA)" graph = GraphMiasmIR(ssa.graph, title, None) if ssa_simplify: class IRAOutRegs(ira): def get_out_regs(self, block): regs_todo = super(self.__class__, self).get_out_regs(block) out = {} for assignblk in block: for dst in assignblk: reg = self.ssa_var.get(dst, None) if reg is None: continue if reg in regs_todo: out[reg] = dst return set(out.values()) # Add dummy dependency to uncover out regs affectation for loc in ircfg.leaves(): irblock = ircfg.blocks.get(loc) if irblock is None: continue regs = {} for reg in ir_arch.get_out_regs(irblock): regs[reg] = reg assignblks = list(irblock) new_assiblk = AssignBlock(regs, assignblks[-1].instr) assignblks.append(new_assiblk) new_irblock = IRBlock(irblock.loc_key, assignblks) ircfg.blocks[loc] = new_irblock ir_arch = IRAOutRegs(mdis.loc_db) def is_addr_ro_variable(bs, addr, size): """ Return True if address at @addr is a read-only variable. WARNING: Quick & Dirty @addr: integer representing the address of the variable @size: size in bits """ try: _ = bs.getbytes(addr, size / 8) except IOError: return False return True ir_arch.ssa_var = {} index = 0 modified = True ssa_forbidden_regs = set( [ir_arch.pc, ir_arch.IRDst, ir_arch.arch.regs.exception_flags]) head = list(entry_points)[0] heads = set([head]) all_ssa_vars = set() propagate_expr = PropagateExpr() while modified: ssa = SSADiGraph(ircfg) ssa.immutable_ids.update(ssa_forbidden_regs) ssa.transform(head) all_ssa_vars.update(ssa._ssa_variable_to_expr) ssa_regs = [reg for reg in ssa.expressions if reg.is_id()] ssa_forbidden_regs.update(ssa_regs) ir_arch.ssa_var.update(ssa._ssa_variable_to_expr) while modified: index += 1 modified = False modified |= propagate_expr.propagate(ssa, head) modified |= ircfg.simplify(expr_simp) simp_modified = True while simp_modified: index += 1 simp_modified = False simp_modified |= dead_simp(ir_arch, ircfg) index += 1 simp_modified |= remove_empty_assignblks(ircfg) simp_modified |= merge_blocks(ircfg, heads) simp_modified |= load_from_int(ircfg, bs, is_addr_ro_variable) modified |= simp_modified index += 1 merge_blocks(ircfg, heads) ssa = SSADiGraph(ircfg) ssa.immutable_ids.update(ssa_forbidden_regs) ssa.transform(head) all_ssa_vars.update(ssa._ssa_variable_to_expr) ssa._ssa_variable_to_expr = all_ssa_vars dead_simp(ir_arch, ssa.graph) title += " (SSA Simplified)" graph = GraphMiasmIR(ssa.graph, title, None) graph.Show()
def build_graph(verbose=False, simplify=False): start_addr = idc.ScreenEA() machine = guess_machine(addr=start_addr) mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira if verbose: print "Arch", dis_engine fname = idc.GetInputFile() if verbose: print fname bs = bin_stream_ida() mdis = dis_engine(bs) ir_arch = ira(mdis.loc_db) # populate symbols with ida names for addr, name in idautils.Names(): if name is None: continue if (mdis.loc_db.get_offset_location(addr) or mdis.loc_db.get_name_location(name)): # Symbol alias continue mdis.loc_db.add_location(name, addr) if verbose: print "start disasm" if verbose: print hex(addr) asmcfg = mdis.dis_multiblock(start_addr) if verbose: print "generating graph" open('asm_flow.dot', 'w').write(asmcfg.dot()) print "generating IR... %x" % start_addr ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg) if verbose: print "IR ok... %x" % start_addr for irb in ircfg.blocks.itervalues(): irs = [] for assignblk in irb: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs) if verbose: out = ircfg.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" if simplify: dead_simp(ir_arch, ircfg) ircfg.simplify(expr_simp) modified = True while modified: modified = False modified |= dead_simp(ir_arch, ircfg) modified |= ircfg.remove_empty_assignblks() modified |= ircfg.remove_jmp_blocks() modified |= ircfg.merge_blocks() title += " (simplified)" g = GraphMiasmIR(ircfg, title, None) g.Show()
print "generating IR... %x" % addr for block in blocks: print 'ADD' print block ir_arch.add_bloc(block) print "IR ok... %x" % addr for irb in ir_arch.blocks.itervalues(): irs = [] for assignblk in irb.irs: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ir_arch.blocks[irb.label] = IRBlock(irb.label, irs) out = ir_arch.graph.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) # dead_simp(ir_arch) g = GraphMiasmIR(ir_arch, "Miasm IR graph", None) g.cmd_a = g.AddCommand("cmd a", "x") g.cmd_b = g.AddCommand("cmd b", "y") g.Show()
def build_graph(verbose=False, simplify=False): machine = guess_machine() mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira if verbose: print "Arch", dis_engine fname = idc.GetInputFile() if verbose: print fname bs = bin_stream_ida() mdis = dis_engine(bs) ir_arch = ira(mdis.symbol_pool) # populate symbols with ida names for addr, name in idautils.Names(): # print hex(ad), repr(name) if name is None: continue mdis.symbol_pool.add_label(name, addr) if verbose: print "start disasm" addr = idc.ScreenEA() if verbose: print hex(addr) blocks = mdis.dis_multiblock(addr) if verbose: print "generating graph" open('asm_flow.dot', 'w').write(blocks.dot()) print "generating IR... %x" % addr for block in blocks: if verbose: print 'ADD' print block ir_arch.add_block(block) if verbose: print "IR ok... %x" % addr for irb in ir_arch.blocks.itervalues(): irs = [] for assignblk in irb.irs: new_assignblk = { expr_simp(dst): expr_simp(src) for dst, src in assignblk.iteritems() } irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) ir_arch.blocks[irb.label] = IRBlock(irb.label, irs) if verbose: out = ir_arch.graph.dot() open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out) title = "Miasm IR graph" if simplify: dead_simp(ir_arch) ir_arch.simplify(expr_simp) modified = True while modified: modified = False modified |= dead_simp(ir_arch) modified |= ir_arch.remove_empty_assignblks() modified |= ir_arch.remove_jmp_blocks() modified |= ir_arch.merge_blocks() title += " (simplified)" g = GraphMiasmIR(ir_arch, title, None) g.cmd_a = g.AddCommand("cmd a", "x") g.cmd_b = g.AddCommand("cmd b", "y") g.Show()
ctype_str) ctype = mychandler.type_analyzer.types_mngr.types_ast.ast_parse_declaration( ast.ext[0]) objc = types_mngr.get_objc(ctype) print '=' * 20 print expr, objc infos_types[expr] = objc # Add fake head lbl_real_start = ir_arch.symbol_pool.getby_offset(addr) lbl_head = ir_arch.symbol_pool.getby_name_create("start") first_block = blocks.label2block(lbl_real_start) assignblk_head = AssignBlock([ ExprAff(ir_arch.IRDst, ExprId(lbl_real_start, ir_arch.IRDst.size)), ExprAff(ir_arch.sp, ir_arch.arch.regs.regs_init[ir_arch.sp]) ], first_block.lines[0]) irb_head = IRBlock(lbl_head, [assignblk_head]) ir_arch.blocks[lbl_head] = irb_head ir_arch.graph.add_uniq_edge(lbl_head, lbl_real_start) class Engine(SymbExecCType): def __init__(self, state): mychandler = MyCHandler(types_mngr, state.infos_types) super(Engine, self).__init__(ir_arch, state.symbols, state.infos_types, mychandler) def add_state(todo, states, addr, state): addr = ir_arch.get_label(addr) if addr not in states: states[addr] = state
def sanitize_memory_accesses(self, memories, c_handler): """Modify memory accesses to consider only access on "full final element" Example: struct T{ int a; int b; int *c; } @8[T + 2] = X -> @32[T] = 00 X 00 00 @32[T + 2] = WW XX YY ZZ -> @32[T] = 00 00 WW XX, @32[T + 4] = YY ZZ 00 00 @memories: AssignBlock @ctype_manager: CHandler with argument types Return sanitized access, filled memory cases {Full access -> [offset filled]} """ # First, identify involved fields fields = set() atomic_values = {} for dst, value in memories.iteritems(): assert isinstance(dst, ExprMem) assert isinstance(value, ExprInt) addr_expr = dst.arg for i in xrange(dst.size / 8): # Split in atomic access offset = ExprInt(i, addr_expr.size) sub_addr_expr = expr_simp(addr_expr + offset) mem_access = ExprMem(sub_addr_expr, 8) value_access = ExprInt((int(value) >> (i * 8)) & 0xFF, 8) # Keep atomic value atomic_values[mem_access] = value_access # Convert atomic access -> fields access -> Expr access on the # full field info_C = c_handler.expr_to_c(mem_access) assert len(info_C) == 1 expr_sanitize = expr_simp(c_handler.c_to_expr(info_C[0])) # Conserve the involved field fields.add(expr_sanitize) # Second, rebuild the fields values filled_memory = {} out = {} for dst in fields: assert isinstance(dst, ExprMem) accumulator = 0 addr_expr = dst.arg for i in reversed(xrange(dst.size / 8)): # Split in atomic access offset = ExprInt(i, addr_expr.size) sub_addr_expr = expr_simp(addr_expr + offset) mem_access = ExprMem(sub_addr_expr, 8) # Get the value, or complete with 0 if mem_access not in atomic_values: value = ExprInt(0, 8) filled_memory.setdefault(dst, []).append(offset) else: value = atomic_values[mem_access] accumulator <<= 8 accumulator += int(value) # Save the computed value out[dst] = ExprInt(accumulator, dst.size) out = AssignBlock(out) if memories != out: self.logger.debug("SANITIZE: %s", memories) self.logger.debug("OUT SANITIZE: %s", out) return out, filled_memory
def propagate(self, ssa, head): defuse = SSADefUse.from_ssa(ssa) to_replace = {} node_to_reg = {} for node in defuse.nodes(): lbl, index, reg = node src = defuse.get_node_target(node) if expr_has_call(src): continue if src.is_op('Phi'): continue if reg.is_mem(): continue to_replace[reg] = src node_to_reg[node] = reg modified = False for node, reg in node_to_reg.iteritems(): for successor in defuse.successors(node): if not self.propagation_allowed(ssa, to_replace, node, successor): continue loc_a, index_a, reg_a = node loc_b, index_b, reg_b = successor block = ssa.graph.blocks[loc_b] replace = {reg_a: to_replace[reg_a]} # Replace assignblks = list(block) assignblk = block[index_b] out = {} for dst, src in assignblk.iteritems(): if src.is_op('Phi'): out[dst] = src continue if src.is_mem(): ptr = src.ptr ptr = ptr.replace_expr(replace) new_src = ExprMem(ptr, src.size) else: new_src = src.replace_expr(replace) if dst.is_id(): new_dst = dst elif dst.is_mem(): ptr = dst.ptr ptr = ptr.replace_expr(replace) new_dst = ExprMem(ptr, dst.size) else: new_dst = dst.replace_expr(replace) if not (new_dst.is_id() or new_dst.is_mem()): new_dst = dst if src != new_src or dst != new_dst: modified = True out[new_dst] = new_src out = AssignBlock(out, assignblk.instr) assignblks[index_b] = out new_block = IRBlock(block.loc_key, assignblks) ssa.graph.blocks[block.loc_key] = new_block return modified
def test_ClassDef(self): from miasm2.expression.expression import ExprInt32, ExprId, ExprMem, \ ExprCompose, ExprAff from miasm2.arch.x86.sem import ir_x86_32 from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import AssignBlock addrX = ExprInt32(-1) addr0 = ExprInt32(0) addr1 = ExprInt32(1) addr8 = ExprInt32(8) addr9 = ExprInt32(9) addr20 = ExprInt32(20) addr40 = ExprInt32(40) addr50 = ExprInt32(50) mem0 = ExprMem(addr0) mem1 = ExprMem(addr1, 8) mem8 = ExprMem(addr8) mem9 = ExprMem(addr9) mem20 = ExprMem(addr20) mem40v = ExprMem(addr40, 8) mem40w = ExprMem(addr40, 16) mem50v = ExprMem(addr50, 8) mem50w = ExprMem(addr50, 16) id_x = ExprId('x') id_y = ExprId('y', 8) id_a = ExprId('a') id_eax = ExprId('eax_init') e = SymbolicExecutionEngine( ir_x86_32(), { mem0: id_x, mem1: id_y, mem9: id_x, mem40w: id_x[:16], mem50v: id_y, id_a: addr0, id_eax: addr0 }) self.assertEqual(e.find_mem_by_addr(addr0), mem0) self.assertEqual(e.find_mem_by_addr(addrX), None) self.assertEqual(e.eval_expr(ExprMem(addr1 - addr1)), id_x) self.assertEqual(e.eval_expr(ExprMem(addr1, 8)), id_y) self.assertEqual(e.eval_expr(ExprMem(addr1 + addr1)), ExprCompose(id_x[16:32], ExprMem(ExprInt32(4), 16))) self.assertEqual(e.eval_expr(mem8), ExprCompose(id_x[0:24], ExprMem(ExprInt32(11), 8))) self.assertEqual(e.eval_expr(mem40v), id_x[:8]) self.assertEqual(e.eval_expr(mem50w), ExprCompose(id_y, ExprMem(ExprInt32(51), 8))) self.assertEqual(e.eval_expr(mem20), mem20) e.func_read = lambda x: x self.assertEqual(e.eval_expr(mem20), mem20) self.assertEqual(set(e.modified()), set(e.symbols)) self.assertRaises(KeyError, e.symbols.__getitem__, ExprMem(ExprInt32(100))) self.assertEqual(e.apply_expr(id_eax), addr0) self.assertEqual(e.apply_expr(ExprAff(id_eax, addr9)), addr9) self.assertEqual(e.apply_expr(id_eax), addr9) # apply_change / eval_ir / apply_expr ## x = a (with a = 0x0) assignblk = AssignBlock() assignblk[id_x] = id_a e.eval_ir(assignblk) self.assertEqual(e.apply_expr(id_x), addr0) ## x = a (without replacing 'a' with 0x0) e.apply_change(id_x, id_a) self.assertEqual(e.apply_expr(id_x), id_a) ## x = a (with a = 0x0) self.assertEqual(e.apply_expr(assignblk.dst2ExprAff(id_x)), addr0) self.assertEqual(e.apply_expr(id_x), addr0)
def do_it_block(self, loc, index, block, assignments, gen_pc_updt): instr = block.lines[index] it_hints, it_cond = self.parse_itt(instr) cond_num = cond_dct_inv[it_cond.name] cond_eq = tab_cond[cond_num] if not index + len(it_hints) <= len(block.lines): raise NotImplementedError("Splitted IT block non supported yet") ir_blocks_all = [] # Gen dummy irblock for IT instr loc_next = self.get_next_loc_key(instr) dst = ExprAff(self.IRDst, ExprId(loc_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) irblock = IRBlock(loc, assignments) ir_blocks_all.append([irblock]) loc = loc_next assignments = [] for hint in it_hints: irblocks = [] index += 1 instr = block.lines[index] # Add conditionnal jump to current irblock loc_do = self.loc_db.add_location() loc_next = self.get_next_loc_key(instr) if hint: local_cond = ~cond_eq else: local_cond = cond_eq dst = ExprAff( self.IRDst, ExprCond(local_cond, ExprLoc(loc_do, 32), ExprLoc(loc_next, 32))) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) irblock = IRBlock(loc, assignments) irblocks.append(irblock) assignments = [] loc = loc_do split = self.add_instr_to_current_state(instr, block, assignments, irblocks, gen_pc_updt) if split: raise NotImplementedError( "Unsupported instr in IT block (%s)" % instr) dst = ExprAff(self.IRDst, ExprId(loc_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) irblock = IRBlock(loc, assignments) irblocks.append(irblock) loc = loc_next assignments = [] ir_blocks_all.append(irblocks) return index, ir_blocks_all
def sanitize_memory_accesses(self, memories, c_handler, expr_type_from_C): """Modify memory accesses to consider only access on "full final element" Example: struct T{ int a; int b; int *c; } @8[T + 2] = X -> @32[T] = 00 X 00 00 @32[T + 2] = WW XX YY ZZ -> @32[T] = 00 00 WW XX, @32[T + 4] = YY ZZ 00 00 @memories: AssignBlock @ctype_manager: CHandler with argument types @expr_type_from_C: Name -> ObjC dict, for C -> Expr generation Return sanitized access, filled memory cases {Full access -> [offset filled]} """ # First, identify involved fields fields = set() atomic_values = {} for dst, value in memories.iteritems(): assert isinstance(dst, ExprMem) addr_expr = dst.ptr for i in xrange(dst.size / 8): # Split in atomic access offset = ExprInt(i, addr_expr.size) sub_addr_expr = expr_simp(addr_expr + offset) mem_access = ExprMem(sub_addr_expr, 8) value_access = expr_simp(value[i * 8:(i + 1) * 8]) # Keep atomic value atomic_values[mem_access] = value_access # Convert atomic access -> fields access -> Expr access on the # full field info_C = list(c_handler.expr_to_c(mem_access)) assert len(info_C) == 1 if "__PAD__" in info_C[0]: # This is a field used for padding, ignore it continue expr_sanitize = expr_simp( c_handler.c_to_expr(info_C[0], expr_type_from_C)) # Conserve the involved field fields.add(expr_sanitize) # Second, rebuild the fields values filled_memory = {} out = {} for dst in fields: assert isinstance(dst, ExprMem) accumulator = [] addr_expr = dst.ptr for i in reversed(xrange(dst.size / 8)): # Split in atomic access offset = ExprInt(i, addr_expr.size) sub_addr_expr = expr_simp(addr_expr + offset) mem_access = ExprMem(sub_addr_expr, 8) # Get the value, or complete with 0 if mem_access not in atomic_values: value = ExprInt(0, 8) filled_memory.setdefault(dst, []).append(offset) else: value = atomic_values[mem_access] accumulator.append(value) # Save the computed value out[dst] = expr_simp(ExprCompose(*reversed(accumulator))) out = AssignBlock(out) if memories != out: self.logger.debug("SANITIZE: %s", memories) self.logger.debug("OUT SANITIZE: %s", out) return out, filled_memory