def offset_to_ptr(base, offset): """ Return an expression representing the @base + @offset @base: symbolic base address @offset: relative offset integer to the @base address """ if base.is_id(INTERNAL_INTBASE_NAME): ptr = ExprInt(offset, base.size) elif offset == 0: ptr = base else: ptr = base + ExprInt(offset, base.size) return ptr.canonize()
def simp_test_signext_inf(expr_s, expr): """A.signExt() <s int => A <s int[:]""" if not (expr.is_op(TOK_INF_SIGNED) or expr.is_op(TOK_INF_EQUAL_SIGNED)): return expr arg, cst = expr.args if not (arg.is_op() and arg.op.startswith("signExt")): return expr if not cst.is_int(): return expr base = arg.args[0] tmp = int(mod_size2int[cst.size](int(cst))) if -(1 << (base.size - 1)) <= tmp < (1 << (base.size - 1)): # Can trunc integer return ExprOp(expr.op, base, expr_s(cst[:base.size])) if (tmp >= (1 << (base.size - 1)) or tmp < -(1 << (base.size - 1)) ): return ExprInt(1, 1) return expr
def test_advck3(self): """Test ADVCK3 execution""" # ADVCK3 R0,Rn,Rm exec_instruction("ADVCK3 R0, R1, R2", [(ExprId("R1", 32), ExprInt(1, 32)), (ExprId("R2", 32), ExprInt(2, 32))], [(ExprId("R0", 32), ExprInt(0, 32))]) exec_instruction("ADVCK3 R0, R1, R2", [(ExprId("R1", 32), ExprInt(1, 32)), (ExprId("R2", 32), ExprInt(0xFFFFFFFF, 32))], [(ExprId("R0", 32), ExprInt(1, 32))])
def test_bnez(self): """Test BNEZ execution""" # BNEZ Rn,disp8.align2 exec_instruction("BNEZ R1, 0x10", [(ExprId("R1", 32), ExprInt(0, 32))], [(ExprId("PC", 32), ExprInt(0x2, 32))]) exec_instruction("BNEZ R1, 0x10", [(ExprId("R1", 32), ExprInt(1, 32))], [(ExprId("PC", 32), ExprInt(0x20, 32))], offset=0x10) exec_instruction("BNEZ R1, 0x80", [(ExprId("R1", 32), ExprInt(0, 32))], [(ExprId("PC", 32), ExprInt(0x2, 32))])
def test_bsr(self): """Test BSR execution""" # BSR disp12.align2 exec_instruction("BSR 0x800", [(ExprId("PC", 32), ExprInt(2, 32))], [(ExprId("PC", 32), ExprInt(0xFFFFF800, 32)), (ExprId("LP", 32), ExprInt(2, 32))], index=0) # BSR disp24.align2 exec_instruction("BSR 0x101015", [(ExprId("PC", 32), ExprInt(4, 32))], [(ExprId("PC", 32), ExprInt(0x101014, 32)), (ExprId("LP", 32), ExprInt(4, 32))], index=1)
def test_add3(self): """Test ADD3 execution""" # ADD3 Rl,Rn,Rm exec_instruction("ADD3 R1, R2, R3", [(ExprId("R2", 32), ExprInt(0x40, 32)), (ExprId("R3", 32), ExprInt(0x2, 32))], [(ExprId("R1", 32), ExprInt(0x42, 32))]) # ADD3 Rn,SP,imm7.align4 exec_instruction("ADD3 R1, SP, 0x8", [(ExprId("SP", 32), ExprInt(0x20, 32))], [(ExprId("R1", 32), ExprInt(0x28, 32))]) # ADD3 Rn,Rm,imm16 exec_instruction("ADD3 R7, R5, -31912", [(ExprId("R5", 32), ExprInt(0x20, 32))], [(ExprId("R7", 32), ExprInt(-31880, 32))])
def test(self): """Simple symbolic execution examples""" def exec_instruction(hex_asm, init_values): """Symbolically execute an instruction""" print("Hex:", hex_asm) # Disassemble an instruction mn = mn_mep.dis(decode_hex(hex_asm), "b") print("Dis:", mn) loc_db = LocationDB() # Get the IR im = ir_mepb(loc_db) iir, eiir, = im.get_ir(mn) print("\nInternal representation:", iir) # Symbolic execution sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init) # Assign register values before symbolic evaluation for reg_expr_id, reg_expr_value in init_values: sb.symbols[reg_expr_id] = reg_expr_value print("\nModified registers:", [reg for reg in sb.modified(mems=False)]) print("Modified memories:", [mem for mem in sb.modified()]) print("\nFinal registers:") sb.dump(mems=False) print("\nFinal mems:") sb.dump() for hex_asm, init_values in [("6108", [(ExprId("R1", 32), ExprInt(0x40, 32))]), ("08a2", [(ExprId("R8", 32), ExprInt(0x40, 32)), (ExprId("R10", 32), ExprInt(0x41, 32))]), ("0948", [(ExprId("R4", 32), ExprInt(0x41, 32)), (ExprId("R9", 32), ExprInt(0x28, 32)), (ExprMem(ExprInt(0x41, 32), 8), ExprInt(0, 8))])]: print("-" * 49) # Tests separation exec_instruction(hex_asm, init_values)
def ccmp(ir, instr, arg1, arg2, arg3, arg4): e = [] if (arg2.is_int()): arg2 = ExprInt(int(arg2), arg1.size) default_nf = arg3[0:1] default_zf = arg3[1:2] default_cf = arg3[2:3] default_of = arg3[3:4] cond_expr = cond2expr[arg4.name] res = arg1 - arg2 new_nf = nf new_zf = update_flag_zf(res)[0].src new_cf = update_flag_sub_cf(arg1, arg2)[0].src new_of = update_flag_sub_of(arg1, arg2)[0].src e.append(ExprAssign(nf, ExprCond(cond_expr, new_nf, default_nf))) e.append(ExprAssign(zf, ExprCond(cond_expr, new_zf, default_zf))) e.append(ExprAssign(cf, ExprCond(cond_expr, new_cf, default_cf))) e.append(ExprAssign(of, ExprCond(cond_expr, new_of, default_of))) return e, []
def test_jmp(self): """Test JMP execution""" # JMP Rm exec_instruction("JMP R1", [(ExprId("R1", 32), ExprInt(0x101015, 32))], [(ExprId("PC", 32), ExprInt(0x101015, 32))]) # JMP target24.align2 exec_instruction("JMP 0x2807", [(ExprId("PC", 32), ExprInt(0, 32))], [(ExprId("PC", 32), ExprInt(0x2806, 32))], offset=0x42) exec_instruction("JMP 0x2807", [(ExprId("PC", 32), ExprInt(0xB0000000, 32))], [(ExprId("PC", 32), ExprInt(0xB0002806, 32))], offset=0xB0000000)
def asm_ast_to_expr(self, arg, loc_db): if isinstance(arg, AstId): if isinstance(arg.name, ExprId): return arg.name if arg.name in gpregs.str: return None loc_key = loc_db.get_or_create_name_location(arg.name.encode()) return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] if None in args: return None return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, 32) if isinstance(arg, AstMem): ptr = self.asm_ast_to_expr(arg.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, arg.size) return None
def simp_slice_of_ext(_, expr): """ C.zeroExt(X)[A:B] => 0 if A >= size(C) C.zeroExt(X)[A:B] => C[A:B] if B <= size(C) A.zeroExt(X)[0:Y] => A.zeroExt(Y) """ if not expr.arg.is_op(): return expr if not expr.arg.op.startswith("zeroExt"): return expr arg = expr.arg.args[0] if expr.start >= arg.size: # C.zeroExt(X)[A:B] => 0 if A >= size(C) return ExprInt(0, expr.size) if expr.stop <= arg.size: # C.zeroExt(X)[A:B] => C[A:B] if B <= size(C) return arg[expr.start:expr.stop] if expr.start == 0: # A.zeroExt(X)[0:Y] => A.zeroExt(Y) return arg.zeroExtend(expr.stop) return expr
def extend_arg(dst, arg): if not isinstance(arg, ExprOp): return arg op, (reg, shift) = arg.op, arg.args if op == "SXTB": base = reg[:8].signExtend(dst.size) op = "<<" elif op == "SXTH": base = reg[:16].signExtend(dst.size) op = "<<" elif op == 'SXTW': base = reg[:32].signExtend(dst.size) op = "<<" elif op == "SXTX": base = reg.signExtend(dst.size) op = "<<" elif op == "UXTB": base = reg[:8].zeroExtend(dst.size) op = "<<" elif op == "UXTH": base = reg[:16].zeroExtend(dst.size) op = "<<" elif op == 'UXTW': base = reg[:32].zeroExtend(dst.size) op = "<<" elif op == "UXTX": base = reg.zeroExtend(dst.size) op = "<<" elif op in ['<<', '>>', '<<a', 'a>>', '<<<', '>>>']: base = reg.zeroExtend(dst.size) else: raise NotImplementedError('Unknown shifter operator') out = ExprOp(op, base, (shift.zeroExtend(dst.size) & ExprInt(dst.size - 1, dst.size))) return out
def test_beqi(self): """Test BEQI execution""" # BEQI Rn,imm4,disp17.align2 exec_instruction("BEQI R1, 0x8, 0x28", [(ExprId("R1", 32), ExprInt(0, 32))], [(ExprId("PC", 32), ExprInt(0x4, 32))]) exec_instruction("BEQI R1, 0x1, 0x28", [(ExprId("R1", 32), ExprInt(1, 32))], [(ExprId("PC", 32), ExprInt(0x38, 32))], offset=0x10) exec_instruction("BEQI R1, 0x6, 0x10000", [(ExprId("R1", 32), ExprInt(6, 32))], [(ExprId("PC", 32), ExprInt(0xFFFF0010, 32))], offset=0x10)
def downcast_expression(self, state: SynthesisState) -> SynthesisState: """ Mutation to downcast a subexpression. The mutation randomly selects a subexpression and chooses a size that is smaller than the subexpression's size. Afterward, it semantically downcasts the expression by applying it's bit mask and updates the state accordingly. Finally, the state in cleaned up. Example: The random subexpression x + y from (x + y) + z is downcasted to a byte value, leading to ((x + y) & 0xff) + z. Args: state (SynthesisState): State to mutate. Returns: SynthesisState: Mutated state. """ # choose random expression from AST sub_expr = choice(get_subexpressions(state.expr_ast)) # choose a random size for downcasting value = choice(self.sizes_casting) # repeat until the chosen size is smaller than the chosen subexpressions' while sub_expr.size - 1 > value: value = choice(self.sizes_casting) # downcast the subexpression repl = sub_expr & ExprInt(value, sub_expr.size) # replace expression in AST state.expr_ast = state.expr_ast.replace_expr({sub_expr: repl}) # clean state state.cleanup() return state
def test_bgei(self): """Test BGEI execution""" # BGEI Rn,imm4,disp17.align2 exec_instruction("BGEI R1, 0x5, 0x10000", [(ExprId("R1", 32), ExprInt(0x10, 32))], [(ExprId("PC", 32), ExprInt(0xFFFF0010, 32))], offset=0x10) exec_instruction("BGEI R1, 0x5, 0x10000", [(ExprId("R1", 32), ExprInt(0x01, 32))], [(ExprId("PC", 32), ExprInt(0x14, 32))], offset=0x10) exec_instruction("BGEI R1, 0x5, 0x10000", [(ExprId("R1", 32), ExprInt(0x05, 32))], [(ExprId("PC", 32), ExprInt(0xFFFF0010, 32))], offset=0x10)
def handle(self, cur_addr): cur_addr = canonize_to_exprloc(self.ir_arch.loc_db, cur_addr) symb_pc = self.eval_expr(self.ir_arch.IRDst) possibilities = possible_values(symb_pc) cur_path_constraint = set() # path_constraint for the concrete path if len(possibilities) == 1: dst = next(iter(possibilities)).value dst = canonize_to_exprloc(self.ir_arch.loc_db, dst) assert dst == cur_addr else: for possibility in possibilities: target_addr = canonize_to_exprloc(self.ir_arch.loc_db, possibility.value) path_constraint = set( ) # Set of ExprAssign for the possible path # Get constraint associated to the possible path memory_to_add = ModularIntervals(symb_pc.size) for cons in possibility.constraints: eaff = cons.to_constraint() # eaff.get_r(mem_read=True) is not enough # ExprAssign consider a Memory access in dst as a write mem = eaff.dst.get_r(mem_read=True) mem.update(eaff.src.get_r(mem_read=True)) for expr in mem: if expr.is_mem(): addr_range = expr_range(expr.ptr) # At upper bounds, add the size of the memory access # if addr (- [a, b], then @size[addr] reachables # values are in @8[a, b + size[ for start, stop in addr_range: stop += expr.size // 8 - 1 full_range = ModularIntervals( symb_pc.size, [(start, stop)]) memory_to_add.update(full_range) path_constraint.add(eaff) if memory_to_add.length > self.MAX_MEMORY_INJECT: # TODO re-croncretize the constraint or z3-try raise RuntimeError("Not implemented: too long memory area") # Inject memory for start, stop in memory_to_add: for address in range(start, stop + 1): expr_mem = ExprMem( ExprInt(address, self.ir_arch.pc.size), 8) value = self.eval_expr(expr_mem) if not value.is_int(): raise TypeError("Rely on a symbolic memory case, " \ "address 0x%x" % address) path_constraint.add(ExprAssign(expr_mem, value)) if target_addr == cur_addr: # Add path constraint cur_path_constraint = path_constraint elif self.produce_solution(target_addr): # Looking for a new solution self.cur_solver.push() for cons in path_constraint: trans = self.z3_trans.from_expr(cons) trans = z3.simplify(trans) self.cur_solver.add(trans) result = self.cur_solver.check() if result == z3.sat: model = self.cur_solver.model() self.handle_solution(model, target_addr) self.cur_solver.pop() self.handle_correct_destination(cur_addr, cur_path_constraint)
def callback(self, _): """Called before each instruction""" # Assert synchronization with concrete execution self._check_state() # Call callbacks associated to the current address cur_addr = self.jitter.pc if isinstance(cur_addr, LocKey): lbl = self.ir_arch.loc_db.loc_key_to_label(cur_addr) cur_addr = lbl.offset if cur_addr in self.handler: self.handler[cur_addr](self) return True if cur_addr in self.instrumentation: self.instrumentation[cur_addr](self) # Handle current address self.handle(ExprInt(cur_addr, self.ir_arch.IRDst.size)) # Avoid memory issue in ExpressionSimplifier if len(self.symb.expr_simp.cache) > 100000: self.symb.expr_simp.cache.clear() # Get IR blocks if cur_addr in self.addr_to_cacheblocks: self.ircfg.blocks.clear() self.ircfg.blocks.update(self.addr_to_cacheblocks[cur_addr]) else: ## Reset cache structures self.ircfg.blocks.clear() # = {} ## Update current state asm_block = self.mdis.dis_block(cur_addr) self.ir_arch.add_asmblock_to_ircfg(asm_block, self.ircfg) self.addr_to_cacheblocks[cur_addr] = dict(self.ircfg.blocks) # Emulate the current instruction self.symb.reset_modified() # Is the symbolic execution going (potentially) to jump on a lbl_gen? if len(self.ircfg.blocks) == 1: self.symb.run_at(self.ircfg, cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain # the full IR blocks path # -> Use a fully concrete execution to get back path # Update the concrete execution self._update_state_from_concrete_symb(self.symb_concrete, cpu=True, mem=True) while True: next_addr_concrete = self.symb_concrete.run_block_at( self.ircfg, cur_addr) self.symb.run_block_at(self.ircfg, cur_addr) if not (isinstance(next_addr_concrete, ExprLoc) and self.ir_arch.loc_db.get_location_offset( next_addr_concrete.loc_key) is None): # Not a lbl_gen, exit break # Call handle with lbl_gen state self.handle(next_addr_concrete) cur_addr = next_addr_concrete # At this stage, symbolic engine is one instruction after the concrete # engine return True
def svc(arg1): exception_flags = ExprInt(EXCEPT_INT_XX, exception_flags.size) interrupt_num = ExprInt(int(arg1), interrupt_num.size)
def adrp(arg1, arg2): arg1 = (PC & ExprInt(0xfffffffffffff000, 64)) + arg2
def tbnz(arg1, arg2, arg3): bitmask = ExprInt(1, arg1.size) << arg2 dst = arg3 if arg1 & bitmask else ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst
def sdiv(arg1, arg2, arg3): if arg3: arg1 = ExprOp('sdiv', arg2, arg3) else: exception_flags = ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size)
def update_flag_sub_cf(op1, op2): "Compote CF in @op1 - @op2" return [ExprAssign(cf, ExprOp("FLAG_SUB_CF", op1, op2) ^ ExprInt(1, 1))]
def bl(arg1): PC = arg1 ir.IRDst = arg1 LR = ExprInt(instr.offset + instr.l, 64)
def decode(self, v): v = v & self.lmask self.expr = ExprInt(v + 1, 32) return True
import warnings from future.utils import viewitems, viewvalues from miasm.expression.expression import ExprId, ExprLoc, ExprInt, \ ExprMem, ExprCond, LocKey, is_expr from miasm.ir.ir import IRBlock, AssignBlock from miasm.ir.translators.C import TranslatorC from miasm.core.asmblock import AsmBlockBad from miasm.expression.simplifications import expr_simp_high_to_explicit TRANSLATOR_NO_SYMBOL = TranslatorC(loc_db=None) SIZE_TO_MASK = { size: TRANSLATOR_NO_SYMBOL.from_expr(ExprInt(0, size).mask) for size in (1, 2, 3, 7, 8, 16, 32, 64) } class Attributes(object): """ Store an irblock attributes """ def __init__(self, log_mn=False, log_regs=False): self.mem_read = False self.mem_write = False self.set_exception = False self.log_mn = log_mn self.log_regs = log_regs self.instr = None
def dst_to_c(self, src): """Translate Expr @src into C code""" if not is_expr(src): src = ExprInt(src, self.PC.size) return self.id_to_c(src)
def lsl(arg1, arg2, arg3): arg1 = arg2 << (arg3 & ExprInt(arg3.size - 1, arg3.size))
def decode(self, v): v = v & self.lmask v = cpu.sign_ext(v, 9, 32) self.expr = ExprInt(v, 32) return True
def lsr(arg1, arg2, arg3): arg1 = arg2 >> (arg3 & ExprInt(arg3.size - 1, arg3.size))
def decode(self, v): self.expr = ExprInt(v - int(self.parent.epos.expr) + 1, 32) return True
def asr(arg1, arg2, arg3): arg1 = ExprOp('a>>', arg2, (arg3 & ExprInt(arg3.size - 1, arg3.size)))