def test_Variables_Identifier(self): import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import Variables_Identifier # Build a complex expression cst = m2_expr.ExprInt(0x100, 16) eax = m2_expr.ExprId("EAX") ebx = m2_expr.ExprId("EBX") ax = eax[0:16] expr = eax + ebx expr = m2_expr.ExprCompose(ax, expr[16:32]) expr2 = m2_expr.ExprMem((eax + ebx) ^ (eax), size=16) expr2 = expr2 | ax | expr2 | cst exprf = expr - expr + m2_expr.ExprCompose(expr2, cst) # Identify variables vi = Variables_Identifier(exprf) # Use __str__ print vi # Test the result new_expr = vi.equation ## Force replace in the variable dependency order for var_id, var_value in reversed(vi.vars.items()): new_expr = new_expr.replace_expr({var_id: var_value}) self.assertEqual(exprf, new_expr) # Test prefix vi = Variables_Identifier(exprf, var_prefix="prefix_v") ## Use __str__ print vi ## Test the result new_expr = vi.equation ### Force replace in the variable dependency order for var_id, var_value in reversed(vi.vars.items()): new_expr = new_expr.replace_expr({var_id: var_value}) self.assertEqual(exprf, new_expr) # Test an identify on an expression already containing identifier vi = Variables_Identifier(exprf) vi2 = Variables_Identifier(vi.equation) ## Test the result new_expr = vi2.equation ### Force replace in the variable dependency order for var_id, var_value in reversed(vi2.vars.items()): new_expr = new_expr.replace_expr({var_id: var_value}) self.assertEqual(vi.equation, new_expr) ## Corner case: each sub var depends on itself mem1 = m2_expr.ExprMem(ebx, size=32) mem2 = m2_expr.ExprMem(mem1, size=32) cst2 = m2_expr.ExprInt(-1, 32) expr_mini = ((eax ^ mem2 ^ cst2) & (mem2 ^ (eax + mem2)))[31:32] ## Build vi = Variables_Identifier(expr_mini) vi2 = Variables_Identifier(vi.equation) ## Test the result new_expr = vi2.equation ### Force replace in the variable dependency order for var_id, var_value in reversed(vi2.vars.items()): new_expr = new_expr.replace_expr({var_id: var_value}) self.assertEqual(vi.equation, new_expr)
def ast_id2expr(a): return m2_expr.ExprId(a, 32)
def tbnz(arg1, arg2, arg3): bitmask = m2_expr.ExprInt(1, arg1.size) << arg2 dst = arg3 if arg1 & bitmask else m2_expr.ExprId( ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst
log_to_c_h = logging.getLogger("ir_helper") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) log_to_c_h.addHandler(console_handler) log_to_c_h.setLevel(logging.WARN) # Miasm to C translator translator = Translator.to_language("C") prefetch_id = [] prefetch_id_size = {} for size in [8, 16, 32, 64]: prefetch_id_size[size] = [] for i in xrange(20): name = 'pfmem%.2d_%d' % (size, i) c = m2_expr.ExprId(name, size) globals()[name] = c prefetch_id.append(c) prefetch_id_size[size].append(c) def init_arch_C(arch): arch.id2Cid = {} for x in arch.regs.all_regs_ids + prefetch_id: arch.id2Cid[x] = m2_expr.ExprId('((vm_cpu_t*)jitcpu->cpu)->' + str(x), x.size) arch.id2newCid = {} for x in arch.regs.all_regs_ids + prefetch_id: arch.id2newCid[x] = m2_expr.ExprId(
# in Miasm2 engine thanks to : # - Conditions computation in ExprOp # - Simplifications to catch known condition forms # # Conditions currently supported : # <u, <s, == # # Authors : Fabrice DESCLAUX (CEA/DAM), Camille MOUGEY (CEA/DAM) # ################################################################################ import miasm2.expression.expression as m2_expr # Jokers for expression matching jok1 = m2_expr.ExprId("jok1") jok2 = m2_expr.ExprId("jok2") jok3 = m2_expr.ExprId("jok3") jok_small = m2_expr.ExprId("jok_small", 1) # Constructors def __ExprOp_cond(op, arg1, arg2): "Return an ExprOp standing for arg1 op arg2 with size to 1" ec = m2_expr.ExprOp(op, arg1, arg2) return ec def ExprOp_inf_signed(arg1, arg2): "Return an ExprOp standing for arg1 <s arg2"
def __init__(self, ir_arch): super(mipsCGen, self).__init__(ir_arch) self.delay_slot_dst = m2_expr.ExprId("branch_dst_irdst") self.delay_slot_set = m2_expr.ExprId("branch_dst_set")
def init_arch_C(self): self.id_to_c_id = {} for reg in self.ir_arch.arch.regs.all_regs_ids: self.id_to_c_id[reg] = m2_expr.ExprId('mycpu->%s' % reg, reg.size) self.C_PC = self.id_to_c(self.PC)
def tbz(arg1, arg2, arg3): bitmask = m2_expr.ExprInt(1, arg1.size) << arg2 dst = m2_expr.ExprId( ir.get_next_label(instr), 64) if arg1 & bitmask else arg3 PC = dst ir.IRDst = dst
# Match the expected form ## isinstance(expr, m2_expr.ExprOp) is not needed: simplifications are ## attached to expression types if expr.op == "+" and \ len(expr.args) == 3 and \ expr.args.count(expr.args[0]) == len(expr.args): # Effective simplification return m2_expr.ExprOp("*", expr.args[0], m2_expr.ExprInt(3, expr.args[0].size)) else: # Do not simplify return expr a = m2_expr.ExprId('a') base_expr = a + a + a print "Without adding the simplification:" print "\t%s = %s" % (base_expr, expr_simp(base_expr)) # Enable pass expr_simp.enable_passes({m2_expr.ExprOp: [simp_add_mul]}) print "After adding the simplification:" print "\t%s = %s" % (base_expr, expr_simp(base_expr)) # Automatic fail assert (expr_simp(base_expr) == m2_expr.ExprOp("*", a, m2_expr.ExprInt(3, a.size)))
def __init__(self, symbol_pool=None): ir.__init__(self, mn_mips32, 'b', symbol_pool) self.pc = mn_mips32.getpc() self.sp = mn_mips32.getsp() self.IRDst = m2_expr.ExprId('IRDst', 32)
def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): if symbol_pool is None: symbol_pool = asmbloc.asm_symbol_pool() lines_text = [] lines_data = [] lines_bss = [] C_NEXT = asmbloc.asm_constraint.c_next C_TO = asmbloc.asm_constraint.c_to lines = lines_text # parse each line for line in txt.split('\n'): # empty if re.match(r'\s*$', line): continue # comment if re.match(r'\s*;\S*', line): continue # labels to forget r = re.match(r'\s*\.LF[BE]\d\s*:', line) if r: continue # label beginning with .L r = re.match(r'\s*(\.L\S+)\s*:', line) if r: l = r.groups()[0] l = symbol_pool.getby_name_create(l) lines.append(l) continue # directive if re.match(r'\s*\.', line): r = re.match(r'\s*\.(\S+)', line) directive = r.groups()[0] if directive == 'text': lines = lines_text continue if directive == 'data': lines = lines_data continue if directive == 'bss': lines = lines_bss continue if directive in ['string', 'ascii']: # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r"'")] raw = raw.decode('string_escape') if directive == 'string': raw += "\x00" lines.append(asmbloc.asm_raw(raw)) continue if directive == 'ustring': # XXX HACK line = line.replace(r'\n', '\n').replace(r'\r', '\r') raw = line[line.find(r'"') + 1:line.rfind(r"'")] + "\x00" raw = raw.decode('string_escape') raw = "".join([string + '\x00' for string in raw]) lines.append(asmbloc.asm_raw(raw)) continue if directive in declarator: data_raw = line[r.end():].split(' ', 1)[1] data_raw = data_raw.split(',') size = declarator[directive] data_int = [] # parser base_expr = gen_base_expr()[2] my_var_parser = parse_ast( lambda x: m2_expr.ExprId(x, size), lambda x: m2_expr.ExprInt_fromsize(size, x)) base_expr.setParseAction(my_var_parser) for b in data_raw: b = b.strip() x = base_expr.parseString(b)[0] data_int.append(x.canonize()) raw = data_int x = asmbloc.asm_raw(raw) x.element_size = size lines.append(x) continue if directive == 'comm': # TODO continue if directive == 'split': # custom command x = asmbloc.asm_raw() x.split = True lines.append(x) continue if directive == 'dontsplit': # custom command lines.append(asmbloc.asm_raw()) continue if directive == "align": align_value = int(line[r.end():]) lines.append(DirectiveAlign(align_value)) continue if directive in [ 'file', 'intel_syntax', 'globl', 'local', 'type', 'size', 'align', 'ident', 'section' ]: continue if directive[0:4] == 'cfi_': continue raise ValueError("unknown directive %s" % str(directive)) # label r = re.match(r'\s*(\S+)\s*:', line) if r: l = r.groups()[0] l = symbol_pool.getby_name_create(l) lines.append(l) continue # code if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, attrib) # replace orphan asm_label with labels from symbol_pool replace_orphan_labels(instr, symbol_pool) if instr.dstflow(): instr.dstflow2label(symbol_pool) lines.append(instr) asmbloc.log_asmbloc.info("___pre asm oki___") # make blocs blocs_sections = [] bloc_num = 0 b = None for lines in [lines_text, lines_data, lines_bss]: state = 0 i = 0 blocs = [] blocs_sections.append(blocs) bloc_to_nlink = None block_may_link = False while i < len(lines): # no current bloc if state == 0: if not isinstance(lines[i], asmbloc.asm_label): l = guess_next_new_label(symbol_pool) lines[i:i] = [l] else: l = lines[i] b = asmbloc.asm_bloc(l, alignment=mnemo.alignment) b.bloc_num = bloc_num bloc_num += 1 blocs.append(b) state = 1 i += 1 if bloc_to_nlink: bloc_to_nlink.addto( asmbloc.asm_constraint(b.label, C_NEXT)) bloc_to_nlink = None # in bloc elif state == 1: if isinstance(lines[i], asmbloc.asm_raw): if hasattr(lines[i], 'split'): state = 0 block_may_link = False i += 1 else: state = 1 block_may_link = True b.addline(lines[i]) i += 1 elif isinstance(lines[i], DirectiveAlign): b.alignment = lines[i].alignment i += 1 # asmbloc.asm_label elif isinstance(lines[i], asmbloc.asm_label): if block_may_link: b.addto(asmbloc.asm_constraint(lines[i], C_NEXT)) block_may_link = False state = 0 # instruction else: b.addline(lines[i]) if lines[i].dstflow(): for x in lines[i].getdstflow(symbol_pool): if not isinstance(x, m2_expr.ExprId): continue if x in mnemo.regs.all_regs_ids: continue b.addto(asmbloc.asm_constraint(x, C_TO)) # TODO XXX redo this really if not lines[i].breakflow() and i + 1 < len(lines): if isinstance(lines[i + 1], asmbloc.asm_label): l = lines[i + 1] else: l = guess_next_new_label(symbol_pool) lines[i + 1:i + 1] = [l] else: state = 0 if lines[i].splitflow(): bloc_to_nlink = b if not lines[i].breakflow() or lines[i].splitflow(): block_may_link = True else: block_may_link = False i += 1 for block in blocs_sections[0]: asmbloc.log_asmbloc.info(block) return blocs_sections, symbol_pool
@sb.parse def test(Arg1, Arg2, Arg3): "Test docstring" Arg1 = Arg2 mem32[Arg1] = Arg2 mem32[Arg2] = Arg3 + i32(4) - mem32[Arg1] Arg3 = Arg3 if Arg2 else i32(0) tmpvar = 'myop' (i32(2)) Arg2 = ('myopsize%d' % Arg1.size)(tmpvar, Arg1) if not Arg1: Arg2 = Arg3 a = m2_expr.ExprId('A') b = m2_expr.ExprId('B') c = m2_expr.ExprId('C') ir = IR() instr = None res = test(ir, instr, a, b, c) print "[+] Returned:" print res print "[+] DocString:", test.__doc__ print "[+] Cur instr:" for statement in res[0]: print statement print "[+] Blocks:"
def blr(arg1): PC = arg1 ir.IRDst = arg1 LR = m2_expr.ExprId(ir.get_next_label(instr), 64)
"Test docstring" Arg1 = Arg2 mem32[Arg1] = Arg2 mem32[Arg2] = Arg3 + i32(4) - mem32[Arg1] Arg3 = Arg3 if Arg2 else i32(0) tmpvar = 'myop' (i32(2)) Arg2 = ('myopsize%d' % Arg1.size)(tmpvar, Arg1) alias = Arg1[:24] if not Arg1: Arg2 = Arg3 else: alias = {i16(4), i8(5)} a = m2_expr.ExprId('A', 32) b = m2_expr.ExprId('B', 32) c = m2_expr.ExprId('C', 32) ir = IR() instr = Instr() res = test(ir, instr, a, b, c) print "[+] Returned:" print res print "[+] DocString:", test.__doc__ print "[+] Cur instr:" for statement in res[0]: print statement print "[+] Blocks:"
def __init__(self, symbol_pool=None): ir.__init__(self, mn_aarch64, "b", symbol_pool) self.pc = PC self.sp = SP self.IRDst = m2_expr.ExprId('IRDst', 64)
def __init__(self, symbol_pool=None): IntermediateRepresentation.__init__(self, mn_mips32, 'b', symbol_pool) self.pc = mn_mips32.getpc() self.sp = mn_mips32.getsp() self.IRDst = m2_expr.ExprId('IRDst', 32)
def prepare_symbexec(self, jitter, return_addr): # Activate callback on each instr jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) #jitter.jit.log_mn = True #jitter.jit.log_regs = True jitter.exec_cb = self.callback # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(jitter.vm), lines_wd=1) # Symbexec engine ## Prepare the symbexec engine self.symb_ir = self.machine.ir() self.symb = EmulatedSymbExecWithModif(jitter.cpu, jitter.vm, self.symb_ir, {}) self.symb.enable_emulated_simplifications() ## Update registers value self.symb.reset_regs() self.symb.update_engine_from_cpu() ## Load the memory as ExprMem self.symb.func_read = None self.symb.func_write = None for base_addr, mem_segment in jitter.vm.get_all_memory().iteritems(): # Split into 8 bytes chunk for get_mem_overlapping for start in xrange(0, mem_segment["size"], 8): expr_mem = m2_expr.ExprMem(m2_expr.ExprInt(base_addr + start, size=64), size=8 * min(8, mem_segment["size"] - start)) # Its initialisation, self.symb.apply_change is not necessary self.symb.symbols[expr_mem] = self.symb._func_read(expr_mem) ## Save the initial state self.symbols_init = self.symb.symbols.copy() ## Save the returning address self.return_addr = return_addr # Inject argument self.init_values = {} struct_expr_types = {} self.args_symbols = [] for i, param_name in enumerate(self.prototype.args_order): cur_arg_abi = self.get_arg_n(i) cur_arg = m2_expr.ExprId("arg%d_%s" % (i, param_name), size=cur_arg_abi.size) self.init_values[cur_arg] = self.symb.eval_expr(cur_arg_abi) arg_type = self.prototype.args[param_name] if objc_is_dereferenceable(arg_type): # Convert the argument to symbol to track access based on it self.symb.apply_change(cur_arg_abi, cur_arg) struct_expr_types[cur_arg.name] = arg_type self.args_symbols.append(cur_arg) # Init Expr <-> C conversion # Strict access is deliberately not enforced (example: memcpy(struct)) self.c_handler = CHandler(self.types, struct_expr_types, enforce_strict_access=False) # Init output structures self.memories_read = set() self.memories_write = set()
def cbz(arg1, arg2): dst = m2_expr.ExprId(ir.get_next_label(instr), 64) if arg1 else arg2 PC = dst ir.IRDst = dst
def __init__(self, loc_db=None): super(ir_ppc32b, self).__init__(mn_ppc, 'b', loc_db) self.pc = mn_ppc.getpc() self.sp = mn_ppc.getsp() self.IRDst = expr.ExprId('IRDst', 32) self.addrsize = 32
def cbnz(arg1, arg2): dst = arg2 if arg1 else m2_expr.ExprId(ir.get_next_label(instr), 64) PC = dst ir.IRDst = dst
def remove_jmp_blocks(self): """ Remove irblock with only IRDst set, by linking it's parent destination to the block destination. """ # Find candidates jmp_blocks = set() for block in self.blocks.itervalues(): if len(block.irs) != 1: continue assignblk = block.irs[0] if len(assignblk) > 1: continue assert set(assignblk.keys()) == set([self.IRDst]) if len(self.graph.successors(block.label)) != 1: continue if not expr_is_label(assignblk[self.IRDst]): continue jmp_blocks.add(block) # Remove them, relink graph modified = False for block in jmp_blocks: dst_label = block.dst.name parents = self.graph.predecessors(block.label) for lbl in parents: parent = self.blocks.get(lbl, None) if parent is None: continue dst = parent.dst if dst.is_id(block.label): dst = m2_expr.ExprId(dst_label, dst.size) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True elif dst.is_cond(): src1, src2 = dst.src1, dst.src2 if src1.is_id(block.label): dst = m2_expr.ExprCond( dst.cond, m2_expr.ExprId(dst_label, dst.size), dst.src2) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True if src2.is_id(block.label): dst = m2_expr.ExprCond( dst.cond, dst.src1, m2_expr.ExprId(dst_label, dst.size)) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True if dst.src1 == dst.src2: dst = src1 else: continue parent.dst = dst # Remove unlinked useless nodes for block in jmp_blocks: if (len(self.graph.predecessors(block.label)) == 0 and len(self.graph.successors(block.label)) == 0): self.graph.del_node(block.label) return modified
def tbnz(arg1, arg2, arg3): bitmask = m2_expr.ExprInt_from(arg1, 1) << arg2 dst = arg3 if arg1 & bitmask else m2_expr.ExprId( ir.get_next_label(instr), 64) PC = dst ir.IRDst = dst
def __init__(self, loc_db=None): self.addrsize = 32 IntermediateRepresentation.__init__(self, mn_mips32, 'b', loc_db) self.pc = mn_mips32.getpc() self.sp = mn_mips32.getsp() self.IRDst = m2_expr.ExprId('IRDst', 32)
def b_ne(arg1): dst = m2_expr.ExprId(ir.get_next_label(instr), 64) if zf else arg1 PC = dst ir.IRDst = dst
def gen_reg(reg_name, sz=32): """Gen reg expr and parser""" reg = m2_expr.ExprId(reg_name, sz) reginfo = reg_info([reg_name], [reg]) return reg, reginfo
def b_eq(arg1): dst = arg1 if zf else m2_expr.ExprId(ir.get_next_label(instr), 64) PC = dst ir.IRDst = dst
def __init__(self, loc_db=None): IntermediateRepresentation.__init__(self, mn_aarch64, "b", loc_db) self.pc = PC self.sp = SP self.IRDst = m2_expr.ExprId('IRDst', 64)
def b_lt(arg1): cond = cond2expr['LT'] dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) PC = dst ir.IRDst = dst
def __init__(self, symbol_pool=None): IntermediateRepresentation.__init__(self, mn_aarch64, "l", symbol_pool) self.pc = PC self.sp = SP self.IRDst = m2_expr.ExprId('IRDst', 64) self.addrsize = 64
# Match the expected form ## isinstance(expr, m2_expr.ExprOp) is not needed: simplifications are ## attached to expression types if expr.op == "+" and \ len(expr.args) == 3 and \ expr.args.count(expr.args[0]) == len(expr.args): # Effective simplification return m2_expr.ExprOp("*", expr.args[0], m2_expr.ExprInt(3, expr.args[0].size)) else: # Do not simplify return expr a = m2_expr.ExprId('a', 32) base_expr = a + a + a print "Without adding the simplification:" print "\t%s = %s" % (base_expr, expr_simp(base_expr)) # Enable pass expr_simp.enable_passes({m2_expr.ExprOp: [simp_add_mul]}) print "After adding the simplification:" print "\t%s = %s" % (base_expr, expr_simp(base_expr)) # Automatic fail assert (expr_simp(base_expr) == m2_expr.ExprOp("*", a, m2_expr.ExprInt(3, a.size)))