Exemple #1
0
def is_local_variable(expr, ir_arch_a, mn):
    if not expr.is_mem():
        return None
    ptr = expr.ptr
    diff = expr_simp(ptr - mn.regs.regs_init[ir_arch_a.sp])
    if diff.is_int() and int(
            expr_simp(expr_is_signed_lower(diff, ExprInt(0, diff.size)))):
        return True
    return None
Exemple #2
0
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done):
    while states_todo:
        addr, symbols, conds = states_todo.pop()
        print('*' * 40, "addr", addr, '*' * 40)
        if (addr, symbols, conds) in states_done:
            print('Known state, skipping', addr)
            continue
        states_done.add((addr, symbols, conds))
        symbexec = SymbolicExecutionEngine(ir_arch)
        symbexec.symbols = symbols.copy()
        if ir_arch.pc in symbexec.symbols:
            del symbexec.symbols[ir_arch.pc]
        irblock = get_block(ir_arch, ircfg, mdis, addr)

        print('Run block:')
        print(irblock)
        addr = symbexec.eval_updt_irblock(irblock)
        print('Final state:')
        symbexec.dump(mems=False)

        assert addr is not None
        if isinstance(addr, ExprCond):
            # Create 2 states, each including complementary conditions
            cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)}
            cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)}
            addr_a = expr_simp(
                symbexec.eval_expr(addr.replace_expr(cond_group_a), {}))
            addr_b = expr_simp(
                symbexec.eval_expr(addr.replace_expr(cond_group_b), {}))
            if not (addr_a.is_int() or addr_a.is_loc() and addr_b.is_int()
                    or addr_b.is_loc()):
                print(str(addr_a), str(addr_b))
                raise ValueError("Unsupported condition")
            if isinstance(addr_a, ExprInt):
                addr_a = int(addr_a.arg)
            if isinstance(addr_b, ExprInt):
                addr_b = int(addr_b.arg)
            states_todo.add(
                (addr_a, symbexec.symbols.copy(),
                 tuple(list(conds) + list(viewitems(cond_group_a)))))
            states_todo.add(
                (addr_b, symbexec.symbols.copy(),
                 tuple(list(conds) + list(viewitems(cond_group_b)))))
        elif addr == ret_addr:
            print('Return address reached')
            continue
        elif addr.is_int():
            addr = int(addr.arg)
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        elif addr.is_loc():
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")
Exemple #3
0
    def evaluate_expression(expr: Expr, inputs_array: List[int]) -> int:
        """
        Evaluates an expression for an array of random values.

        Each input variable p0, p1, ..., pn is associated with an
        entry in the array of inputs [i0, i1, ..., in]. In the given 
        expression, we replace p0 with i1, p1 with i1 etc. and evaluate
        the expression. As a result, the expression results in a 
        final constant in form of ExprInt.

        Args:
            expr: Expression to evaluate
            inputs_array: List of random values.

        Returns: 
            Int that is the return value of the evaluated expression.
        """
        # dictionary of replacements
        replacements = {}
        # walk over unique variables in the expression
        for v in get_unique_variables(expr):
            # skip if register pattern does not match
            if not re.search("^p[0-9]*", v.name):
                continue
            # calculate index for p
            index = int(v.name.strip("p"))
            # insert into replacements dictionary
            replacements[v] = ExprInt(inputs_array[index], v.size)

        return int(expr_simp(expr.replace_expr(replacements)))
Exemple #4
0
 def _get_strings_from_dse(self, dse):
     modified_mem = SortedList(key=lambda x: int(x[0]))
     for key, val in dse.symb.modified(ids=False, mems=True):
         try:
             val = dse.eval_expr(key)
             key = dse.eval_expr(key.ptr)
         except RuntimeError:
             continue
         if not key.is_int() or not val.is_int():
             continue
         modified_mem.add((key, val))
     following_address = None
     current_sequence = b""
     strings = set()
     for address, value in modified_mem:
         if following_address == address:
             current_sequence += int(value).to_bytes(
                 value.size // 8, "little")
         else:
             self._update_strings_from_sequence(current_sequence, strings)
             current_sequence = int(value).to_bytes(value.size // 8,
                                                    "little")
         following_address = expr_simp(address +
                                       ExprInt(value.size //
                                               8, address.size))
     self._update_strings_from_sequence(current_sequence, strings)
     return strings
Exemple #5
0
    def _expr_str_to_equiv_class(self, expr_str: str) -> Tuple[str, Expr]:
        """
        Determines the equivalence class of a given Miasm IR expression 
        (passed as string).

        Used as part of the parallel computation in `gen_oracle_map`.

        Args:
            expr_str: String containing a Miasm IR expression from the
                      pre-computed library.
        
        Returns:
            Tuple of equivalence class and expression.
        """
        # init AST translator
        translator = AbstractSyntaxTreeTranslator()
        # read expression
        expr = eval(expr_str)
        # simplify and transform into abtsract syntax tree
        expr = translator.from_expr(expr_simp(expr))
        # calculate output behavior
        outputs = self.get_outputs(expr)
        # determine equivalence class
        equiv_class = self.determine_equiv_class(expr, outputs)
        return (equiv_class, expr)
Exemple #6
0
def emul_symb(ir_arch, ircfg, mdis, states_todo, states_done):
    while states_todo:
        addr, symbols, conds = states_todo.pop()
        print('*' * 40, "addr", addr, '*' * 40)
        if (addr, symbols, conds) in states_done:
            print('Known state, skipping', addr)
            continue
        states_done.add((addr, symbols, conds))
        symbexec = SymbolicExecutionEngine(ir_arch)
        symbexec.symbols = symbols.copy()
        if ir_arch.pc in symbexec.symbols:
            del symbexec.symbols[ir_arch.pc]
        irblock = get_block(ir_arch, ircfg, mdis, addr)

        print('Run block:')
        print(irblock)
        addr = symbexec.eval_updt_irblock(irblock)
        print('Final state:')
        symbexec.dump(mems=False)

        assert addr is not None
        if isinstance(addr, ExprCond):
            # Create 2 states, each including complementary conditions
            cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)}
            cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)}
            addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {}))
            addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {}))
            if not (addr_a.is_int() or addr_a.is_loc() and
                    addr_b.is_int() or addr_b.is_loc()):
                print(str(addr_a), str(addr_b))
                raise ValueError("Unsupported condition")
            if isinstance(addr_a, ExprInt):
                addr_a = int(addr_a.arg)
            if isinstance(addr_b, ExprInt):
                addr_b = int(addr_b.arg)
            states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_a)))))
            states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + list(viewitems(cond_group_b)))))
        elif addr == ret_addr:
            print('Return address reached')
            continue
        elif addr.is_int():
            addr = int(addr.arg)
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        elif addr.is_loc():
            states_todo.add((addr, symbexec.symbols.copy(), tuple(conds)))
        else:
            raise ValueError("Unsupported destination")
Exemple #7
0
def arm_guess_jump_table(
    mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db):
    ira = get_ira(mnemo, attrib)

    jra = ExprId('jra')
    jrb = ExprId('jrb')

    sp = LocationDB()
    ir_arch = ira(sp)
    ircfg = ira.new_ircfg()
    ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg)

    for irblock in viewvalues(ircfg.blocks):
        pc_val = None
        for exprs in irblock:
            for e in exprs:
                if e.dst == ir_arch.pc:
                    pc_val = e.src
        if pc_val is None:
            continue
        if not isinstance(pc_val, ExprMem):
            continue
        assert(pc_val.size == 32)
        print(pc_val)
        ad = pc_val.arg
        ad = expr_simp(ad)
        print(ad)
        res = match_expr(ad, jra + jrb, set([jra, jrb]))
        if res is False:
            raise NotImplementedError('not fully functional')
        print(res)
        if not isinstance(res[jrb], ExprInt):
            raise NotImplementedError('not fully functional')
        base_ad = int(res[jrb])
        print(base_ad)
        addrs = set()
        i = -1
        max_table_entry = 10000
        max_diff_addr = 0x100000  # heuristic
        while i < max_table_entry:
            i += 1
            try:
                ad = upck32(pool_bin.getbytes(base_ad + 4 * i, 4))
            except:
                break
            if abs(ad - base_ad) > max_diff_addr:
                break
            addrs.add(ad)
        print([hex(x) for x in addrs])

        for ad in addrs:
            offsets_to_dis.add(ad)
            l = loc_db.get_or_create_offset_location(ad)
            c = AsmConstraintTo(l)
            cur_bloc.addto(c)
Exemple #8
0
def arm_guess_jump_table(dis_engine, cur_block, offsets_to_dis):
    arch = dis_engine.arch
    loc_db = dis_engine.loc_db
    ira = get_ira(arch, dis_engine.attrib)

    jra = ExprId('jra')
    jrb = ExprId('jrb')

    ir_arch = ira(loc_db)
    ircfg = ira.new_ircfg()
    ir_arch.add_asmblock_to_ircfg(cur_block, ircfg)

    for irblock in viewvalues(ircfg.blocks):
        pc_val = None
        for exprs in irblock:
            for e in exprs:
                if e.dst == ir_arch.pc:
                    pc_val = e.src
        if pc_val is None:
            continue
        if not isinstance(pc_val, ExprMem):
            continue
        assert (pc_val.size == 32)
        print(pc_val)
        ad = pc_val.arg
        ad = expr_simp(ad)
        print(ad)
        res = match_expr(ad, jra + jrb, set([jra, jrb]))
        if res is False:
            raise NotImplementedError('not fully functional')
        print(res)
        if not isinstance(res[jrb], ExprInt):
            raise NotImplementedError('not fully functional')
        base_ad = int(res[jrb])
        print(base_ad)
        addrs = set()
        i = -1
        max_table_entry = 10000
        max_diff_addr = 0x100000  # heuristic
        while i < max_table_entry:
            i += 1
            try:
                ad = upck32(dis_engine.bin_stream.getbytes(base_ad + 4 * i, 4))
            except:
                break
            if abs(ad - base_ad) > max_diff_addr:
                break
            addrs.add(ad)
        print([hex(x) for x in addrs])

        for ad in addrs:
            offsets_to_dis.add(ad)
            l = loc_db.get_or_create_offset_location(ad)
            c = AsmConstraintTo(l)
            cur_block.addto(c)
Exemple #9
0
 def propag_expr_cst(self, expr):
     """Propagate constant expressions in @expr
     @expr: Expression to update"""
     elements = expr.get_r(mem_read=True)
     to_propag = {}
     for element in elements:
         # Only ExprId can be safely propagated
         if not element.is_id():
             continue
         value = self.eval_expr(element)
         if self.is_expr_cst(self.ir_arch, value):
             to_propag[element] = value
     return expr_simp(expr.replace_expr(to_propag))
Exemple #10
0
 def propag_expr_cst(self, expr):
     """Propagate constant expressions in @expr
     @expr: Expression to update"""
     elements = expr.get_r(mem_read=True)
     to_propag = {}
     for element in elements:
         # Only ExprId can be safely propagated
         if not element.is_id():
             continue
         value = self.eval_expr(element)
         if self.is_expr_cst(self.ir_arch, value):
             to_propag[element] = value
     return expr_simp(expr.replace_expr(to_propag))
Exemple #11
0
def get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register,
                              state):
    referenced_blocks = []

    for cfgnode in ircfg.nodes():
        irblock = ircfg.get_block(cfgnode)
        if not irblock:
            print('[!] Could not get IRBLOCK!')
            sys.exit()
        if len(irblock.assignblks) == 1:
            _next_addr = irblock.dst
        else:
            _symbolic_engine = SymbolicExecutionEngine(ir_arch, symbols_init)
            _next_addr = _symbolic_engine.run_block_at(
                ircfg, get_address(ircfg.loc_db, cfgnode))
            if _next_addr == None:
                continue
            _next_addr = expr_simp(_next_addr)

        if isinstance(_next_addr, ExprCond) and \
                isinstance(_next_addr.cond, ExprOp) and \
                _next_addr.cond.op == '==':
            args = _next_addr.cond

            while not isinstance(args.args[0], ExprId):
                if hasattr(args, 'args'):
                    args = args.args[0]

                    if not isinstance(args, ExprOp):
                        break

            if hasattr(args, 'args') and \
                    args.args[0] in (state_register, symbols_init[state_register]) and \
                    args.args[1] == state:

                block = ircfg.get_block(cfgnode)
                if hasattr(block.dst.cond,
                           'op') and block.dst.cond.op in ('CC_S>'):
                    dst = get_address(ircfg.loc_db, block.dst.src2.loc_key)
                    next_block = ircfg.get_block(dst)
                    dst = get_address(ircfg.loc_db,
                                      next_block.dst.src1.loc_key)
                else:
                    dst = get_address(ircfg.loc_db, block.dst.src1.loc_key)

                referenced_block = ircfg.get_block(dst)
                referenced_blocks.append(referenced_block)
    return referenced_blocks
Exemple #12
0
    def gen_from_expression(expr: Expr, variables: List[Expr], num_samples: int) -> SynthesisOracle:
        """
        Builds a SynthesisOracle instance from a given expression.

        For a given expression, `num_samples` independent I/O pairs are 
        evaluated as follows:

        1. We generate a list of random values, one for each variable. Random values
           are represented in Miasm IL.
        2. We evaluate the expression by replacing all variables in the expression
           by their corresponding value and do a constant propagation.
        3. We map the list of inputs to the obtained integer value (in Miasm IL).

        Args:
            expr (Expr): Expression representing a function f(x0, ..., xi).
            variables (List[Expr]): List of variables contained in `expr`.
            num_samples (int): Number of I/O samples to evaluate.

        Returns:
            SynthesisOracle: Generated SynthesisOracle instance.
        """
        # init map
        synthesis_map = {}

        # walk over number of samples
        for _ in range(num_samples):
            # list of inputs
            inputs = []
            # dictionary of expression replacements
            replacements = {}
            # walk over all variables
            for v in variables:
                # generate a random value
                value = get_rand_input()
                # replace variable with random value
                replacements[v] = ExprInt(value, v.size)
                # add random value to list of inputs
                inputs.append(ExprInt(value, v.size))

            # evaluate expression to obtain output
            result = expr_simp(expr.replace_expr(replacements))
            # output should be an ExprInt
            assert(result.is_int())
            # map list of inputs to output
            synthesis_map[tuple(inputs)] = result

        return SynthesisOracle(synthesis_map)
Exemple #13
0
    def elements(self):
        value = self.cbReg.value
        if value in self.stk_args:
            line = self.ircfg.blocks[self.loc_key][self.line_nb].instr
            arg_num = self.stk_args[value]
            stk_high = m2_expr.ExprInt(idc.get_spd(line.offset), ir_arch.sp.size)
            stk_off = m2_expr.ExprInt(self.ira.sp.size // 8 * arg_num, ir_arch.sp.size)
            element =  m2_expr.ExprMem(self.mn.regs.regs_init[ir_arch.sp] + stk_high + stk_off, self.ira.sp.size)
            element = expr_simp(element)
            # Force stack unaliasing
            self.stk_unalias_force = True
        elif value:
            element = self.ira.arch.regs.all_regs_ids_byname.get(value, None)

        else:
            raise ValueError("Unknown element '%s'!" % value)
        return set([element])
Exemple #14
0
    def eval_updt_irblock(self, irb, step=False):
        """
        Symbolic execution of the @irb on the current state
        @irb: irblock instance
        @step: display intermediate steps
        """

        offset2cmt = {}
        for index, assignblk in enumerate(irb):
            if set(assignblk) == set([self.lifter.IRDst, self.lifter.pc]):
                # Don't display on jxx
                continue
            instr = assignblk.instr
            tmp_r = assignblk.get_r()
            tmp_w = assignblk.get_w()

            todo = set()

            # Replace PC with value to match IR args
            pc_fixed = {
                self.lifter.pc:
                m2_expr.ExprInt(instr.offset + instr.l, self.lifter.pc.size)
            }
            inputs = tmp_r
            inputs.update(arg for arg in tmp_w if arg.is_mem())
            for arg in inputs:
                arg = expr_simp(arg.replace_expr(pc_fixed))
                if arg in tmp_w and not arg.is_mem():
                    continue
                todo.add(arg)

            for expr in todo:
                if expr.is_int():
                    continue
                for c_str, c_type in self.chandler.expr_to_c_and_types(
                        expr, self.symbols):
                    expr = self.cst_propag_link.get((irb.loc_key, index),
                                                    {}).get(expr, expr)
                    offset2cmt.setdefault(instr.offset, set()).add(
                        "\n%s: %s\n%s" % (expr, c_str, c_type))
            self.eval_updt_assignblk(assignblk)
        for offset, value in viewitems(offset2cmt):
            idc.set_cmt(offset, '\n'.join(value), 0)
            print("%x\n" % offset, '\n'.join(value))

        return self.eval_expr(self.lifter.IRDst)
Exemple #15
0
    def elements(self):
        value = self.cbReg.value
        if value in self.stk_args:
            line = self.ircfg.blocks[self.loc_key][self.line_nb].instr
            arg_num = self.stk_args[value]
            stk_high = m2_expr.ExprInt(idc.GetSpd(line.offset), ir_arch.sp.size)
            stk_off = m2_expr.ExprInt(self.ira.sp.size // 8 * arg_num, ir_arch.sp.size)
            element =  m2_expr.ExprMem(mn.regs.regs_init[ir_arch.sp] + stk_high + stk_off, self.ira.sp.size)
            element = expr_simp(element)
            # Force stack unaliasing
            self.stk_unalias_force = True
        elif value:
            element = self.ira.arch.regs.all_regs_ids_byname.get(value, None)

        else:
            raise ValueError("Unknown element '%s'!" % value)
        return set([element])
Exemple #16
0
    def resolve_args_with_symbols(self, symbols=None):
        if symbols is None:
            symbols = LocationDB()
        args_out = []
        for expr in self.args:
            # try to resolve symbols using symbols (0 for default value)
            loc_keys = m2_expr.get_expr_locs(expr)
            fixed_expr = {}
            for exprloc in loc_keys:
                loc_key = exprloc.loc_key
                names = symbols.get_location_names(loc_key)
                # special symbols
                if b'$' in names:
                    fixed_expr[exprloc] = self.get_asm_offset(exprloc)
                    continue
                if b'_' in names:
                    fixed_expr[exprloc] = self.get_asm_next_offset(exprloc)
                    continue
                arg_int = symbols.get_location_offset(loc_key)
                if arg_int is not None:
                    fixed_expr[exprloc] = m2_expr.ExprInt(
                        arg_int, exprloc.size)
                    continue
                if not names:
                    raise ValueError('Unresolved symbol: %r' % exprloc)

                offset = symbols.get_location_offset(loc_key)
                if offset is None:
                    raise ValueError(
                        'The offset of loc_key "%s" cannot be determined' %
                        names)
                else:
                    # Fix symbol with its offset
                    size = exprloc.size
                    if size is None:
                        default_size = self.get_symbol_size(exprloc, symbols)
                        size = default_size
                    value = m2_expr.ExprInt(offset, size)
                fixed_expr[exprloc] = value

            expr = expr.replace_expr(fixed_expr)
            expr = expr_simp(expr)
            args_out.append(expr)
        return args_out
Exemple #17
0
    def resolve_args_with_symbols(self, symbols=None):
        if symbols is None:
            symbols = LocationDB()
        args_out = []
        for expr in self.args:
            # try to resolve symbols using symbols (0 for default value)
            loc_keys = m2_expr.get_expr_locs(expr)
            fixed_expr = {}
            for exprloc in loc_keys:
                loc_key = exprloc.loc_key
                names = symbols.get_location_names(loc_key)
                # special symbols
                if b'$' in names:
                    fixed_expr[exprloc] = self.get_asm_offset(exprloc)
                    continue
                if b'_' in names:
                    fixed_expr[exprloc] = self.get_asm_next_offset(exprloc)
                    continue
                arg_int = symbols.get_location_offset(loc_key)
                if arg_int is not None:
                    fixed_expr[exprloc] = m2_expr.ExprInt(arg_int, exprloc.size)
                    continue
                if not names:
                    raise ValueError('Unresolved symbol: %r' % exprloc)

                offset = symbols.get_location_offset(loc_key)
                if offset is None:
                    raise ValueError(
                        'The offset of loc_key "%s" cannot be determined' % names
                    )
                else:
                    # Fix symbol with its offset
                    size = exprloc.size
                    if size is None:
                        default_size = self.get_symbol_size(exprloc, symbols)
                        size = default_size
                    value = m2_expr.ExprInt(offset, size)
                fixed_expr[exprloc] = value

            expr = expr.replace_expr(fixed_expr)
            expr = expr_simp(expr)
            args_out.append(expr)
        return args_out
Exemple #18
0
    def eval_updt_irblock(self, irb, step=False):
        """
        Symbolic execution of the @irb on the current state
        @irb: irblock instance
        @step: display intermediate steps
        """

        offset2cmt = {}
        for index, assignblk in enumerate(irb):
            if set(assignblk) == set([self.ir_arch.IRDst, self.ir_arch.pc]):
                # Don't display on jxx
                continue
            instr = assignblk.instr
            tmp_r = assignblk.get_r()
            tmp_w = assignblk.get_w()

            todo = set()

            # Replace PC with value to match IR args
            pc_fixed = {self.ir_arch.pc: m2_expr.ExprInt(instr.offset + instr.l, self.ir_arch.pc.size)}
            inputs = tmp_r
            inputs.update(arg for arg in tmp_w if arg.is_mem())
            for arg in inputs:
                arg = expr_simp(arg.replace_expr(pc_fixed))
                if arg in tmp_w and not arg.is_mem():
                    continue
                todo.add(arg)

            for expr in todo:
                if expr.is_int():
                    continue
                for c_str, c_type in self.chandler.expr_to_c_and_types(expr, self.symbols):
                    expr = self.cst_propag_link.get((irb.loc_key, index), {}).get(expr, expr)
                    offset2cmt.setdefault(instr.offset, set()).add(
                        "\n%s: %s\n%s" % (expr, c_str, c_type)
                    )
            self.eval_updt_assignblk(assignblk)
        for offset, value in viewitems(offset2cmt):
            idc.MakeComm(offset, '\n'.join(value))
            print("%x\n" % offset, '\n'.join(value))

        return self.eval_expr(self.ir_arch.IRDst)
Exemple #19
0
    def dis(cls, bs_o, mode_o = None, offset=0):
        if not isinstance(bs_o, bin_stream):
            bs_o = bin_stream_str(bs_o)

        bs_o.enter_atomic_mode()

        offset_o = offset
        try:
            pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis(
                bs_o, mode_o, offset)
        except:
            bs_o.leave_atomic_mode()
            raise
        candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset)
        if not candidates:
            bs_o.leave_atomic_mode()
            raise Disasm_Exception('cannot disasm (guess) at %X' % offset)

        out = []
        out_c = []
        if hasattr(bs, 'getlen'):
            bs_l = bs.getlen()
        else:
            bs_l = len(bs)

        alias = False
        for c in candidates:
            log.debug("*" * 40, mode, c.mode)
            log.debug(c.fields)

            c = cls.all_mn_inst[c][0]

            c.reset_class()
            c.mode = mode

            if not c.add_pre_dis_info(pre_dis_info):
                continue

            todo = {}
            getok = True
            fname_values = dict(pre_dis_info)
            offset_b = offset * 8

            total_l = 0
            for i, f in enumerate(c.fields_order):
                if f.flen is not None:
                    l = f.flen(mode, fname_values)
                else:
                    l = f.l
                if l is not None:
                    total_l += l
                    f.l = l
                    f.is_present = True
                    log.debug("FIELD %s %s %s %s", f.__class__, f.fname,
                              offset_b, l)
                    if bs_l * 8 - offset_b < l:
                        getok = False
                        break
                    try:
                        bv = cls.getbits(bs, mode, offset_b, l)
                    except:
                        bs_o.leave_atomic_mode()
                        raise
                    offset_b += l
                    if not f.fname in fname_values:
                        fname_values[f.fname] = bv
                    todo[i] = bv
                else:
                    f.is_present = False
                    todo[i] = None

            if not getok:
                continue

            c.l = prefix_len + total_l // 8
            for i in c.to_decode:
                f = c.fields_order[i]
                if f.is_present:
                    ret = f.decode(todo[i])
                    if not ret:
                        log.debug("cannot decode %r", f)
                        break

            if not ret:
                continue
            for a in c.args:
                a.expr = expr_simp(a.expr)

            c.b = cls.getbytes(bs, offset_o, c.l)
            c.offset = offset_o
            c = c.post_dis()
            if c is None:
                continue
            c_args = [a.expr for a in c.args]
            instr = cls.instruction(c.name, mode, c_args,
                                    additional_info=c.additional_info())
            instr.l = prefix_len + total_l // 8
            instr.b = cls.getbytes(bs, offset_o, instr.l)
            instr.offset = offset_o
            instr.get_info(c)
            if c.alias:
                alias = True
            out.append(instr)
            out_c.append(c)

        bs_o.leave_atomic_mode()

        if not out:
            raise Disasm_Exception('cannot disasm at %X' % offset_o)
        if len(out) != 1:
            if not alias:
                log.warning('dis multiple args ret default')

            for i, o in enumerate(out_c):
                if o.alias:
                    return out[i]
            raise NotImplementedError(
                'Multiple disas: \n' +
                "\n".join(str(x) for x in out)
            )
        return out[0]
Exemple #20
0
    def _is_suitable_simplification_candidate(self, expr: Expr,
                                              simplified: Expr) -> bool:
        """
        Checks if a simplification candidate is not suitable.

        This check ensures the semantical correctness of the simplification.

        We skip the simplification candiate

        1. If the simplification candidate contains any unification variable.
           In this case, not every variable of the simplification candidate
           can be matched to a terminal expression in the original one.

        2. If the tree depth of the original expression is smaller or equal to
           the simplified one. In this case, simplification could make
           expressions even more complex.

        3. If Miasm's expression simplification results in the same expression for
           the original and the simplified one. In this case, the lookup in the
           simplification oracle is not required.

        4. If the original expression is semantically equivalent to the simplified one.
           Since this query is computationally expensive, we, by default, set a small
           timeout and check only if the SMT solver is not able to find a proof for
           inequivalence in the provided time. If the solver was not able to proof 
           the equivalence within the provided time, we still accept it. 

           The user has the possibility to enforce the SMT-based equivalence check
           to be successful by setting the `enforce_equivalence` flag and
           (optionally) increasing the `solver_timeout`.

        Args:
            expr: Original expression.
            simplified: Simplified expression candidate.

        Returns:
            True if simplification should be skipped, False otherwise.
        """
        # contains placeholder variables
        if any([
                re.search("^p[0-9]*", v.name)
                for v in get_unique_variables(simplified)
        ]):
            logger.debug(
                f"{expr} <==> {simplified} (incorrect variable replacement)")
            return False
        # checks if original is smaller to simplified
        if len(expr.graph().nodes()) <= len(simplified.graph().nodes()):
            return False
        # same normalized expression
        if expr_simp(expr) == expr_simp(simplified):
            return False
        # SMT solver proves non-equivalence or timeouts
        if self.enforce_equivalence and self.check_semantical_equivalence(
                expr, simplified) != z3.unsat:
            logger.debug(
                f"{expr} <==> {simplified} (not semantically equivalent)")
            return False
        # SMT solver finds a counter example
        if self.check_semantical_equivalence(expr, simplified) == z3.sat:
            logger.debug(
                f"{expr} <==> {simplified} (not semantically equivalent, counterexample found)"
            )
            return False
        return True
Exemple #21
0
    (b0, expr_is_signed_lower_or_equal, int_1, int_m1),

    (b1, expr_is_signed_greater_or_equal, int_m1, int_m1),
    (b1, expr_is_signed_lower_or_equal, int_m1, int_m1),


    (b1, expr_is_signed_greater, int_m1, int_m2),
    (b1, expr_is_signed_lower, int_m2, int_m1),

    (b0, expr_is_signed_greater, int_m2, int_m1),
    (b0, expr_is_signed_lower, int_m1, int_m2),

    (b1, expr_is_signed_greater_or_equal, int_m1, int_m2),
    (b1, expr_is_signed_lower_or_equal, int_m2, int_m1),

    (b0, expr_is_signed_greater_or_equal, int_m2, int_m1),
    (b0, expr_is_signed_lower_or_equal, int_m1, int_m2),

    # eq/neq
    (b1, expr_is_equal, int_1, int_1),
    (b1, expr_is_not_equal, int_0, int_1),

    (b0, expr_is_equal, int_1, int_0),
    (b0, expr_is_not_equal, int_0, int_0),


]

for result, func, arg1, arg2 in tests:
    assert result == expr_simp(func(arg1, arg2))
Exemple #22
0
    def dis(cls, bs_o, mode_o = None, offset=0):
        if not isinstance(bs_o, bin_stream):
            bs_o = bin_stream_str(bs_o)

        bs_o.enter_atomic_mode()

        offset_o = offset
        try:
            pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis(
                bs_o, mode_o, offset)
        except:
            bs_o.leave_atomic_mode()
            raise
        candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset)
        if not candidates:
            bs_o.leave_atomic_mode()
            raise Disasm_Exception('cannot disasm (guess) at %X' % offset)

        out = []
        out_c = []
        if hasattr(bs, 'getlen'):
            bs_l = bs.getlen()
        else:
            bs_l = len(bs)

        alias = False
        for c in candidates:
            log.debug("*" * 40, mode, c.mode)
            log.debug(c.fields)

            c = cls.all_mn_inst[c][0]

            c.reset_class()
            c.mode = mode

            if not c.add_pre_dis_info(pre_dis_info):
                continue

            todo = {}
            getok = True
            fname_values = dict(pre_dis_info)
            offset_b = offset * 8

            total_l = 0
            for i, f in enumerate(c.fields_order):
                if f.flen is not None:
                    l = f.flen(mode, fname_values)
                else:
                    l = f.l
                if l is not None:
                    total_l += l
                    f.l = l
                    f.is_present = True
                    log.debug("FIELD %s %s %s %s", f.__class__, f.fname,
                              offset_b, l)
                    if bs_l * 8 - offset_b < l:
                        getok = False
                        break
                    try:
                        bv = cls.getbits(bs, mode, offset_b, l)
                    except:
                        bs_o.leave_atomic_mode()
                        raise
                    offset_b += l
                    if not f.fname in fname_values:
                        fname_values[f.fname] = bv
                    todo[i] = bv
                else:
                    f.is_present = False
                    todo[i] = None

            if not getok:
                continue

            c.l = prefix_len + total_l // 8
            for i in c.to_decode:
                f = c.fields_order[i]
                if f.is_present:
                    ret = f.decode(todo[i])
                    if not ret:
                        log.debug("cannot decode %r", f)
                        break

            if not ret:
                continue
            for a in c.args:
                a.expr = expr_simp(a.expr)

            c.b = cls.getbytes(bs, offset_o, c.l)
            c.offset = offset_o
            c = c.post_dis()
            if c is None:
                continue
            c_args = [a.expr for a in c.args]
            instr = cls.instruction(c.name, mode, c_args,
                                    additional_info=c.additional_info())
            instr.l = prefix_len + total_l // 8
            instr.b = cls.getbytes(bs, offset_o, instr.l)
            instr.offset = offset_o
            instr.get_info(c)
            if c.alias:
                alias = True
            out.append(instr)
            out_c.append(c)

        bs_o.leave_atomic_mode()

        if not out:
            raise Disasm_Exception('cannot disasm at %X' % offset_o)
        if len(out) != 1:
            if not alias:
                log.warning('dis multiple args ret default')

            for i, o in enumerate(out_c):
                if o.alias:
                    return out[i]
            raise NotImplementedError(
                'Multiple disas: \n' +
                "\n".join(str(x) for x in out)
            )
        return out[0]
Exemple #23
0
    def simplify(self, expr: Expr) -> Expr:
        """
        High-level algorithm to simplify an expression.

        Given an expression, we generate an abstract syntax tree (AST)
        and simplify the AST as follows in a fixpoint iteration:

        1. We do a BFS over the AST (top to bottom) and try to simplify
           the largest possible subtree.

        2. For each subtree, we check if its input-output behavior
           can be represented as an equivalence class that is already
           contained in the pre-computed oracle. For this, we have to
           unify the subtree (by replacing terminal nodes with place
           holder variables), re-apply the unifications to simplification 
           candidates and check if it is suitable.

        3. If a suitable simplification candidate is found, we store it in an
           dictionary and replace the subtree with a placeholder variable in the
           AST. 

        4. If no more simplifications can be applied, we recursively replace all 
           place holder variables with the simplified subtrees in the AST.

        Args:
            expr: Expression to simplify

        Returns:
            Simplified expression
        """
        # transform expr to abstract syntax tree
        ast = self._translator_ast.from_expr(expr)
        # dictionary to map to placeholder variables to simplified subtrees
        global_unification_dict: Dict[Expr, Expr] = {}
        # placeholder variable counter
        global_ctr = 0

        logger.info(f"initial ast: {ast}")

        # fixpoint iteration
        while True:
            before = ast.copy()

            # walk over all subtrees
            for subtree in get_subexpressions(ast):
                # skip subtree if possible
                if self._skip_subtree(subtree):
                    continue

                # build unification dictionary
                unification_dict = gen_unification_dict(subtree)

                # determine subtree's equivalence class
                equiv_class = self.determine_equivalence_class(
                    subtree.replace_expr(unification_dict))

                # if the equivalence class is in the pre-computed oracle:
                if self.oracle.contains_equiv_class(equiv_class):
                    # check if there is a simpler subtree in the equivalence class
                    success, simplified = self._find_suitable_simplification(
                        equiv_class, subtree, unification_dict)

                    # skip if no candidate found
                    if not success:
                        continue

                    # generate global placeholder variable
                    global_variable = self._gen_global_variable_replacement(
                        global_ctr, subtree.size)
                    global_ctr += 1

                    # map global placeholder variable to simplified subtree
                    global_unification_dict[global_variable] = simplified

                    # replace original subtree with global placeholder variable
                    ast = ast.replace_expr({subtree: global_variable})
                    break

            # check if fixpoint is reached
            if before == ast:
                break

        # replace global placeholder variables with simplified subtrees in ast
        ast = self._reverse_global_unification(ast, global_unification_dict)

        return expr_simp(ast)
Exemple #24
0
def m2expr_to_r2esil(iir, loc_db):
    """Convert a miasm expression to a radare2 ESIL"""

    if isinstance(iir, ExprId):
        return iir.name.lower()

    if isinstance(iir, ExprLoc):
        return loc_db.get_location_offset(iir.loc_key)

    if isinstance(iir, ExprInt):
        return hex(iir.arg)

    if isinstance(iir, ExprMem):
        ret = "%s,[%d]" % (m2expr_to_r2esil(iir.arg, loc_db), iir.size/8)
        return ret.lower()

    elif isAssignation(iir):
        if not isinstance(iir.dst, ExprMem):
            esil_dst = m2expr_to_r2esil(iir.dst, loc_db)
            return "%s,%s,=" % (m2expr_to_r2esil(iir.src, loc_db), esil_dst)
        else:
            esrc = m2expr_to_r2esil(iir.src, loc_db)
            edst = m2expr_to_r2esil(iir.dst.arg, loc_db)
            return "%s,%s,=[]" % (esrc, edst)

    elif isinstance(iir, ExprOp):
        if len(iir.args) == 2:
            arg_1 = m2expr_to_r2esil(iir.args[1], loc_db)
            arg_0 = m2expr_to_r2esil(iir.args[0], loc_db)
            if iir.op == "FLAG_SIGN_SUB":
                shift = iir.args[1].size - 1
                return "%s,%s,-,%d,>>" % (arg_1, arg_0, shift)
            return "%s,%s,%s" % (arg_1, arg_0, iir.op)
        elif iir.op == "parity":
            arg = m2expr_to_r2esil(iir.args[0], loc_db)
            return "%s,1,&,?{,0,}{,1,}" % arg
        elif iir.op.startswith("signExt_") and isinstance(iir.args[0], ExprMem):
            argsize = iir.args[0].size
            bits = int(iir.op.split("_")[1])
            test = 1 << (argsize - 1)
            mask = 2**bits-1 ^ 2**argsize-1
            tmp = m2expr_to_r2esil(iir.args[0], loc_db)
            sign_extension = "%s,0x%x,&,1,?{,%s,0x%x,+,}{,%s,}"
            return sign_extension % (tmp, test, tmp, mask, tmp)
        elif iir.op.startswith("zeroExt_"):
            return m2expr_to_r2esil(iir.args[0], loc_db)
        elif iir.op == "CC_EQ":
            return m2expr_to_r2esil(iir.args[0], loc_db)
        else:
            return "%s,0,%s" % (m2expr_to_r2esil(iir.args[0], loc_db), iir.op)

    elif isinstance(iir, ExprCompose):

        esil_strings = []
        for start, expr in iir.iter_args():
            stop = start + expr.size
            mask = (2**stop - 1) - (2**start - 1)
            esil_tmp = "%s,%s,&" % (m2expr_to_r2esil(expr, loc_db), hex(mask))
            esil_strings.append(esil_tmp)

        l = esil_strings
        if len(l) == 2:
            ret_string = "%s,%s,+" % (l[0], l[1])
            return ret_string
        else:
            tmp_list = [",".join(l[i:i+2]) for i in xrange(0, len(l), 2)]
            ret_string = ",+,".join(tmp_list)
            return ret_string

    elif isinstance(iir, ExprSlice):

        mask = (2**iir.stop - 1) - (2**iir.start - 1)
        return "%s,%s,&" % (m2expr_to_r2esil(iir.arg, loc_db), hex(mask))

    elif isinstance(iir, ExprCond):

        if isinstance(iir.cond, ExprSlice):

            # Attempt to evaluate the expression
            result = expr_simp(iir.cond)

            if isinstance(result, ExprInt):
                if result.arg != 0:
                    tmp_src = iir.src1
                else:
                    tmp_src = iir.src2
            else:
                tmp = m2expr_to_r2esil(iir.cond, loc_db)
                esil_string = "%s,?{,%s,},?{,%s,}" % (tmp, iir.src1, iir.src2)
                return esil_string

            return m2expr_to_r2esil(tmp_src, loc_db)

        elif (isinstance(iir.cond, ExprOp) or isinstance(iir.cond, ExprId) or
                isinstance(iir.cond, ExprCond)):
            condition = m2expr_to_r2esil(iir.cond, loc_db)
            if_clause = m2expr_to_r2esil(iir.src1, loc_db)
            then_clause = m2expr_to_r2esil(iir.src2, loc_db)
            return "%s,?{,%s,}{,%s,}" % (condition, if_clause, then_clause)

        elif isinstance(iir.cond, ExprInt):
            if int(iir.cond.arg):
                return m2expr_to_r2esil(iir.src1, loc_db)
            else:
                return m2expr_to_r2esil(iir.src2, loc_db)

        return "TODO_Cond"  # GV: use a r2m2 exception ?

    elif isinstance(iir, str):
        return iir

    else:
        print >> sys.stderr, "Unknown type:", type(iir), iir
        return "TODO_UNK"
Exemple #25
0
from __future__ import print_function
from miasm.expression.expression import *
from miasm.expression.simplifications import expr_simp

print("""
Simple expression simplification demo
""")


a = ExprId('eax', 32)
b = ExprId('ebx', 32)

exprs = [a + b - a,
         ExprInt(0x12, 32) + ExprInt(0x30, 32) - a,
         ExprCompose(a[:8], a[8:16])]

for e in exprs:
    print('*' * 40)
    print('original expression:', e)
    print("simplified:", expr_simp(e))
Exemple #26
0
    (ExprCond(ExprOp(TOK_INF_SIGNED, a8.zeroExtend(32), ExprInt(-1, 32)), a,
              b), b),
    (ExprCond(ExprOp(TOK_INF_EQUAL_SIGNED, a8.zeroExtend(32), ExprInt(-1, 32)),
              a, b), b),
    (a8.zeroExtend(32)[2:5], a8[2:5]),
    (ExprCond(a + b, a, b), ExprCond(ExprOp(TOK_EQUAL, a, -b), b, a)),
    (ExprCond(a + i1, a, b), ExprCond(ExprOp(TOK_EQUAL, a, im1), b, a)),
    (ExprCond(ExprOp(TOK_EQUAL, a, i1), bi1, bi0), ExprOp(TOK_EQUAL, a, i1)),
    (ExprCond(ExprOp(TOK_INF_SIGNED, a, i1), bi1,
              bi0), ExprOp(TOK_INF_SIGNED, a, i1)),
    (ExprOp(TOK_INF_EQUAL_UNSIGNED, a, i0), ExprOp(TOK_EQUAL, a, i0)),
]

for e_input, e_check in to_test:
    print("#" * 80)
    e_check = expr_simp(e_check)
    e_new = expr_simp(e_input)
    print("original: ", str(e_input), "new: ", str(e_new))
    rez = e_new == e_check
    if not rez:
        raise ValueError('bug in expr_simp simp(%s) is %s and should be %s' %
                         (e_input, e_new, e_check))

# Test conds

to_test = [
    (((a - b) ^ ((a ^ b) & ((a - b) ^ a))).msb(), ExprOp_inf_signed(a, b)),
    ((((a - b) ^ ((a ^ b) & ((a - b) ^ a))) ^ a ^ b).msb(),
     ExprOp_inf_unsigned(a, b)),
    (ExprOp_inf_unsigned(ExprInt(-1, 32), ExprInt(3, 32)), ExprInt(0, 1)),
    (ExprOp_inf_signed(ExprInt(-1, 32), ExprInt(3, 32)), ExprInt(1, 1)),
Exemple #27
0
def resolve_offsets(state_register, asmcfg, ircfg, ir_arch):
    patches = set()
    nodes_to_walk = list(ircfg.nodes())

    symbols_init = dict()
    for i, r in enumerate(all_regs_ids):
        symbols_init[r] = all_regs_ids_init[i]

    expr_simp.enable_passes({ExprOp: [ignore_call_results]})

    for node in nodes_to_walk:
        irblock = ircfg.get_block(node)
        if not irblock:
            print('[-] Could not get IRBLOCK!')
            sys.exit()

        if len(irblock.assignblks) == 1:
            if irblock.assignblks[
                    0].instr.name == "CMOVNZ" and irblock.assignblks[
                        0].instr.args[0] == state_register:
                temp_reg1 = irblock.assignblks[0].instr.args[0]
                temp_reg2 = irblock.assignblks[0].instr.args[1]

                state1 = None
                state2 = None

                previous_block = ircfg.get_block(ircfg.predecessors(node)[0])
                for line in previous_block.assignblks:
                    if line.instr.name == 'MOV' and \
                            line.instr.args[0] in (temp_reg1, temp_reg2) and isinstance(line.instr.args[1], ExprInt):
                        if line.instr.args[0] == state_register:
                            state1 = line.instr.args[1]
                        else:
                            state2 = line.instr.args[1]
                    if state1 and state2:
                        break

                # compiler shenanigans. state missing is not initialised in current bblk. search function for it
                if not state1:
                    state1 = scan_function_for_state(asmcfg, state_register,
                                                     temp_reg1)
                elif not state2:
                    state2 = scan_function_for_state(asmcfg, state_register,
                                                     temp_reg2)

                blocks1 = get_assignblock_for_state(ircfg, ir_arch,
                                                    symbols_init,
                                                    state_register, state2)
                blocks2 = get_assignblock_for_state(ircfg, ir_arch,
                                                    symbols_init,
                                                    state_register, state1)

                dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key)
                src1 = irblock.assignblks[0].instr.offset
                patches.add((src1, dst1, CNDP1))

                dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key)
                src2 = src1
                patches.add((src2, dst2, CNDP0))

            elif irblock.assignblks[0].instr.name == "CMOVZ":
                state1 = None
                state2 = None

                temp_reg1 = irblock.assignblks[0].instr.args[0]
                temp_reg2 = irblock.assignblks[0].instr.args[1]

                if temp_reg1 == state_register:
                    previous_block = ircfg.get_block(
                        ircfg.predecessors(node)[0])

                    for line in previous_block.assignblks:
                        if line.instr.name == 'MOV' and \
                                line.instr.args[0] in (temp_reg1, temp_reg2):
                            if line.instr.args[0] == state_register:
                                state1 = line.instr.args[1]
                            else:
                                state2 = line.instr.args[1]

                    if state1 and state2:
                        blocks1 = get_assignblock_for_state(
                            ircfg, ir_arch, symbols_init, state_register,
                            state1)
                        blocks2 = get_assignblock_for_state(
                            ircfg, ir_arch, symbols_init, state_register,
                            state2)

                        dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key)
                        src1 = irblock.assignblks[0].instr.offset
                        patches.add((src1, dst1, CNDP1))

                        dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key)
                        src2 = src1
                        patches.add((src2, dst2, CNDP0))

                    else:
                        found_state = state1 if state1 else state2
                        missing_state = state1 if not state1 else state2
                        subject_reg = temp_reg1 if not state1 else temp_reg2

                        def get_imm_write_for_reg(asmcfg, subject_reg):
                            for node in asmcfg.nodes():
                                asmblock = asmcfg.loc_key_to_block(node)
                                for line in asmblock.lines:
                                    if line.name == 'MOV' and line.args[0] == subject_reg and \
                                            isinstance(line.args[1], ExprInt):
                                        return line.args[1]
                            return None

                        missing_state = get_imm_write_for_reg(
                            asmcfg, subject_reg)
                        if not missing_state:
                            print(
                                "[-] Something went wrong. could not find mising state!"
                            )
                            continue

                        state1 = state1 if state1 == found_state else missing_state
                        state2 = missing_state if state1 == found_state else state2

                        blocks1 = get_assignblock_for_state(
                            ircfg, ir_arch, symbols_init, state_register,
                            state1)
                        blocks2 = get_assignblock_for_state(
                            ircfg, ir_arch, symbols_init, state_register,
                            state2)

                        dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key)
                        src1 = irblock.assignblks[0].instr.offset
                        patches.add((src1, dst1, CNDP1))

                        dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key)
                        src2 = src1
                        patches.add((src2, dst2, CNDP0))

                else:
                    next_block = ircfg.get_block(ircfg.successors(node)[0])
                    for line in next_block.assignblks:
                        if line.instr.name == 'MOV' and line.instr.args[
                                0] == state_register:
                            state1 = line.instr.args[1]
                            break

                    if state1:
                        blocks1 = get_assignblock_for_state(
                            ircfg, ir_arch, symbols_init, state_register,
                            state1)
                        src = None
                        for assignblk in next_block.assignblks:
                            if assignblk.instr.name == 'JMP':
                                src = assignblk.instr.offset

                        dst_block = ircfg.get_block(blocks1[0].loc_key)
                        if isinstance(dst_block.dst, ExprCond) and len(
                                dst_block.assignblks):
                            if hasattr(dst_block.dst.cond,
                                       'op') and dst_block.dst.cond.op in (
                                           'CC_S>'):
                                dst = get_address(ircfg.loc_db,
                                                  dst_block.dst.src2.loc_key)
                                next_block = ircfg.get_block(dst)
                                dst = get_address(ircfg.loc_db,
                                                  next_block.dst.src1.loc_key)
                            else:
                                dst = get_address(ircfg.loc_db,
                                                  dst_block.dst.src1.loc_key)
                        else:
                            dst = get_address(ircfg.loc_db, blocks1[0].loc_key)

                        patches.add((src, dst, STDP))

        else:
            symbolic_engine = SymbolicExecutionEngine(ir_arch, symbols_init)
            next_addr = symbolic_engine.run_block_at(
                ircfg, get_address(ircfg.loc_db, node))
            next_addr = expr_simp(next_addr)

            updated_state = symbolic_engine.symbols[state_register]

            if isinstance(updated_state, ExprOp):
                updated_state = expr_simp(updated_state)

            if updated_state != symbols_init[state_register] and \
                isinstance(updated_state, ExprOp):

                irblock = ircfg.get_block(node)
                if not irblock:
                    print('[-] Could not get IRBLOCK!')
                    sys.exit()

                if len(irblock.assignblks) > 3:
                    neg_inst = False
                    for i in range(len(irblock.assignblks)):
                        if irblock.assignblks[i].instr.name == 'NEG':
                            neg_inst = True
                        if irblock.assignblks[i].instr.name == 'SBB' and \
                                irblock.assignblks[i + 1].instr.name == 'AND' and \
                                irblock.assignblks[i + 2].instr.name == 'ADD':

                            expr = symbolic_engine.symbols[
                                state_register].copy()

                            if neg_inst:
                                state1 = expr_simp(
                                    expr.replace_expr(
                                        {EAX_init: ExprInt(0, 32)}))
                                state2 = expr_simp(
                                    expr.replace_expr(
                                        {EAX_init: ExprInt(1, 32)}))

                            elif irblock.assignblks[i-1].instr.name == 'CMP' and \
                                irblock.assignblks[i-2].instr.name == 'ADD' and \
                                    isinstance(irblock.assignblks[i-2].instr.args[1], ExprInt):
                                id = irblock.assignblks[i - 2].instr.args[0]
                                imm = irblock.assignblks[i - 2].instr.args[1]

                                state1 = expr_simp(
                                    expr.replace_expr({
                                        EAX_init: imm
                                    }).replace_expr({
                                        symbolic_engine.symbols[id].args[0]:
                                        imm
                                    }))
                                state2 = expr_simp(
                                    expr.replace_expr({
                                        EAX_init: ExprInt(-1, 32)
                                    }).replace_expr({
                                        symbolic_engine.symbols[id].args[0]:
                                        imm
                                    }))

                            blocks1 = get_assignblock_for_state(
                                ircfg, ir_arch, symbols_init, state_register,
                                state1)
                            blocks2 = get_assignblock_for_state(
                                ircfg, ir_arch, symbols_init, state_register,
                                state2)

                            process_blocks_for_patches(node, blocks1, ircfg,
                                                       patches, nodes_to_walk,
                                                       state1, True)
                            process_blocks_for_patches(node, blocks2, ircfg,
                                                       patches, nodes_to_walk,
                                                       state2, False)
                            break

            elif updated_state != symbols_init[state_register] and \
                isinstance(updated_state, ExprInt) and \
                updated_state._get_int() > 0xff:

                #print("[*] Looking for state %s" % hex(updated_state._get_int()))

                referenced_blocks = get_assignblock_for_state(
                    ircfg, ir_arch, symbols_init, state_register,
                    updated_state)
                # for block in referenced_blocks:
                #     print("\t[+] Found reference at %s" % hex(get_address(ircfg.loc_db, block.loc_key)))
                process_blocks_for_patches(node, referenced_blocks, ircfg,
                                           patches, nodes_to_walk)

            elif isinstance(next_addr, ExprCond):
                if not hasattr(next_addr.cond, 'args'):
                    if isinstance(next_addr.src1, ExprLoc):
                        dest1 = next_addr.src1.loc_key
                    else:
                        dest1 = get_loc_key_at(ircfg.loc_db,
                                               next_addr.src1._get_int())

                    if isinstance(next_addr.src2, ExprLoc):
                        dest2 = next_addr.src2.loc_key
                    else:
                        dest2 = get_loc_key_at(ircfg.loc_db,
                                               next_addr.src2._get_int())

                    if dest1 not in nodes_to_walk:
                        nodes_to_walk.append(dest1)

                    if dest2 not in nodes_to_walk:
                        nodes_to_walk.append(dest2)

                    dst2block = ircfg.get_block(dest2)
                    if dst2block.assignblks[0].instr.name == 'CMP' and \
                        dst2block.assignblks[0].instr.args[0] == state_register and \
                            len(ircfg.get_block(node).assignblks) > 1:

                        ref_block = node
                        while True:
                            irblock = ircfg.get_block(
                                ircfg.predecessors(ref_block)[0])
                            if irblock.assignblks[0].instr.name == 'CMP' and \
                                    dst2block.assignblks[0].instr.args[0] == state_register:
                                break
                            ref_block = ircfg.predecessors(ref_block)[0]

                        asmblock = asmcfg.loc_key_to_block(node)
                        for line in asmblock.lines:
                            if line.name == 'JZ':
                                patches.add(
                                    (line.offset,
                                     get_address(asmcfg.loc_db,
                                                 ref_block), CNDP2))
                                true_block = ircfg.get_block(
                                    ircfg.get_block(node).dst.src2.loc_key)
                                symbolic_engine.run_block_at(
                                    ircfg, true_block.loc_key)

                                if isinstance(
                                        symbolic_engine.
                                        symbols[state_register], ExprInt):
                                    referenced_block = get_assignblock_for_state(
                                        ircfg, ir_arch, symbols_init,
                                        state_register, symbolic_engine.
                                        symbols[state_register])[0]
                                    patches.add(
                                        (line.offset,
                                         get_address(ircfg.loc_db,
                                                     referenced_block.loc_key),
                                         CNDP3))
                                break

            elif isinstance(next_addr, ExprInt):
                dest = get_loc_key_at(ircfg.loc_db, next_addr._get_int())
                if dest not in nodes_to_walk:
                    nodes_to_walk.append(
                        get_loc_key_at(ircfg.loc_db, next_addr._get_int()))
    return list(patches)
Exemple #28
0
def launch_depgraph():
    global graphs, comments, sol_nb, settings, addr, ir_arch, ircfg
    # Get the current function
    addr = idc.get_screen_ea()
    func = ida_funcs.get_func(addr)

    # Init
    machine = guess_machine(addr=func.start_ea)
    mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira

    bs = bin_stream_ida()
    mdis = dis_engine(bs, dont_dis_nulstart_bloc=True)
    ir_arch = ira(mdis.loc_db)

    # Populate symbols with ida names
    for ad, name in idautils.Names():
        if name is None:
            continue
        mdis.loc_db.add_location(name, ad)

    asmcfg = mdis.dis_multiblock(func.start_ea)

    # Generate IR
    ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg)

    # Get settings
    settings = depGraphSettingsForm(ir_arch, ircfg, mn)
    settings.Execute()

    loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb
    # Simplify assignments
    for irb in list(viewvalues(ircfg.blocks)):
        irs = []
        offset = ir_arch.loc_db.get_location_offset(irb.loc_key)
        fix_stack = offset is not None and settings.unalias_stack
        for assignblk in irb:
            if fix_stack:
                stk_high = m2_expr.ExprInt(idc.get_spd(assignblk.instr.offset), ir_arch.sp.size)
                fix_dct = {ir_arch.sp: mn.regs.regs_init[ir_arch.sp] + stk_high}

            new_assignblk = {}
            for dst, src in viewitems(assignblk):
                if fix_stack:
                    src = src.replace_expr(fix_dct)
                    if dst != ir_arch.sp:
                        dst = dst.replace_expr(fix_dct)
                dst, src = expr_simp(dst), expr_simp(src)
                new_assignblk[dst] = src
            irs.append(AssignBlock(new_assignblk, instr=assignblk.instr))
        ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)

    # Get dependency graphs
    dg = settings.depgraph
    graphs = dg.get(loc_key, elements, line_nb,
                    set([ir_arch.loc_db.get_offset_location(func.start_ea)]))

    # Display the result
    comments = {}
    sol_nb = 0

    # Register and launch
    ida_kernwin.add_hotkey("Shift-N", next_element)
    treat_element()
Exemple #29
0
def build_graph(start_addr, type_graph, simplify=False, dontmodstack=True, loadint=False, verbose=False):
    machine = guess_machine(addr=start_addr)
    dis_engine, ira = machine.dis_engine, machine.ira

    class IRADelModCallStack(ira):
        def call_effects(self, addr, instr):
            assignblks, extra = super(IRADelModCallStack, self).call_effects(addr, instr)
            if not dontmodstack:
                return assignblks, extra
            out = []
            for assignblk in assignblks:
                dct = dict(assignblk)
                dct = {
                    dst:src for (dst, src) in viewitems(dct) if dst != self.sp
                }
                out.append(AssignBlock(dct, assignblk.instr))
            return out, extra


    if verbose:
        print("Arch", dis_engine)

    fname = idc.get_root_filename()
    if verbose:
        print(fname)

    bs = bin_stream_ida()
    mdis = dis_engine(bs)
    ir_arch = IRADelModCallStack(mdis.loc_db)


    # populate symbols with ida names
    for addr, name in idautils.Names():
        if name is None:
            continue
        if (mdis.loc_db.get_offset_location(addr) or
            mdis.loc_db.get_name_location(name)):
            # Symbol alias
            continue
        mdis.loc_db.add_location(name, addr)

    if verbose:
        print("start disasm")
    if verbose:
        print(hex(start_addr))

    asmcfg = mdis.dis_multiblock(start_addr)
    entry_points = set([mdis.loc_db.get_offset_location(start_addr)])
    if verbose:
        print("generating graph")
        open('asm_flow.dot', 'w').write(asmcfg.dot())
        print("generating IR... %x" % start_addr)

    ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg)

    if verbose:
        print("IR ok... %x" % start_addr)

    for irb in list(viewvalues(ircfg.blocks)):
        irs = []
        for assignblk in irb:
            new_assignblk = {
                expr_simp(dst): expr_simp(src)
                for dst, src in viewitems(assignblk)
            }
            irs.append(AssignBlock(new_assignblk, instr=assignblk.instr))
        ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)

    if verbose:
        out = ircfg.dot()
        open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out)
    title = "Miasm IR graph"


    head = list(entry_points)[0]

    if simplify:
        ircfg_simplifier = IRCFGSimplifierCommon(ir_arch)
        ircfg_simplifier.simplify(ircfg, head)
        title += " (simplified)"

    if type_graph == TYPE_GRAPH_IR:
        graph = GraphMiasmIR(ircfg, title, None)
        graph.Show()
        return


    class IRAOutRegs(ira):
        def get_out_regs(self, block):
            regs_todo = super(IRAOutRegs, self).get_out_regs(block)
            out = {}
            for assignblk in block:
                for dst in assignblk:
                    reg = self.ssa_var.get(dst, None)
                    if reg is None:
                        continue
                    if reg in regs_todo:
                        out[reg] = dst
            return set(viewvalues(out))



    # Add dummy dependency to uncover out regs affectation
    for loc in ircfg.leaves():
        irblock = ircfg.blocks.get(loc)
        if irblock is None:
            continue
        regs = {}
        for reg in ir_arch.get_out_regs(irblock):
            regs[reg] = reg
        assignblks = list(irblock)
        new_assiblk = AssignBlock(regs, assignblks[-1].instr)
        assignblks.append(new_assiblk)
        new_irblock = IRBlock(irblock.loc_key, assignblks)
        ircfg.blocks[loc] = new_irblock


    class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA):
        def do_simplify(self, ssa, head):
            modified = super(CustomIRCFGSimplifierSSA, self).do_simplify(ssa, head)
            if loadint:
                modified |= load_from_int(ssa.graph, bs, is_addr_ro_variable)
            return modified

        def simplify(self, ircfg, head):
            ssa = self.ircfg_to_ssa(ircfg, head)
            ssa = self.do_simplify_loop(ssa, head)

            if type_graph == TYPE_GRAPH_IRSSA:
                ret = ssa.graph
            elif type_graph == TYPE_GRAPH_IRSSAUNSSA:
                ircfg = self.ssa_to_unssa(ssa, head)
                ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch)
                ircfg_simplifier.simplify(ircfg, head)
                ret = ircfg
            else:
                raise ValueError("Unknown option")
            return ret


    head = list(entry_points)[0]
    simplifier = CustomIRCFGSimplifierSSA(ir_arch)
    ircfg = simplifier.simplify(ircfg, head)
    open('final.dot', 'w').write(ircfg.dot())


    graph = GraphMiasmIR(ircfg, title, None)
    graph.Show()
Exemple #30
0
def build_graph(start_addr, type_graph, simplify=False, dontmodstack=True, loadint=False, verbose=False):
    machine = guess_machine(addr=start_addr)
    dis_engine, ira = machine.dis_engine, machine.ira

    class IRADelModCallStack(ira):
        def call_effects(self, addr, instr):
            assignblks, extra = super(IRADelModCallStack, self).call_effects(addr, instr)
            if not dontmodstack:
                return assignblks, extra
            out = []
            for assignblk in assignblks:
                dct = dict(assignblk)
                dct = {
                    dst:src for (dst, src) in viewitems(dct) if dst != self.sp
                }
                out.append(AssignBlock(dct, assignblk.instr))
            return out, extra


    if verbose:
        print("Arch", dis_engine)

    fname = idc.GetInputFile()
    if verbose:
        print(fname)

    bs = bin_stream_ida()
    mdis = dis_engine(bs)
    ir_arch = IRADelModCallStack(mdis.loc_db)


    # populate symbols with ida names
    for addr, name in idautils.Names():
        if name is None:
            continue
        if (mdis.loc_db.get_offset_location(addr) or
            mdis.loc_db.get_name_location(name)):
            # Symbol alias
            continue
        mdis.loc_db.add_location(name, addr)

    if verbose:
        print("start disasm")
    if verbose:
        print(hex(start_addr))

    asmcfg = mdis.dis_multiblock(start_addr)
    entry_points = set([mdis.loc_db.get_offset_location(start_addr)])
    if verbose:
        print("generating graph")
        open('asm_flow.dot', 'w').write(asmcfg.dot())
        print("generating IR... %x" % start_addr)

    ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg)

    if verbose:
        print("IR ok... %x" % start_addr)

    for irb in list(viewvalues(ircfg.blocks)):
        irs = []
        for assignblk in irb:
            new_assignblk = {
                expr_simp(dst): expr_simp(src)
                for dst, src in viewitems(assignblk)
            }
            irs.append(AssignBlock(new_assignblk, instr=assignblk.instr))
        ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)

    if verbose:
        out = ircfg.dot()
        open(os.path.join(tempfile.gettempdir(), 'graph.dot'), 'wb').write(out)
    title = "Miasm IR graph"


    head = list(entry_points)[0]

    if simplify:
        ircfg_simplifier = IRCFGSimplifierCommon(ir_arch)
        ircfg_simplifier.simplify(ircfg, head)
        title += " (simplified)"

    if type_graph == TYPE_GRAPH_IR:
        graph = GraphMiasmIR(ircfg, title, None)
        graph.Show()
        return


    class IRAOutRegs(ira):
        def get_out_regs(self, block):
            regs_todo = super(IRAOutRegs, self).get_out_regs(block)
            out = {}
            for assignblk in block:
                for dst in assignblk:
                    reg = self.ssa_var.get(dst, None)
                    if reg is None:
                        continue
                    if reg in regs_todo:
                        out[reg] = dst
            return set(viewvalues(out))



    # Add dummy dependency to uncover out regs affectation
    for loc in ircfg.leaves():
        irblock = ircfg.blocks.get(loc)
        if irblock is None:
            continue
        regs = {}
        for reg in ir_arch.get_out_regs(irblock):
            regs[reg] = reg
        assignblks = list(irblock)
        new_assiblk = AssignBlock(regs, assignblks[-1].instr)
        assignblks.append(new_assiblk)
        new_irblock = IRBlock(irblock.loc_key, assignblks)
        ircfg.blocks[loc] = new_irblock


    class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA):
        def do_simplify(self, ssa, head):
            modified = super(CustomIRCFGSimplifierSSA, self).do_simplify(ssa, head)
            if loadint:
                modified |= load_from_int(ssa.graph, bs, is_addr_ro_variable)
            return modified

        def simplify(self, ircfg, head):
            ssa = self.ircfg_to_ssa(ircfg, head)
            ssa = self.do_simplify_loop(ssa, head)

            if type_graph == TYPE_GRAPH_IRSSA:
                ret = ssa.graph
            elif type_graph == TYPE_GRAPH_IRSSAUNSSA:
                ircfg = self.ssa_to_unssa(ssa, head)
                ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch)
                ircfg_simplifier.simplify(ircfg, head)
                ret = ircfg
            else:
                raise ValueError("Unknown option")
            return ret


    head = list(entry_points)[0]
    simplifier = CustomIRCFGSimplifierSSA(ir_arch)
    ircfg = simplifier.simplify(ircfg, head)
    open('final.dot', 'w').write(ircfg.dot())


    graph = GraphMiasmIR(ircfg, title, None)
    graph.Show()
Exemple #31
0
    "Naive Simplification: a + a + a == a * 3"

    # Match the expected form
    ## isinstance(expr, m2_expr.ExprOp) is not needed: simplifications are
    ## attached to expression types
    if expr.op == "+" and \
            len(expr.args) == 3 and \
            expr.args.count(expr.args[0]) == len(expr.args):

        # Effective simplification
        return m2_expr.ExprOp("*", expr.args[0],
                              m2_expr.ExprInt(3, expr.args[0].size))
    else:
        # Do not simplify
        return expr

a = m2_expr.ExprId('a', 32)
base_expr = a + a + a
print("Without adding the simplification:")
print("\t%s = %s" % (base_expr, expr_simp(base_expr)))

# Enable pass
expr_simp.enable_passes({m2_expr.ExprOp: [simp_add_mul]})

print("After adding the simplification:")
print("\t%s = %s" % (base_expr, expr_simp(base_expr)))

# Automatic fail
assert(expr_simp(base_expr) == m2_expr.ExprOp("*", a,
                                              m2_expr.ExprInt(3, a.size)))
Exemple #32
0
    def get_graph(self):
        simplify = self.simplify
        dontmodstack = self.dontmodstack
        loadmemint = self.loadmemint
        type_graph = self.type_graph

        bin_str = ""
        for s in self.data.segments:
            bin_str += self.data.read(s.start, len(s))
            # add padding between each segment
            if s.end != self.data.end:
                bin_str += '\x00' * (((s.end | 0xfff) + 1) - s.end)

        bs = bin_stream_str(input_str=bin_str, base_address=self.data.start)
        machine = Machine(archs[self.data.arch.name])
        mdis = machine.dis_engine(bs)

        asmcfg = mdis.dis_multiblock(self.function.start)
        entry_points = set(
            [mdis.loc_db.get_offset_location(self.function.start)])

        class IRADelModCallStack(machine.ira):
            def call_effects(self, addr, instr):
                assignblks, extra = super(IRADelModCallStack,
                                          self).call_effects(addr, instr)
                if not dontmodstack:
                    return assignblks, extra
                out = []
                for assignblk in assignblks:
                    dct = dict(assignblk)
                    dct = {
                        dst: src
                        for (dst, src) in viewitems(dct) if dst != self.sp
                    }
                    out.append(AssignBlock(dct, assignblk.instr))
                return out, extra

        ir_arch = IRADelModCallStack(mdis.loc_db)
        ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg)

        for irb in list(viewvalues(ircfg.blocks)):
            irs = []
            for assignblk in irb:
                new_assignblk = {
                    expr_simp(dst): expr_simp(src)
                    for dst, src in viewitems(assignblk)
                }
                irs.append(AssignBlock(new_assignblk, instr=assignblk.instr))
            ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)

        head = list(entry_points)[0]

        if simplify:
            ircfg_simplifier = IRCFGSimplifierCommon(ir_arch)
            ircfg_simplifier.simplify(ircfg, head)

        if type_graph == TYPE_GRAPH_IR:
            return MiasmIRGraph(self.add_names(ircfg))

        class IRAOutRegs(machine.ira):
            def get_out_regs(self, block):
                regs_todo = super(IRAOutRegs, self).get_out_regs(block)
                out = {}
                for assignblk in block:
                    for dst in assignblk:
                        reg = self.ssa_var.get(dst, None)
                        if reg is None:
                            continue
                        if reg in regs_todo:
                            out[reg] = dst
                return set(viewvalues(out))

        # Add dummy dependency to uncover out regs affectation
        for loc in ircfg.leaves():
            irblock = ircfg.blocks.get(loc)
            if irblock is None:
                continue
            regs = {}
            for reg in ir_arch.get_out_regs(irblock):
                regs[reg] = reg
            assignblks = list(irblock)
            new_assiblk = AssignBlock(regs, assignblks[-1].instr)
            assignblks.append(new_assiblk)
            new_irblock = IRBlock(irblock.loc_key, assignblks)
            ircfg.blocks[loc] = new_irblock

        class CustomIRCFGSimplifierSSA(IRCFGSimplifierSSA):
            def do_simplify(self, ssa, head):
                modified = super(CustomIRCFGSimplifierSSA,
                                 self).do_simplify(ssa, head)
                if loadmemint:
                    modified |= load_from_int(ssa.graph, bs,
                                              is_addr_ro_variable)
                return modified

            def simplify(self, ircfg, head):
                ssa = self.ircfg_to_ssa(ircfg, head)
                ssa = self.do_simplify_loop(ssa, head)

                if type_graph == TYPE_GRAPH_IRSSA:
                    ret = ssa.graph
                elif type_graph == TYPE_GRAPH_IRSSAUNSSA:
                    ircfg = self.ssa_to_unssa(ssa, head)
                    ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch)
                    ircfg_simplifier.simplify(ircfg, head)
                    ret = ircfg
                else:
                    raise ValueError("Unknown option")
                return ret

        # dirty patch to synchronize nodes and blocks lists in ircfg
        nodes_to_del = [
            node for node in ircfg.nodes() if not node in ircfg.blocks
        ]
        for node in nodes_to_del:
            ircfg.del_node(node)

        head = list(entry_points)[0]
        simplifier = CustomIRCFGSimplifierSSA(ir_arch)
        ircfg = simplifier.simplify(ircfg, head)

        return MiasmIRGraph(self.add_names(ircfg))
Exemple #33
0
def launch_depgraph():
    global graphs, comments, sol_nb, settings, addr, ir_arch, ircfg
    # Get the current function
    addr = idc.ScreenEA()
    func = ida_funcs.get_func(addr)

    # Init
    machine = guess_machine(addr=func.startEA)
    mn, dis_engine, ira = machine.mn, machine.dis_engine, machine.ira

    bs = bin_stream_ida()
    mdis = dis_engine(bs, dont_dis_nulstart_bloc=True)
    ir_arch = ira(mdis.loc_db)

    # Populate symbols with ida names
    for ad, name in idautils.Names():
        if name is None:
            continue
        mdis.loc_db.add_location(name, ad)

    asmcfg = mdis.dis_multiblock(func.startEA)

    # Generate IR
    ircfg = ir_arch.new_ircfg_from_asmcfg(asmcfg)

    # Get settings
    settings = depGraphSettingsForm(ir_arch, ircfg)
    settings.Execute()

    loc_key, elements, line_nb = settings.loc_key, settings.elements, settings.line_nb
    # Simplify assignments
    for irb in list(viewvalues(ircfg.blocks)):
        irs = []
        offset = ir_arch.loc_db.get_location_offset(irb.loc_key)
        fix_stack = offset is not None and settings.unalias_stack
        for assignblk in irb:
            if fix_stack:
                stk_high = m2_expr.ExprInt(idc.GetSpd(assignblk.instr.offset), ir_arch.sp.size)
                fix_dct = {ir_arch.sp: mn.regs.regs_init[ir_arch.sp] + stk_high}

            new_assignblk = {}
            for dst, src in viewitems(assignblk):
                if fix_stack:
                    src = src.replace_expr(fix_dct)
                    if dst != ir_arch.sp:
                        dst = dst.replace_expr(fix_dct)
                dst, src = expr_simp(dst), expr_simp(src)
                new_assignblk[dst] = src
            irs.append(AssignBlock(new_assignblk, instr=assignblk.instr))
        ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)

    # Get dependency graphs
    dg = settings.depgraph
    graphs = dg.get(loc_key, elements, line_nb,
                    set([ir_arch.loc_db.get_offset_location(func.startEA)]))

    # Display the result
    comments = {}
    sol_nb = 0

    # Register and launch
    ida_kernwin.add_hotkey("Shift-N", next_element)
    treat_element()
Exemple #34
0
def test(left, right):
    """Launch tests on left OP right"""
    global size, mask

    for left_i in left:
        left_i = ModularIntervals(size, left_i)
        left_values = list(interval_elements(left_i))

        # Check operations without other arguments
        ## Check NEG
        result = -left_i
        for x in left_values:
            rez = (-x) & mask
            assert rez in result

        # Check operations on intervals
        for right_i in right:
            right_i = ModularIntervals(size, right_i)
            right_values = list(interval_elements(right_i))

            # Check operations available only on integer
            if len(right_values) == 1:
                # Check mod
                value = right_values[0]
                # Avoid division by zero
                if value != 0:
                    result = left_i % value
                    for x in left_values:
                        rez = (x % value) & mask
                        assert rez in result

            # Check ADD
            result = left_i + right_i
            for x in left_values:
                for y in right_values:
                    rez = (x + y) & mask
                    assert rez in result

            # Check OR
            result = left_i | right_i
            for x in left_values:
                for y in right_values:
                    rez = (x | y) & mask
                    assert rez in result

            # Check AND
            result = left_i & right_i
            for x in left_values:
                for y in right_values:
                    rez = (x & y) & mask
                    assert rez in result

            # Check XOR
            result = left_i ^ right_i
            for x in left_values:
                for y in right_values:
                    rez = (x ^ y) & mask
                    assert rez in result

            # Check MUL
            result = left_i * right_i
            for x in left_values:
                for y in right_values:
                    rez = (x * y) & mask
                    assert rez in result

            # Check >>
            result = left_i >> right_i
            for x in left_values:
                for y in right_values:
                    rez = (x >> y) & mask
                    assert rez in result

            # Check <<
            result = left_i << right_i
            for x in left_values:
                for y in right_values:
                    rez = (x << y) & mask
                    assert rez in result

            # Check a>>
            result = left_i.arithmetic_shift_right(right_i)
            for x in left_values:
                x = ExprInt(x, size)
                for y in right_values:
                    y = ExprInt(y, size)
                    rez = int(expr_simp(ExprOp('a>>', x, y)))
                    assert rez in result

            # Check >>>
            result = left_i.rotation_right(right_i)
            for x in left_values:
                x = ExprInt(x, size)
                for y in right_values:
                    y = ExprInt(y, size)
                    rez = int(expr_simp(ExprOp('>>>', x, y)))
                    assert rez in result

            # Check <<<
            result = left_i.rotation_left(right_i)
            for x in left_values:
                x = ExprInt(x, size)
                for y in right_values:
                    y = ExprInt(y, size)
                    rez = int(expr_simp(ExprOp('<<<', x, y)))
                    assert rez in result
Exemple #35
0
    # Match the expected form
    ## isinstance(expr, m2_expr.ExprOp) is not needed: simplifications are
    ## attached to expression types
    if expr.op == "+" and \
            len(expr.args) == 3 and \
            expr.args.count(expr.args[0]) == len(expr.args):

        # Effective simplification
        return m2_expr.ExprOp("*", expr.args[0],
                              m2_expr.ExprInt(3, expr.args[0].size))
    else:
        # Do not simplify
        return expr


a = m2_expr.ExprId('a', 32)
base_expr = a + a + a
print("Without adding the simplification:")
print("\t%s = %s" % (base_expr, expr_simp(base_expr)))

# Enable pass
expr_simp.enable_passes({m2_expr.ExprOp: [simp_add_mul]})

print("After adding the simplification:")
print("\t%s = %s" % (base_expr, expr_simp(base_expr)))

# Automatic fail
assert (expr_simp(base_expr) == m2_expr.ExprOp("*", a,
                                               m2_expr.ExprInt(3, a.size)))
Exemple #36
0
    def fromstring(cls, text, loc_db, mode = None):
        global total_scans
        name = re.search('(\S+)', text).groups()
        if not name:
            raise ValueError('cannot find name', text)
        name = name[0]

        if not name in cls.all_mn_name:
            raise ValueError('unknown name', name)
        clist = [x for x in cls.all_mn_name[name]]
        out = []
        out_args = []
        parsers = defaultdict(dict)

        for cc in clist:
            for c in cls.get_cls_instance(cc, mode):
                args_expr = []
                args_str = text[len(name):].strip(' ')

                start = 0
                cannot_parse = False
                len_o = len(args_str)

                for i, f in enumerate(c.args):
                    start_i = len_o - len(args_str)
                    if type(f.parser) == tuple:
                        parser = f.parser
                    else:
                        parser = (f.parser,)
                    for p in parser:
                        if p in parsers[(i, start_i)]:
                            continue
                        try:
                            total_scans += 1
                            v, start, stop = next(p.scanString(args_str))
                        except StopIteration:
                            v, start, stop = [None], None, None
                        if start != 0:
                            v, start, stop = [None], None, None
                        if v != [None]:
                            v = f.asm_ast_to_expr(v[0], loc_db)
                        if v is None:
                            v, start, stop = [None], None, None
                        parsers[(i, start_i)][p] = v, start, stop
                    start, stop = f.fromstring(args_str, loc_db, parsers[(i, start_i)])
                    if start != 0:
                        log.debug("cannot fromstring %r", args_str)
                        cannot_parse = True
                        break
                    if f.expr is None:
                        raise NotImplementedError('not fully functional')
                    f.expr = expr_simp(f.expr)
                    args_expr.append(f.expr)
                    args_str = args_str[stop:].strip(' ')
                    if args_str.startswith(','):
                        args_str = args_str[1:]
                    args_str = args_str.strip(' ')
                if args_str:
                    cannot_parse = True
                if cannot_parse:
                    continue

                out.append(c)
                out_args.append(args_expr)
                break

        if len(out) == 0:
            raise ValueError('cannot fromstring %r' % text)
        if len(out) != 1:
            log.debug('fromstring multiple args ret default')
        c = out[0]
        c_args = out_args[0]

        instr = cls.instruction(c.name, mode, c_args,
                                additional_info=c.additional_info())
        return instr
Exemple #37
0
    for c_str, ctype in mychandler.expr_to_c_and_types(expr):
        print(c_str, ctype)
        computed.add((str(ctype), c_str))
    assert computed == result


    for out_type, out_str in computed:
        parsed_expr = mychandler.c_to_expr(out_str)
        parsed_type = mychandler.c_to_type(out_str)
        print("Access expr:", parsed_expr)
        print("Access type:", parsed_type)

        ast = parse_access(out_str)
        access_c = ast_get_c_access_expr(ast, c_context)
        print("Generated access:", access_c)

        parsed_expr_bis, parsed_type_bis = mychandler.exprc2expr.get_expr(access_c, c_context)
        assert parsed_expr_bis is not None
        assert parsed_expr == parsed_expr_bis
        assert parsed_type == parsed_type_bis

        parsed_expr_3, parsed_type_3 = mychandler.c_to_expr_and_type(out_str)
        assert parsed_expr_3 is not None
        assert parsed_expr == parsed_expr_3
        assert parsed_type == parsed_type_3

        expr_new1 = expr_simp(parsed_expr)
        expr_new2 = expr_simp(expr)
        print("\t", expr_new1)
        assert expr_new1 == expr_new2
Exemple #38
0
def test(left, right):
    """Launch tests on left OP right"""
    global size, mask

    for left_i in left:
        left_i = ModularIntervals(size, left_i)
        left_values = list(interval_elements(left_i))

        # Check operations without other arguments
        ## Check NEG
        result = - left_i
        for x in left_values:
            rez = (- x) & mask
            assert rez in result

        # Check operations on intervals
        for right_i in right:
            right_i = ModularIntervals(size, right_i)
            right_values = list(interval_elements(right_i))

            # Check operations available only on integer
            if len(right_values) == 1:
                # Check mod
                value = right_values[0]
                # Avoid division by zero
                if value != 0:
                    result = left_i % value
                    for x in left_values:
                        rez = (x % value) & mask
                        assert rez in result

            # Check ADD
            result = left_i + right_i
            for x in left_values:
                for y in right_values:
                    rez = (x + y) & mask
                    assert rez in result

            # Check OR
            result = left_i | right_i
            for x in left_values:
                for y in right_values:
                    rez = (x | y) & mask
                    assert rez in result

            # Check AND
            result = left_i & right_i
            for x in left_values:
                for y in right_values:
                    rez = (x & y) & mask
                    assert rez in result

            # Check XOR
            result = left_i ^ right_i
            for x in left_values:
                for y in right_values:
                    rez = (x ^ y) & mask
                    assert rez in result

            # Check MUL
            result = left_i * right_i
            for x in left_values:
                for y in right_values:
                    rez = (x * y) & mask
                    assert rez in result

            # Check >>
            result = left_i >> right_i
            for x in left_values:
                for y in right_values:
                    rez = (x >> y) & mask
                    assert rez in result

            # Check <<
            result = left_i << right_i
            for x in left_values:
                for y in right_values:
                    rez = (x << y) & mask
                    assert rez in result

            # Check a>>
            result = left_i.arithmetic_shift_right(right_i)
            for x in left_values:
                x = ExprInt(x, size)
                for y in right_values:
                    y = ExprInt(y, size)
                    rez = int(expr_simp(ExprOp('a>>', x, y)))
                    assert rez in result

            # Check >>>
            result = left_i.rotation_right(right_i)
            for x in left_values:
                x = ExprInt(x, size)
                for y in right_values:
                    y = ExprInt(y, size)
                    rez = int(expr_simp(ExprOp('>>>', x, y)))
                    assert rez in result

            # Check <<<
            result = left_i.rotation_left(right_i)
            for x in left_values:
                x = ExprInt(x, size)
                for y in right_values:
                    y = ExprInt(y, size)
                    rez = int(expr_simp(ExprOp('<<<', x, y)))
                    assert rez in result
Exemple #39
0
ircfg = lifter.new_ircfg()

first_block = list(asmcfg.blocks)[0]
lifter.add_asmblock_to_ircfg(first_block, ircfg)

# --- Symbolic execution --- #

from miasm.ir.symbexec import SymbolicExecutionEngine
from miasm.expression.expression import *

symb = SymbolicExecutionEngine(lifter, machine.mn.regs.regs_init)

# irDst contains the offset of next IR basic block to execute
irDst = symb.run_at(ircfg, entry_addr, step=False)
print("IR Dest = ", irDst)

# Provide symbolic context to irDst
expr_flag = ExprId("flag", 32)
result = symb.eval_expr(
    expr_simp(
        irDst.replace_expr(
            {
                expr_simp(
                    ExprMem(machine.mn.regs.EBP_init - ExprInt(0x4, 32), 32)):
                expr_flag,
            })))
print("IR Dest Semantics = ", result)

# Dump the final state of symbolic execution
# symb.dump()
Exemple #40
0
    for c_str, ctype in mychandler.expr_to_c_and_types(expr):
        print(c_str, ctype)
        computed.add((str(ctype), c_str))
    assert computed == result

    for out_type, out_str in computed:
        parsed_expr = mychandler.c_to_expr(out_str)
        parsed_type = mychandler.c_to_type(out_str)
        print("Access expr:", parsed_expr)
        print("Access type:", parsed_type)

        ast = parse_access(out_str)
        access_c = ast_get_c_access_expr(ast, c_context)
        print("Generated access:", access_c)

        parsed_expr_bis, parsed_type_bis = mychandler.exprc2expr.get_expr(
            access_c, c_context)
        assert parsed_expr_bis is not None
        assert parsed_expr == parsed_expr_bis
        assert parsed_type == parsed_type_bis

        parsed_expr_3, parsed_type_3 = mychandler.c_to_expr_and_type(out_str)
        assert parsed_expr_3 is not None
        assert parsed_expr == parsed_expr_3
        assert parsed_type == parsed_type_3

        expr_new1 = expr_simp(parsed_expr)
        expr_new2 = expr_simp(expr)
        print("\t", expr_new1)
        assert expr_new1 == expr_new2
Exemple #41
0
    def fromstring(cls, text, loc_db, mode = None):
        global total_scans
        name = re.search('(\S+)', text).groups()
        if not name:
            raise ValueError('cannot find name', text)
        name = name[0]

        if not name in cls.all_mn_name:
            raise ValueError('unknown name', name)
        clist = [x for x in cls.all_mn_name[name]]
        out = []
        out_args = []
        parsers = defaultdict(dict)

        for cc in clist:
            for c in cls.get_cls_instance(cc, mode):
                args_expr = []
                args_str = text[len(name):].strip(' ')

                start = 0
                cannot_parse = False
                len_o = len(args_str)

                for i, f in enumerate(c.args):
                    start_i = len_o - len(args_str)
                    if type(f.parser) == tuple:
                        parser = f.parser
                    else:
                        parser = (f.parser,)
                    for p in parser:
                        if p in parsers[(i, start_i)]:
                            continue
                        try:
                            total_scans += 1
                            v, start, stop = next(p.scanString(args_str))
                        except StopIteration:
                            v, start, stop = [None], None, None
                        if start != 0:
                            v, start, stop = [None], None, None
                        if v != [None]:
                            v = f.asm_ast_to_expr(v[0], loc_db)
                        if v is None:
                            v, start, stop = [None], None, None
                        parsers[(i, start_i)][p] = v, start, stop
                    start, stop = f.fromstring(args_str, loc_db, parsers[(i, start_i)])
                    if start != 0:
                        log.debug("cannot fromstring %r", args_str)
                        cannot_parse = True
                        break
                    if f.expr is None:
                        raise NotImplementedError('not fully functional')
                    f.expr = expr_simp(f.expr)
                    args_expr.append(f.expr)
                    args_str = args_str[stop:].strip(' ')
                    if args_str.startswith(','):
                        args_str = args_str[1:]
                    args_str = args_str.strip(' ')
                if args_str:
                    cannot_parse = True
                if cannot_parse:
                    continue

                out.append(c)
                out_args.append(args_expr)
                break

        if len(out) == 0:
            raise ValueError('cannot fromstring %r' % text)
        if len(out) != 1:
            log.debug('fromstring multiple args ret default')
        c = out[0]
        c_args = out_args[0]

        instr = cls.instruction(c.name, mode, c_args,
                                additional_info=c.additional_info())
        return instr
Exemple #42
0
    # greater lesser, neg
    (b1, expr_is_signed_greater, int_1, int_m1),
    (b1, expr_is_signed_lower, int_m1, int_1),
    (b0, expr_is_signed_greater, int_m1, int_1),
    (b0, expr_is_signed_lower, int_1, int_m1),
    (b1, expr_is_signed_greater_or_equal, int_1, int_m1),
    (b1, expr_is_signed_lower_or_equal, int_m1, int_1),
    (b0, expr_is_signed_greater_or_equal, int_m1, int_1),
    (b0, expr_is_signed_lower_or_equal, int_1, int_m1),
    (b1, expr_is_signed_greater_or_equal, int_m1, int_m1),
    (b1, expr_is_signed_lower_or_equal, int_m1, int_m1),
    (b1, expr_is_signed_greater, int_m1, int_m2),
    (b1, expr_is_signed_lower, int_m2, int_m1),
    (b0, expr_is_signed_greater, int_m2, int_m1),
    (b0, expr_is_signed_lower, int_m1, int_m2),
    (b1, expr_is_signed_greater_or_equal, int_m1, int_m2),
    (b1, expr_is_signed_lower_or_equal, int_m2, int_m1),
    (b0, expr_is_signed_greater_or_equal, int_m2, int_m1),
    (b0, expr_is_signed_lower_or_equal, int_m1, int_m2),

    # eq/neq
    (b1, expr_is_equal, int_1, int_1),
    (b1, expr_is_not_equal, int_0, int_1),
    (b0, expr_is_equal, int_1, int_0),
    (b0, expr_is_not_equal, int_0, int_0),
]

for result, func, arg1, arg2 in tests:
    assert result == expr_simp(func(arg1, arg2))