Ejemplo n.º 1
0
 def _process_cmov(self, cur_bloc, last_instruction):
     assignment_block = AsmBlock(self.loc_db.add_location())
     cond_block = AsmBlock(self.loc_db.add_location())
     dst = last_instruction.args[0]
     src = last_instruction.args[1]
     assignment_block.lines.append(create_mov_instruction(self.mode, dst, src))
     branch_target = next(iter(cur_bloc.bto)).loc_key
     assignment_block.lines.append(create_jump_instruction(self.mode, ExprLoc(branch_target, self.mode)))
     branch_name = "J" + last_instruction.name[len("CMOV"):]
     cur_bloc.lines.pop()
     if not cur_bloc.lines:
         cur_bloc.lines = [create_nop(self.mode)]
     cond_block.lines.append(create_cond_branch_instruction(self.mode, branch_name,
                                                            ExprLoc(assignment_block.loc_key, self.mode)))
     assignment_block.bto = {AsmConstraintTo(branch_target)}
     cond_block.bto = {AsmConstraintNext(branch_target), AsmConstraintTo(assignment_block.loc_key)}
     cur_bloc.bto = {AsmConstraintNext(cond_block.loc_key)}
     self.add_block(assignment_block)
     self.add_block(cond_block)
Ejemplo n.º 2
0
    def _insert_flat_block(self, source_flat_block, symb_exec,
                           flat_block_to_loc_key):
        """
        Copies source_flat_block and sets its successors according to flat_block_to_loc_key
        :param flat_block_to_loc_key: dictionary mapping flat_blocks to respective loc_keys
        :param symb_exec: instance of current symbolic execution engine
        :param source_flat_block: flat_block to be inserted
        :return: dictionary mapping old successor loc_keys to the new ones
        """
        # we're not using redirect_successors after copying to avoid executing the same loops multiple times
        source_block = self.asmcfg.loc_key_to_block(
            source_flat_block.block_loc_key)
        tobe_processed = {}
        new_flat_blocks = set()
        new_block_loc_key = flat_block_to_loc_key[source_flat_block]
        if self.out_asmcfg.loc_key_to_block(new_block_loc_key) is not None:
            raise Exception("Target loc_key is already associated to a block")
        new_block = AsmBlock(new_block_loc_key)

        # copy instructions
        for ln in source_block.lines:
            tmp_ln = instruction_x86(ln.name, ln.mode,
                                     [i.copy() for i in ln.args],
                                     ln.additional_info)
            tmp_ln.b = ln.b
            tmp_ln.l = ln.l
            tmp_ln.offset = ln.offset
            new_block.addline(tmp_ln)

        constraints = source_block.bto
        # try to simplify the destination if it's a primary flattening block
        if not self.flat_loops[source_block.loc_key].is_default:
            logger.debug("current block is a part of primary loc_keys")
            simplified_target = symb_exec.eval_expr(self.ircfg.IRDst)
            if isinstance(simplified_target, ExprInt):
                simplified_target = self.asmcfg.loc_db.get_offset_location(
                    int(simplified_target))
            elif isinstance(simplified_target, ExprLoc):
                simplified_target = simplified_target.loc_key
            else:
                # there's probably a(n) (series of) unknown instruction(s) causing an implicit conditional assignment
                # such as CMOV or SBB->AND->ADD, prepend comparison + cond jump if it happens to be common, or add it to
                # ExtendedAsmCFG.extended_discovery and split flow on the final instruction

                # it's also possible that it's not related to any cff loop at all
                addr = self.asmcfg.loc_db.get_location_offset(
                    source_flat_block.block_loc_key)
                addr = hex(addr) if addr else addr
                logger.warning(
                    "Couldn't simplify loc_key %s at %s, continuing" %
                    (str(source_flat_block.block_loc_key), addr))
                logger.warning("the simplified target is %s of instance %s" %
                               (simplified_target, type(simplified_target)))
                simplified_target = None
            if simplified_target:
                constraints = {AsmConstraintTo(simplified_target)}
                mode = self.asmcfg.mode

                # remove redundant comparison
                dp = DependencyGraph(self.ircfg, True)
                block_loc_key = source_block.loc_key
                res = next(
                    dp.get(block_loc_key, {self.ircfg.IRDst}, None,
                           {block_loc_key}))
                for depnode in res.relevant_nodes:
                    ind = depnode.line_nb
                    ind -= (len(self.ircfg.blocks[block_loc_key]) -
                            len(new_block.lines))
                    if new_block.lines[ind].name == "CMP":
                        new_block.lines.pop(ind)

                new_block.lines[-1] = create_jump_instruction(
                    mode, ExprLoc(simplified_target, mode))

        # copy constraints
        new_bto = set()
        for constraint in constraints:
            if not self.asmcfg.loc_key_to_block(constraint.loc_key):
                logger.debug("Skipping bad constraint %s" % constraint.loc_key)
                continue
            flat_block = self.flat_loops.get_block(constraint.loc_key,
                                                   symb_exec,
                                                   source_flat_block)
            if flat_block not in flat_block_to_loc_key:
                new_flat_blocks.add(flat_block)
                new_loc_key = self.out_asmcfg.loc_db.add_location()
                tobe_processed[constraint.loc_key] = (new_loc_key, flat_block)
                flat_block_to_loc_key[flat_block] = new_loc_key
            else:
                new_loc_key = flat_block_to_loc_key[flat_block]
            new_bto.add(AsmConstraint(new_loc_key, constraint.c_t))
        new_block.bto = new_bto
        new_block.alignment = source_block.alignment

        # change jmp targets
        if new_block.lines:
            for ind, arg in enumerate(list(new_block.lines[-1].args)):
                if isinstance(arg, ExprLoc):
                    if not self.asmcfg.loc_key_to_block(arg.loc_key):
                        logger.debug("Skipping bad constraint %s" %
                                     arg.loc_key)
                        continue
                    new_target, flat_block = tobe_processed.get(
                        arg.loc_key, (None, None))
                    if not new_target:
                        flat_block = self.flat_loops.get_block(
                            arg.loc_key, symb_exec, source_flat_block)
                        new_target = flat_block_to_loc_key.get(flat_block)
                    # None in case of irrelevant calls
                    logger.debug("new target: %s" % new_target)
                    if new_target:
                        new_block.lines[-1].args[ind] = ExprLoc(
                            new_target, arg.size)

        self.out_asmcfg.add_block(new_block)
        return new_flat_blocks
Ejemplo n.º 3
0
    def _process_jmp_table(self, cur_bloc, mn, attrib, loc_db, pool_bin, offsets_to_dis):
        # TODO add support for jump tables with "AND cntrl_var, range" boundary check; such jmp tables were present only
        #   in library functions in Stantinko samples
        # add current block to the asmcfg to make it accessible in the ircfg edges, add_block is called anyway right
        # after this callback, it will notice that the block has been already added
        self.add_block(cur_bloc)
        dst_address = loc_db.get_location_offset(cur_bloc.loc_key)

        logger.info("Possible jump table addr: 0x%x" % dst_address)

        ira = get_ira(mn, attrib)

        ir_arch = ira(loc_db)

        ircfg = ir_arch.new_ircfg_from_asmcfg(self)

        # the previous blocks should have exactly 1 predecessor dictating range
        predecessors = self.predecessors(cur_bloc.loc_key)
        if len(predecessors) != 1:
            logger.info("Expected exactly one predecessor")
            return
        predecessor = ircfg.blocks[predecessors.pop()]

        irdst_block = ircfg.blocks[cur_bloc.loc_key]
        if len(irdst_block.assignblks) != len(cur_bloc.lines):
            processed = set()
            todo = {irdst_block.loc_key}
            while not irdst_block.dst.is_mem():
                loc_key = todo.pop()
                if loc_key in processed:
                    continue
                processed.add(loc_key)
                irdst_block = ircfg.blocks[loc_key]
                todo.update(ircfg.successors(loc_key))

        # we shouldn't stumble upon crashing segm and call operators even thought implicit is required to process
        # initial IRDst(mentioned operators cause crashes of the engine behind implicit) since we operate only on the
        # 2 crucial basic blocks. The predecessor contains range of the jump table, we use it to determine constructs
        # of the jump table and track back base code segment address assignment to target the msvc compiler and x64
        # architecture, other compilers use directly RIP related addressing to get the address.

        # get real predecessor
        asm_block = self.loc_key_to_block(predecessor.loc_key)
        if len(predecessor.assignblks) != len(asm_block.lines):
            processed = set()
            todo = {predecessor.loc_key}
            while cur_bloc.loc_key not in ircfg.successors(predecessor.loc_key):
                loc_key = todo.pop()
                if loc_key in processed:
                    continue
                processed.add(loc_key)
                predecessor = ircfg.blocks[loc_key]
                todo.update(ircfg.successors(loc_key))

        # get jump_table_control_variable from predecessor
        dg = DependencyGraph(ircfg, implicit=True, apply_simp=True, follow_mem=True, follow_call=False)
        jtcdg = JTCVariableDependencyGraph(predecessor.loc_key,
                                           ircfg, implicit=True, apply_simp=True, follow_mem=False, follow_call=False)

        dependency_result_iter = iter(jtcdg.get(irdst_block.loc_key, {ircfg.IRDst}, len(predecessor.assignblks),
                                                {predecessor.loc_key}))
        solution_predecessor = next(dependency_result_iter)
        # jump table control variable
        jtc_var = jtcdg.jtc_var
        if not jtc_var:
            logger.info("couldn't determine single jump table control variable")
            return
        # get symbolic execution engine to be used in both predecessor and jmp table block
        symb_exec_both = MySymbolicExecutionEngine(pool_bin, jtc_var, ir_arch)
        try:
            # symbolically evaluate lines influencing IRDst of the predecessor leading to jtc_var
            for line_nb in sorted({node.line_nb for node in solution_predecessor.relevant_nodes
                                   if node.loc_key == predecessor.loc_key}):
                assign_blk = predecessor.assignblks[line_nb]
                symb_exec_both.eval_updt_assignblk(assign_blk)
        except (KeyError, TypeError):
            logger.error(
                "Couldn't symbolically eval predecessor of 0x%x" % loc_db.get_location_offset(cur_bloc.loc_key))
            # stantinko contains illegal unreachable dereferences prior jmp tables, such as
            # xor     eax, eax; movsx   eax, byte ptr [eax]
            return
        # get symbolic execution engine supporting binary memory dereference
        symb_exec_minimal = MySymbolicExecutionEngine(pool_bin, ir_arch, symb_exec_both.symbols.copy())
        predecessor_irdst_equation = symb_exec_both.symbols[ircfg.IRDst]

        # get equation whose solutions solve the indirect jump
        irdst_block = ircfg.blocks[cur_bloc.loc_key]
        if len(irdst_block.assignblks) != len(cur_bloc.lines):
            processed = set()
            todo = {irdst_block.loc_key}
            while not irdst_block.dst.is_mem():
                symb_exec_both.eval_updt_irblock(irdst_block)
                loc_key = todo.pop()
                if loc_key in processed:
                    continue
                processed.add(loc_key)
                irdst_block = ircfg.blocks[loc_key]
                todo.update(ircfg.successors(loc_key))

        irdst_equation = symb_exec_both.eval_updt_irblock(irdst_block)
        sizes = set()
        # prevent mem processing via raw arrays by using var ID instead
        # we also want to set a maximum boundary so slices don't cause the sat solver generate a huge number of results
        visitor = ExprVisitorCallbackTopToBottom(lambda x: self._eliminate_jtc_var_slice_cb(x, sizes, jtc_var))
        irdst_equation = visitor.visit(irdst_equation)
        predecessor_irdst_equation = visitor.visit(predecessor_irdst_equation)
        size_boundary = jtc_var.size
        sizes = sorted(filter(lambda x: x > 1, sizes))
        if sizes:
            size_boundary = sizes[0]
        jtc_var_id = ExprId("jtc_var", jtc_var.size)
        irdst_equation = irdst_equation.replace_expr({jtc_var: jtc_var_id})
        predecessor_irdst_equation = predecessor_irdst_equation.replace_expr({jtc_var: jtc_var_id})
        # track possible CS base address dependency, ignore control variable from predecessor
        eliminated_jtc_var_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(0, jtc_var_id.size)})
        evaluated_ejtc_var_equation = symb_exec_both.eval_expr(eliminated_jtc_var_equation)
        if not evaluated_ejtc_var_equation.is_int():
            # we need to determine code base
            dependencies = dg._follow_apply_cb(evaluated_ejtc_var_equation)
            expr_deps = {fexpr.element for fexpr in dependencies if fexpr.follow}
            dg_base = DependencyGraph(ircfg, implicit=False, apply_simp=True, follow_mem=True, follow_call=False)
            dependency_result_iter = iter(dg_base.get(cur_bloc.loc_key, expr_deps, len(cur_bloc.lines),
                                                      {self.heads()[0]}))
            solution = next(dependency_result_iter)
            code_base_dict = {expr: solution.emul(ir_arch)[expr] for expr in expr_deps}
            irdst_equation = irdst_equation.replace_expr(code_base_dict)
            predecessor_irdst_equation = predecessor_irdst_equation.replace_expr(code_base_dict)

        # we need backward slice of the jump table destination dependencies to retain the other independent assignments
        # during cmp chain assembling
        dependency_result = dg.get(cur_bloc.loc_key, {ircfg.IRDst}, len(cur_bloc.lines), {cur_bloc.loc_key})
        dependent_line_nbs = {}
        for solution in dependency_result:
            dependent_line_nbs.setdefault(solution.loc_key, set()).update(
                {dn.line_nb for dn in solution.relevant_nodes})
        cur_bloc_new_lines = []
        for loc_key, lines in dependent_line_nbs.items():
            for line_nb, assignblk in enumerate(ircfg.blocks[loc_key].assignblks):
                if line_nb not in lines:
                    symb_exec_minimal.eval_assignblk(assignblk)
                    cur_bloc_new_lines.append(assignblk.instr)
        comparison_reg_id = None
        comparison_reg_value = None
        if jtc_var not in symb_exec_minimal.symbols.symbols_id:
            comparison_reg_id = jtc_var
            comparison_reg_value = jtc_var
        else:
            for symbol, comparison_reg_value in symb_exec_minimal.symbols.symbols_id.items():
                if jtc_var in comparison_reg_value and (symbol.is_mem() or
                                                        (symbol.is_id() and symbol.name not in
                                                         ["RIP", "EIP", "zf", "nf", "pf", "of", "cf", "af", "df",
                                                          ircfg.IRDst.name])):
                    replaced_jtcv = comparison_reg_value.replace_expr({jtc_var: ExprInt(0, jtc_var.size)})
                    if isinstance(symb_exec_minimal.eval_expr(replaced_jtcv), ExprInt):
                        comparison_reg_id = symbol
                        break
        if not comparison_reg_id or not comparison_reg_value:
            logger.debug("Couldn't find any candidate for comparison register at 0x%x" %
                         loc_db.get_location_offset(cur_bloc.loc_key))
            return

        from miasm.ir.translators import Translator
        import z3
        translator = Translator.to_language("z3")
        solver = z3.Solver()

        logger.debug("predecessor_irdst_equation: %s" % str(predecessor_irdst_equation))
        logger.debug(("dst_address: 0x%x" % dst_address))
        logger.debug(("jump_table_control_variable: %s" % str(jtc_var)))
        solver.add(translator.from_expr(predecessor_irdst_equation) == dst_address)
        translated_jtc_var = translator.from_expr(jtc_var_id)
        solver.add(translated_jtc_var >= 0)
        solver.add(translated_jtc_var < 2 ** (size_boundary - 1) - 1)

        if solver.check() != z3.sat:
            logger.debug("Couldn't find at least one jump table control variable")
            return

        dbg_destinations = set()
        next_loc_key = new_block_loc_key = loc_db.add_location()

        logger.debug("comparison_reg_id: %s" % str(comparison_reg_id))
        dst_ranges = {}
        counter = 0
        while counter < 500:
            val = solver.model()[translated_jtc_var].as_long()
            final_irdst_equation = irdst_equation.replace_expr({jtc_var_id: ExprInt(val, jtc_var_id.size)})
            final_dst = int(symb_exec_both.eval_expr(final_irdst_equation))
            cmp_reg_val = comparison_reg_value.replace_expr({jtc_var: ExprInt(val, jtc_var.size)})
            cmp_reg_val = int(symb_exec_minimal.eval_expr(cmp_reg_val))

            dst_ranges[final_dst] = dst_ranges.get(final_dst, interval()).union([(cmp_reg_val, cmp_reg_val)])
            dbg_destinations.add(final_dst)
            offsets_to_dis.add(final_dst)

            solver.add(translated_jtc_var != translator.from_expr(ExprInt(val, jtc_var_id.size)))
            if solver.check() != z3.sat:
                break
            counter += 1

        if counter == 500:
            raise RuntimeError("Interrupted; there might be a broken slice")

        for dst, interv in dst_ranges.items():
            cond_target_loc_key = loc_db.get_or_create_offset_location(dst)
            for lower, upper in interv:
                lower = ExprInt(lower, self.mode)
                upper = ExprInt(upper, self.mode)
                new_asm_block = AsmBlock(new_block_loc_key)
                new_block_loc_key = loc_db.add_location()
                if lower == upper:
                    new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower,
                                                                    ExprLoc(cond_target_loc_key, self.mode), "JZ")
                    new_asm_block.add_cst(cond_target_loc_key, "c_to")
                    new_asm_block.add_cst(new_block_loc_key, "c_next")
                else:
                    upper_check_loc_key = loc_db.add_location()
                    # lower boundary check
                    new_asm_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, lower,
                                                                    ExprLoc(new_block_loc_key, self.mode), "JB")
                    new_asm_block.add_cst(new_block_loc_key, "c_to")
                    new_asm_block.add_cst(upper_check_loc_key, "c_next")
                    # upper boundary check
                    upper_check_block = AsmBlock(upper_check_loc_key)
                    upper_check_block.lines = create_cmp_j_instructions(self.mode, comparison_reg_id, upper,
                                                                        ExprLoc(cond_target_loc_key, self.mode), "JBE")
                    upper_check_block.add_cst(cond_target_loc_key, "c_to")
                    upper_check_block.add_cst(new_block_loc_key, "c_next")
                    self.add_block(upper_check_block)
                self.add_block(new_asm_block)
        # trigger last jump unconditionally
        new_asm_block.bto = {AsmConstraintTo(cond_target_loc_key)}
        new_asm_block.lines = [create_jump_instruction(self.mode, ExprLoc(cond_target_loc_key, self.mode))]

        cur_bloc.lines = cur_bloc_new_lines
        cur_bloc.add_cst(next_loc_key, "c_next")
        if not cur_bloc.lines:
            cur_bloc.lines = [create_nop(self.mode)]
        self.jmp_table_loc_keys.add(cur_bloc.loc_key)
        logger.debug("destinations: %s" % pformat([hex(i or 0) for i in dbg_destinations]))
        logger.debug("blocks: %d" % counter)
Ejemplo n.º 4
0
dg.enable_passes([remove_useless_blocks])
asmcfg = dg(asmcfg)

### Only two asmcfg should remain
assert len(asmcfg) == 2
assert first_block.loc_key in asmcfg.nodes()
assert last_block_loc_key in asmcfg.nodes()

## Graph the final output
open("graph2.dot", "w").write(asmcfg.dot())

# Test helper methods
## loc_key_to_block should always be updated
assert asmcfg.loc_key_to_block(first_block.loc_key) == first_block
testlabel = mdis.loc_db.get_or_create_name_location("testlabel")
my_block = AsmBlock(testlabel)
asmcfg.add_block(my_block)
assert len(asmcfg) == 3
assert asmcfg.loc_key_to_block(first_block.loc_key) == first_block
assert asmcfg.loc_key_to_block(my_block.loc_key) == my_block

## Bad asmcfg
assert len(list(asmcfg.get_bad_blocks())) == 0
assert len(list(asmcfg.get_bad_blocks_predecessors())) == 0
### Add a bad block, not linked
testlabel_bad = mdis.loc_db.get_or_create_name_location("testlabel_bad")
my_bad_block = AsmBlockBad(testlabel_bad)
asmcfg.add_block(my_bad_block)
assert list(asmcfg.get_bad_blocks()) == [my_bad_block]
assert len(list(asmcfg.get_bad_blocks_predecessors())) == 0
### Link the bad block and update edges