예제 #1
0
 def _handle_Load(self, expr_idx: int, expr: Load, stmt_idx: int,
                  stmt: Statement, block: Block):
     if isinstance(expr.addr,
                   Load) and expr.addr.bits == self._project.arch.bits:
         if isinstance(expr.addr.addr, Const):
             # *(*(const_addr))
             # does it belong to a read-only section/segment?
             if self._addr_belongs_to_got(expr.addr.addr.value) or \
                     self._addr_belongs_to_ro_region(expr.addr.addr.value):
                 w = self._project.loader.memory.unpack_word(
                     expr.addr.addr.value,
                     expr.addr.addr.bits // self._project.arch.byte_width,
                     endness=self._project.arch.memory_endness)
                 if w is not None and self._addr_belongs_to_object(w):
                     # nice! replace it with a load from that address
                     return Load(expr.idx,
                                 Const(None, None, w, expr.addr.size,
                                       **expr.addr.addr.tags),
                                 expr.size,
                                 expr.endness,
                                 variable=expr.variable,
                                 variable_offset=expr.variable_offset,
                                 guard=expr.guard,
                                 alt=expr.alt,
                                 **expr.tags)
     return None
예제 #2
0
    def _handle_Load(self, expr_idx: int, expr: Load, stmt_idx: int,
                     stmt: Statement, block: Optional[Block]):
        addr = self._handle_expr(0, expr.addr, stmt_idx, stmt, block)

        if addr is not None and addr is not expr.addr:
            new_expr = expr.copy()
            new_expr.addr = addr
            return new_expr
        return None
예제 #3
0
파일: ail_simplifier.py 프로젝트: mk-z/angr
    def _unify_local_variables(self) -> bool:
        """
        Find variables that are definitely equivalent and then eliminate the unnecessary copies.
        """

        simplified = False

        prop = self.project.analyses.Propagator(func=self.func,
                                                func_graph=self.func_graph)
        if not prop.equivalence:
            return simplified

        addr2block: Dict[int, Block] = {}
        for block in self.func_graph.nodes():
            addr2block[block.addr] = block

        for eq in prop.equivalence:
            eq: Equivalence

            # Acceptable equivalence classes:
            #
            # stack variable == register
            # register variable == register
            # stack variable == Conv(register, M->N)
            #
            the_def = None
            if isinstance(eq.atom0, SimStackVariable):
                if isinstance(eq.atom1, Register):
                    # stack_var == register
                    reg = eq.atom1
                elif isinstance(eq.atom1, Convert) and isinstance(
                        eq.atom1.operand, Register):
                    # stack_var == Conv(register, M->N)
                    reg = eq.atom1.operand
                else:
                    continue

            elif isinstance(eq.atom0, Register):
                if isinstance(eq.atom1, Register):
                    # register == register
                    reg = eq.atom1
                else:
                    continue

            else:
                continue

            # find the definition of this register
            defs = self._reaching_definitions.all_uses.get_uses_by_location(
                eq.codeloc)
            for def_ in defs:
                def_: Definition
                if isinstance(def_.atom, atoms.Register
                              ) and def_.atom.reg_offset == reg.reg_offset:
                    # found it!
                    the_def = def_
                    break

            if the_def is None:
                continue
            if isinstance(the_def.codeloc, ExternalCodeLocation):
                continue

            # find all uses of this definition
            all_uses: Set[
                CodeLocation] = self._reaching_definitions.all_uses.get_uses(
                    the_def)

            # TODO: We can only replace all these uses with the stack variable if the stack variable isn't
            # TODO: re-assigned of a new value. Perform this check.

            # replace all uses
            for u in all_uses:
                if u == eq.codeloc:
                    # skip the very initial assignment location
                    continue
                old_block = addr2block.get(u.block_addr, None)
                if old_block is None:
                    continue

                # if there is an updated block, use it
                the_block = self.blocks.get(old_block, old_block)
                stmt: Statement = the_block.statements[u.stmt_idx]

                if isinstance(eq.atom0, SimStackVariable):
                    # create the memory loading expression
                    dst = Load(None,
                               StackBaseOffset(None, self.project.arch.bits,
                                               eq.atom0.offset),
                               eq.atom0.size,
                               endness=self.project.arch.memory_endness)
                elif isinstance(eq.atom0, Register):
                    dst = eq.atom0
                else:
                    raise RuntimeError("Unsupported atom0 type %s." %
                                       type(eq.atom0))

                r = self._replace_expr_and_update_block(
                    the_block, u.stmt_idx, stmt, the_def, eq.atom1, dst)
                simplified |= r

        return simplified
예제 #4
0
    def _unify_local_variables(self) -> bool:
        """
        Find variables that are definitely equivalent and then eliminate the unnecessary copies.
        """

        simplified = False

        prop = self._compute_propagation()
        if not prop.equivalence:
            return simplified

        addr_and_idx_to_block: Dict[Tuple[int, int], Block] = {}
        for block in self.func_graph.nodes():
            addr_and_idx_to_block[(block.addr, block.idx)] = block

        equivalences: Dict[Any, Set[Equivalence]] = defaultdict(set)
        for eq in prop.equivalence:
            equivalences[eq.atom1].add(eq)

        for _, eqs in equivalences.items():
            if len(eqs) > 1:
                continue

            eq = next(iter(eqs))

            # Acceptable equivalence classes:
            #
            # stack variable == register
            # register variable == register
            # stack variable == Conv(register, M->N)
            #
            the_def = None
            if isinstance(eq.atom0, SimStackVariable):
                if isinstance(eq.atom1, Register):
                    # stack_var == register
                    reg = eq.atom1
                elif isinstance(eq.atom1, Convert) and isinstance(
                        eq.atom1.operand, Register):
                    # stack_var == Conv(register, M->N)
                    reg = eq.atom1.operand
                else:
                    continue

            elif isinstance(eq.atom0, Register):
                if isinstance(eq.atom1, Register):
                    # register == register
                    reg = eq.atom1
                else:
                    continue

            else:
                continue

            # find the definition of this register
            rd = self._compute_reaching_definitions()
            defs = rd.all_uses.get_uses_by_location(eq.codeloc)
            for def_ in defs:
                def_: Definition
                if isinstance(def_.atom, atoms.Register
                              ) and def_.atom.reg_offset == reg.reg_offset:
                    # found it!
                    the_def = def_
                    break

            if the_def is None:
                continue
            if isinstance(the_def.codeloc, ExternalCodeLocation):
                continue

            # find all uses of this definition
            # we make a copy of the set since we may touch the set (uses) when replacing expressions
            all_uses: Set[CodeLocation] = set(rd.all_uses.get_uses(the_def))

            # TODO: We can only replace all these uses with the stack variable if the stack variable isn't
            # TODO: re-assigned of a new value. Perform this check.

            # replace all uses
            for u in all_uses:
                if u == eq.codeloc:
                    # skip the very initial assignment location
                    continue
                old_block = addr_and_idx_to_block.get(
                    (u.block_addr, u.block_idx), None)
                if old_block is None:
                    continue

                # if there is an updated block, use it
                the_block = self.blocks.get(old_block, old_block)
                stmt: Statement = the_block.statements[u.stmt_idx]

                if isinstance(eq.atom0, SimStackVariable):
                    # create the memory loading expression
                    dst = Load(None,
                               StackBaseOffset(None, self.project.arch.bits,
                                               eq.atom0.offset),
                               eq.atom0.size,
                               endness=self.project.arch.memory_endness)
                elif isinstance(eq.atom0, Register):
                    dst = eq.atom0
                else:
                    raise RuntimeError("Unsupported atom0 type %s." %
                                       type(eq.atom0))

                r, new_block = self._replace_expr_and_update_block(
                    the_block, u.stmt_idx, stmt, the_def, u, eq.atom1, dst)
                if r:
                    self.blocks[old_block] = new_block
                simplified |= r

        # no need to clear cache at the end of this function
        return simplified
예제 #5
0
    def _unify_local_variables(self) -> bool:
        """
        Find variables that are definitely equivalent and then eliminate unnecessary copies.
        """

        simplified = False

        prop = self._compute_propagation()
        if not prop.equivalence:
            return simplified

        addr_and_idx_to_block: Dict[Tuple[int, int], Block] = {}
        for block in self.func_graph.nodes():
            addr_and_idx_to_block[(block.addr, block.idx)] = block

        equivalences: Dict[Any, Set[Equivalence]] = defaultdict(set)
        atom_by_loc = set()
        for eq in prop.equivalence:
            equivalences[eq.atom1].add(eq)
            atom_by_loc.add((eq.codeloc, eq.atom1))

        # sort keys to ensure a reproducible result
        sorted_loc_and_atoms = sorted(atom_by_loc, key=lambda x: x[0])

        for _, atom in sorted_loc_and_atoms:
            eqs = equivalences[atom]
            if len(eqs) > 1:
                continue

            eq = next(iter(eqs))

            # Acceptable equivalence classes:
            #
            # stack variable == register
            # register variable == register
            # stack variable == Conv(register, M->N)
            # global variable == register
            #
            # Equivalence is generally created at assignment sites. Therefore, eq.atom0 is the definition and
            # eq.atom1 is the use.
            the_def = None
            if isinstance(eq.atom0, SimMemoryVariable
                          ):  # covers both Stack and Global variables
                if isinstance(eq.atom1, Register):
                    # stack_var == register or global_var == register
                    to_replace = eq.atom1
                    to_replace_is_def = False
                elif isinstance(eq.atom1, Convert) and isinstance(
                        eq.atom1.operand, Register):
                    # stack_var == Conv(register, M->N)
                    to_replace = eq.atom1.operand
                    to_replace_is_def = False
                else:
                    continue

            elif isinstance(eq.atom0, Register):
                if isinstance(eq.atom1, Register):
                    # register == register
                    if self.project.arch.is_artificial_register(
                            eq.atom0.reg_offset, eq.atom0.size):
                        to_replace = eq.atom0
                        to_replace_is_def = True
                    else:
                        to_replace = eq.atom1
                        to_replace_is_def = False
                else:
                    continue

            else:
                continue

            # find the definition of this register
            rd = self._compute_reaching_definitions()
            if to_replace_is_def:
                # find defs
                defs = []
                for def_ in rd.all_definitions:
                    if def_.codeloc == eq.codeloc:
                        if isinstance(to_replace, SimStackVariable):
                            if isinstance(def_.atom, atoms.MemoryLocation) \
                                    and isinstance(def_.atom.addr, atoms.SpOffset):
                                if to_replace.offset == def_.atom.addr.offset:
                                    defs.append(def_)
                        elif isinstance(to_replace, Register):
                            if isinstance(def_.atom, atoms.Register) \
                                    and to_replace.reg_offset == def_.atom.reg_offset:
                                defs.append(def_)
                if len(defs) != 1:
                    continue
                the_def = defs[0]
            else:
                # find uses
                defs = rd.all_uses.get_uses_by_location(eq.codeloc)
                if len(defs) != 1:
                    # there are multiple defs for this register - we do not support replacing all of them
                    continue
                for def_ in defs:
                    def_: Definition
                    if isinstance(
                            def_.atom, atoms.Register
                    ) and def_.atom.reg_offset == to_replace.reg_offset:
                        # found it!
                        the_def = def_
                        break
            if the_def is None:
                continue

            if isinstance(the_def.codeloc, ExternalCodeLocation):
                # this is a function argument. we enter a slightly different logic and try to eliminate copies of this
                # argument if
                # (a) the on-stack copy of it has never been modified in this function
                # (b) the function argument register has never been updated.
                #     TODO: we may loosen requirement (b) once we have real register versioning in AIL.
                defs = [
                    def_ for def_ in rd.all_definitions
                    if def_.codeloc == eq.codeloc
                ]
                all_uses_with_def = None
                replace_with = None
                remove_initial_assignment = None

                if defs and len(defs) == 1:
                    stackvar_def = defs[0]
                    if isinstance(stackvar_def.atom, atoms.MemoryLocation) \
                            and isinstance(stackvar_def.atom.addr, SpOffset):
                        # found the stack variable
                        # Make sure there is no other write to this location
                        if any((def_ != stackvar_def
                                and def_.atom == stackvar_def.atom)
                               for def_ in rd.all_definitions
                               if isinstance(def_.atom, atoms.MemoryLocation)):
                            continue

                        # Make sure the register is never updated across this function
                        if any((def_ != the_def and def_.atom == the_def.atom)
                               for def_ in rd.all_definitions
                               if isinstance(def_.atom, atoms.Register)):
                            continue

                        # find all its uses
                        all_stackvar_uses: Set[Tuple[CodeLocation, Any]] = set(
                            rd.all_uses.get_uses_with_expr(stackvar_def))
                        all_uses_with_def = set()

                        should_abort = False
                        for use in all_stackvar_uses:
                            used_expr = use[1]
                            if used_expr is not None and used_expr.size != stackvar_def.size:
                                should_abort = True
                                break
                            all_uses_with_def.add((stackvar_def, use))
                        if should_abort:
                            continue

                        #to_replace = Load(None, StackBaseOffset(None, self.project.arch.bits, eq.atom0.offset),
                        #                  eq.atom0.size, endness=self.project.arch.memory_endness)
                        replace_with = eq.atom1
                        remove_initial_assignment = True

                if all_uses_with_def is None:
                    continue

            else:
                if isinstance(eq.atom0, SimStackVariable):
                    # create the memory loading expression
                    new_idx = None if self._ail_manager is None else next(
                        self._ail_manager.atom_ctr)
                    replace_with = Load(
                        new_idx,
                        StackBaseOffset(None, self.project.arch.bits,
                                        eq.atom0.offset),
                        eq.atom0.size,
                        endness=self.project.arch.memory_endness)
                elif isinstance(eq.atom0, SimMemoryVariable) and isinstance(
                        eq.atom0.addr, int):
                    # create the memory loading expression
                    new_idx = None if self._ail_manager is None else next(
                        self._ail_manager.atom_ctr)
                    replace_with = Load(
                        new_idx,
                        Const(None, None, eq.atom0.addr,
                              self.project.arch.bits),
                        eq.atom0.size,
                        endness=self.project.arch.memory_endness)
                elif isinstance(eq.atom0, Register):
                    if isinstance(eq.atom1, Register):
                        if self.project.arch.is_artificial_register(
                                eq.atom0.reg_offset, eq.atom0.size):
                            replace_with = eq.atom1
                        else:
                            replace_with = eq.atom0
                    else:
                        raise RuntimeError("Unsupported atom1 type %s." %
                                           type(eq.atom1))
                else:
                    raise RuntimeError("Unsupported atom0 type %s." %
                                       type(eq.atom0))

                to_replace_def = the_def

                # find all uses of this definition
                # we make a copy of the set since we may touch the set (uses) when replacing expressions
                all_uses: Set[Tuple[CodeLocation, Any]] = set(
                    rd.all_uses.get_uses_with_expr(to_replace_def))
                # make sure none of these uses are phi nodes (depends on more than one def)
                all_uses_with_unique_def = set()
                for use_and_expr in all_uses:
                    use_loc, used_expr = use_and_expr
                    defs_and_exprs = rd.all_uses.get_uses_by_location(
                        use_loc, exprs=True)
                    filtered_defs = {
                        def_
                        for def_, expr_ in defs_and_exprs if expr_ == used_expr
                    }
                    if len(filtered_defs) == 1:
                        all_uses_with_unique_def.add(use_and_expr)
                    else:
                        # optimization: break early
                        break

                if len(all_uses) != len(all_uses_with_unique_def):
                    # only when all uses are determined by the same definition will we continue with the simplification
                    continue

                all_uses_with_def = set((to_replace_def, use_and_expr)
                                        for use_and_expr in all_uses)

                remove_initial_assignment = False  # expression folding will take care of it

            if not all_uses_with_def:
                # definitions without uses may simply be our data-flow analysis being incorrect. do not remove them.
                continue

            # TODO: We can only replace all these uses with the stack variable if the stack variable isn't
            # TODO: re-assigned of a new value. Perform this check.

            # replace all uses
            all_uses_replaced = True
            for def_, use_and_expr in all_uses_with_def:
                u, used_expr = use_and_expr
                if u == eq.codeloc:
                    # skip the very initial assignment location
                    continue
                old_block = addr_and_idx_to_block.get(
                    (u.block_addr, u.block_idx), None)
                if old_block is None:
                    continue

                # if there is an updated block, use it
                the_block = self.blocks.get(old_block, old_block)
                stmt: Statement = the_block.statements[u.stmt_idx]

                replace_with_copy = replace_with.copy()
                if to_replace.size != replace_with_copy.size:
                    new_idx = None if self._ail_manager is None else next(
                        self._ail_manager.atom_ctr)
                    replace_with_copy = Convert(
                        new_idx,
                        replace_with_copy.bits,
                        to_replace.bits,
                        False,
                        replace_with_copy,
                    )

                r, new_block = self._replace_expr_and_update_block(
                    the_block, u.stmt_idx, stmt, def_, u, used_expr,
                    replace_with_copy)
                if r:
                    self.blocks[old_block] = new_block
                else:
                    # failed to replace a use - we need to keep the initial assignment!
                    all_uses_replaced = False
                simplified |= r

            if all_uses_replaced and remove_initial_assignment:
                # the initial statement can be removed
                self._assignments_to_remove.add(eq.codeloc)

        if simplified:
            self._clear_cache()
        return simplified
예제 #6
0
    def _unify_local_variables(self) -> bool:
        """
        Find variables that are definitely equivalent and then eliminate the unnecessary copies.
        """

        simplified = False

        prop = self._compute_propagation()
        if not prop.equivalence:
            return simplified

        addr_and_idx_to_block: Dict[Tuple[int, int], Block] = {}
        for block in self.func_graph.nodes():
            addr_and_idx_to_block[(block.addr, block.idx)] = block

        equivalences: Dict[Any, Set[Equivalence]] = defaultdict(set)
        for eq in prop.equivalence:
            equivalences[eq.atom1].add(eq)

        for _, eqs in equivalences.items():
            if len(eqs) > 1:
                continue

            eq = next(iter(eqs))

            # Acceptable equivalence classes:
            #
            # stack variable == register
            # register variable == register
            # stack variable == Conv(register, M->N)
            #
            the_def = None
            if isinstance(eq.atom0, SimStackVariable):
                if isinstance(eq.atom1, Register):
                    # stack_var == register
                    reg = eq.atom1
                elif isinstance(eq.atom1, Convert) and isinstance(
                        eq.atom1.operand, Register):
                    # stack_var == Conv(register, M->N)
                    reg = eq.atom1.operand
                else:
                    continue

            elif isinstance(eq.atom0, Register):
                if isinstance(eq.atom1, Register):
                    # register == register
                    reg = eq.atom1
                else:
                    continue

            else:
                continue

            # find the definition of this register
            rd = self._compute_reaching_definitions()
            defs = rd.all_uses.get_uses_by_location(eq.codeloc)
            for def_ in defs:
                def_: Definition
                if isinstance(def_.atom, atoms.Register
                              ) and def_.atom.reg_offset == reg.reg_offset:
                    # found it!
                    the_def = def_
                    break

            if the_def is None:
                continue

            if isinstance(the_def.codeloc, ExternalCodeLocation):
                # this is a function argument. we enter a slightly different logic and try to eliminate copies of this
                # argument if
                # (a) the on-stack copy of it has never been modified in this function
                # (b) the function argument register has never been updated.
                #     TODO: we may loosen requirement (b) once we have real register versioning in AIL.
                defs = [
                    def_ for def_ in rd.all_definitions
                    if def_.codeloc == eq.codeloc
                ]
                all_uses_with_def = None
                to_replace, replace_with = None, None
                remove_initial_assignment = None

                if defs and len(defs) == 1:
                    stackvar_def = defs[0]
                    if isinstance(stackvar_def.atom,
                                  atoms.MemoryLocation) and isinstance(
                                      stackvar_def.atom.addr, SpOffset):
                        # found the stack variable
                        # Make sure there is no other write to this location
                        if any((def_ != stackvar_def
                                and def_.atom == stackvar_def.atom)
                               for def_ in rd.all_definitions
                               if isinstance(def_.atom, atoms.MemoryLocation)):
                            continue

                        # Make sure the register is never updated across this function
                        if any((def_ != the_def and def_.atom == the_def.atom)
                               for def_ in rd.all_definitions
                               if isinstance(def_.atom, atoms.Register)):
                            continue

                        # find all its uses
                        all_stackvar_uses: Set[CodeLocation] = set(
                            rd.all_uses.get_uses(stackvar_def))
                        all_uses_with_def = set()
                        for use in all_stackvar_uses:
                            all_uses_with_def.add((stackvar_def, use))

                        to_replace = Load(
                            None,
                            StackBaseOffset(None, self.project.arch.bits,
                                            eq.atom0.offset),
                            eq.atom0.size,
                            endness=self.project.arch.memory_endness)
                        replace_with = eq.atom1
                        remove_initial_assignment = True

                if all_uses_with_def is None:
                    continue

            else:
                # find all uses of this definition
                # we make a copy of the set since we may touch the set (uses) when replacing expressions
                all_uses: Set[CodeLocation] = set(
                    rd.all_uses.get_uses(the_def))
                all_uses_with_def = set((the_def, use) for use in all_uses)

                remove_initial_assignment = False  # expression folding will take care of it
                if isinstance(eq.atom0, SimStackVariable):
                    # create the memory loading expression
                    to_replace = eq.atom1
                    replace_with = Load(
                        None,
                        StackBaseOffset(None, self.project.arch.bits,
                                        eq.atom0.offset),
                        eq.atom0.size,
                        endness=self.project.arch.memory_endness)
                elif isinstance(eq.atom0, Register):
                    to_replace = eq.atom1
                    replace_with = eq.atom0
                else:
                    raise RuntimeError("Unsupported atom0 type %s." %
                                       type(eq.atom0))

            # TODO: We can only replace all these uses with the stack variable if the stack variable isn't
            # TODO: re-assigned of a new value. Perform this check.

            # replace all uses
            all_uses_replaced = True
            for def_, u in all_uses_with_def:
                if u == eq.codeloc:
                    # skip the very initial assignment location
                    continue
                old_block = addr_and_idx_to_block.get(
                    (u.block_addr, u.block_idx), None)
                if old_block is None:
                    continue

                # if there is an updated block, use it
                the_block = self.blocks.get(old_block, old_block)
                stmt: Statement = the_block.statements[u.stmt_idx]

                r, new_block = self._replace_expr_and_update_block(
                    the_block, u.stmt_idx, stmt, def_, u, to_replace,
                    replace_with)
                if r:
                    self.blocks[old_block] = new_block
                else:
                    # failed to replace a use - we need to keep the initial assignment!
                    all_uses_replaced = False
                simplified |= r

            if all_uses_replaced and remove_initial_assignment:
                # the initial statement can be removed
                self._assignments_to_remove.add(eq.codeloc)

        # no need to clear cache at the end of this function
        return simplified