def _handle_Load(self, expr_idx: int, expr: Load, stmt_idx: int, stmt: Statement, block: Optional[Block]): addr = self._handle_expr(0, expr.addr, stmt_idx, stmt, block) if addr is not None and addr is not expr.addr: new_expr = expr.copy() new_expr.addr = addr return new_expr return None
def _unify_local_variables(self) -> bool: """ Find variables that are definitely equivalent and then eliminate unnecessary copies. """ simplified = False prop = self._compute_propagation() if not prop.equivalence: return simplified addr_and_idx_to_block: Dict[Tuple[int, int], Block] = {} for block in self.func_graph.nodes(): addr_and_idx_to_block[(block.addr, block.idx)] = block equivalences: Dict[Any, Set[Equivalence]] = defaultdict(set) atom_by_loc = set() for eq in prop.equivalence: equivalences[eq.atom1].add(eq) atom_by_loc.add((eq.codeloc, eq.atom1)) # sort keys to ensure a reproducible result sorted_loc_and_atoms = sorted(atom_by_loc, key=lambda x: x[0]) for _, atom in sorted_loc_and_atoms: eqs = equivalences[atom] if len(eqs) > 1: continue eq = next(iter(eqs)) # Acceptable equivalence classes: # # stack variable == register # register variable == register # stack variable == Conv(register, M->N) # global variable == register # # Equivalence is generally created at assignment sites. Therefore, eq.atom0 is the definition and # eq.atom1 is the use. the_def = None if isinstance(eq.atom0, SimMemoryVariable ): # covers both Stack and Global variables if isinstance(eq.atom1, Register): # stack_var == register or global_var == register to_replace = eq.atom1 to_replace_is_def = False elif isinstance(eq.atom1, Convert) and isinstance( eq.atom1.operand, Register): # stack_var == Conv(register, M->N) to_replace = eq.atom1.operand to_replace_is_def = False else: continue elif isinstance(eq.atom0, Register): if isinstance(eq.atom1, Register): # register == register if self.project.arch.is_artificial_register( eq.atom0.reg_offset, eq.atom0.size): to_replace = eq.atom0 to_replace_is_def = True else: to_replace = eq.atom1 to_replace_is_def = False else: continue else: continue # find the definition of this register rd = self._compute_reaching_definitions() if to_replace_is_def: # find defs defs = [] for def_ in rd.all_definitions: if def_.codeloc == eq.codeloc: if isinstance(to_replace, SimStackVariable): if isinstance(def_.atom, atoms.MemoryLocation) \ and isinstance(def_.atom.addr, atoms.SpOffset): if to_replace.offset == def_.atom.addr.offset: defs.append(def_) elif isinstance(to_replace, Register): if isinstance(def_.atom, atoms.Register) \ and to_replace.reg_offset == def_.atom.reg_offset: defs.append(def_) if len(defs) != 1: continue the_def = defs[0] else: # find uses defs = rd.all_uses.get_uses_by_location(eq.codeloc) if len(defs) != 1: # there are multiple defs for this register - we do not support replacing all of them continue for def_ in defs: def_: Definition if isinstance( def_.atom, atoms.Register ) and def_.atom.reg_offset == to_replace.reg_offset: # found it! the_def = def_ break if the_def is None: continue if isinstance(the_def.codeloc, ExternalCodeLocation): # this is a function argument. we enter a slightly different logic and try to eliminate copies of this # argument if # (a) the on-stack copy of it has never been modified in this function # (b) the function argument register has never been updated. # TODO: we may loosen requirement (b) once we have real register versioning in AIL. defs = [ def_ for def_ in rd.all_definitions if def_.codeloc == eq.codeloc ] all_uses_with_def = None replace_with = None remove_initial_assignment = None if defs and len(defs) == 1: stackvar_def = defs[0] if isinstance(stackvar_def.atom, atoms.MemoryLocation) \ and isinstance(stackvar_def.atom.addr, SpOffset): # found the stack variable # Make sure there is no other write to this location if any((def_ != stackvar_def and def_.atom == stackvar_def.atom) for def_ in rd.all_definitions if isinstance(def_.atom, atoms.MemoryLocation)): continue # Make sure the register is never updated across this function if any((def_ != the_def and def_.atom == the_def.atom) for def_ in rd.all_definitions if isinstance(def_.atom, atoms.Register)): continue # find all its uses all_stackvar_uses: Set[Tuple[CodeLocation, Any]] = set( rd.all_uses.get_uses_with_expr(stackvar_def)) all_uses_with_def = set() should_abort = False for use in all_stackvar_uses: used_expr = use[1] if used_expr is not None and used_expr.size != stackvar_def.size: should_abort = True break all_uses_with_def.add((stackvar_def, use)) if should_abort: continue #to_replace = Load(None, StackBaseOffset(None, self.project.arch.bits, eq.atom0.offset), # eq.atom0.size, endness=self.project.arch.memory_endness) replace_with = eq.atom1 remove_initial_assignment = True if all_uses_with_def is None: continue else: if isinstance(eq.atom0, SimStackVariable): # create the memory loading expression new_idx = None if self._ail_manager is None else next( self._ail_manager.atom_ctr) replace_with = Load( new_idx, StackBaseOffset(None, self.project.arch.bits, eq.atom0.offset), eq.atom0.size, endness=self.project.arch.memory_endness) elif isinstance(eq.atom0, SimMemoryVariable) and isinstance( eq.atom0.addr, int): # create the memory loading expression new_idx = None if self._ail_manager is None else next( self._ail_manager.atom_ctr) replace_with = Load( new_idx, Const(None, None, eq.atom0.addr, self.project.arch.bits), eq.atom0.size, endness=self.project.arch.memory_endness) elif isinstance(eq.atom0, Register): if isinstance(eq.atom1, Register): if self.project.arch.is_artificial_register( eq.atom0.reg_offset, eq.atom0.size): replace_with = eq.atom1 else: replace_with = eq.atom0 else: raise RuntimeError("Unsupported atom1 type %s." % type(eq.atom1)) else: raise RuntimeError("Unsupported atom0 type %s." % type(eq.atom0)) to_replace_def = the_def # find all uses of this definition # we make a copy of the set since we may touch the set (uses) when replacing expressions all_uses: Set[Tuple[CodeLocation, Any]] = set( rd.all_uses.get_uses_with_expr(to_replace_def)) # make sure none of these uses are phi nodes (depends on more than one def) all_uses_with_unique_def = set() for use_and_expr in all_uses: use_loc, used_expr = use_and_expr defs_and_exprs = rd.all_uses.get_uses_by_location( use_loc, exprs=True) filtered_defs = { def_ for def_, expr_ in defs_and_exprs if expr_ == used_expr } if len(filtered_defs) == 1: all_uses_with_unique_def.add(use_and_expr) else: # optimization: break early break if len(all_uses) != len(all_uses_with_unique_def): # only when all uses are determined by the same definition will we continue with the simplification continue all_uses_with_def = set((to_replace_def, use_and_expr) for use_and_expr in all_uses) remove_initial_assignment = False # expression folding will take care of it if not all_uses_with_def: # definitions without uses may simply be our data-flow analysis being incorrect. do not remove them. continue # TODO: We can only replace all these uses with the stack variable if the stack variable isn't # TODO: re-assigned of a new value. Perform this check. # replace all uses all_uses_replaced = True for def_, use_and_expr in all_uses_with_def: u, used_expr = use_and_expr if u == eq.codeloc: # skip the very initial assignment location continue old_block = addr_and_idx_to_block.get( (u.block_addr, u.block_idx), None) if old_block is None: continue # if there is an updated block, use it the_block = self.blocks.get(old_block, old_block) stmt: Statement = the_block.statements[u.stmt_idx] replace_with_copy = replace_with.copy() if to_replace.size != replace_with_copy.size: new_idx = None if self._ail_manager is None else next( self._ail_manager.atom_ctr) replace_with_copy = Convert( new_idx, replace_with_copy.bits, to_replace.bits, False, replace_with_copy, ) r, new_block = self._replace_expr_and_update_block( the_block, u.stmt_idx, stmt, def_, u, used_expr, replace_with_copy) if r: self.blocks[old_block] = new_block else: # failed to replace a use - we need to keep the initial assignment! all_uses_replaced = False simplified |= r if all_uses_replaced and remove_initial_assignment: # the initial statement can be removed self._assignments_to_remove.add(eq.codeloc) if simplified: self._clear_cache() return simplified