def get_unique_variables(expr: Expr) -> List[Expr]: """ Get all unique variables in an expression. To provide deterministic behavior, the list is sorted. Args: expr: Expression to parse. Returns: Sorted list of unique variables. """ l = set() def add_to_set(e: Expr) -> Expr: """ Helper function to add variables to a set. Args: expr: Expression to add. Returns: Expression. """ if e.is_id(): l.add(e) return e expr.visit(add_to_set) return sorted(l, key=lambda x: str(x))
def get_subexpressions(expr: Expr) -> Iterator[Expr]: """ Get all subexpressions in descending order. This can be understood as a breadth-first search on an abstract syntax tree from top to bottom. Args: expr: Expr Returns: Iterator of expressions. """ l = [] def add_to_list(e: Expr) -> Expr: """ Helper function to an expression to a list. Args: expr: Expression to add. Returns: Expression. """ l.append(e) return e expr.visit(add_to_list) return reversed(l)
def simple_unwrap_expr(expr: Expr, loc_db: LocationDB): ra = -1 if expr.is_int(): ra = int(expr) elif expr.is_loc(): ra = loc_db.get_location_offset(expr.loc_key) if ra is None: ra = -1 return ra
def synthesize_from_expression(self, expr: Expr, num_samples: int) -> Tuple[Expr, float]: """ Synthesizes an expression from a given expression that represents a function f(x0, ..., xi). The function constructs a synthesis oracle, a grammar and a mutator. Afterward, it performs the stochastic search. To deal with memory, memory accesses are replaced with variable accesses and re-applied afterward to the synthesized expression. Example: The given expression @64[rax] + rbx - rbx is unified to p0 + p1 - p1. A synthesized expression p0 with he same I/O behavior is p0. After re-applying the initial variables, we return @64[rax]. Args: expr (Expr): Expression representing a function f(x0, ..., xi) in Miasm IR. num_samples (int): Number of I/O samples for the synthesis oracle. Returns: Tuple[Expr, float]: Synthesized expression and its corresponding score. """ # unify expression (to remove memory etc.) unification_dict = gen_unification_dict(expr) expr = expr.replace_expr(unification_dict) # get list of unique variables variables = get_unique_variables(expr) # generate synthesis oracle oracle = SynthesisOracle.gen_from_expression(expr, variables, num_samples) # init grammar grammar = Grammar(expr.size, variables) # build mutator mutator = Mutator.gen_from_expression(expr, grammar) # perform stochastic search state, score = self.iterated_local_search(mutator, oracle) # reverse unification and re-apply original variables expr = reverse_unification(state.get_expr_simplified(), unification_dict) # upcast expression if necessary if grammar.size > expr.size: expr = expr.zeroExtend(grammar.size) return expr, score
def _skip_subtree(expr: Expr) -> bool: """ Skips the subtree if an expression is a terminal expression. A terminal expression is a leaf in the abstract syntax tree, such as an ExprInt (register/variable), ExprMem (memory) or ExprLoc (location label) or ExprInt (integer). Args: expr: Expression to test. Returns: True if expr is terminal expression. """ return expr.is_id() or expr.is_int() or expr.is_loc() # type: ignore
def evaluate_expression(expr: Expr, inputs_array: List[int]) -> int: """ Evaluates an expression for an array of random values. Each input variable p0, p1, ..., pn is associated with an entry in the array of inputs [i0, i1, ..., in]. In the given expression, we replace p0 with i1, p1 with i1 etc. and evaluate the expression. As a result, the expression results in a final constant in form of ExprInt. Args: expr: Expression to evaluate inputs_array: List of random values. Returns: Int that is the return value of the evaluated expression. """ # dictionary of replacements replacements = {} # walk over unique variables in the expression for v in get_unique_variables(expr): # skip if register pattern does not match if not re.search("^p[0-9]*", v.name): continue # calculate index for p index = int(v.name.strip("p")) # insert into replacements dictionary replacements[v] = ExprInt(inputs_array[index], v.size) return int(expr_simp(expr.replace_expr(replacements)))
def _reverse_global_unification( self, expr: Expr, unification_dict: Dict[Expr, Expr]) -> Expr: """ Iteratively reverses the global unifications of an expression. For the given unification dictionary, unification variables can be part of other unification rules. To reverse all unifications in a given expression, the reverse unification process is applied iteratively. Example: Given: {r0: x + r1, r1: y} and expression r0 + r1. We first transform it into (x + r1) + y and then to (x + y) + y. Args: expr: Expression to reverse unification for. unification_dict: Dictionary of expressions containing unifications. Returns: Expression with reversed unification. """ # while there is any unification variable remaining in the expression while any([ v.name.startswith(self._global_variable_prefix) for v in get_unique_variables(expr) ]): # replace in expression expr = expr.replace_expr(unification_dict) return expr
def get_unification_candidates(expr: Expr) -> List[Expr]: """ Get all unification candidates in an expression. A unification candidate is a leaf in an abstract syntax tree (variable, memory or label). Integers are excluded. To provide deterministic behavior, the list is sorted. Args: expr: Expression to parse. Returns: Sorted list of unification candidates. """ results = set() def add_to_set(e: Expr) -> Expr: """ Helper function to add variables, memory and labels to a set. Args: expr: Expression to add. Returns: Expression. """ # memory if e.is_mem(): results.add(e) # registers if e.is_id(): results.add(e) # location IDs if e.is_loc(): results.add(e) return e expr.visit(add_to_set) return sorted(list(results), key=lambda x: str(x))
def __init__(self, expr: Expr, replacements: Dict[Expr, Expr] = {}): """ Initializes a SynthesisState instance. Attributes: expr (Expr): Expression in Miasm IR with unique variables/leaves. replacements (Dict[Expr, Expr], optional): Dictionary of variable replacements. """ self.expr_ast: Expr = expr self._expr: Expr = expr.replace_expr(replacements) self.replacements: Dict[Expr, Expr] = replacements
def add_to_set(e: Expr) -> Expr: """ Helper function to add variables, memory and labels to a set. Args: expr: Expression to add. Returns: Expression. """ # memory if e.is_mem(): results.add(e) # registers if e.is_id(): results.add(e) # location IDs if e.is_loc(): results.add(e) return e
def add_to_set(e: Expr) -> Expr: """ Helper function to add variables to a set. Args: expr: Expression to add. Returns: Expression. """ if e.is_id(): l.add(e) return e
def gen_from_expression(expr: Expr, variables: List[Expr], num_samples: int) -> SynthesisOracle: """ Builds a SynthesisOracle instance from a given expression. For a given expression, `num_samples` independent I/O pairs are evaluated as follows: 1. We generate a list of random values, one for each variable. Random values are represented in Miasm IL. 2. We evaluate the expression by replacing all variables in the expression by their corresponding value and do a constant propagation. 3. We map the list of inputs to the obtained integer value (in Miasm IL). Args: expr (Expr): Expression representing a function f(x0, ..., xi). variables (List[Expr]): List of variables contained in `expr`. num_samples (int): Number of I/O samples to evaluate. Returns: SynthesisOracle: Generated SynthesisOracle instance. """ # init map synthesis_map = {} # walk over number of samples for _ in range(num_samples): # list of inputs inputs = [] # dictionary of expression replacements replacements = {} # walk over all variables for v in variables: # generate a random value value = get_rand_input() # replace variable with random value replacements[v] = ExprInt(value, v.size) # add random value to list of inputs inputs.append(ExprInt(value, v.size)) # evaluate expression to obtain output result = expr_simp(expr.replace_expr(replacements)) # output should be an ExprInt assert(result.is_int()) # map list of inputs to output synthesis_map[tuple(inputs)] = result return SynthesisOracle(synthesis_map)
def reverse_unification(expr: Expr, unification_dict: Dict[Expr, Expr]) -> Expr: """ Reverses the unification of an expression. This way, each unified variable in an expression is replaced with their corresponding terminal expression in the original expression. To achieve this, we first have to inverse the unification dictionary. Example: Given: {x: p0, y:p1} and expression p0 + p1. We invert the dictionary {p0: x, p1: y}. The expresion becomes x + y. Args: expr: Expression to reverse unification for. unification_dict: Dictionary of expressions containing unifications. Returns: Expression with reversed unification. """ return expr.replace_expr(invert_dict(unification_dict))
def get_vars(cond: Expr, vars=[]): if cond.is_id(): vars.append(cond) return cond
def _is_suitable_simplification_candidate(self, expr: Expr, simplified: Expr) -> bool: """ Checks if a simplification candidate is not suitable. This check ensures the semantical correctness of the simplification. We skip the simplification candiate 1. If the simplification candidate contains any unification variable. In this case, not every variable of the simplification candidate can be matched to a terminal expression in the original one. 2. If the tree depth of the original expression is smaller or equal to the simplified one. In this case, simplification could make expressions even more complex. 3. If Miasm's expression simplification results in the same expression for the original and the simplified one. In this case, the lookup in the simplification oracle is not required. 4. If the original expression is semantically equivalent to the simplified one. Since this query is computationally expensive, we, by default, set a small timeout and check only if the SMT solver is not able to find a proof for inequivalence in the provided time. If the solver was not able to proof the equivalence within the provided time, we still accept it. The user has the possibility to enforce the SMT-based equivalence check to be successful by setting the `enforce_equivalence` flag and (optionally) increasing the `solver_timeout`. Args: expr: Original expression. simplified: Simplified expression candidate. Returns: True if simplification should be skipped, False otherwise. """ # contains placeholder variables if any([ re.search("^p[0-9]*", v.name) for v in get_unique_variables(simplified) ]): logger.debug( f"{expr} <==> {simplified} (incorrect variable replacement)") return False # checks if original is smaller to simplified if len(expr.graph().nodes()) <= len(simplified.graph().nodes()): return False # same normalized expression if expr_simp(expr) == expr_simp(simplified): return False # SMT solver proves non-equivalence or timeouts if self.enforce_equivalence and self.check_semantical_equivalence( expr, simplified) != z3.unsat: logger.debug( f"{expr} <==> {simplified} (not semantically equivalent)") return False # SMT solver finds a counter example if self.check_semantical_equivalence(expr, simplified) == z3.sat: logger.debug( f"{expr} <==> {simplified} (not semantically equivalent, counterexample found)" ) return False return True