def get_se_info(sucs, reg_records: set, mem_records: set, init_state=None): formulas = [] constraints = [] read_from = set() for c in sucs.all_successors[0].solver.constraints: e = claripy.simplify(c) read_from.update(get_expression_input(e)) constraints.append(e) # read formulas from registers if len(sucs.flat_successors) > 0: for reg_name in reg_records: f = claripy.simplify( getattr(sucs.flat_successors[0].regs, reg_name)) read_from.update(get_expression_input(f)) formulas.append((reg_name, f)) elif len(sucs.unconstrained_successors) > 0: # sometimes the next instruction address is hard to decide, then next states are in unconstrained stash for reg_name in reg_records: f = claripy.simplify( getattr(sucs.unconstrained_successors[0].regs, reg_name)) read_from.update(get_expression_input(f)) formulas.append((reg_name, f)) # read formulas from memory if init_state is not None and len(mem_records) > 0: formulas.extend( FormulaExtractor.get_writing_mem_formulas( init_state, sucs.all_successors[0], mem_records)) return formulas, constraints, read_from
def _make_ites(self, seq): # search for a == ^a pairs while True: break_hard = False for i in range(len(seq.nodes)): node_0 = seq.nodes[i] if not type(node_0) is CodeNode: continue rcond_0 = node_0.reaching_condition if rcond_0 is None: continue if claripy.is_true(rcond_0) or claripy.is_false(rcond_0): continue for j in range(i + 1, len(seq.nodes)): node_1 = seq.nodes[j] if not type(node_1) is CodeNode: continue if node_0 is node_1: continue rcond_1 = node_1.reaching_condition if rcond_1 is None: continue cond_ = claripy.simplify(claripy.Not(rcond_0) == rcond_1) if claripy.is_true(cond_): # node_0 and node_1 should be structured using an if-then-else self._make_ite(seq, node_0, node_1) break_hard = True break if break_hard: break else: break
def _merge_same_conditioned_nodes(self, seq): # search for nodes with the same reaching condition and then merge them into one sequence node i = 0 while i < len(seq.nodes) - 1: node_0 = seq.nodes[i] if not type(node_0) is CodeNode: i += 1 continue rcond_0 = node_0.reaching_condition if rcond_0 is None: i += 1 continue node_1 = seq.nodes[i + 1] rcond_1 = node_1.reaching_condition if rcond_1 is None: i += 1 continue r = claripy.simplify(rcond_0 == rcond_1) if claripy.is_true(r): # node_0 and node_1 should be put into the same sequence node new_node = CodeNode( self._merge_nodes(node_0.node, node_1.node), node_0.reaching_condition, ) seq.nodes = seq.nodes[:i] + [new_node] + seq.nodes[i + 2:] continue i += 1
def update_constraint_rep(self, state): """ Used to check if a constraint has been updated """ self.last_constraints = [ claripy.simplify(c) for c in state.solver.constraints ]
def _make_ites(self, seq): # search for a == ^a pairs while True: for node_0 in seq.nodes: if not type(node_0) is CodeNode: continue rcond_0 = node_0.reaching_condition if rcond_0 is None: continue for node_1 in seq.nodes: if not type(node_1) is CodeNode: continue if node_0 is node_1: continue rcond_1 = node_1.reaching_condition if rcond_1 is None: continue cond_ = claripy.simplify(claripy.Not(rcond_0) == rcond_1) if claripy.is_true(cond_): # node_0 and node_1 should be structured using an if-then-else self._make_ite(seq, node_0, node_1) break else: break # make all conditionally-reachable nodes a ConditionNode for i in range(len(seq.nodes)): node = seq.nodes[i] if node.reaching_condition is not None and not claripy.is_true( node.reaching_condition): new_node = ConditionNode(node.addr, None, node.reaching_condition, node, None) seq.nodes[i] = new_node
def simplify(self, *args): if len(args) == 0: return self._solver.simplify() elif isinstance(args[0], claripy.Base): return claripy.simplify(args[0]) else: return args[0]
def _recover_reaching_conditions(self): edge_conditions = { } predicate_mapping = { } # traverse the graph to recover the condition for each edge for src in self._region.graph.nodes(): nodes = self._region.graph[src] if len(nodes) > 1: for dst in nodes: edge = src, dst predicate = self._extract_predicate(src, dst) edge_conditions[edge] = predicate predicate_mapping[predicate] = dst reaching_conditions = { } # recover the reaching condition for each node for node in networkx.topological_sort(self._region.graph): preds = self._region.graph.predecessors(node) reaching_condition = None for pred in preds: edge = (pred, node) pred_condition = reaching_conditions.get(pred, claripy.true) edge_condition = edge_conditions.get(edge, claripy.true) if reaching_condition is None: reaching_condition = claripy.And(pred_condition, edge_condition) else: reaching_condition = claripy.Or(claripy.And(pred_condition, edge_condition), reaching_condition) if reaching_condition is not None: reaching_conditions[node] = claripy.simplify(reaching_condition) self._reaching_conditions = reaching_conditions self._predicate_mapping = predicate_mapping
def merge_path_to_same_lib_call_together(self): res = dict() for libcall_addr in self.all_lib_calls.keys(): try: res[libcall_addr] = [] all_paths = self.all_lib_calls[libcall_addr] all_formulas = dict() all_cons = [] for path in all_paths: fs, cs, inputs = self.fe.get_formulas_from(path) if cs: all_cons.extend(cs) for f in fs: v_name, ast = f if v_name not in all_formulas: all_formulas[v_name] = [] all_formulas[v_name].append((ast, cs)) if len(all_formulas.keys()) > 0: # here is a libcall with arguments for v_name, fc_list in all_formulas.items(): tmp_f, tmp_cons = merge_fe_formulas( fc_list, self.fe.ptr_size) tmp_f = claripy.simplify(tmp_f) if len(tmp_cons) == 0: # no constraints is equivalent to always True tmp_cons = None else: tmp_cons = claripy.simplify(claripy.Or(*tmp_cons)) if tmp_cons.depth == 1 and tmp_cons.args == ( True, ): tmp_cons = None res[libcall_addr].append((v_name, tmp_f, tmp_cons)) else: # here is a libcall with no argument if len(all_cons) == 0: tmp_cons = claripy.BoolV(True) else: tmp_cons = claripy.simplify(claripy.Or(*all_cons)) res[libcall_addr].append((None, all_paths, tmp_cons)) except Exception: log.error( 'meets error in merge_path_to_same_lib_call_together base_entry=0x%x' % self.base_entry) # if this lib call meets error, we skip merging formulas of it if libcall_addr in res.keys(): res.pop(libcall_addr) return res
def _simplify_condition(self, cond): claripy_simplified = claripy.simplify(cond) if not claripy_simplified.symbolic: return claripy_simplified simplified = self._revert_short_circuit_conditions(cond) cond = simplified if simplified is not None else cond return cond
def _simplify_trivial_cases(cond): if cond.op == "And": new_args = [] for arg in cond.args: claripy_simplified = claripy.simplify(arg) if claripy.is_true(claripy_simplified): continue new_args.append(arg) return claripy.And(*new_args) return None
def simplify_condition(cond): # Z3's simplification may yield weird and unreadable results # hence we mostly rely on our own simplification. we only use Z3's simplification results when it returns a # concrete value. claripy_simplified = claripy.simplify(cond) if not claripy_simplified.symbolic: return claripy_simplified simplified = ConditionProcessor._fold_double_negations(cond) cond = simplified if simplified is not None else cond simplified = ConditionProcessor._revert_short_circuit_conditions(cond) cond = simplified if simplified is not None else cond return cond
def get_relative_symbol(self, expr): """ :param expr: The expr should be concretized to a memory address. We get all symbols being used. The symbols may be in 2 types: 1. a memory address pointer 2. a offset relative value Currently I hope the offset should be relatively small. Usually the offset is a constant. We only analyze the simplest a +/- b expression, since it is easy to decide which one is the pointer, and this should cover almost all cases :return: """ if hasattr(expr, 'depth'): if expr.depth == 1 and expr.symbolic: # expr itself is a symbolic value, and it is used as a mem ptr self.mem_ptr_symbols.add(expr) elif expr.depth == 2 and expr.symbolic: if expr.op in {'__add__', '__sub__'}: if len(expr.args) != 2: log.warning( 'Unknown why add and sub operation with a single argument, %s (%s, %s), treat it as a single parameter without the operation.' % (str(expr), str(expr.op), str(expr.args))) self.get_relative_symbol(expr.args[0]) else: if expr.args[0].symbolic and not expr.args[1].symbolic: # args[1] should be a BVV, and its value should be relatively small (a offset) self.mem_ptr_symbols.add(expr.args[0]) self.mem_ptr_symbols_str.add(str(expr.args[0])) elif not expr.args[0].symbolic and expr.args[ 1].symbolic: self.mem_ptr_symbols.add(expr.args[1]) self.mem_ptr_symbols_str.add(str(expr.args[1])) else: log.warning( 'Unsolvable expression for concretizing memory address %s' % str(expr)) # raise NotImplementedError() else: log.warning( 'Unsolvable expression for concretizing memory address %s' % str(expr)) # raise NotImplementedError() else: # It could be a very complex formula. 'If' condition is often in it, so we treat the whole formula as # an input, since it is usually not being simplified. tmp = claripy.simplify(expr) # we must simplify it. It seems the simplify of claripy merely sort the AST in an order self.mem_ptr_symbols.add(tmp) self.mem_ptr_symbols_str.add(str(tmp))
def is_swval_constant(self, state): ''' Checks if the form of the guard condition is simple comparison with a constant value. ''' guards = list(state.guards) # Assume the last guard condition alone can determine the switch value. guards = guards[-1:] ast = claripy.simplify(reduce(claripy.And, guards, claripy.true)) if ast.op == '__eq__': x, y = ast.args if x.op == 'BVV': return y.op == 'BVS' and y.args[0].startswith(self.swvar_name()) elif y.op == 'BVV': return x.op == 'BVS' and x.args[0].startswith(self.swvar_name()) return False
def _make_ites(self, seq): # search for a == ^a pairs while True: for node_0 in seq.nodes: if not type(node_0) is CodeNode: continue rcond_0 = node_0.reaching_condition if rcond_0 is None: continue for node_1 in seq.nodes: if not type(node_1) is CodeNode: continue if node_0 is node_1: continue rcond_1 = node_1.reaching_condition if rcond_1 is None: continue cond_ = claripy.simplify(claripy.Not(rcond_0) == rcond_1) if claripy.is_true(cond_): # node_0 and node_1 should be structured using an if-then-else self._make_ite(seq, node_0, node_1) break else: break # make all conditionally-reachable nodes ConditionNodes for i in range(len(seq.nodes)): node = seq.nodes[i] if node.reaching_condition is not None and not claripy.is_true( node.reaching_condition): if isinstance(node.node, ConditionalBreakNode): # Put conditions together and simplify them cond = claripy.And( node.reaching_condition, self._bool_variable_from_ail_condition( node.node.condition)) new_node = CodeNode( ConditionalBreakNode(node.node.addr, cond, node.node.target), None) else: new_node = ConditionNode(node.addr, None, node.reaching_condition, node, None) seq.nodes[i] = new_node
def simplify_condition_deprecated(cond): # Z3's simplification may yield weird and unreadable results # hence we mostly rely on our own simplification. we only use Z3's simplification results when it returns a # concrete value. claripy_simplified = claripy.simplify(cond) if not claripy_simplified.symbolic: return claripy_simplified simplified = ConditionProcessor._fold_double_negations(cond) cond = simplified if simplified is not None else cond simplified = ConditionProcessor._revert_short_circuit_conditions(cond) cond = simplified if simplified is not None else cond simplified = ConditionProcessor._extract_common_subexpressions(cond) cond = simplified if simplified is not None else cond # simplified = ConditionProcessor._remove_redundant_terms(cond) # cond = simplified if simplified is not None else cond # in the end, use claripy's simplification to handle really easy cases again simplified = ConditionProcessor._simplify_trivial_cases(cond) cond = simplified if simplified is not None else cond return cond
def _make_ites(self, seq): # search for a == ^a pairs while True: for node_0 in seq.nodes: if not type(node_0) is CodeNode: continue rcond_0 = node_0.reaching_condition if rcond_0 is None: continue for node_1 in seq.nodes: if not type(node_1) is CodeNode: continue if node_0 is node_1: continue rcond_1 = node_1.reaching_condition if rcond_1 is None: continue cond_ = claripy.simplify(claripy.Not(rcond_0) == rcond_1) if claripy.is_true(cond_): # node_0 and node_1 should be structured using an if-then-else self._make_ite(seq, node_0, node_1) break else: break # make all conditionally-reachable nodes ConditionNodes for i in range(len(seq.nodes)): node = seq.nodes[i] if node.reaching_condition is not None and not claripy.is_true(node.reaching_condition): if isinstance(node.node, ConditionalBreakNode): # Put conditions together and simplify them cond = claripy.And(node.reaching_condition, self._bool_variable_from_ail_condition(node.node.condition)) new_node = CodeNode(ConditionalBreakNode(node.node.addr, cond, node.node.target), None) else: new_node = ConditionNode(node.addr, None, node.reaching_condition, node, None) seq.nodes[i] = new_node
def _claripy_simplify(self, *args): #pylint:disable=no-self-use return claripy.simplify(args[0])
def gen_chains(self, state=None): states = self.states if self.states else [] if not state: state = self.state while True: if self.is_successful(state): return ('success', state) states = self.gc(self.states) try: w("<%d+" % len(states)) my_succ = state.step( ).flat_successors # succ.successors for symbolic nsucc = len(my_succ) w(str(nsucc)) w(">") if nsucc == 1: w(".") state = my_succ.pop() self.state = state continue if nsucc == 0: # No active successors. if not states: return ('no_states', None) log("<< %d(" % len(states)) state, states = self.choose_a_previous_path(states) self.update_checked_idx() w("%d)" % self.last_char_checked) self.state, self.states = state, states elif nsucc > 1: w("{") arg = self.get_args(state) w(repr(arg)) w(",") state, ss = self.choose_a_successor_state(my_succ) self.update_checked_idx() arg = self.get_args(state) w(repr(arg)) w("}") states.extend(ss) self.state, self.states = state, states # were there any new chars? w("[") current_constraints = [ claripy.simplify(c) for c in state.solver.constraints ] if not self.identical_constraints(current_constraints, self.last_constraints): # TODO: get the last variable checked. self.last_constraints = current_constraints # were there any constraints? if self.is_printable(self.arg1a[self.last_char_checked + 1]): # log("adding: %s at %d" % (chr(m), self.last_char_checked)) # now concretize # TODO: save the state with opposite constraints after # checking unsat val = state.solver.eval( self.arg1a[self.last_char_checked]) w("@%d: %s" % (self.last_char_checked, chr(val))) # check if an equality operator is involved c = self.arg1a[self.last_char_checked] if Quick_Fix and state.solver.max( c) != state.solver.min(c): not_state = state.copy() not_state.add_constraints(c != val) self.extra_states.append(not_state) state.add_constraints(c == val) self.update_constraint_rep(state) log("]") else: # the constraint added was not one on the input character # hence we ignore. w("x]") except angr.errors.SimUnsatError as ue: log('unsat.. %s' % str(ue)) if not states: return ('no_states', None) state, states = self.choose_a_previous_path(states) self.state, self.states = state, states self.update_checked_idx()
def assert_correct(a, b): nose.tools.assert_true( claripy.backends.z3.identical(claripy.simplify(a), b))