def _find_sources_of_taint(self): """ Find source of taint in a Linux binary (recv, and reads) :return: a list of sources """ # LIMITATION: this part only works for linux binaries so far p = self._current_p cfg = self._current_cfg sources = {} bb_call = [] plt_addrs = [(x, y) for x, y in p.loader.main_object.plt.items() if 'recv' in x or 'read' in x] for f_name, plt_addr in plt_addrs: no = cfg.get_any_node(plt_addr) if no: bb_call += [pred.addr for pred in no.predecessors] for bb in bb_call: try: no = cfg.get_any_node(bb) faddr = no.function_address if faddr not in sources: sources[faddr] = [] nargs = len(get_any_arguments_call(p, no.addr)) regs = [] for i in xrange(nargs): off = ordered_argument_regs[p.arch.name][i] regs.append(p.arch.register_names[off]) sources[faddr].append((no.addr, tuple(regs))) # we go one level back n_f = cfg.get_any_node(faddr) preds = n_f.predecessors for pred in preds: nargs = len(get_any_arguments_call(p, pred.addr)) regs = [] for i in xrange(nargs): off = ordered_argument_regs[p.arch.name][i] regs.append(p.arch.register_names[off]) if pred.function_address not in sources: sources[pred.function_address] = [] sources[pred.function_address].append( (pred.addr, tuple(regs))) except Exception as e: log.debug(str(e)) for k in sources: sources[k] = list(set(sources[k])) return sources
def get_args_call(p, no): """ Gets the arguments of function call :param p: angr project :param no: CFG Accurate node of the call site :return: the values of function called in node no """ ins_args = get_ord_arguments_call(p, no.addr) if not ins_args: ins_args = get_any_arguments_call(p, no.addr) vals = {} for state in no.final_states: vals[state] = [] for ins_arg in ins_args: # get the values of the arguments if hasattr(ins_arg.data, 'tmp') and ins_arg.data.tmp in state.scratch.temps: val = state.scratch.temps[ins_arg.data.tmp] val = val.args[0] if type(val.args[0]) in (int, long) else None if val: vals[state].append((ins_arg.offset, val)) elif type(ins_arg.data) == pyvex.expr.Const and len(ins_arg.data.constants) == 1: vals[state].append((ins_arg.offset, ins_arg.data.constants[0].value)) else: print("Cant' get the value for function call") return vals
def _backward_taint(self, current_path, *_, **__): try: p = self._p addr = current_path.active[0].addr bl = p.factory.block(addr) cfg = self._cfg if not self._ct.taint_applied: if self._taint_locs: for mem_addr in self._taint_locs: self._ct.apply_taint(current_path, mem_addr, 'intial_taint', bit_size=self._ct.taint_buf_size) else: no = cfg.get_any_node(current_path.active[0].addr) if not no: return preds = no.predecessors if not preds: return pred = preds[0] nargs = len(get_any_arguments_call(p, pred.addr)) for i in xrange(nargs): off = ordered_agument_regs[p.arch.name][i] reg = p.arch.register_names[off] t_addr = getattr(current_path.active[0].regs, reg) self._ct.apply_taint(current_path, t_addr, 'intial_taint', bit_size=self._ct.taint_buf_size) # check sink if bl.vex.jumpkind == 'Ijk_Call': try: no = self._cfg.get_any_node(addr) succ = no.successors succ = succ[0] if (succ.name and ('recv' in succ.name or 'read' in succ.name)) or \ 'recv' in p.loader.find_symbol(succ.addr).name: #FIXME: should I check if tainted args self._found_recv = True except: pass next_path = current_path.copy(copy_states=True).step() sink = [x for x in self._bb_sinks if x[0] == addr] if sink: for curr_sink in sink: for reg_name in curr_sink[1]: m_addr = getattr(next_path.active[0].regs, reg_name) if self._ct.is_or_points_to_tainted_data(m_addr, next_path): self._sink_dep_args = True if self._found_recv: self._sink_bound_to_recv = True self._ct.stop_run() break except Exception as e: pass
def _forward_taint(self, current_path, *_, **__): """ Implements the forward taint logic :param current_path: angr current path :return: """ try: p = self._p addr = current_path.active[0].addr bl = p.factory.block(addr) cfg = self._cfg source = [x for x in self._bb_sources if x[0] == addr] if source and addr not in self._taint_applied_sources: self._taint_applied_sources.append(addr) self.apply_ret_taint = True source = source[0] regs = source[1] for reg in regs: t_addr = getattr(current_path.active[0].regs, reg) self._ct.apply_taint(current_path, t_addr, 'intial_taint', bit_size=self._ct.taint_buf_size) # check sink if bl.vex.jumpkind == 'Ijk_Call' and self._ct.taint_applied: try: next_path = current_path.copy(copy_states=True).step() no = cfg.get_any_node(addr) succ = no.successors succ = succ[0] if (succ.name and any([x in succ.name for x in CMP_SUCCS])) or \ any([x in p.loader.find_symbol(succ.addr).name for x in CMP_SUCCS]): nargs = len(get_any_arguments_call(p, no.addr)) for i in xrange(nargs): off = ordered_argument_regs[p.arch.name][i] reg = p.arch.register_names[off] if self._ct.is_or_points_to_tainted_data( getattr(next_path.active[0].regs, reg), next_path): self._sink_bound_to_recv = True self._ct.stop_run() except: pass except: pass
def _find_taint_callers(self, current_path, *_, **__): """ Finds tainted callers :param current_path: :return: None """ active = current_path.active[0] p = self._current_p if p.factory.block(active.addr).vex.jumpkind == 'Ijk_Call': next_path = current_path.copy(copy_states=True).step() n = len(get_any_arguments_call(p, active.addr)) args = ordered_argument_regs[p.arch.name][:n] for a in args: var = getattr(next_path.active[0].regs, p.arch.register_names[a]) if self._core_taint.is_or_points_to_tainted_data( var, next_path): self._tainted_callsites.append( (active.addr, p.arch.register_names[a]))
def backward_tainter(self, function_addr): min_lvl = MAX_DEPTH_BACKWARD to_analyze = [(function_addr, self._bb_sinks, 0)] p = self._p cfg = self._cfg self.backward_analysis_completed = False # ITERATE HERE! while to_analyze: self._sink_bound_to_recv = False self._sink_dep_args = False faddr, self._bb_sinks, curr_lvl = to_analyze[0] if min_lvl >= curr_lvl: min_lvl = curr_lvl if curr_lvl >= MAX_DEPTH_BACKWARD: continue to_analyze = to_analyze[1:] white_calls = self._has_interesting_calls_backward(faddr) self._ct = coretaint.CoreTaint(p, interfunction_level=0, smart_call=True, only_tracker=True, follow_unsat=True, shuffle_sat=True, white_calls=white_calls, exploration_strategy=self._exploration_strategy, try_thumb=True, taint_returns_unfollowed_calls=True, taint_arguments_unfollowed_calls=True, exit_on_decode_error=True, force_paths=True, allow_untaint=False) s = self._prepare_state(faddr) summarized_f = self._prepare_function_summaries() self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: # to trigger it refer to httpd 0x16410. Switch case is mostly UNSAT! self._ct.run(s, (), (), summarized_f=summarized_f, force_thumb=False, use_smart_concretization=False, check_func=self._backward_taint, init_bss=False) except TimeOutException: log.warning("Timeout Triggered") except Exception as e: log.warning("Exception: %s" % str(e)) self._ct.unset_alarm() if self._sink_bound_to_recv: return True elif not self._taint_locs and self._sink_dep_args: # consider the callers no = cfg.get_any_node(faddr) if not no: continue functions = {} for pred in no.predecessors: if pred.function_address not in functions: functions[pred.function_address] = [] callee_args = len(get_any_arguments_call(p, pred.addr)) curr_sink = (pred.addr, tuple( [p.arch.register_names[ordered_agument_regs[p.arch.name][i]] for i in xrange(callee_args)])) functions[pred.function_address].append(curr_sink) for faddr, finfo in functions.items(): to_analyze.append((faddr, finfo, curr_lvl + 1)) if min_lvl < MAX_DEPTH_BACKWARD: self.backward_analysis_completed = False return False