def _find_sources_of_taint(p, cfg): """ Find sources of taint :return: addresses of taintable basic blocks and the registers that are used as inputs """ # methods to discover that are sources of taint source_methods = ['read, recv'] # source_methods = ['scanf'] # LIMITATION: this part only works for linux binaries so far sources = {} bb_call = [] # find all basic blocks containing the methods we are looking for plt_addrs = [(x, y) for x, y in p.loader.main_object.plt.items() if any(s_m in x for s_m in source_methods)] # retrieve the basic block of the call for f_name, plt_addr in plt_addrs: no = cfg.model.get_any_node(plt_addr) if no: bb_call += [pred.addr for pred in no.predecessors] # for each basic block and predecessor discover the registers # providing input to the methods for b_block in bb_call: try: no = cfg.model.get_any_node(b_block) faddr = no.function_address if faddr not in sources: sources[faddr] = [] regs = arg_reg_names(p, get_arity(p, no.addr)) sources[faddr].append((no.addr, tuple(regs))) # we go one level back n_f = cfg.model.get_any_node(faddr) preds = n_f.predecessors for pred in preds: regs = arg_reg_names(p, get_arity(p, pred.addr)) if pred.function_address not in sources: sources[pred.function_address] = [] sources[pred.function_address].append( (pred.addr, tuple(regs))) except Exception as e: log.error( f"BBF: Error encountered when discovering input registers: {e}" ) for k in sources: sources[k] = list(set(sources[k])) return sources
def _save_info_preamble(self, current_path, core_taint): """ Save address of function calls that have tainted parameters. This is useful to find wrappers to role functions. :param current_path: angr current path :param core_taint: core taint engine :return: None """ try: no = self._cfg.get_any_node(current_path.active[0].addr) if not no or no.function_address != no.addr: return if len(no.predecessors) == 1: # check if the current address is just a normal basic block # that angr mistaken for a function because it contains a function preamble pred = no.predecessors[0] if self._p.factory.block(pred.addr).vex.jumpkind != 'Ijk_Call': return if no.predecessors: arity = get_arity(self._p, no.predecessors[0].addr) for narg in xrange(arity): dst_reg = ordered_argument_regs[self._p.arch.name][narg] dst_addr = getattr(current_path.active[0].regs, self._p.arch.register_names[dst_reg]) if core_taint.is_or_points_to_tainted_data(dst_addr, current_path): self._tainted_calls.append(no.addr) break except TimeOutException: raise except: return
def find_memcpy_like(p, cfg=None): """ Finds all the memcpy-like functions in a given binary (Linux and binary blob) :param p: angr project :param cfg: angr cfg :return: memcpy-like functions """ memcpy_like = [ f.addr for f in p.kb.functions.values() if 'memcpy' in f.name ] if cfg is None: return memcpy_like for fun in cfg.functions.values(): css = [] try: no = cfg.model.get_any_node(fun.addr) css = [pred for pred in no.predecessors] except: pass if not css: continue cs = css[0] nargs = get_arity(p, cs.addr) if nargs > 3 or nargs < 2: continue for loop in [x for x in networkx.simple_cycles(fun.graph)]: # CMPNE or CMPEQ if any([ op for l in loop for op in p.factory.block(l.addr).vex.operations if 'cmpeq' in op.lower() or 'cmpne' in op.lower() ]): # INCREMENT wr_tmp = [ st for l in loop for st in p.factory.block(l.addr).vex.statements if st.tag == 'Ist_WrTmp' ] cons = [ w.constants for w in wr_tmp if hasattr(w, 'data') and hasattr(w.data, 'op') and w.data.op == 'Iop_Add64' ] if cons: cons = [c.value for cs in cons for c in cs] # using BootStomp thresholds if 1 in cons and len([x for x in fun.blocks]) <= 8: memcpy_like.append(fun.addr) return list(set(memcpy_like))
def backward_tainter(self, function_addr): """ Implements the backward taint core functionality :param function_addr: function address to start the analysis :return: None """ min_lvl = MAX_DEPTH_BACKWARD to_analyze = [(function_addr, self._bb_sinks, 0)] p = self._p cfg = self._cfg self._backward_analysis_completed = False # ITERATE HERE! while to_analyze: self._sink_bound_to_recv = False self._sink_dep_args = False faddr, self._bb_sinks, curr_lvl = to_analyze[0] if min_lvl >= curr_lvl: min_lvl = curr_lvl if curr_lvl >= MAX_DEPTH_BACKWARD: continue to_analyze = to_analyze[1:] white_calls = self._has_interesting_calls_backward(faddr) # run the taint analysis with the parameters self.run_coretaint(p, white_calls, faddr, self._backward_taint) if self._sink_bound_to_recv: return True elif not self._taint_locs and self._sink_dep_args: # consider the callers no = cfg.model.get_any_node(faddr) if not no: continue functions = {} for pred in no.predecessors: if pred.function_address not in functions: functions[pred.function_address] = [] curr_sink = (pred.addr, tuple(arg_reg_names(p, get_arity(p, pred.addr)))) functions[pred.function_address].append(curr_sink) for faddr, finfo in functions.items(): to_analyze.append((faddr, finfo, curr_lvl + 1)) if min_lvl < MAX_DEPTH_BACKWARD: self._backward_analysis_completed = False return False
def _forward_taint(self, current_path, *_, **__): """ Implements the forward taint logic :param current_path: angr current path :return: """ try: p = self._p addr = current_path.active[0].addr bl = p.factory.block(addr) cfg = self._cfg source = [x for x in self._bb_sources if x[0] == addr] if source and addr not in self._taint_applied_sources: self._taint_applied_sources.append(addr) self.apply_ret_taint = True source = source[0] regs = source[1] for reg in regs: t_addr = getattr(current_path.active[0].regs, reg) size = min(self._ct.estimate_mem_buf_size(current_path.active[0], t_addr), self._ct.taint_buf_size) self._ct.apply_taint(current_path, t_addr, 'initial_taint', bit_size=size) # check sink if bl.vex.jumpkind == 'Ijk_Call' and self._ct.taint_applied: try: next_path = current_path.copy(deep=True).step() no = cfg.model.get_any_node(addr) succ = no.successors succ = succ[0] if (succ.name and any([x in succ.name for x in CMP_SUCCS])) or \ any([x in p.loader.find_symbol(succ.addr).name for x in CMP_SUCCS]): nargs = get_arity(p, no.addr) for i in range(nargs): reg_name = arg_reg_name(p, i) if self._ct.is_or_points_to_tainted_data(getattr(next_path.active[0].regs, reg_name), next_path): self._sink_bound_to_recv = True self._ct.stop_run() except: pass except: pass
def _find_taint_callers(self, current_path, *_, **__): """ Finds tainted callers :param current_path: :return: None """ active = current_path.active[0] p = self._current_p if p.factory.block(active.addr).vex.jumpkind == 'Ijk_Call': next_path = current_path.copy(deep=True).step() nargs = get_arity(p, active.addr) for a in range(nargs): var = getattr(next_path.active[0].regs, arg_reg_name(p, a)) if self._core_taint.is_or_points_to_tainted_data(var, next_path): self._tainted_callsites.append((active.addr, arg_reg_name(p, a)))
def _check_key_usage(self, current_path, *_, **__): """ Runs every cpfs on the current path to check whether the role of the binary can be inferred with the current info :param current_path: current path given by the taint analysis :return: None """ # retrieve and save the values of arguments of the function where we start the taint # analyis if not self._f_arg_vals and self._set_f_vals: self._set_f_vals = False arity = max(get_arity(self._current_p, self._current_f_addr), DEF_ROLE_ARITY) for narg in xrange(arity): dst_reg = ordered_argument_regs[ self._current_p.arch.name][narg] dst_cnt = getattr(current_path.active[0].regs, self._current_p.arch.register_names[dst_reg]) self._f_arg_vals.append(dst_cnt) current_bin = self._current_bin for pl in self._cpfs[current_bin]: log.debug("Entering cpf %s" % pl.name) try: found, role = pl.run(self._current_data_key, self._current_key_addr, self._current_par_name, self._core_taint, current_path, self._f_arg_vals) if found: log.debug("Using cpf %s" % pl.name) self._current_role = role self._cpf_used = pl if pl not in (semantic.Semantic, setter_getter.SetterGetter): self._core_taint.stop_run() break except: pass
def _backward_taint(self, current_path, *_, **__): """ Implements the backward taint logic :param current_path: angr current path :return: None """ try: p = self._p addr = current_path.active[0].addr bl = p.factory.block(addr) cfg = self._cfg if not self._ct.taint_applied: if self._taint_locs: for mem_addr in self._taint_locs: size = min(self._ct.estimate_mem_buf_size(current_path.active[0], mem_addr), self._ct.taint_buf_size) self._ct.apply_taint(current_path, mem_addr, 'intial_taint', bit_size=size) else: no = cfg.model.get_any_node(current_path.active[0].addr) if not no: return preds = no.predecessors if not preds: return pred = preds[0] nargs = get_arity(p, pred.addr) for i in range(nargs): reg_name = arg_reg_name(p, i) t_addr = getattr(current_path.active[0].regs, reg_name) size = min(self._ct.estimate_mem_buf_size(current_path.active[0], t_addr), self._ct.taint_buf_size) self._ct.apply_taint(current_path, t_addr, 'initial_taint', bit_size=size) # check sink if bl.vex.jumpkind == 'Ijk_Call': try: no = self._cfg.model.get_any_node(addr) succ = no.successors succ = succ[0] if (succ.name and ('recv' in succ.name or 'read' in succ.name)) or \ 'recv' in p.loader.find_symbol(succ.addr).name: self._found_recv = True except: pass next_path = current_path.copy(deep=True).step() sink = [x for x in self._bb_sinks if x[0] == addr] if sink: for curr_sink in sink: for reg_name in curr_sink[1]: m_addr = getattr(next_path.active[0].regs, reg_name) if self._ct.is_or_points_to_tainted_data(m_addr, next_path): self._sink_dep_args = True if self._found_recv: self._sink_bound_to_recv = True self._ct.stop_run() break except: pass
def _glbl_data_key_setter(self, current_path, data_key, key_addr, core_taint, reg_name, par_vals): """ Check whether a data key is copied to global structure. This strategy is usually used to binaries that talks to themselves. :param current_path: angr current path :param data_key: data key value :param key_addr: data key address :param core_taint: core taint engine :param reg_name: parameter name :param par_vals: function argument values :return: None """ p = self._p cfg = self._cfg plt = p.loader.main_bin.reverse_plt globl = False tainted = False arg_copied = False try: next_path = current_path.copy(copy_states=True).step() current_addr = current_path.active[0].addr bl = p.factory.block(current_addr) if bl.vex.jumpkind != 'Ijk_Call': return False, Role.UNKNOWN no = cfg.get_any_node(current_addr) succ = no.successors[0] if succ.addr not in plt: return False, Role.UNKNOWN if not succ.name: succ = succ.successors[0] if succ.name in CPY_FUNS: caller_block_addr = current_addr arity = get_arity(p, caller_block_addr) for narg in xrange(arity): dst_reg = ordered_argument_regs[p.arch.name][narg] dst_reg_cnt = getattr(next_path.active[0].regs, self._p.arch.register_names[dst_reg]) cnt_buff = current_path.active[0].memory.load(dst_reg_cnt) if core_taint.is_or_points_to_tainted_data(dst_reg_cnt, next_path): tainted = True elif dst_reg_cnt.concrete and any([sec.min_addr <= dst_reg_cnt.args[0] <= sec.max_addr for sec in p.loader.main_bin.sections if sec.name in ('.bss', '.data')]): globl = True elif any([str(cnt_buff) == str(current_path.active[0].memory.load(val)) for val in par_vals]): arg_copied = True if arg_copied and tainted and globl: current_function = no.function_address addrs = [x for x in current_path.active[0].history.bbl_addrs] try: pos_call = len(addrs) - 1 - addrs[::-1].index(current_function) except TimeOutException: raise except: pos_call = 0 assert pos_call > 0, 'semantic.run: unable to find the calling block' caller_block = addrs[pos_call - 1] cno = self._cfg.get_any_node(caller_block) assert cno, 'semantic.run: could not get a node :(' function_x_ref = cno.function_address par_id = ordered_argument_regs[p.arch.name].index(p.arch.registers[reg_name][0]) info = { RoleInfo.ROLE: Role.SETTER, RoleInfo.DATAKEY: data_key, RoleInfo.X_REF_FUN: function_x_ref, RoleInfo.CALLER_BB: caller_block, RoleInfo.ROLE_FUN: current_function, RoleInfo.ROLE_INS: cno.addr, RoleInfo.ROLE_INS_IDX: len(bl.vex.statements), RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: par_id, RoleInfo.CPF: self._name } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) return True, Role.SETTER except TimeOutException: raise except Exception as e: self._log.debug("Semantic cpf. Error: " + str(e)) return False, Role.UNKNOWN
def _indirect_access_search(self, current_path, data_key, key_addr, core_taint, reg_name): """ Checks whether tainted data is used indirectly (through a loop and memory comparison) to retrieve, or set, data into a structure. :param current_path: angr current path :param data_key: data key value :param key_addr: data key address :param core_taint: core taint engine :param reg_name: register name :return: """ current_addr = current_path.active[0].addr next_path = current_path.copy(copy_states=True) next_path.step() try: next_state = next_path.active[0] except TimeOutException: raise except Exception: return False, Role.UNKNOWN if self._p.factory.block(current_addr).vex.jumpkind == 'Ijk_Call': # there must be a loop, a strcmp-like function and the data_key has to be used as key history_bbs = [x for x in current_path.active[0].history.bbl_addrs] no = self._cfg.get_any_node(current_addr) if no and no.successors: for succ in no.successors: if current_addr in history_bbs and self._is_memcmp_succ(succ): # we are calling a strcmp-like function within a loop. if not are_parameters_in_registers(self._p): raise Exception("implement me") dst_reg = ordered_argument_regs[self._p.arch.name][1] dst_addr = getattr(next_state.regs, self._p.arch.register_names[dst_reg]) dst_cnt = current_path.active[0].memory.load(dst_addr) if core_taint.is_tainted(dst_cnt) or (dst_addr.concrete and dst_addr.args[0] == dst_addr): # yup! they are looking for some data indexed by the key. Understand if for setting # or getting current_function = no.function_address try: pos_call = len(history_bbs) - 1 - history_bbs[::-1].index(current_function) except TimeOutException: raise except: pos_call = 0 assert pos_call > 0, 'semantic.run: unable to find the calling block' caller_block = history_bbs[pos_call - 1] # Heuristic: if the function's arity is greater than two, we assume # that the third paramenter is the content to store in the shared buffer, making # the function itsels a setter. # FIXME: (limitation) improve this heuristic. One should perform a def-use analysis to # see whether the base pointer used as first argument in the strcmp is used to return a # value, or to set a value nargs = get_arity(self._p, caller_block) if nargs > 2: candidate_role = Role.SETTER else: candidate_role = Role.GETTER no = self._cfg.get_any_node(caller_block) assert no, 'semantic.run: could not get a node :(' function_x_ref = no.function_address par_id = ordered_argument_regs[self._p.arch.name].index(self._p.arch.registers[reg_name][0]) block = self._p.factory.block(no.addr) is_wrapped, wrapp_addr = self._is_wrapped(history_bbs, current_path) if is_wrapped: last_index = len(history_bbs) - 1 - history_bbs[::-1].index(wrapp_addr) if last_index > 0: current_function = wrapp_addr caller_block = history_bbs[last_index - 1] cno = self._cfg.get_any_node(caller_block) function_x_ref = None if cno: function_x_ref = cno.function_address info = { RoleInfo.ROLE: candidate_role, RoleInfo.DATAKEY: data_key, RoleInfo.X_REF_FUN: function_x_ref, RoleInfo.CALLER_BB: caller_block, RoleInfo.ROLE_FUN: current_function, RoleInfo.ROLE_INS: no.addr, RoleInfo.ROLE_INS_IDX: len(block.vex.statements), RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: par_id, RoleInfo.CPF: self._name } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) return True, candidate_role return False, Role.UNKNOWN
def _check_sink(self, current_path, guards_info, *_, **__): """ Checks whether the taint propagation analysis lead to a sink, and performs the necessary actions :param current_path: angr current path :param guards_info: guards (ITE) information :return: None """ try: current_state = current_path.active[0] current_addr = current_state.addr cfg = self._current_cfg self._visited_bb += 1 next_path = current_path.copy(copy_states=True).step() info = self._current_role_info # check constant comparisons and untaint if necessary bounded, var = self._is_any_taint_var_bounded(guards_info) if bounded: self._ct.do_recursive_untaint(var, current_path) # If the taint is not applied yet, apply it if not self._ct.taint_applied and current_addr == info[ RoleInfo.CALLER_BB]: next_state = next_path.active[0] self._apply_taint(current_addr, current_path, next_state, taint_key=True) try: if len(next_path.active) and self._config['eg_souce_addr']: if next_path.active[0].addr == int( self._config['eg_souce_addr'], 16): next_state = next_path.active[0] self._apply_taint(current_addr, current_path, next_state, taint_key=True) except TimeOutException as to: raise to except: pass if self._is_sink_and_tainted(current_path): delta_t = time.time() - self._analysis_starting_time self._raised_alert = True name_bin = self._ct.p.loader.main_object.binary self._report_alert_fun( 'sink', name_bin, current_path, current_addr, self._current_role_info[RoleInfo.DATAKEY], pl_name=self._current_cpf_name, report_time=delta_t) # tainted call address and tainted parameters bl = self._current_p.factory.block(current_addr) if not len(next_path.active) and len( next_path.unconstrained) and bl.vex.jumpkind == 'Ijk_Call': cap = bl.capstone.insns[-1] vb = bl.vex reg_jump = cap.insn.op_str val_jump_reg = getattr(next_path.unconstrained[0].regs, reg_jump) if not hasattr(vb.next, 'tmp'): return val_jump_tmp = next_path.unconstrained[0].scratch.temps[ vb.next.tmp] if not self.is_tainted_by_us( val_jump_reg) and not self.is_tainted_by_us( val_jump_tmp): if self._ct.is_or_points_to_tainted_data( val_jump_reg, next_path, unconstrained=True): nargs = get_arity(self._current_p, current_path.active[0].addr) for ord_reg in ordered_argument_regs[ self._current_p.arch.name][:nargs]: reg_name = self._current_p.arch.register_names[ ord_reg] if reg_name == reg_jump: continue reg_val = getattr(next_path.unconstrained[0].regs, reg_name) if self._ct.is_or_points_to_tainted_data( reg_val, next_path, unconstrained=True ) and self.is_address(reg_val): delta_t = time.time( ) - self._analysis_starting_time self._raised_alert = True name_bin = self._ct.p.loader.main_object.binary self._report_alert_fun( 'sink', name_bin, current_path, current_addr, self._current_role_info[RoleInfo.DATAKEY], pl_name=self._current_cpf_name, report_time=delta_t) next_state = next_path.unconstrained[0] hash_val = self.bv_to_hash(val_jump_tmp) self._taint_names_applied.append(hash_val) hash_val = self.bv_to_hash(val_jump_reg) self._taint_names_applied.append(hash_val) self._apply_taint(current_addr, current_path, next_state) # eventually if we are in a loop guarded by a tainted variable next_active = next_path.active if len(next_active) > 1: history_addrs = [t for t in current_state.history.bbl_addrs] seen_addr = [ a.addr for a in next_active if a.addr in history_addrs ] if len(seen_addr) == 0: return back_jumps = [a for a in seen_addr if a < current_addr] if len(back_jumps) == 0: return bj = back_jumps[0] node_s = cfg.get_any_node(bj) node_f = cfg.get_any_node(current_addr) if not node_s or not node_f: return fun_s = node_s.function_address fun_f = node_f.function_address if fun_s != fun_f: return idx_s = history_addrs.index(bj) for a in history_addrs[idx_s:]: n = cfg.get_any_node(a) if not n: continue if n.function_address != fun_s: return # if we have a back-jump satisfiying all the conditions cond_guard = [g for g in next_active[0].guards][-1] if hasattr(cond_guard, 'args') and len(cond_guard.args) == 2 and \ self._ct.taint_buf in str(cond_guard.args[0]) and \ self._ct.taint_buf in str(cond_guard.args[1]): delta_t = time.time() - self._analysis_starting_time self._raised_alert = True name_bin = self._ct.p.loader.main_object.binary self._report_alert_fun('loop', name_bin, current_path, current_addr, cond_guard, pl_name=self._current_cpf_name, report_time=delta_t) except TimeOutException as to: raise to except Exception as e: log.error("Something went terribly wrong: %s" % str(e))