def _inet_pton(self, ct, caller_path, plt_path): """ inet_pton summary :param ct: core taint engine :param caller_path: angr path leading to the inet_path :param plt_path: angr path leading to the plt entry of inet_pton :return: """ p = ct.p new_state = plt_path.active[0] # move data key from argument to destination addr addr_str = getattr(plt_path.active[0].regs, arg_reg_name(p, 1)) cnt_str = get_string(p, addr_str.args[0], extended=True) # do the inet_pton conversion. # this is not exactly the conversion the inet_pton does, it's a trick we use to keep track of the addresses inet_pton_convesions[self._pton_counter] = cnt_str bits = p.arch.bits to_store = BVV(self._pton_counter, bits) self._pton_counter += 1 # store it! dst_mem = getattr(plt_path.active[0].regs, arg_reg_name(p, 2)) new_state.memory.store(dst_mem, to_store) # instead of a link register, we hook an unconstrained sim procedure self._p.hook(plt_path.addr, ReturnUnconstrained()) caller_path.step().step()
def _check_recv(self, current_path, *_, **__): """ Finds the function receving tainted data :param current_path: angr current path :return: None """ if not self._ct.taint_applied: # first get the address of the filename next_path = current_path.copy(deep=True).step() buf_addr = getattr(next_path.active[0].regs, arg_reg_name(self._p, 1)) self._ct.apply_taint(current_path, buf_addr, "buffer_recv") self._ct.apply_taint(current_path, self._current_key_addr, "key_str") if current_path.active[0].addr in self._sink_addrs: next_path = current_path.copy(deep=True).step() val_1 = getattr(next_path.active[0].regs, arg_reg_name(self._p, 0)) val_2 = getattr(next_path.active[0].regs, arg_reg_name(self._p, 1)) if (self._ct.is_tainted(val_1) and self._ct.is_tainted(val_2)) or \ (self._ct.is_tainted(next_path.active[0].memory.load(val_1, self._p.arch.bytes)) and self._ct.is_tainted(next_path.active[0].memory.load(val_2, self._p.arch.bytes))): self._read_from_socket = True self._ct.stop_run() self._stop_run = True
def sprintf(p, core_taint, plt_path, *_, **__): """ sprintf function summary :param p: angr project :param core_taint: core taint engine :param plt_path: path to the plt entry :return: None """ plt_state = plt_path.active[0] if are_parameters_in_registers(p): frmt_str = getattr(plt_state.regs, arg_reg_name(p, 1)) str_val = get_string(p, frmt_str.args[0], extended=True) n_vargs = str_val.count('%s') for i in range(2, 2 + n_vargs): name = p.arch.register_names[ordered_argument_regs[p.arch.name][i]] reg = getattr(plt_state.regs, name) if (core_taint.is_tainted(reg, path=plt_path) or core_taint.is_tainted( core_taint.safe_load(plt_path, reg), path=plt_path)): return True return False else: raise Exception("implement me")
def _save_info_preamble(self, current_path, core_taint): """ Save address of function calls that have tainted parameters. This is useful to find wrappers to role functions. :param current_path: angr current path :param core_taint: core taint engine :return: None """ try: no = self._cfg.model.get_any_node(current_path.active[0].addr) if not no or no.function_address != no.addr: return if len(no.predecessors) == 1: # check if the current address is just a normal basic block # that angr mistaken for a function because it contains a function preamble pred = no.predecessors[0] if self._p.factory.block(pred.addr).vex.jumpkind != 'Ijk_Call': return if no.predecessors: arity = get_arity(self._p, no.predecessors[0].addr) for narg in range(arity): dst_addr = getattr(current_path.active[0].regs, arg_reg_name(self._p, narg)) if core_taint.is_or_points_to_tainted_data(dst_addr, current_path): self._tainted_calls.append(no.addr) break except TimeOutException: raise except: return
def _find_taint_callers(self, current_path, *_, **__): """ Finds tainted callers :param current_path: :return: None """ active = current_path.active[0] p = self._current_p if p.factory.block(active.addr).vex.jumpkind == 'Ijk_Call': next_path = current_path.copy(deep=True).step() nargs = get_arity(p, active.addr) for a in range(nargs): var = getattr(next_path.active[0].regs, arg_reg_name(p, a)) if self._core_taint.is_or_points_to_tainted_data(var, next_path): self._tainted_callsites.append((active.addr, arg_reg_name(p, a)))
def _check_getter_sink(self, current_path, *_, **__): """ The sink here is a memcmp-like function, if both parameters are tainted, he data key is compared against some content read from a file :param current_path: angr current pathdo you have to work? :return: None """ if current_path.active[0].addr == self._sink_addr: next_path = current_path.copy(deep=True).step() val_1 = getattr(next_path.active[0].regs, arg_reg_name(self._p, 0)) val_2 = getattr(next_path.active[0].regs, arg_reg_name(self._p, 1)) if self._ct.is_tainted(val_1) and self._ct.is_tainted(val_2): self._read_from_file = True self._ct.stop_run() self._stop_run = True
def _htons(self, ct, caller_path, plt_path): """ htons summary :param ct: core taint engine :param caller_path: angr path leading to the htons :param plt_path: angr path leading to the plt entry of htons :return: """ p = ct.p cfg = self._cfg new_state = plt_path.active[0] old_state = caller_path.active[0] # move port from argument to return register port_n = getattr(plt_path.active[0].regs, arg_reg_name(p, 0)) setattr(plt_path.active[0].regs, ret_reg_name(p), port_n) if port_n.concrete: self._last_port = port_n.args[0] # now we have to find the ip, we assume the code complies to the # form: address.sin_port = htons( PORT ); # first get the node htons_caller_node = cfg.model.get_any_node(caller_path.active[0].addr) succs = htons_caller_node.successors next_block_addrs = [] for succ in succs: region = p.loader.main_object.sections.find_region_containing(succ.addr) if region and region.name == '.text': # as the CFG has a fixed context sensitivity and there might be multiple calls to # htons, we might have multiple successors after the htons call. At this stage # we do not know which corresponds to the current running path. We save them all and we keep # symbolically execute the binary, when we encounter one of these blocks, we stop again # and collect the port next_block_addrs.append(succ.addr) else: succs += succ.successors if next_block_addrs: self._sink_addrs = next_block_addrs else: # we couldn't match the port with a valid ip self._last_port = None # instead of a link register, we hook an unconstrained sim procedure self._p.hook(plt_path.addr, ReturnUnconstrained()) caller_path.step().step()
def _forward_taint(self, current_path, *_, **__): """ Implements the forward taint logic :param current_path: angr current path :return: """ try: p = self._p addr = current_path.active[0].addr bl = p.factory.block(addr) cfg = self._cfg source = [x for x in self._bb_sources if x[0] == addr] if source and addr not in self._taint_applied_sources: self._taint_applied_sources.append(addr) self.apply_ret_taint = True source = source[0] regs = source[1] for reg in regs: t_addr = getattr(current_path.active[0].regs, reg) size = min(self._ct.estimate_mem_buf_size(current_path.active[0], t_addr), self._ct.taint_buf_size) self._ct.apply_taint(current_path, t_addr, 'initial_taint', bit_size=size) # check sink if bl.vex.jumpkind == 'Ijk_Call' and self._ct.taint_applied: try: next_path = current_path.copy(deep=True).step() no = cfg.model.get_any_node(addr) succ = no.successors succ = succ[0] if (succ.name and any([x in succ.name for x in CMP_SUCCS])) or \ any([x in p.loader.find_symbol(succ.addr).name for x in CMP_SUCCS]): nargs = get_arity(p, no.addr) for i in range(nargs): reg_name = arg_reg_name(p, i) if self._ct.is_or_points_to_tainted_data(getattr(next_path.active[0].regs, reg_name), next_path): self._sink_bound_to_recv = True self._ct.stop_run() except: pass except: pass
def _save_file_name(self, current_path, guards_info, *_, **__): """ Save and apply taint to the open function :param current_path: angr current path :param guards_info: guards info :return: None """ if not self._ct.taint_applied: # first get the address of the filename next_path = current_path.copy(deep=True).step() addr = getattr(next_path.active[0].regs, arg_reg_name(self._p, 0)) if addr.concrete: self._last_file_name = get_string(self._p, addr.args[0], extended=True) self._ct.apply_taint(current_path, addr, "filename") self._ct.apply_taint(current_path, self._current_key_addr, "key_str") self._check_func(current_path, guards_info, *_, **__)
def _check_key_usage(self, current_path, *_, **__): """ Runs every cpfs on the current path to check whether the role of the binary can be inferred with the current info :param current_path: current path given by the taint analysis :return: None """ # retrieve and save the values of arguments of the function where we start the taint analyis if not self._f_arg_vals and self._set_f_vals: self._set_f_vals = False arity = max(get_arity(self._current_p, self._current_f_addr), DEF_ROLE_ARITY) for narg in range(arity): dst_cnt = getattr(current_path.active[0].regs, arg_reg_name(self._current_p, narg)) self._f_arg_vals.append(dst_cnt) current_bin = self._current_bin for pl in self._cpfs[current_bin]: # log.debug(f"Entering cpf {pl.name}") try: found, role = pl.run(self._current_data_key, self._current_key_addr, self._current_par_name, self._core_taint, current_path, self._f_arg_vals) if found: log.debug(f"Using cpf {pl.name} with role {role}") self._current_role = role self._cpf_used = pl if pl not in (semantic.Semantic, setter_getter.SetterGetter): self._core_taint.stop_run() break except Exception as e: log.warning(f"Exception CPF {pl.name}: {e}") pass
def _backward_taint(self, current_path, *_, **__): """ Implements the backward taint logic :param current_path: angr current path :return: None """ try: p = self._p addr = current_path.active[0].addr bl = p.factory.block(addr) cfg = self._cfg if not self._ct.taint_applied: if self._taint_locs: for mem_addr in self._taint_locs: size = min(self._ct.estimate_mem_buf_size(current_path.active[0], mem_addr), self._ct.taint_buf_size) self._ct.apply_taint(current_path, mem_addr, 'intial_taint', bit_size=size) else: no = cfg.model.get_any_node(current_path.active[0].addr) if not no: return preds = no.predecessors if not preds: return pred = preds[0] nargs = get_arity(p, pred.addr) for i in range(nargs): reg_name = arg_reg_name(p, i) t_addr = getattr(current_path.active[0].regs, reg_name) size = min(self._ct.estimate_mem_buf_size(current_path.active[0], t_addr), self._ct.taint_buf_size) self._ct.apply_taint(current_path, t_addr, 'initial_taint', bit_size=size) # check sink if bl.vex.jumpkind == 'Ijk_Call': try: no = self._cfg.model.get_any_node(addr) succ = no.successors succ = succ[0] if (succ.name and ('recv' in succ.name or 'read' in succ.name)) or \ 'recv' in p.loader.find_symbol(succ.addr).name: self._found_recv = True except: pass next_path = current_path.copy(deep=True).step() sink = [x for x in self._bb_sinks if x[0] == addr] if sink: for curr_sink in sink: for reg_name in curr_sink[1]: m_addr = getattr(next_path.active[0].regs, reg_name) if self._ct.is_or_points_to_tainted_data(m_addr, next_path): self._sink_dep_args = True if self._found_recv: self._sink_bound_to_recv = True self._ct.stop_run() break except: pass
def _indirect_access_search(self, current_path, data_key, key_addr, core_taint, reg_name, next_path): """ Checks whether tainted data is used indirectly (through a loop and memory comparison) to retrieve, or set, data into a structure. :param current_path: angr current path :param data_key: data key value :param key_addr: data key address :param core_taint: core taint engine :param reg_name: register name :return: """ current_addr = current_path.active[0].addr try: next_state = next_path.active[0] except TimeOutException: raise except Exception: return False, Role.UNKNOWN if self._p.factory.block(current_addr).vex.jumpkind == 'Ijk_Call': # there must be a loop, a strcmp-like function and the data_key has to be used as key history_bbs = [x for x in current_path.active[0].history.bbl_addrs] no = self._cfg.model.get_any_node(current_addr) if no and no.successors: for succ in no.successors: if current_addr in history_bbs and self._is_memcmp_succ(succ): # we are calling a strcmp-like function within a loop. if not are_parameters_in_registers(self._p): raise Exception("implement me") dst_addr = getattr(next_state.regs, arg_reg_name(self._p, 1)) dst_cnt = current_path.active[0].memory.load(dst_addr, self._p.arch.bytes) # eq = int(dst_addr.args[0]) == dst_addr concrete = dst_addr.concrete if core_taint.is_tainted(dst_cnt) or concrete: # yup! they are looking for some data indexed by the key. Understand if for setting # or getting current_function = no.function_address try: pos_call = len(history_bbs) - 1 - history_bbs[::-1].index(current_function) except TimeOutException: raise except: pos_call = 0 assert pos_call > 0, 'semantic.run: unable to find the calling block' caller_block = history_bbs[pos_call - 1] # Heuristic: if the function's arity is greater than two, we assume # that the third paramenter is the content to store in the shared buffer, making # the function itsels a setter. # FIXME: (limitation) improve this heuristic. One should perform a def-use analysis to # see whether the base pointer used as first argument in the strcmp is used to return a # value, or to set a value nargs = get_arity(self._p, caller_block) if nargs > 2: candidate_role = Role.SETTER else: candidate_role = Role.GETTER no = self._cfg.model.get_any_node(caller_block) assert no, 'semantic.run: could not get a node :(' function_x_ref = no.function_address block = self._p.factory.block(no.addr) is_wrapped, wrapp_addr = self._is_wrapped(history_bbs, current_path) if is_wrapped: last_index = len(history_bbs) - 1 - history_bbs[::-1].index(wrapp_addr) if last_index > 0: current_function = wrapp_addr caller_block = history_bbs[last_index - 1] cno = self._cfg.model.get_any_node(caller_block) function_x_ref = None if cno: function_x_ref = cno.function_address info = { RoleInfo.ROLE: candidate_role, RoleInfo.DATAKEY: data_key, RoleInfo.X_REF_FUN: function_x_ref, RoleInfo.CALLER_BB: caller_block, RoleInfo.ROLE_FUN: current_function, RoleInfo.ROLE_INS: no.addr, RoleInfo.ROLE_INS_IDX: len(block.vex.statements), RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: arg_reg_id(self._p, reg_name), RoleInfo.CPF: self._name } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) return True, candidate_role return False, Role.UNKNOWN
def _glbl_data_key_setter(self, current_path, data_key, key_addr, core_taint, reg_name, par_vals, next_path): """ Check whether a data key is copied to global structure. This strategy is usually used to binaries that talks to themselves. :param current_path: angr current path :param data_key: data key value :param key_addr: data key address :param core_taint: core taint engine :param reg_name: parameter name :param par_vals: function argument values :return: None """ p = self._p cfg = self._cfg globl = False tainted = False arg_copied = False try: current_addr = current_path.active[0].addr bl = p.factory.block(current_addr) if bl.vex.jumpkind != 'Ijk_Call': return False, Role.UNKNOWN no = cfg.model.get_any_node(current_addr) succ = no.successors[0] if not self._p.loader.find_plt_stub_name(succ.addr): return False, Role.UNKNOWN if not succ.name: succ = succ.successors[0] if succ.name in CPY_FUNS: caller_block_addr = current_addr arity = get_arity(p, caller_block_addr) for narg in range(arity): dst_reg_cnt = getattr(next_path.active[0].regs, arg_reg_name(p, narg)) cnt_buff = current_path.active[0].memory.load(dst_reg_cnt, p.arch.bytes) if core_taint.is_or_points_to_tainted_data(dst_reg_cnt, next_path): tainted = True elif dst_reg_cnt.concrete and any([sec.min_addr <= dst_reg_cnt.args[0] <= sec.max_addr for sec in p.loader.main_object.sections if sec.name in ('.bss', '.data')]): globl = True elif any([str(cnt_buff) == str(current_path.active[0].memory.load(val, p.arch.bytes)) for val in par_vals]): arg_copied = True if arg_copied and tainted and globl: current_function = no.function_address addrs = [x for x in current_path.active[0].history.bbl_addrs] try: pos_call = len(addrs) - 1 - addrs[::-1].index(current_function) except TimeOutException: raise except: pos_call = 0 assert pos_call > 0, 'semantic.run: unable to find the calling block' caller_block = addrs[pos_call - 1] cno = self._cfg.model.get_any_node(caller_block) assert cno, 'semantic.run: could not get a node :(' function_x_ref = cno.function_address info = { RoleInfo.ROLE: Role.SETTER, RoleInfo.DATAKEY: data_key, RoleInfo.X_REF_FUN: function_x_ref, RoleInfo.CALLER_BB: caller_block, RoleInfo.ROLE_FUN: current_function, RoleInfo.ROLE_INS: cno.addr, RoleInfo.ROLE_INS_IDX: len(bl.vex.statements), RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: arg_reg_id(p, reg_name), RoleInfo.CPF: self._name } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) return True, Role.SETTER except TimeOutException: raise except Exception as e: self._log.debug(f"Semantic cpf. Error: {str(e)}") return False, Role.UNKNOWN
def run(self, data_key, key_addr, reg_name, core_taint, current_path, *kargs, **kwargs): """ Run this CPF :param data_key: data key :param key_addr: data key address :param reg_name: register name where the address is stored :param core_taint: core taint engine :param current_path: angr current path :param kargs: kargs :param kwargs: kwargs :return: None """ p = self._p cfg = self._cfg self._current_key_addr = key_addr path_copy = current_path addr = current_path.active[0].addr next_path = current_path.copy(deep=True).step() # we jump into the GoT if we have an extern call if next_path.active and self._p.loader.find_plt_stub_name(next_path.active[0].addr): path_copy = next_path.copy(deep=True).step() addr = path_copy.active[0].addr if not are_parameters_in_registers(p): raise Exception("socket.run: Implement me") node = cfg.model.get_any_node(addr) block_caller_role_function = current_path.active[0].addr candidate_role = Role.UNKNOWN x_ref_fun = cfg.model.get_any_node(block_caller_role_function) if node and node.name and '+' not in node.name: # setter if str(node.name).lower() in M_SET_KEYWORD: reg_cnt = getattr(path_copy.active[0].regs, arg_reg_name(p, 1)) reg_cnt_loaded = core_taint.safe_load(path_copy, reg_cnt) tainted = core_taint.is_tainted(reg_cnt) or core_taint.is_tainted(reg_cnt_loaded) if tainted: self._log.debug("tainted data is sent over socket.. looking for binding values") candidate_role = Role.SETTER self._find_binding(current_path) # getter # getter are more complicated. We have to understand whether the data key is compared against # some content retrieved from socket elif str(node.name).lower() in CMP_KEYWORD: # check whether if the data key is passed. We have to check both args reg_cnt_0 = getattr(path_copy.active[0].regs, arg_reg_name(p, 0)) reg_cnt_loaded_0 = core_taint.safe_load(path_copy, reg_cnt_0) tainted_0 = core_taint.is_tainted(reg_cnt_0) or core_taint.is_tainted(reg_cnt_loaded_0, path=path_copy) reg_cnt_1 = getattr(path_copy.active[0].regs, arg_reg_name(p, 1)) reg_cnt_loaded_1 = core_taint.safe_load(path_copy, reg_cnt_1) tainted_1 = core_taint.is_tainted(reg_cnt_1) or core_taint.is_tainted(reg_cnt_loaded_1, path=path_copy) tainted = tainted_0 or tainted_1 if tainted: self._log.debug("tainted data used in a memcmp-like function.. looking for recv") self._stop_run = False self._sink_addrs = [block_caller_role_function] # if the data key is passed to a memcmp-like function # we have to make sure that the content is compared against to is retrieved # from socket. We have to find the recv. self._find_recv(current_path) if self._read_from_socket: candidate_role = Role.GETTER self._stop_run = False self._find_binding(current_path) # Role data key is passed directly to a getter function elif M_GET_KEYWORD in str(node.name).lower(): self._read_from_socket = True # check whether if the data key is passed. We have to check both args reg_cnt = getattr(path_copy.active[0].regs, arg_reg_name(p, 1)) tainted = core_taint.is_tainted(reg_cnt) if not tainted: tainted = core_taint.is_tainted(core_taint.safe_load(path_copy, reg_cnt), path=path_copy) if tainted: self._log.debug("tainted data is received from socket.. looking for binding values") # set_env candidate_role = Role.GETTER self._find_binding(current_path) if candidate_role != Role.UNKNOWN: # if the data key contains the ":%s", ":5d" and so forth, we remove it data_key = data_key.split(":%")[0] self._data_keys.append(data_key) self._roles.append(candidate_role) info = { RoleInfo.ROLE: candidate_role, RoleInfo.DATAKEY: data_key, RoleInfo.X_REF_FUN: x_ref_fun, RoleInfo.CALLER_BB: block_caller_role_function, RoleInfo.ROLE_FUN: addr, RoleInfo.ROLE_INS: addr, RoleInfo.ROLE_INS_IDX: None, RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: arg_reg_id(p, reg_name), RoleInfo.CPF: self._name } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) return True, candidate_role return False, Role.UNKNOWN
def run(self, data_key, key_addr, reg_name, core_taint, current_path, *kargs, **kwargs): """ Run this CPF :param data_key: data key :param key_addr: data key address :param reg_name: register name where the address is stored :param core_taint: core taint engine :param current_path: angr current path :param kargs: kargs :param kwargs: kwargs :return: None """ p = self._p cfg = self._cfg self._current_key_addr = key_addr path_copy = current_path addr = current_path.active[0] next_path = current_path.copy(deep=True).step() # we jump into the GoT if we have an extern call if next_path.active and self._p.loader.find_plt_stub_name( next_path.active[0].addr): path_copy = next_path.copy(deep=True).step() addr = path_copy.active[0].addr node = cfg.get_any_node(addr) if not are_parameters_in_registers(p): raise Exception("file.run: Implement me") block_caller_role_function = current_path.active[0].addr if node and node.name and '+' not in node.name: # Setter candidate_role = Role.UNKNOWN if str(node.name).lower() in M_SET_KEYWORD: # check whether if the data_key is passed. reg_name = arg_reg_name(p, 1) reg_cnt = getattr(path_copy.active[0].regs, reg_name) reg_cnt_loaded = core_taint.safe_load(path_copy, reg_cnt) tainted = core_taint.is_tainted( reg_cnt) or core_taint.is_tainted(reg_cnt_loaded, path=path_copy) if tainted: candidate_role = Role.SETTER # we got to find the filename self._sink_addr = block_caller_role_function self._stop_run = False self._find_file_name(current_path, self._check_setter_sink) # getter # getter are more complicated. We have to understand whether the data_key is compared against # some content taken from a file if str(node.name).lower() in CMP_KEYWORD: # check whether if the data_key is passed. We have to check both args reg_name = arg_reg_name(p, 0) reg_cnt_0 = getattr(path_copy.active[0].regs, reg_name) reg_cnt_loaded_0 = core_taint.safe_load(path_copy, reg_cnt_0) tainted_0 = core_taint.is_tainted( reg_cnt_0) or core_taint.is_tainted(reg_cnt_loaded_0, path=path_copy) reg_name = arg_reg_name(p, 1) reg_cnt_1 = getattr(path_copy.active[0].regs, reg_name) reg_cnt_loaded_1 = core_taint.safe_load(path_copy, reg_cnt_1) tainted_1 = core_taint.is_tainted( reg_cnt_1) or core_taint.is_tainted(reg_cnt_loaded_1) tainted = tainted_0 or tainted_1 if tainted: self._sink_addr = block_caller_role_function self._stop_run = False # as in this case we do not know yet whether the data_key comes from a file # (we only found a strmp), we start looking for the open only if it is within two hops # from the strcmp. self._find_file_name(current_path, self._check_getter_sink) if self._read_from_file: candidate_role = Role.GETTER if candidate_role != Role.UNKNOWN: self._data_keys.append(data_key) self._roles.append(candidate_role) x_ref_fun = cfg.model.get_any_node(block_caller_role_function) # if the data_key contains the ":%s", ":5d" and so forth, we remove it data_key = data_key.split(":%")[0] if data_key: info = { RoleInfo.ROLE: candidate_role, RoleInfo.DATAKEY: data_key, RoleInfo.X_REF_FUN: x_ref_fun, RoleInfo.CALLER_BB: block_caller_role_function, RoleInfo.ROLE_FUN: addr, RoleInfo.ROLE_INS: addr, RoleInfo.ROLE_INS_IDX: None, RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: arg_reg_id(p, reg_name), RoleInfo.CPF: self._name } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) if self._last_file_name not in self._name_files: self._name_files.append(self._last_file_name) return True, candidate_role return False, Role.UNKNOWN
def _check_sink(self, current_path, guards_info, *_, **__): """ Checks whether the taint propagation analysis lead to a sink, and performs the necessary actions :param current_path: angr current path :param guards_info: guards (ITE) information :return: None """ try: current_state = current_path.active[0] current_addr = current_state.addr cfg = self._current_cfg self._visited_bb += 1 next_path = current_path.copy(deep=True).step() info = self._current_role_info # check constant comparisons and untaint if necessary bounded, var = self._is_any_taint_var_bounded(guards_info) if bounded: self._ct.do_recursive_untaint(var, current_path) # If the taint is not applied yet, apply it if not self._ct.taint_applied and current_addr == info[ RoleInfo.CALLER_BB]: next_state = next_path.active[0] self._apply_taint(current_addr, current_path, next_state, taint_key=True, data_key_reg=arg_reg_name( self._current_p, info[RoleInfo.PAR_N])) try: if 'eg_source_addr' in self._config and len( next_path.active) and self._config['eg_source_addr']: if next_path.active[0].addr == int( self._config['eg_source_addr'], 16): next_state = next_path.active[0] self._apply_taint(current_addr, current_path, next_state, taint_key=True) except TimeOutException as to: raise to except Exception as e: pass if self._is_sink_and_tainted(current_path, next_path): delta_t = time.time() - self._analysis_starting_time self._raised_alert = True name_bin = self._ct.p.loader.main_object.binary log.info("Found a tainted sink! Reporting alert!") self._report_alert_fun( 'sink', name_bin, current_path, current_addr, self._current_role_info[RoleInfo.DATAKEY], pl_name=self._current_cpf_name, report_time=delta_t) # tainted call address and tainted parameters bl = self._current_p.factory.block(current_addr) if not len(next_path.active) and len( next_path.unconstrained) and bl.vex.jumpkind == 'Ijk_Call': cap = bl.capstone.insns[-1] vb = bl.vex reg_jump = cap.insn.op_str val_jump_reg = getattr(next_path.unconstrained[0].regs, reg_jump) if not hasattr(vb.next, 'tmp'): return val_jump_tmp = next_path.unconstrained[0].scratch.temps[ vb.next.tmp] if not self.is_tainted_by_us( val_jump_reg) and not self.is_tainted_by_us( val_jump_tmp): if self._ct.is_or_points_to_tainted_data( val_jump_reg, next_path, unconstrained=True): nargs = get_arity(self._current_p, current_path.active[0].addr) for reg_name in arg_reg_names(self._current_p, nargs): if reg_name == reg_jump: continue reg_val = getattr(next_path.unconstrained[0].regs, reg_name) if self._ct.is_or_points_to_tainted_data(reg_val, next_path, unconstrained=True) \ and self.is_address(reg_val): delta_t = time.time( ) - self._analysis_starting_time self._raised_alert = True name_bin = self._ct.p.loader.main_object.binary log.info( "Found a tainted call with tainted parameters! Reporting alert!" ) self._report_alert_fun( 'sink', name_bin, current_path, current_addr, self._current_role_info[RoleInfo.DATAKEY], pl_name=self._current_cpf_name, report_time=delta_t) next_state = next_path.unconstrained[0] hash_val = self.bv_to_hash(val_jump_tmp) self._taint_names_applied.append(hash_val) hash_val = self.bv_to_hash(val_jump_reg) self._taint_names_applied.append(hash_val) self._apply_taint(current_addr, current_path, next_state) # eventually if we are in a loop guarded by a tainted variable next_active = next_path.active if len(next_active) > 1: history_addrs = [t for t in current_state.history.bbl_addrs] seen_addr = [ a.addr for a in next_active if a.addr in history_addrs ] if len(seen_addr) == 0: return back_jumps = [a for a in seen_addr if a < current_addr] if len(back_jumps) == 0: return bj = back_jumps[0] node_s = cfg.model.get_any_node(bj) node_f = cfg.model.get_any_node(current_addr) if not node_s or not node_f: return fun_s = node_s.function_address fun_f = node_f.function_address if fun_s != fun_f: return idx_s = history_addrs.index(bj) for a in history_addrs[idx_s:]: n = cfg.model.get_any_node(a) if not n: continue if n.function_address != fun_s: return # if we have a back-jump satisfying all the conditions cond_guard = [g for g in next_active[0].history.jump_guards][-1] for node in cond_guard.recursive_leaf_asts: if self._ct._taint_buf in str(node): log.info( "Found a loop guarded by a tainted variable. Reporting Alert!" ) delta_t = time.time() - self._analysis_starting_time self._raised_alert = True name_bin = self._ct.p.loader.main_object.binary self._report_alert_fun('loop', name_bin, current_path, current_addr, cond_guard, pl_name=self._current_cpf_name, report_time=delta_t) # clean up the copied state to prevent possible memory leaks for state in next_path.active + next_path.unconstrained: state.history.trim() state.downsize() state.release_plugin('solver') except TimeOutException as to: raise to except Exception as e: log.error(f"Something went terribly wrong: {str(e)}")