def sprintf(p, core_taint, plt_path, *_, **__): """ sprintf function summary :param p: angr project :param core_taint: core taint engine :param plt_path: path to the plt entry :return: None """ plt_state = plt_path.active[0] if are_parameters_in_registers(p): frmt_str = getattr(plt_state.regs, arg_reg_name(p, 1)) str_val = get_string(p, frmt_str.args[0], extended=True) n_vargs = str_val.count('%s') for i in range(2, 2 + n_vargs): name = p.arch.register_names[ordered_argument_regs[p.arch.name][i]] reg = getattr(plt_state.regs, name) if (core_taint.is_tainted(reg, path=plt_path) or core_taint.is_tainted( core_taint.safe_load(plt_path, reg), path=plt_path)): return True return False else: raise Exception("implement me")
def strcpy(p, core_taint, plt_path, size_con=None): """ strcpy function summary :param p: angr project :param core_taint: core taint engine :param plt_path: path to the plt entry :return: None """ plt_state = plt_path.active[0] if are_parameters_in_registers(p): name_reg_src = p.arch.register_names[ordered_argument_regs[p.arch.name] [1]] reg_src = getattr(plt_state.regs, name_reg_src) if core_taint.is_tainted(reg_src, path=plt_path): return True # check the size of the two buffers name_reg_dst = p.arch.register_names[ordered_argument_regs[p.arch.name] [0]] reg_dst = getattr(plt_state.regs, name_reg_dst) src = core_taint.safe_load(plt_path, reg_src) dst = core_taint.safe_load(plt_path, reg_dst) tainted = core_taint.is_tainted(src, path=plt_path) # we raise alerts also for equal size of src and dst, as the analysis might be under-constrained. return tainted and size_con >= (src.cardinality - 1) >= (dst.cardinality - 1) else: raise Exception("implement me")
def _run_def_use(self, caller_node, role_function_info): """ Run def-use analysis to find the data-key :param caller_node: node calling an environment function :param role_function_info: role function info :return: None """ if not self._normalized_cfg: self._normalized_cfg = self._p.analyses.CFG(normalize=True) p = self._p cfg = self._normalized_cfg c = caller_node # Reaching definition analysis fun = cfg.functions.function(caller_node.function_address) t = (caller_node.instruction_addrs[-1], angr.analyses.reaching_definitions.OP_AFTER) rd = p.analyses.ReachingDefinitions(func=fun, observation_points=[t,], init_func=True) try: results = rd.observed_results[t] except TimeOutException: raise except: return if are_parameters_in_registers(p): idx = role_function_info[RoleInfo.PAR_N] reg_off = ordered_argument_regs[p.arch.name][idx] for r_def in results.register_definitions.get_objects_by_offset(reg_off): for val in r_def.data.data: if type(val) == angr.analyses.reaching_definitions.undefined.Undefined: continue if type(val) not in (int, long): print("Data value is not what expected. Check me...") continue c_string = get_string(p, val) if c_string: if val not in self._role_info: self._role_info[val] = [] new_role_info = dict(role_function_info) new_role_info[RoleInfo.DATAKEY] = c_string new_role_info[RoleInfo.X_REF_FUN] = c.function_address new_role_info[RoleInfo.CALLER_BB] = c.addr new_role_info[RoleInfo.CPF] = self._name if new_role_info not in self._role_info[val]: self._role_info[val].append(new_role_info) role = self._roles[0] self._roles.append(role) self._data_keys.append(c_string) self._name_funs.append(c.successors[0].name) else: raise Exception("Envirnoment cpf: Parameters not in registers, implement me")
def system(p, core_taint, plt_path, *_, **__): """ system function summary :param p: angr project :param core_taint: core taint engine :param plt_path: path to the plt entry :return: None """ plt_state = plt_path.active[0] if are_parameters_in_registers(p): name = p.arch.register_names[ordered_argument_regs[p.arch.name][0]] reg = getattr(plt_state.regs, name) idx = 0 state = plt_path.active[0] if core_taint.is_tainted(reg, path=plt_path): return True while not core_taint.is_tainted(core_taint.safe_load( plt_path, reg.args[0] + idx), path=plt_path): byt = state.memory.load(reg.args[0] + idx, 1).args[0] if byt == 0: return False idx += 1 return True else: raise Exception("implement me")
def find_ref_http_strings(n, keywords): """ Finds HTTP related strings :param n: BDG node :param keywords: keywords to look for :return: None """ cfg = n.cfg p = n.p # get all the string references we are looking for for key_str in keywords: strs_info = get_addrs_similar_string(p, key_str) for s_info in strs_info: str_addr = s_info[1] current_string = s_info[0] direct_refs = [s for s in cfg.memory_data.items() if s[0] == str_addr] indirect_refs = get_indirect_str_refs(p, cfg, [str_addr]) for a, s in direct_refs + indirect_refs: if not s.irsb: continue if not BinaryDependencyGraph.is_call(s): continue for (irsb_addr, stmt_idx, insn_addr) in list(s.refs): if are_parameters_in_registers(p): reg_used = get_reg_used(p, cfg, irsb_addr, stmt_idx, a, [str_addr]) if not reg_used: continue par_n = ordered_argument_regs[p.arch.name].index(p.arch.registers[reg_used][0]) # in this way we filter out sub-functions (angr's mistakes) x_ref_fun = min([f for f in cfg.functions.values() if min(f.block_addrs) <= s.irsb_addr <= max(f.block_addrs)], key=lambda x: x.addr) info = { RoleInfo.ROLE: n.role, RoleInfo.DATAKEY: current_string, RoleInfo.CPF: None, RoleInfo.X_REF_FUN: x_ref_fun.addr, RoleInfo.CALLER_BB: s.irsb_addr, RoleInfo.ROLE_FUN: None, RoleInfo.ROLE_INS: None, RoleInfo.ROLE_INS_IDX: None, RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: par_n } n.add_role_info(s.address, info) else: log.error("_find_str_xref_in_call: arch doesn t use registers to set function parameters." "Implement me!")
def fwrite(p, core_taint, plt_path, *_, **__): """ fwrite function summary :param p: angr project :param core_taint: core taint engine :param plt_path: path to the plt entry :return: None """ plt_state = plt_path.active[0] if are_parameters_in_registers(p): name = p.arch.register_names[ordered_argument_regs[p.arch.name][0]] reg = getattr(plt_state.regs, name) return (core_taint.is_tainted(reg, path=plt_path) or core_taint.is_tainted(core_taint.safe_load(plt_path, reg), path=plt_path)) else: raise Exception("implement me")
def _apply_taint(self, addr, current_path, next_state, taint_key=False): """ Applies the taint to the role function call :param addr: address of the role function :param current_path: current angr's path :param next_state: state at the entry of the function :param taint_key: taint the used data key :return: """ def is_arg_key(arg): return hasattr(arg, 'args') and type(arg.args[0]) in ( int, long) and arg.args[0] == self._current_seed_addr p = self._current_p ins_args = get_ord_arguments_call(p, addr) if not ins_args: ins_args = get_any_arguments_call(p, addr) if not are_parameters_in_registers(p): raise Exception("Parameters not in registers: Implement me") for stmt in ins_args: reg_off = stmt.offset reg_name = p.arch.register_names[reg_off] val_arg = getattr(next_state.regs, reg_name) size = None if is_arg_key(val_arg): if not taint_key: continue n_bytes = p.loader.memory.read_bytes(val_arg.args[0], STR_LEN) size = len(get_mem_string(n_bytes)) * 8 if val_arg.concrete and val_arg.args[ 0] < p.loader.main_object.min_addr: continue log.info('taint applied to %s:%s' % (reg_name, str(val_arg))) self._ct.apply_taint(current_path, val_arg, reg_name, size)
def run(self, data_key, key_addr, reg_name, core_taint, current_path, *kargs, **kwargs): """ Run this CPF :param data_key: :param key_addr: :param reg_name: :param core_taint: :param current_path: :param kargs: :param kwargs: :return: """ p = self._p cfg = self._cfg path_copy = current_path.copy(copy_states=True) addr = current_path.active[0].addr node = cfg.get_any_node(addr) prev_node = node.predecessors if prev_node: prev_node = prev_node[0] par_n = ordered_argument_regs[p.arch.name].index( p.arch.registers[reg_name][0]) if node and node.name and '+' not in node.name: if are_parameters_in_registers(p): reg_cnt = getattr(path_copy.active[0].regs, reg_name) tainted = core_taint.is_tainted(reg_cnt) if not tainted: tainted = core_taint.is_tainted(core_taint.safe_load( path_copy, reg_cnt), path=path_copy) else: raise Exception( "nvram.run: Parameters not in registers, implement me") if (M_SET_KEYWORD in str(node.name).lower() or M_GET_KEYWORD in str(node.name).lower()) and tainted: assert len(current_path.active[0].history.bbl_addrs ) >= 1, "nvram.run: what's the caller? :(" block_caller_role_function = current_path.active[ 0].history.bbl_addrs[-1] no = self._cfg.get_any_node(block_caller_role_function) assert no, "nvram.run: Impossible to find the function address, this is bad.." # set_nvram role = Role.GETTER if M_GET_KEYWORD in str( node.name).lower() else Role.SETTER self._name_funs.append(node.name) self._data_keys.append(data_key) self._roles.append(role) plt = self._p.loader.main_bin.reverse_plt if addr not in plt and prev_node.addr in plt: addr = prev_node.addr no = prev_node block_caller_role_function = current_path.active[ 0].history.bbl_addrs[-2] info = { RoleInfo.ROLE: role, RoleInfo.DATAKEY: data_key, RoleInfo.X_REF_FUN: no.function_address, RoleInfo.CALLER_BB: block_caller_role_function, RoleInfo.ROLE_FUN: addr, RoleInfo.ROLE_INS: addr, RoleInfo.ROLE_INS_IDX: None, RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: par_n, RoleInfo.CPF: self._name } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) return True, role return False, Role.UNKNOWN
def run(self, data_key, key_addr, reg_name, core_taint, current_path, *kargs, **kwargs): """ Run this CPF :param data_key: data key :param key_addr: data key address :param reg_name: register name where the address is stored :param core_taint: core taint engine :param current_path: angr current path :param kargs: kargs :param kwargs: kwargs :return: None """ p = self._p cfg = self._cfg self._current_key_addr = key_addr path_copy = current_path addr = current_path.active[0].addr next_path = current_path.copy(copy_states=True).step() # we jump into the GoT if we have an extern call if len(next_path.active) > 0 and next_path.active[ 0].addr in self._p.loader.main_bin.reverse_plt.keys(): path_copy = next_path.copy(copy_states=True).step() addr = path_copy.active[0].addr if not are_parameters_in_registers(p): raise Exception("socket.run: Implement me") node = cfg.get_any_node(addr) block_caller_role_function = current_path.active[0].addr par_n = ordered_argument_regs[p.arch.name].index( p.arch.registers[reg_name][0]) candidate_role = Role.UNKNOWN x_ref_fun = cfg.get_any_node(block_caller_role_function) if node and node.name and '+' not in node.name: # setter if str(node.name).lower() in M_SET_KEYWORD: reg_off = ordered_argument_regs[p.arch.name][1] reg_name = p.arch.register_names[reg_off] reg_cnt = getattr(path_copy.active[0].regs, reg_name) tainted = core_taint.is_tainted(reg_cnt) if not tainted: tainted = core_taint.is_tainted(core_taint.safe_load( path_copy, reg_cnt), path=path_copy) if tainted: self._log.debug( "tainted data is sent over socket.. looking for binding values" ) candidate_role = Role.SETTER self._find_binding(current_path) # getter # getter are more complicated. We have to understand whether the data key is compared against # some content retrieved from socket elif str(node.name).lower() in CMP_KEYWORD: # check whether if the data key is passed. We have to check both args reg_off = ordered_argument_regs[p.arch.name][0] reg_name = p.arch.register_names[reg_off] reg_cnt = getattr(path_copy.active[0].regs, reg_name) tainted_0 = core_taint.is_tainted(reg_cnt) if not tainted_0: tainted_0 = core_taint.is_tainted(core_taint.safe_load( path_copy, reg_cnt), path=path_copy) reg_off = ordered_argument_regs[p.arch.name][1] reg_name = p.arch.register_names[reg_off] reg_cnt = getattr(path_copy.active[0].regs, reg_name) tainted_1 = core_taint.is_tainted(reg_cnt) if not tainted_1: tainted_1 = core_taint.is_tainted(core_taint.safe_load( path_copy, reg_cnt), path=path_copy) tainted = tainted_0 | tainted_1 if tainted: self._log.debug( "tainted data used in a memcmp-like function.. looking for recv" ) self._stop_run = False self._sink_addrs = [block_caller_role_function] # if the data key is passed to a memcmp-like function # we have to make sure that the content is compared against to is retrieved # from socket. We have to find the recv. self._find_recv(current_path) if self._read_from_socket: candidate_role = Role.GETTER self._stop_run = False self._find_binding(current_path) # Role data key is passed directly to a getter function elif M_GET_KEYWORD in str(node.name).lower(): self._read_from_socket = True # check whether if the data key is passed. We have to check both args reg_off = ordered_argument_regs[p.arch.name][1] reg_name = p.arch.register_names[reg_off] reg_cnt = getattr(path_copy.active[0].regs, reg_name) tainted = core_taint.is_tainted(reg_cnt) if not tainted: tainted = core_taint.is_tainted(core_taint.safe_load( path_copy, reg_cnt), path=path_copy) if tainted: self._log.debug( "tainted data is received from socket.. looking for binding values" ) # set_env candidate_role = Role.GETTER self._find_binding(current_path) if candidate_role != Role.UNKNOWN: # if the data key contains the ":%s", ":5d" and so forth, we remove it data_key = data_key.split(":%")[0] self._data_keys.append(data_key) self._roles.append(candidate_role) info = { RoleInfo.ROLE: candidate_role, RoleInfo.DATAKEY: data_key, RoleInfo.X_REF_FUN: x_ref_fun, RoleInfo.CALLER_BB: block_caller_role_function, RoleInfo.ROLE_FUN: addr, RoleInfo.ROLE_INS: addr, RoleInfo.ROLE_INS_IDX: None, RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: par_n, RoleInfo.CPF: self._name } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) return True, candidate_role return False, Role.UNKNOWN
def _indirect_access_search(self, current_path, data_key, key_addr, core_taint, reg_name): """ Checks whether tainted data is used indirectly (through a loop and memory comparison) to retrieve, or set, data into a structure. :param current_path: angr current path :param data_key: data key value :param key_addr: data key address :param core_taint: core taint engine :param reg_name: register name :return: """ current_addr = current_path.active[0].addr next_path = current_path.copy(copy_states=True) next_path.step() try: next_state = next_path.active[0] except TimeOutException: raise except Exception: return False, Role.UNKNOWN if self._p.factory.block(current_addr).vex.jumpkind == 'Ijk_Call': # there must be a loop, a strcmp-like function and the data_key has to be used as key history_bbs = [x for x in current_path.active[0].history.bbl_addrs] no = self._cfg.get_any_node(current_addr) if no and no.successors: for succ in no.successors: if current_addr in history_bbs and self._is_memcmp_succ(succ): # we are calling a strcmp-like function within a loop. if not are_parameters_in_registers(self._p): raise Exception("implement me") dst_reg = ordered_argument_regs[self._p.arch.name][1] dst_addr = getattr(next_state.regs, self._p.arch.register_names[dst_reg]) dst_cnt = current_path.active[0].memory.load(dst_addr) if core_taint.is_tainted(dst_cnt) or (dst_addr.concrete and dst_addr.args[0] == dst_addr): # yup! they are looking for some data indexed by the key. Understand if for setting # or getting current_function = no.function_address try: pos_call = len(history_bbs) - 1 - history_bbs[::-1].index(current_function) except TimeOutException: raise except: pos_call = 0 assert pos_call > 0, 'semantic.run: unable to find the calling block' caller_block = history_bbs[pos_call - 1] # Heuristic: if the function's arity is greater than two, we assume # that the third paramenter is the content to store in the shared buffer, making # the function itsels a setter. # FIXME: (limitation) improve this heuristic. One should perform a def-use analysis to # see whether the base pointer used as first argument in the strcmp is used to return a # value, or to set a value nargs = get_arity(self._p, caller_block) if nargs > 2: candidate_role = Role.SETTER else: candidate_role = Role.GETTER no = self._cfg.get_any_node(caller_block) assert no, 'semantic.run: could not get a node :(' function_x_ref = no.function_address par_id = ordered_argument_regs[self._p.arch.name].index(self._p.arch.registers[reg_name][0]) block = self._p.factory.block(no.addr) is_wrapped, wrapp_addr = self._is_wrapped(history_bbs, current_path) if is_wrapped: last_index = len(history_bbs) - 1 - history_bbs[::-1].index(wrapp_addr) if last_index > 0: current_function = wrapp_addr caller_block = history_bbs[last_index - 1] cno = self._cfg.get_any_node(caller_block) function_x_ref = None if cno: function_x_ref = cno.function_address info = { RoleInfo.ROLE: candidate_role, RoleInfo.DATAKEY: data_key, RoleInfo.X_REF_FUN: function_x_ref, RoleInfo.CALLER_BB: caller_block, RoleInfo.ROLE_FUN: current_function, RoleInfo.ROLE_INS: no.addr, RoleInfo.ROLE_INS_IDX: len(block.vex.statements), RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: par_id, RoleInfo.CPF: self._name } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) return True, candidate_role return False, Role.UNKNOWN
def run(self, data_key, key_addr, reg_name, core_taint, current_path, *kargs, **kwargs): """ Run this CPF :param data_key: data key :param key_addr: data key address :param reg_name: register name where the address is stored :param core_taint: core taint engine :param current_path: angr current path :param kargs: kargs :param kwargs: kwargs :return: None """ p = self._p cfg = self._cfg self._current_key_addr = key_addr path_copy = current_path addr = current_path.active[0] next_path = current_path.copy(copy_states=True).step() # we jump into the GoT if we have an extern call if next_path.active[ 0].addr in self._p.loader.main_bin.reverse_plt.keys(): path_copy = next_path.copy(copy_states=True).step() addr = path_copy.active[0].addr node = cfg.get_any_node(addr) par_n = ordered_argument_regs[p.arch.name].index( p.arch.registers[reg_name][0]) if not are_parameters_in_registers(p): raise Exception("file.run: Implement me") block_caller_role_function = current_path.active[0].addr if node and node.name and '+' not in node.name: # Setter candidate_role = Role.UNKNOWN if str(node.name).lower() in M_SET_KEYWORD: # check whether if the data_key is passed. reg_off = ordered_argument_regs[p.arch.name][1] reg_name = p.arch.register_names[reg_off] reg_cnt = getattr(path_copy.active[0].regs, reg_name) tainted = core_taint.is_tainted(reg_cnt) if not tainted: tainted = core_taint.is_tainted(core_taint.safe_load( path_copy, reg_cnt), path=path_copy) if tainted: candidate_role = Role.SETTER # we got to find the filename self._sink_addr = block_caller_role_function self._stop_run = False self._find_file_name(current_path, self._check_setter_sink) # getter # getter are more complicated. We have to understand whether the data_key is compared against # some content taken from a file if str(node.name).lower() in CMP_KEYWORD: # check whether if the data_key is passed. We have to check both args reg_off = ordered_argument_regs[p.arch.name][0] reg_name = p.arch.register_names[reg_off] reg_cnt = getattr(path_copy.active[0].regs, reg_name) tainted_0 = core_taint.is_tainted(reg_cnt) if not tainted_0: tainted_0 = core_taint.is_tainted(core_taint.safe_load( path_copy, reg_cnt), path=path_copy) reg_off = ordered_argument_regs[p.arch.name][1] reg_name = p.arch.register_names[reg_off] reg_cnt = getattr(path_copy.active[0].regs, reg_name) tainted_1 = core_taint.is_tainted(reg_cnt) if not tainted_1: tainted_1 = core_taint.is_tainted(core_taint.safe_load( path_copy, reg_cnt), path=path_copy) tainted = tainted_0 | tainted_1 if tainted: self._sink_addr = block_caller_role_function self._stop_run = False # as in this case we do not know yet whether the data_key comes from a file # (we only found a strmp), we start looking for the open only if it is within two hops # from the strcmp. self._find_file_name(current_path, self._check_getter_sink) if self._read_from_file: candidate_role = Role.GETTER if candidate_role != Role.UNKNOWN: self._data_keys.append(data_key) self._roles.append(candidate_role) x_ref_fun = cfg.get_any_node(block_caller_role_function) # if the data_key contains the ":%s", ":5d" and so forth, we remove it data_key = data_key.split(":%")[0] if data_key: info = { RoleInfo.ROLE: candidate_role, RoleInfo.DATAKEY: data_key, RoleInfo.X_REF_FUN: x_ref_fun, RoleInfo.CALLER_BB: block_caller_role_function, RoleInfo.ROLE_FUN: addr, RoleInfo.ROLE_INS: addr, RoleInfo.ROLE_INS_IDX: None, RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: par_n, RoleInfo.CPF: self._name } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) if self._last_file_name not in self._name_files: self._name_files.append(self._last_file_name) return True, candidate_role return False, Role.UNKNOWN
def run(self, data_key, key_addr, reg_name, core_taint, current_path, *kargs, **kwargs): """ Runs this CPF. :param data_key: data key value :param key_addr: data key address :param reg_name: register name :param core_taint: core taint engine :param current_path: angr current path :return: True, and the role if the role for the current binary was found, False and Unknown otherwise. """ p = self._p cfg = self._cfg path_copy = current_path.copy(copy_states=True) addr = current_path.active[0].addr node = cfg.get_any_node(addr) prev_node = node.predecessors if prev_node: prev_node = prev_node[0] par_n = ordered_argument_regs[p.arch.name].index(p.arch.registers[reg_name][0]) # we check that + is not in the name as we want to avoid to consider ever single block # within a function starting with a get/set keyword if node and node.name and '+' not in node.name: if are_parameters_in_registers(p): reg_cnt = getattr(path_copy.active[0].regs, reg_name) tainted = core_taint.is_tainted(reg_cnt) if not tainted: tainted = core_taint.is_tainted(core_taint.safe_load(path_copy, reg_cnt), path=path_copy) else: raise Exception("setter_getter.run: Implement me") if (SetterGetter.is_setter(str(node.name)) or SetterGetter.is_getter(str(node.name))) and tainted: assert len(current_path.active[0].history.bbl_addrs) >= 1, "setter_getter.run: what's the caller? :(" block_caller_role_function = current_path.active[0].history.bbl_addrs[-1] no = self._cfg.get_any_node(block_caller_role_function) assert no, "setter_getter.run: Impossible to find the function address, this is bad.." # set_env role = Role.GETTER if SetterGetter.is_getter(str(node.name)) else Role.SETTER self._name_funs.append(node.name) self._data_keys.append(data_key) self._roles.append(role) plt = self._p.loader.main_bin.reverse_plt if addr not in plt and prev_node.addr in plt: addr = prev_node.addr no = prev_node block_caller_role_function = current_path.active[0].history.bbl_addrs[-2] info = { RoleInfo.ROLE: role, RoleInfo.DATAKEY: data_key, RoleInfo.CPF: self._name, RoleInfo.X_REF_FUN: no.function_address, RoleInfo.CALLER_BB: block_caller_role_function, RoleInfo.ROLE_FUN: addr, RoleInfo.ROLE_INS: addr, RoleInfo.ROLE_INS_IDX: None, RoleInfo.COMM_BUFF: None, RoleInfo.PAR_N: par_n } if key_addr not in self._role_info: self._role_info[key_addr] = [] if info not in self._role_info[key_addr]: self._role_info[key_addr].append(info) return True, role return False, Role.UNKNOWN