def _get_role(self, no, key_addr, reg_name): """ Retrieve the role of a binary by inferring whether it is a setter or a getter :param no: node containing the call to a set or getter function :param key_addr: address of the keyword used to infer the role :param reg_name: register containing the key_addr :return: The role and the function used to infer it whether the role could be inferred, None and None otherwise """ p = self._current_p if not BinaryDependencyGraph.is_call(p.factory.block(no.addr)): return None # detect the role self._cpf_used = None f_addr = no.addr self._candidate_role_function = no.successors[0].addr # prepare the under-contrainted-based initial state # we do not allow untaint as we just want to see where the key data key is leading to self._core_taint = coretaint.CoreTaint(p, interfunction_level=2, smart_call=False, follow_unsat=True, try_thumb=True, exit_on_decode_error=True, force_paths=True, allow_untaint=False, logger_obj=log) # the used register is not a parameter register if are_parameters_in_registers(p) and p.arch.registers[reg_name][ 0] not in ordered_argument_regs[p.arch.name]: return Role.UNKNOWN self._current_par_name = reg_name self._current_key_addr = key_addr self._current_f_addr = f_addr s = self._prepare_state(key_addr, f_addr) summarized_f = self._prepare_function_summaries() self._f_arg_vals = [] self._set_f_vals = True self._core_taint.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: self._core_taint.run(s, (), (), summarized_f=summarized_f, force_thumb=False, check_func=self._check_key_usage, init_bss=False) except TimeOutException: log.warning("Timeout Triggered") except Exception as e: log.warning("Exception: %s" % str(e)) self._core_taint.unset_alarm() return self._current_role
def _find_binding(self): # this function attempts to find the port and ip using to send the data to other binaries. # the heuristic is the following: # * first we look for the htons instruction and we retrieve the port, the we look in the nearby memory to # retrieve the IP address. This heuristic is based on the fact that both port and ip are set in the same # sock_addr struct cfg = self._cfg p = self._p # first look if there is an open/fopen for node in cfg.nodes(): if node.name and 'htons' in node.name: pred = node.predecessors if len(pred) == 1 and pred[ 0].addr in p.loader.main_bin.reverse_plt.keys(): caller_blocks = pred[0].predecessors for caller_block in caller_blocks: self._sink_addrs = [] self._last_port = None self._ct = coretaint.CoreTaint( p, smart_call=False, interfunction_level=0, follow_unsat=True, try_thumb=True, exit_on_decode_error=True, force_paths=True, taint_returns_unfollowed_calls=True, taint_arguments_unfollowed_calls=True, allow_untaint=False) s = self._get_initial_state( caller_block.function_address) self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) summarized_f = {} # summary the htons and inet_pton addrs = get_dyn_sym_addrs(p, ['htons']) for a in addrs: summarized_f[a] = self._htons addrs = get_dyn_sym_addrs(p, ['inet_pton']) for a in addrs: summarized_f[a] = self._inet_pton try: self._ct.run(s, (), (), summarized_f=summarized_f, force_thumb=False, check_func=self._check_ip_address) except TimeOutException: self._log.warning("Hard timeout triggered") except Exception as e: self._log.error( "Something went terribly wrong: %s" % str(e))
def forward_tainter(self, function_addr): """ Implements the backward taint core functionality :param function_addr: function address to start the analysis :return: None """ to_analyze = [(function_addr, self._bb_sinks)] p = self._p while to_analyze: self._taint_applied_sources = [] self._sink_bound_to_recv = False self._sink_dep_args = False faddr, self._bb_sinks = to_analyze[0] to_analyze = to_analyze[1:] white_calls = list(self._has_interesting_calls_frontward(faddr)) self._ct = coretaint.CoreTaint( p, interfunction_level=0, smart_call=True, only_tracker=True, follow_unsat=True, shuffle_sat=True, white_calls=white_calls, exploration_strategy=self._exploration_strategy, try_thumb=True, taint_returns_unfollowed_calls=True, taint_arguments_unfollowed_calls=True, exit_on_decode_error=True, force_paths=True, allow_untaint=False) s = self._prepare_state(faddr) summarized_f = self._prepare_function_summaries() self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: self._ct.run(s, (), (), summarized_f=summarized_f, force_thumb=False, use_smart_concretization=False, check_func=self._forward_taint, init_bss=False) except TimeOutException: log.warning("Timeout Triggered") except Exception as e: log.warning("Exception: %s" % str(e)) self._ct.unset_alarm() if self._sink_bound_to_recv: return True return False
def _vuln_analysis(self, bdg_node, seed_addr, info, max_size): """ Run the analysis for children (i.e, slave binaries) :param bdg_node: BDG node :param seed_addr: address of the seed of taint :param info: binary's info :return: """ self._current_p = bdg_node.p self._current_cfg = bdg_node.cfg self._current_cpf_name = bdg_node.find_cpf_data_key(info[RoleInfo.DATAKEY]).name self._current_seed_addr = seed_addr self._current_role_info = info self._taint_names_applied = [] self._visited_bb = 0 self._current_max_size = max_size ana_start_time = time.time() if bdg_node.bin not in self._stats: self._stats[bdg_node.bin] = { 'n_paths': 0, 'ana_time': 0, 'visited_bb': 0, 'n_runs': 0, 'to': 0, } # prepare the under-contrainted-based initial state self._ct = coretaint.CoreTaint(self._current_p, interfunction_level=1, smart_call=True, follow_unsat=True, black_calls=(info[RoleInfo.ROLE_FUN],), try_thumb=True, shuffle_sat=True, exit_on_decode_error=True, force_paths=True, taint_returns_unfollowed_calls=True, allow_untaint=True, logger_obj=log) summarized_f = self._get_function_summaries() s = self._get_initial_state(info[RoleInfo.X_REF_FUN]) self._find_sink_addresses() self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: self._ct.run(s, (), (), summarized_f=summarized_f, force_thumb=False, check_func=self._check_sink, init_bss=False) except TimeOutException: log.warning("Hard timeout triggered") except Exception as e: log.error("Something went terribly wrong: %s" % str(e)) self._ct.unset_alarm() # stats self._stats[bdg_node.bin]['to'] += 1 if self._ct.triggered_to() else 0 self._stats[bdg_node.bin]['visited_bb'] += self._visited_bb self._stats[bdg_node.bin]['n_paths'] += self._ct.n_paths self._stats[bdg_node.bin]['ana_time'] += (time.time() - ana_start_time) self._stats[bdg_node.bin]['n_runs'] += 1
def _find_tainted_callers(self, key_addr, f_addr): """ Retrieve the role of a binary by inferring whether it is a setter or a getter :param cfg: CFG :param no: node containing the call to a set or getter function :param key_addr: address of the keyword used to infer the role :param reg: register containing the key_addr :return: The role and the function used to infer it whether the role could be inferred, None and None otherwise """ p = self._current_p self.callsites = [] # prepare the under-contrainted-based initial state # we do not allow untaint as we just want to see where the key string is leading to self._core_taint = coretaint.CoreTaint(p, interfunction_level=0, smart_call=False, follow_unsat=True, try_thumb=True, exit_on_decode_error=True, force_paths=True, allow_untaint=False, logger_obj=log) self._current_key_addr = key_addr s = self._prepare_state(key_addr, f_addr) summarized_f = self._prepare_function_summaries() self._core_taint.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: self._core_taint.run(s, (), (), summarized_f=summarized_f, force_thumb=False, check_func=self._find_taint_callers, init_bss=False) except TimeOutException: log.warning("Timeout Triggered") except Exception as e: log.warning("Exception: %s" % str(e)) self._core_taint.unset_alarm() callsites = [] for cs in self.callsites: try: if self._current_cfg.get_any_node( cs[0] ).function_address == f_addr and cs not in callsites: callsites.append(cs) except: pass return callsites
def _find_binding(self, current_path): """ Attempts to find the port and ip using to send the data to other binaries. The heuristic is the following: * first we look for the htons instruction and we retrieve the port, the we look in the nearby memory to retrieve the IP address. This heuristic is based on the fact that both port and ip are set in the same sock_addr struct :param current_path: angr current path :return: None """ p = self._p htons_callers = self._get_caller_blocks(current_path, 'htons') for caller_block in htons_callers: self._sink_addrs = [] self._last_port = None self._ct = coretaint.CoreTaint( p, smart_call=False, interfunction_level=0, follow_unsat=True, try_thumb=True, exit_on_decode_error=True, force_paths=True, taint_returns_unfollowed_calls=True, taint_arguments_unfollowed_calls=True, allow_untaint=False) summarized_f = {} # summary the htons and inet_pton addrs = get_dyn_sym_addrs(p, ['htons']) for a in addrs: summarized_f[a] = self._htons addrs = get_dyn_sym_addrs(p, ['inet_pton']) for a in addrs: summarized_f[a] = self._inet_pton self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: faddr = self._cfg.get_any_node(caller_block).function_address s = self._get_initial_state(faddr) self._ct.run(s, (), (), summarized_f=summarized_f, force_thumb=False, check_func=self._check_ip_address) except TimeOutException: self._log.warning("Hard timeout triggered") except Exception as e: self._log.error( "Find binding: Something went terribly wrong: %s" % str(e)) self._ct.restore_signal_handler()
def run_taint_analysis(self, f_addr, source_addr, name_source, args_to_taint): p = self.p self.buff_can_reach_sink = False self.addr_alert_recorded = [] self.addr_safe_recorded = [] self.taint_names_applied = [] self.ct = coretaint.CoreTaint(p, interfunction_level=1, smart_call=True, follow_unsat=True, only_follow_near_calls=True, try_thumb=True, black_calls=[source_addr], exit_on_decode_error=True, force_paths=True, taint_arguments_unfollowed_calls=True, taint_returns_unfollowed_calls=True, allow_untaint=True) summarized_f = get_function_summaries(p) # initial_taint = self.get_additional_taint(self.p, f_addr, sink_addr) s = get_initial_state(p, f_addr, self.ct) #, initial_taint=initial_taint) self.current_source_addr = source_addr self.current_starting_addr = f_addr self.ct.set_alarm(self.adaptive_to, n_tries=3) self.args_to_taint = args_to_taint # self.n_param = self.estimate_n_param(f_addr) try: self.ct.run(s, (), (), summarized_f=summarized_f, force_thumb=False, check_func=self.check_func, init_bss=False) except Exception as e: print "Exception: %s" % str(e) self.ct.unset_alarm() self.total_safe_calls += len(list(set(self.addr_safe_recorded))) self.current_bin_safe_calls += len(list(set(self.addr_safe_recorded))) self.total_unsafe_calls += len(list(set(self.addr_alert_recorded))) self.current_bin_unsafe_calls += len( list(set(self.addr_alert_recorded))) return self.buff_can_reach_sink
def _find_recv(self): # this function attempts to find the port and ip using to send the data to other binaries. # the heuristic is the following: # * first we look for the htons instruction and we retrieve the port, the we look in the nearby memory to # retrieve the IP address. This heuristic is based on the fact that both port and ip are set in the same # sock_addr struct cfg = self._cfg p = self._p # first look if there is an open/fopen caller_blocks = [] for node in cfg.nodes(): if node.name and M_GET_KEYWORD in node.name: region = p.loader.main_object.sections.find_region_containing( node.addr) if region and region.name == '.text': caller_blocks = [node] else: pred = node.predecessors if len(pred) == 1 and pred[ 0].addr in p.loader.main_bin.reverse_plt.keys(): caller_blocks = pred[0].predecessors for caller_block in caller_blocks: self._ct = coretaint.CoreTaint( p, smart_call=False, not_follow_any_calls=True, follow_unsat=True, try_thumb=True, exit_on_decode_error=True, force_paths=True, taint_returns_unfollowed_calls=True, taint_arguments_unfollowed_calls=True, allow_untaint=False) s = self._get_initial_state(caller_block.addr) self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: self._ct.run(s, (), (), force_thumb=False, check_func=self._check_recv) except TimeOutException: self._log.warning("Hard timeout triggered") except Exception as e: self._log.error("Something went terribly wrong: %s" % str(e))
def frontward_tainter(self, function_addr): to_analyze = [(function_addr, self._bb_sinks)] p = self._p cfg = self._cfg # ITERATE HERE! while to_analyze: self._taint_applied_sources = [] self._sink_bound_to_recv = False self._sink_dep_args = False faddr, self._bb_sinks = to_analyze[0] to_analyze = to_analyze[1:] white_calls = list(self._has_interesting_calls_frontward(faddr)) self._ct = coretaint.CoreTaint(p, interfunction_level=0, smart_call=True, only_tracker=True, follow_unsat=True, shuffle_sat=True, white_calls=white_calls, exploration_strategy=self._exploration_strategy, try_thumb=True, taint_returns_unfollowed_calls=True, taint_arguments_unfollowed_calls=True, exit_on_decode_error=True, force_paths=True, allow_untaint=False) s = self._prepare_state(faddr) summarized_f = self._prepare_function_summaries() self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: # to trigger it refer to httpd 0x16410. Switch case is mostly UNSAT! self._ct.run(s, (), (), summarized_f=summarized_f, force_thumb=False, use_smart_concretization=False, check_func=self._frontward_taint, init_bss=False) except TimeOutException: log.warning("Timeout Triggered") except Exception as e: log.warning("Exception: %s" % str(e)) self._ct.unset_alarm() if self._sink_bound_to_recv: return True return False
def _find_recv(self, current_path): """ Attempts to find the port and ip using to send the data to other binaries. the heuristic is the following: * first we look for the htons instruction and we retrieve the port, the we look in the nearby memory to retrieve the IP address. This heuristic is based on the fact that both port and ip are set in the same sock_addr struct :param current_path: angr current path :return: None """ p = self._p self._read_from_socket = False caller_blocks = self._get_caller_blocks(current_path, M_GET_KEYWORD) for caller_block in caller_blocks: self._ct = coretaint.CoreTaint( p, smart_call=False, not_follow_any_calls=True, follow_unsat=True, try_thumb=True, exit_on_decode_error=True, force_paths=True, taint_returns_unfollowed_calls=True, taint_arguments_unfollowed_calls=True, allow_untaint=False) self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: s = self._get_initial_state(caller_block) self._ct.run(s, (), (), force_thumb=False, check_func=self._check_recv) except TimeOutException: self._log.warning("Hard timeout triggered") except Exception as e: self._log.error( "Find recv: Something went terribly wrong: %s" % str(e)) self._ct.restore_signal_handler()
def _find_file_name(self, check_func): cfg = self._cfg p = self._p self._check_func = check_func # first look if there is an open/fopen for node in cfg.nodes(): if node.name and 'fopen' in node.name: pred = node.predecessors if len(pred) == 1 and pred[ 0].addr in p.loader.main_bin.reverse_plt.keys(): caller_blocks = pred[0].predecessors for caller_block in caller_blocks: self._ct = coretaint.CoreTaint( p, not_follow_any_calls=True, smart_call=False, follow_unsat=True, try_thumb=True, exit_on_decode_error=True, force_paths=True, taint_returns_unfollowed_calls=True, taint_arguments_unfollowed_calls=True, allow_untaint=False) self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) s = self._get_initial_state(caller_block.addr) try: self._ct.run(s, (), (), force_thumb=False, check_func=self._save_file_name) except TimeOutException: log.warning("Hard timeout triggered") except Exception as e: log.error("Something went terribly wrong: %s" % str(e))
def _find_file_name(self, current_path, check_func): """ Find the filename. :param current_path: angr current path. :param check_func: checker function :return: None """ p = self._p self._check_func = check_func caller_blocks = self._get_caller_blocks(current_path, 'fopen') for caller_block in caller_blocks: self._ct = coretaint.CoreTaint( p, not_follow_any_calls=True, smart_call=False, follow_unsat=True, try_thumb=True, exit_on_decode_error=True, force_paths=True, taint_returns_unfollowed_calls=True, taint_arguments_unfollowed_calls=True, allow_untaint=False) self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: s = self._get_initial_state(caller_block) self._ct.run(s, (), (), force_thumb=False, check_func=self._save_file_name) except TimeOutException: self._log.warning("Hard timeout triggered") except Exception as e: self._log.error("file.py: Something went terribly wrong: %s" % str(e)) self._ct.restore_signal_handler()
def backward_tainter(self, function_addr): min_lvl = MAX_DEPTH_BACKWARD to_analyze = [(function_addr, self._bb_sinks, 0)] p = self._p cfg = self._cfg self.backward_analysis_completed = False # ITERATE HERE! while to_analyze: self._sink_bound_to_recv = False self._sink_dep_args = False faddr, self._bb_sinks, curr_lvl = to_analyze[0] if min_lvl >= curr_lvl: min_lvl = curr_lvl if curr_lvl >= MAX_DEPTH_BACKWARD: continue to_analyze = to_analyze[1:] white_calls = self._has_interesting_calls_backward(faddr) self._ct = coretaint.CoreTaint(p, interfunction_level=0, smart_call=True, only_tracker=True, follow_unsat=True, shuffle_sat=True, white_calls=white_calls, exploration_strategy=self._exploration_strategy, try_thumb=True, taint_returns_unfollowed_calls=True, taint_arguments_unfollowed_calls=True, exit_on_decode_error=True, force_paths=True, allow_untaint=False) s = self._prepare_state(faddr) summarized_f = self._prepare_function_summaries() self._ct.set_alarm(TIMEOUT_TAINT, n_tries=TIMEOUT_TRIES) try: # to trigger it refer to httpd 0x16410. Switch case is mostly UNSAT! self._ct.run(s, (), (), summarized_f=summarized_f, force_thumb=False, use_smart_concretization=False, check_func=self._backward_taint, init_bss=False) except TimeOutException: log.warning("Timeout Triggered") except Exception as e: log.warning("Exception: %s" % str(e)) self._ct.unset_alarm() if self._sink_bound_to_recv: return True elif not self._taint_locs and self._sink_dep_args: # consider the callers no = cfg.get_any_node(faddr) if not no: continue functions = {} for pred in no.predecessors: if pred.function_address not in functions: functions[pred.function_address] = [] callee_args = len(get_any_arguments_call(p, pred.addr)) curr_sink = (pred.addr, tuple( [p.arch.register_names[ordered_agument_regs[p.arch.name][i]] for i in xrange(callee_args)])) functions[pred.function_address].append(curr_sink) for faddr, finfo in functions.items(): to_analyze.append((faddr, finfo, curr_lvl + 1)) if min_lvl < MAX_DEPTH_BACKWARD: self.backward_analysis_completed = False return False