def python_check_connectivity(simulation): # Warning! depends on python Hassell -- may be really slow! NTF = hsa_topo.generate_NTF(simulation.topology.live_switches) TTF = hsa_topo.generate_TTF(simulation.topology.live_links) paths = hsa.find_reachability(NTF, TTF, simulation.topology.access_links) # Paths is: in_port -> [p_node1, p_node2] # Where p_node is a hash: # "hdr" -> foo # "port" -> foo # "visits" -> foo connected_pairs = set() for in_port, p_nodes in paths.iteritems(): for p_node in p_nodes: connected_pairs.add((in_port, p_node["port"])) all_pairs = InvariantChecker._get_all_pairs(simulation) remaining_pairs = all_pairs - connected_pairs partitioned_pairs = check_partitions(simulation.topology.switches, simulation.topology.live_links, simulation.topology.access_links) if len(partitioned_pairs) != 0: log.info("Partitioned pairs! %s" % str(partitioned_pairs)) remaining_pairs -= partitioned_pairs # TODO(cs): don't print results here if len(remaining_pairs) > 0: msg.fail("Not all %d pairs are connected! (%d missing)" % (len(all_pairs),len(remaining_pairs))) log.info("remaining_pairs: %s" % (str(remaining_pairs))) else: msg.success("Fully connected!") return list(remaining_pairs)
def check_connectivity(simulation): ''' Return any pairs that couldn't reach each other ''' # Always check liveness down_controllers = InvariantChecker.check_liveness(simulation) if down_controllers != []: return down_controllers # Effectively, run compute physical omega, ignore concrete values of headers, and # check that all pairs can reach eachother physical_omega = InvariantChecker.compute_physical_omega(simulation.topology.live_switches, simulation.topology.live_links, simulation.topology.access_links) connected_pairs = set() # Omegas are { original port -> [(final hs1, final port1), (final hs2, final port2)...] } for start_port, final_location_list in physical_omega.iteritems(): for _, final_port in final_location_list: connected_pairs.add((start_port, final_port)) # TODO(cs): translate HSA port numbers to ofp_phy_ports in the # headerspace/ module instead of computing uniq_port_id here access_links = simulation.topology.access_links all_pairs = [ (get_uniq_port_id(l1.switch, l1.switch_port),get_uniq_port_id(l2.switch, l2.switch_port)) for l1 in access_links for l2 in access_links if l1 != l2 ] all_pairs = set(all_pairs) remaining_pairs = all_pairs - connected_pairs # TODO(cs): don't print results here if len(remaining_pairs) > 0: msg.fail("Not all %d pairs are connected! (%d missing)" % (len(all_pairs),len(remaining_pairs))) log.info("remaining_pairs: %s" % (str(remaining_pairs))) else: msg.success("Fully connected!") return list(remaining_pairs)
def _check_connectivity_msg(unconnected_pairs): if len(unconnected_pairs) == 0: msg.success("Fully connected!") else: msg.fail("Found %d unconnected pair%s: %s" % (len(unconnected_pairs), "" if len(unconnected_pairs) == 1 else "s", unconnected_pairs))
def check_connectivity(simulation): ''' Return any pairs that couldn't reach each other ''' # Dynamic imports to allow this method to be serialized from sts.headerspace.config_parser.openflow_parser import get_uniq_port_id from sts.util.console import msg # Always check liveness if there is a single controllers if len(simulation.controller_manager.controllers) == 1: down_controllers = InvariantChecker.check_liveness(simulation) if down_controllers != []: return down_controllers # Effectively, run compute physical omega, ignore concrete values of headers, and # check that all pairs can reach eachother physical_omega = InvariantChecker.compute_physical_omega(simulation.topology.live_switches, simulation.topology.live_links, simulation.topology.access_links) connected_pairs = set() # Omegas are { original port -> [(final hs1, final port1), (final hs2, final port2)...] } for start_port, final_location_list in physical_omega.iteritems(): for _, final_port in final_location_list: connected_pairs.add((start_port, final_port)) all_pairs = InvariantChecker._get_all_pairs(simulation) remaining_pairs = all_pairs - connected_pairs # TODO(cs): don't print results here if len(remaining_pairs) > 0: msg.fail("Not all %d pairs are connected! (%d missing)" % (len(all_pairs),len(remaining_pairs))) log.info("remaining_pairs: %s" % (str(remaining_pairs))) else: msg.success("Fully connected!") return list(remaining_pairs)
def next_step(self): time.sleep(0.05) self.logical_time += 1 self._forwarded_this_step = 0 if len(self.event_list) == 0: msg.fail("No more events to inject") return next_event = self.event_list.pop(0) if type(next_event) in InteractiveReplayer.supported_input_events: msg.replay_event_success("Injecting %r" % next_event) next_event.proceed(self.simulation) else: # type(next_event) in InteractiveReplayer.supported_internal_events if type(next_event) == ControlMessageReceive and is_flow_mod( next_event): msg.mcs_event("Injecting %r" % next_event) else: msg.replay_event_success("Injecting %r" % next_event) next_event.manually_inject(self.simulation) if self.show_flow_tables_each_step: for switch in self.simulation.topology.switches: print "Switch %s" % switch.dpid switch.show_flow_table() print "\nAdvanced to step %d\n" % self.logical_time
def check_connectivity(simulation): ''' Return any pairs that couldn't reach each other ''' # Always check liveness down_controllers = InvariantChecker.check_liveness(simulation) if down_controllers != []: return down_controllers # Effectively, run compute physical omega, ignore concrete values of headers, and # check that all pairs can reach eachother physical_omega = InvariantChecker.compute_physical_omega( simulation.topology.live_switches, simulation.topology.live_links, simulation.topology.access_links) connected_pairs = set() # Omegas are { original port -> [(final hs1, final port1), (final hs2, final port2)...] } for start_port, final_location_list in physical_omega.iteritems(): for _, final_port in final_location_list: connected_pairs.add((start_port, final_port)) # TODO(cs): translate HSA port numbers to ofp_phy_ports in the # headerspace/ module instead of computing uniq_port_id here access_links = simulation.topology.access_links all_pairs = [(get_uniq_port_id(l1.switch, l1.switch_port), get_uniq_port_id(l2.switch, l2.switch_port)) for l1 in access_links for l2 in access_links if l1 != l2] all_pairs = set(all_pairs) remaining_pairs = all_pairs - connected_pairs # TODO(cs): don't print results here if len(remaining_pairs) > 0: msg.fail("Not all %d pairs are connected! (%d missing)" % (len(all_pairs), len(remaining_pairs))) log.info("remaining_pairs: %s" % (str(remaining_pairs))) else: msg.success("Fully connected!") return list(remaining_pairs)
def simulate(self, check_reproducability=True): self._runtime_stats.set_dag_stats(self.dag) # inject domain knowledge into the dag self.dag.mark_invalid_input_sequences() self.dag = self.dag.filter_unsupported_input_types() if len(self.dag) == 0: raise RuntimeError("No supported input types?") if check_reproducability: # First, run through without pruning to verify that the violation exists self._runtime_stats.record_replay_start() for i in range(0, self.no_violation_verification_runs): violations = self.replay(self.dag) if violations != []: break self._runtime_stats.set_initial_verification_runs_needed(i) self._runtime_stats.record_replay_end() if violations == []: msg.fail("Unable to reproduce correctness violation!") sys.exit(5) self.log("Violation reproduced successfully! Proceeding with pruning") Replayer.total_replays = 0 Replayer.total_inputs_replayed = 0 self._runtime_stats.record_prune_start() # TODO(cs): Better than a boolean flag: check if # log(len(self.dag)) > number of input types to try if self.optimized_filtering: self._optimize_event_dag() precompute_cache = PrecomputeCache() (dag, total_inputs_pruned) = self._ddmin(self.dag, 2, precompute_cache=precompute_cache) # Make sure to track the final iteration size self._track_iteration_size(total_inputs_pruned) self.dag = dag self._runtime_stats.record_prune_end() self._dump_runtime_stats() self.log("Final MCS (%d elements):" % len(self.dag.input_events)) for i in self.dag.input_events: self.log(" - %s" % str(i)) if self.mcs_trace_path is not None: self._dump_mcs_trace() self.log("=== Total replays: %d ===" % Replayer.total_replays) return self.simulation
def do_invariant_check(): if self.log_invariant_checks: self._log_input_event(CheckInvariants(invariant_check=self.invariant_check, fail_on_error=self.halt_on_violation)) controllers_with_violations = self.invariant_check(self.simulation) if controllers_with_violations != []: msg.fail("The following controllers had correctness violations!: %s" % str(controllers_with_violations)) self._log_input_event(InvariantViolation(controllers_with_violations)) if self.halt_on_violation: return True else: msg.interactive("No correctness violations!")
def track(self, violations, logical_time): # First, untrack violations that expire for v in self.violation2time.keys(): if v not in violations: msg.success("Violation %s turns out to be transient!" % v) del self.violation2time[v] # Now, track violations observed this round for v in violations: if v not in self.violation2time.keys(): self.violation2time[v] = (logical_time, logical_time) else: start_time = self.violation2time[v][0] end_time = logical_time self.violation2time[v] = (start_time, end_time) msg.fail("Violation encountered again after %d steps: %s" % (end_time - start_time, v))
def proceed(self, simulation): try: violations = self.invariant_check(simulation) except NameError as e: raise ValueError('''Closures are unsupported for invariant check ''' '''functions.\n Use dynamic imports inside of your ''' '''invariant check code and define all globals ''' '''locally.\n NameError: %s''' % str(e)) if violations != []: msg.fail("The following controllers had correctness violations!: %s" % str(violations)) if self.fail_on_error: msg.fail("Exiting: fail_on_error=True") exit(5) else: msg.interactive("No correctness violations!") return True
def do_invariant_check(): if self.log_invariant_checks: self._log_input_event( CheckInvariants(invariant_check=self.invariant_check, fail_on_error=self.halt_on_violation)) controllers_with_violations = self.invariant_check( self.simulation) if controllers_with_violations != []: msg.fail( "The following controllers had correctness violations!: %s" % str(controllers_with_violations)) self._log_input_event( InvariantViolation(controllers_with_violations)) if self.halt_on_violation: return True else: msg.interactive("No correctness violations!")
def next_step(self): time.sleep(0.05) self.logical_time += 1 self._forwarded_this_step = 0 if len(self.event_list) == 0: msg.fail("No more events to inject") else: next_event = self.event_list.pop(0) if type(next_event) in InteractiveReplayer.supported_input_events: msg.replay_event_success("Injecting %r" % next_event) next_event.proceed(self.simulation) else: # type(next_event) in InteractiveReplayer.supported_internal_events if type(next_event) == ControlMessageReceive and is_flow_mod(next_event): msg.mcs_event("Injecting %r" % next_event) else: msg.replay_event_success("Injecting %r" % next_event) next_event.manually_inject(self.simulation) if self.show_flow_tables_each_step: for switch in self.simulation.topology.switches: print "Switch %s" % switch.dpid switch.show_flow_table() print "\nAdvanced to step %d\n" % self.logical_time
def __init__(self, simulation_cfg, superlog_path_or_dag, invariant_check_name=None, transform_dag=None, end_wait_seconds=0.5, mcs_trace_path=None, extra_log=None, runtime_stats_path=None, wait_on_deterministic_values=False, no_violation_verification_runs=1, optimized_filtering=False, forker=LocalForker(), replay_final_trace=True, strict_assertion_checking=False, **kwargs): super(MCSFinder, self).__init__(simulation_cfg) # number of subsequences delta debugging has examined so far, for # distingushing runtime stats from different intermediate runs. self.subsequence_id = 0 self.mcs_log_tracker = None self.replay_log_tracker = None self.mcs_trace_path = mcs_trace_path self.sync_callback = None self._log = logging.getLogger("mcs_finder") if invariant_check_name is None: raise ValueError("Must specify invariant check") if invariant_check_name not in name_to_invariant_check: raise ValueError('''Unknown invariant check %s.\n''' '''Invariant check name must be defined in config.invariant_checks''', invariant_check_name) self.invariant_check = name_to_invariant_check[invariant_check_name] if type(superlog_path_or_dag) == str: self.superlog_path = superlog_path_or_dag # The dag is codefied as a list, where each element has # a list of its dependents self.dag = EventDag(log_parser.parse_path(self.superlog_path)) else: self.dag = superlog_path_or_dag last_invariant_violation = self.dag.get_last_invariant_violation() if last_invariant_violation is None: raise ValueError("No invariant violation found in dag...") violations = last_invariant_violation.violations if len(violations) > 1: self.bug_signature = None while self.bug_signature is None: msg.interactive("\n------------------------------------------\n") msg.interactive("Multiple violations detected! Choose one for MCS Finding:") for i, violation in enumerate(violations): msg.interactive(" [%d] %s" % (i+1, violation)) violation_index = msg.raw_input("> ") if re.match("^\d+$", violation_index) is None or\ int(violation_index) < 1 or\ int(violation_index) > len(violations): msg.fail("Must provide an integer between 1 and %d!" % len(violations)) continue self.bug_signature = violations[int(violation_index)-1] else: self.bug_signature = violations[0] msg.success("\nBug signature to match is %s. Proceeding with MCS finding!\n" % self.bug_signature) self.transform_dag = transform_dag # A second log with just our MCS progress log messages self._extra_log = extra_log self.kwargs = kwargs self.end_wait_seconds = end_wait_seconds self.wait_on_deterministic_values = wait_on_deterministic_values # `no' means "number" self.no_violation_verification_runs = no_violation_verification_runs self._runtime_stats = RuntimeStats(self.subsequence_id, runtime_stats_path=runtime_stats_path) # Whether to try alternate trace splitting techiques besides splitting by time. self.optimized_filtering = optimized_filtering self.forker = forker self.replay_final_trace = replay_final_trace self.strict_assertion_checking = strict_assertion_checking
def _check_connectivity_msg(unconnected_pairs): if len(unconnected_pairs) == 0: msg.success("Fully connected!") else: msg.fail("Found %d unconnected pair%s: %s" % (len(unconnected_pairs), "" if len(unconnected_pairs)==1 else "s", unconnected_pairs))
def __init__(self, simulation_cfg, superlog_path_or_dag, invariant_check_name="", bug_signature="", transform_dag=None, mcs_trace_path=None, extra_log=None, runtime_stats_path=None, max_replays_per_subsequence=1, optimized_filtering=False, forker=LocalForker(), replay_final_trace=True, strict_assertion_checking=False, no_violation_verification_runs=None, **kwargs): ''' Note that you may pass in any keyword argument for Replayer to MCSFinder, except 'bug_signature' and 'invariant_check_name' ''' super(MCSFinder, self).__init__(simulation_cfg) # number of subsequences delta debugging has examined so far, for # distingushing runtime stats from different intermediate runs. self.subsequence_id = 0 self.mcs_log_tracker = None self.replay_log_tracker = None self.mcs_trace_path = mcs_trace_path self.sync_callback = None self._log = logging.getLogger("mcs_finder") if invariant_check_name == "": raise ValueError("Must specify invariant check") if invariant_check_name not in name_to_invariant_check: raise ValueError('''Unknown invariant check %s.\n''' '''Invariant check name must be defined in config.invariant_checks''', invariant_check_name) self.invariant_check_name = invariant_check_name self.invariant_check = name_to_invariant_check[invariant_check_name] if type(superlog_path_or_dag) == str: self.superlog_path = superlog_path_or_dag # The dag is codefied as a list, where each element has # a list of its dependents self.dag = EventDag(log_parser.parse_path(self.superlog_path)) else: self.dag = superlog_path_or_dag if self.simulation_cfg.ignore_interposition: filtered_events = [e for e in self.dag.events if type(e) not in all_internal_events] self.dag = EventDag(filtered_events) last_invariant_violation = self.dag.get_last_invariant_violation() if last_invariant_violation is None: raise ValueError("No invariant violation found in dag...") violations = last_invariant_violation.violations self.bug_signature = bug_signature if len(violations) > 1: while self.bug_signature == "": msg.interactive("\n------------------------------------------\n") msg.interactive("Multiple violations detected! Choose one for MCS Finding:") for i, violation in enumerate(violations): msg.interactive(" [%d] %s" % (i+1, violation)) violation_index = msg.raw_input("> ") if re.match("^\d+$", violation_index) is None or\ int(violation_index) < 1 or\ int(violation_index) > len(violations): msg.fail("Must provide an integer between 1 and %d!" % len(violations)) continue self.bug_signature = violations[int(violation_index)-1] if self.bug_signature == "": self.bug_signature = violations[0] msg.success("\nBug signature to match is %s. Proceeding with MCS finding!\n" % self.bug_signature) self.transform_dag = transform_dag # A second log with just our MCS progress log messages self._extra_log = extra_log self.kwargs = kwargs unknown_kwargs = [ k for k in kwargs.keys() if k not in Replayer.kwargs ] if unknown_kwargs != []: raise ValueError("Unknown kwargs %s" % str(unknown_kwargs)) if no_violation_verification_runs is not None: raise ValueError('''no_violation_verification_runs parameter is deprecated. ''' '''Use max_replays_per_subsequence.''') self.max_replays_per_subsequence = max_replays_per_subsequence self._runtime_stats = RuntimeStats(self.subsequence_id, runtime_stats_path=runtime_stats_path) # Whether to try alternate trace splitting techiques besides splitting by time. self.optimized_filtering = optimized_filtering self.forker = forker self.replay_final_trace = replay_final_trace self.strict_assertion_checking = strict_assertion_checking
def __init__(self, simulation_cfg, superlog_path_or_dag, invariant_check_name=None, transform_dag=None, end_wait_seconds=0.5, mcs_trace_path=None, extra_log=None, runtime_stats_path=None, wait_on_deterministic_values=False, no_violation_verification_runs=1, optimized_filtering=False, forker=LocalForker(), replay_final_trace=True, strict_assertion_checking=False, delay_flow_mods=False, **kwargs): super(MCSFinder, self).__init__(simulation_cfg) # number of subsequences delta debugging has examined so far, for # distingushing runtime stats from different intermediate runs. self.subsequence_id = 0 self.mcs_log_tracker = None self.replay_log_tracker = None self.mcs_trace_path = mcs_trace_path self.sync_callback = None self._log = logging.getLogger("mcs_finder") if invariant_check_name is None: raise ValueError("Must specify invariant check") if invariant_check_name not in name_to_invariant_check: raise ValueError( '''Unknown invariant check %s.\n''' '''Invariant check name must be defined in config.invariant_checks''', invariant_check_name) self.invariant_check = name_to_invariant_check[invariant_check_name] if type(superlog_path_or_dag) == str: self.superlog_path = superlog_path_or_dag # The dag is codefied as a list, where each element has # a list of its dependents self.dag = EventDag(log_parser.parse_path(self.superlog_path)) else: self.dag = superlog_path_or_dag last_invariant_violation = self.dag.get_last_invariant_violation() if last_invariant_violation is None: raise ValueError("No invariant violation found in dag...") violations = last_invariant_violation.violations if len(violations) > 1: self.bug_signature = None while self.bug_signature is None: msg.interactive( "\n------------------------------------------\n") msg.interactive( "Multiple violations detected! Choose one for MCS Finding:" ) for i, violation in enumerate(violations): msg.interactive(" [%d] %s" % (i + 1, violation)) violation_index = msg.raw_input("> ") if re.match("^\d+$", violation_index) is None or\ int(violation_index) < 1 or\ int(violation_index) > len(violations): msg.fail("Must provide an integer between 1 and %d!" % len(violations)) continue self.bug_signature = violations[int(violation_index) - 1] else: self.bug_signature = violations[0] msg.success( "\nBug signature to match is %s. Proceeding with MCS finding!\n" % self.bug_signature) self.transform_dag = transform_dag # A second log with just our MCS progress log messages self._extra_log = extra_log self.kwargs = kwargs self.end_wait_seconds = end_wait_seconds self.wait_on_deterministic_values = wait_on_deterministic_values # `no' means "number" self.no_violation_verification_runs = no_violation_verification_runs self._runtime_stats = RuntimeStats( self.subsequence_id, runtime_stats_path=runtime_stats_path) # Whether to try alternate trace splitting techiques besides splitting by time. self.optimized_filtering = optimized_filtering self.forker = forker self.replay_final_trace = replay_final_trace self.strict_assertion_checking = strict_assertion_checking self.delay_flow_mods = delay_flow_mods
def replay(self, new_dag, label, ignore_runtime_stats=False): # Run the simulation forward if self.transform_dag: new_dag = self.transform_dag(new_dag) self._runtime_stats.record_replay_stats(len(new_dag.input_events)) # N.B. this function is run as a child process. def play_forward(results_dir, subsequence_id): # TODO(cs): need to serialize the parameters to Replayer rather than # wrapping them in a closure... otherwise, can't use RemoteForker # TODO(aw): MCSFinder needs to configure Simulation to always let DataplaneEvents pass through create_clean_python_dir(results_dir) # Copy stdout and stderr to a file "replay.out" tee = Tee(open(os.path.join(results_dir, "replay.out"), "w")) tee.tee_stdout() tee.tee_stderr() # Set up replayer. input_logger = InputLogger() replayer = Replayer( self.simulation_cfg, new_dag, wait_on_deterministic_values=self.wait_on_deterministic_values, input_logger=input_logger, allow_unexpected_messages=False, pass_through_whitelisted_messages=True, delay_flow_mods=self.delay_flow_mods, **self.kwargs) replayer.init_results(results_dir) self._runtime_stats = RuntimeStats(subsequence_id) violations = [] simulation = None try: simulation = replayer.simulate() self._track_new_internal_events(simulation, replayer) # Wait a bit in case the bug takes awhile to happen self.log("Sleeping %d seconds after run" % self.end_wait_seconds) time.sleep(self.end_wait_seconds) violations = self.invariant_check(simulation) if violations != []: input_logger.log_input_event( InvariantViolation(violations)) except SystemExit: # One of the invariant checks bailed early. Oddly, this is not an # error for us, it just means that there were no violations... # [this logic is arguably broken] # Return no violations, and let Forker handle system exit for us. violations = [] finally: input_logger.close(replayer, self.simulation_cfg, skip_mcs_cfg=True) if simulation is not None: simulation.clean_up() tee.close() if self.strict_assertion_checking: test_serialize_response(violations, self._runtime_stats.client_dict()) timed_out_internal = [ e.label for e in new_dag.events if e.timed_out ] return (violations, self._runtime_stats.client_dict(), timed_out_internal) # TODO(cs): once play_forward() is no longer a closure, register it only once self.forker.register_task("play_forward", play_forward) results_dir = self.replay_log_tracker.get_replay_logger_dir(label) self.subsequence_id += 1 (violations, client_runtime_stats, timed_out_internal) = self.forker.fork("play_forward", results_dir, self.subsequence_id) new_dag.set_events_as_timed_out(timed_out_internal) bug_found = False if violations != []: msg.fail("Violations: %s" % str(violations)) if self.bug_signature in violations: bug_found = True else: msg.fail("Bug does not match initial violation fingerprint!") else: msg.success("No correctness violations!") if not ignore_runtime_stats: self._runtime_stats.merge_client_dict(client_runtime_stats) return bug_found
def simulate(self, check_reproducability=True): self._runtime_stats.set_dag_stats(self.dag) # apply domain knowledge: treat failure/recovery pairs atomically, and # filter event types we don't want to include in the MCS # (e.g. CheckInvariants) self.dag.mark_invalid_input_sequences() # TODO(cs): invoke dag.filter_unsupported_input_types() if len(self.dag) == 0: raise RuntimeError("No supported input types?") if check_reproducability: # First, run through without pruning to verify that the violation exists self._runtime_stats.record_replay_start() for i in range(0, self.no_violation_verification_runs): bug_found = self.replay(self.dag, "reproducibility", ignore_runtime_stats=True) if bug_found: break self._runtime_stats.set_initial_verification_runs_needed(i) self._runtime_stats.record_replay_end() if not bug_found: msg.fail("Unable to reproduce correctness violation!") sys.exit(5) self.log( "Violation reproduced successfully! Proceeding with pruning") self._runtime_stats.record_prune_start() # Run optimizations. # TODO(cs): Better than a boolean flag: check if # log(len(self.dag)) > number of input types to try if self.optimized_filtering: self._optimize_event_dag() precompute_cache = PrecomputeCache() # Invoke delta debugging (dag, total_inputs_pruned) = self._ddmin(self.dag, 2, precompute_cache=precompute_cache) # Make sure to track the final iteration size self._track_iteration_size(total_inputs_pruned) self.dag = dag self.log("=== Total replays: %d ===" % self._runtime_stats.total_replays) self._runtime_stats.record_prune_end() self.mcs_log_tracker.dump_runtime_stats() self.log("Final MCS (%d elements):" % len(self.dag.input_events)) for i in self.dag.input_events: self.log(" - %s" % str(i)) if self.replay_final_trace: # Replaying the final trace achieves two goals: # - verifies that the MCS indeed ends in the violation # - allows us to prune internal events that time out bug_found = self.replay(self.dag, "final_mcs_trace", ignore_runtime_stats=True) if not bug_found: self.log( '''Warning: MCS did not result in violation. Trying replay ''' '''again without timed out events.''') # TODO(cs): always replay the MCS without timeouts, since the console # output will be significantly cleaner? no_timeouts = self.dag.filter_timeouts() bug_found = self.replay(no_timeouts, "final_mcs_no_timed_out_events", ignore_runtime_stats=True) if not bug_found: self.log( '''Warning! Final MCS did not result in violation, even ''' ''' after ignoring timed out internal events. ''' ''' See tools/visualization/visualize1D.html for debugging''' ) # N.B. dumping the MCS trace must occur after the final replay trace, # since we need to infer which events will time out for events.trace.notimeouts if self.mcs_trace_path is not None: self.mcs_log_tracker.dump_mcs_trace(self.dag, self) return ExitCode(0)
def replay(self, new_dag, label, ignore_runtime_stats=False): # Run the simulation forward if self.transform_dag: new_dag = self.transform_dag(new_dag) self._runtime_stats.record_replay_stats(len(new_dag.input_events)) # N.B. this function is run as a child process. def play_forward(results_dir, subsequence_id): # TODO(cs): need to serialize the parameters to Replayer rather than # wrapping them in a closure... otherwise, can't use RemoteForker # TODO(aw): MCSFinder needs to configure Simulation to always let DataplaneEvents pass through create_clean_python_dir(results_dir) # Copy stdout and stderr to a file "replay.out" tee = Tee(open(os.path.join(results_dir, "replay.out"), "w")) tee.tee_stdout() tee.tee_stderr() # Set up replayer. input_logger = InputLogger() replayer = Replayer(self.simulation_cfg, new_dag, wait_on_deterministic_values=self.wait_on_deterministic_values, input_logger=input_logger, allow_unexpected_messages=False, pass_through_whitelisted_messages=True, **self.kwargs) replayer.init_results(results_dir) self._runtime_stats = RuntimeStats(subsequence_id) violations = [] simulation = None try: simulation = replayer.simulate() self._track_new_internal_events(simulation, replayer) # Wait a bit in case the bug takes awhile to happen self.log("Sleeping %d seconds after run" % self.end_wait_seconds) time.sleep(self.end_wait_seconds) violations = self.invariant_check(simulation) if violations != []: input_logger.log_input_event(InvariantViolation(violations)) except SystemExit: # One of the invariant checks bailed early. Oddly, this is not an # error for us, it just means that there were no violations... # [this logic is arguably broken] # Return no violations, and let Forker handle system exit for us. violations = [] finally: input_logger.close(replayer, self.simulation_cfg, skip_mcs_cfg=True) if simulation is not None: simulation.clean_up() tee.close() if self.strict_assertion_checking: test_serialize_response(violations, self._runtime_stats.client_dict()) timed_out_internal = [ e.label for e in new_dag.events if e.timed_out ] return (violations, self._runtime_stats.client_dict(), timed_out_internal) # TODO(cs): once play_forward() is no longer a closure, register it only once self.forker.register_task("play_forward", play_forward) results_dir = self.replay_log_tracker.get_replay_logger_dir(label) self.subsequence_id += 1 (violations, client_runtime_stats, timed_out_internal) = self.forker.fork("play_forward", results_dir, self.subsequence_id) new_dag.set_events_as_timed_out(timed_out_internal) bug_found = False if violations != []: msg.fail("Violations: %s" % str(violations)) if self.bug_signature in violations: bug_found = True else: msg.fail("Bug does not match initial violation fingerprint!") else: msg.interactive("No correctness violations!") if not ignore_runtime_stats: self._runtime_stats.merge_client_dict(client_runtime_stats) return bug_found
def simulate(self, check_reproducability=True): self._runtime_stats.set_dag_stats(self.dag) # apply domain knowledge: treat failure/recovery pairs atomically, and # filter event types we don't want to include in the MCS # (e.g. CheckInvariants) self.dag.mark_invalid_input_sequences() self.dag = self.dag.filter_unsupported_input_types() if len(self.dag) == 0: raise RuntimeError("No supported input types?") if check_reproducability: # First, run through without pruning to verify that the violation exists self._runtime_stats.record_replay_start() for i in range(0, self.no_violation_verification_runs): bug_found = self.replay(self.dag, "reproducibility", ignore_runtime_stats=True) if bug_found: break self._runtime_stats.set_initial_verification_runs_needed(i) self._runtime_stats.record_replay_end() if not bug_found: msg.fail("Unable to reproduce correctness violation!") sys.exit(5) self.log("Violation reproduced successfully! Proceeding with pruning") self._runtime_stats.record_prune_start() # Run optimizations. # TODO(cs): Better than a boolean flag: check if # log(len(self.dag)) > number of input types to try if self.optimized_filtering: self._optimize_event_dag() precompute_cache = PrecomputeCache() # Invoke delta debugging (dag, total_inputs_pruned) = self._ddmin(self.dag, 2, precompute_cache=precompute_cache) # Make sure to track the final iteration size self._track_iteration_size(total_inputs_pruned) self.dag = dag self.log("=== Total replays: %d ===" % self._runtime_stats.total_replays) self._runtime_stats.record_prune_end() self.mcs_log_tracker.dump_runtime_stats() self.log("Final MCS (%d elements):" % len(self.dag.input_events)) for i in self.dag.input_events: self.log(" - %s" % str(i)) if self.replay_final_trace: # Replaying the final trace achieves two goals: # - verifies that the MCS indeed ends in the violation # - allows us to prune internal events that time out bug_found = self.replay(self.dag, "final_mcs_trace", ignore_runtime_stats=True) if not bug_found: self.log('''Warning: MCS did not result in violation. Trying replay ''' '''again without timed out events.''') # TODO(cs): always replay the MCS without timeouts, since the console # output will be significantly cleaner? no_timeouts = self.dag.filter_timeouts() bug_found = self.replay(no_timeouts, "final_mcs_no_timed_out_events", ignore_runtime_stats=True) if not bug_found: self.log('''Warning! Final MCS did not result in violation, even ''' ''' after ignoring timed out internal events. ''' ''' See tools/visualization/visualize1D.html for debugging''') # N.B. dumping the MCS trace must occur after the final replay trace, # since we need to infer which events will time out for events.trace.notimeouts if self.mcs_trace_path is not None: self.mcs_log_tracker.dump_mcs_trace(self.dag, self) return ExitCode(0)