class FuzzTester (EventMixin): """ This is part of a testing framework for controller applications. It acts as a replacement for pox.topology. Given a set of event handlers (registered by a controller application), it will inject intelligently chosen mock events (and observe their responses?) """ def __init__(self, fuzzer_params="fuzzer_params.cfg", interactive=True, check_interval=35, trace_interval=10, random_seed=0.0, delay=0.1, dataplane_trace=None, control_socket=None): self.interactive = interactive self.check_interval = check_interval self.trace_interval = trace_interval # Format of trace file is a pickled array of DataplaneEvent objects self.dataplane_trace = None if dataplane_trace: self.dataplane_trace = pickle.load(file(dataplane_trace)) self.running = False self.panel = None self.switch_impls = [] self.delay = delay self._load_fuzzer_params(fuzzer_params) # Logical time (round #) for the simulation execution self.logical_time = 0 # Metatdata for simulated failures # sts.debugger_entities.Link objects self.cut_links = set() # SwitchOutDpEvent objects self.dropped_dp_events = [] # SwitchImpl objects self.failed_switches = set() # topology.Controller objects self.failed_controllers = set() # ? self.cancelled_timeouts = set() # Statistics to print on exit self.packets_sent = 0 # Make execution deterministic to allow the user to easily replay self.seed = random_seed self.random = random.Random(self.seed) self.traffic_generator = TrafficGenerator(self.random) self.invariant_checker = InvariantChecker(control_socket) # TODO: future feature: log all events, and allow user to (interactively) # replay the execution # self.replay_logger = ReplayLogger() # Write the seed out to the replay log as the first 4 bytes # TODO: future feature: allow the user to interactively choose the order # events occur for each round, whether to delay, drop packets, fail nodes, # etc. # self.failure_lvl = [ # NOTHING, # Everything is handled by the random number generator # CRASH, # The user only controls node crashes and restarts # DROP, # The user also controls message dropping # DELAY, # The user also controls message delays # EVERYTHING # The user controls everything, including message ordering # ] # TODO: need a mechanism for signaling when the distributed controller handshake has completed def _load_fuzzer_params(self, fuzzer_params): if os.path.exists(fuzzer_params): # TODO: more pythonic way to read lines (currently on a plane...) # TODO: even better: there is probably a library for parsing config files for line in file(fuzzer_params).read().splitlines(): if line == "[fuzzer]": # TODO: handle more directives other than [fuzzer] continue (kw, eq, val) = line.split() val = float(val) setattr(self, kw, val) else: # TODO: default values in case fuzzer_config is not present / missing directives raise IOError("Could not find logging config file: %s" % fuzzer_params) def type_check_dataplane_trace(self): if self.dataplane_trace is not None: for dp_event in self.dataplane_trace: if dp_event.interface not in self.interface2host: raise RuntimeError("Dataplane trace does not type check (%s)" % str(dp_event.interface)) def simulate(self, panel, switch_impls, network_links, hosts, access_links, steps=None): """ Start the fuzzer loop! """ log.debug("Starting fuzz loop") self.panel = panel self.switch_impls = set(switch_impls) self.dataplane_links = set(network_links) self.hosts = hosts self.interface2host = {} for host in hosts: for interface in host.interfaces: self.interface2host[interface] = host self.access_links = set(access_links) self.type_check_dataplane_trace() self.loop(steps) def loop(self, steps=None): self.running = True end_time = self.logical_time + steps if steps else sys.maxint while self.running and self.logical_time < end_time: self.logical_time += 1 self.trigger_events() msg.event("Round %d completed." % self.logical_time) if self.interactive: # TODO: print out the state of the network at each timestep? Take a # verbose flag.. self.invariant_check_prompt() self.dataplane_trace_prompt() answer = msg.raw_input('Continue to next round? [Yn]').strip() if answer != '' and answer.lower() != 'y': self.stop() break else: # not self.interactive if (self.logical_time % self.check_interval) == 0: # Time to run correspondence! # spawn a thread for running correspondence. Make sure the controller doesn't # think we've gone idle though: send OFP_ECHO_REQUESTS every few seconds # TODO: this is a HACK def do_correspondence(): any_policy_violations = self.invariant_checker.check_correspondence(self.live_switches, self.live_links, self.access_links) if any_policy_violations: msg.fail("There were policy-violations!") else: msg.interactive("No policy-violations!") thread = threading.Thread(target=do_correspondence) thread.start() while thread.isAlive(): for switch in self.live_switches: # connection -> deferred io worker -> io worker switch.send(of.ofp_echo_request().pack()) thread.join(2.0) if self.dataplane_trace and (self.logical_time % self.trace_interval) == 0: self.inject_trace_event() time.sleep(self.delay) def stop(self): self.running = False def invariant_check_prompt(self): answer = msg.raw_input('Check Invariants? [Ny]') if answer != '' and answer.lower() != 'n': msg.interactive("Which one?") msg.interactive(" 'l' - loops") msg.interactive(" 'b' - blackholes") msg.interactive(" 'r' - routing consistency") msg.interactive(" 'c' - connectivity") msg.interactive(" 'o' - omega") answer = msg.raw_input(" ") result = None if answer.lower() == 'l': result = self.invariant_checker.check_loops() elif answer.lower() == 'b': result = self.invariant_checker.check_blackholes() elif answer.lower() == 'r': result = self.invariant_checker.check_routing_consistency() elif answer.lower() == 'c': result = self.invariant_checker.check_connectivity() elif answer.lower() == 'o': result = self.invariant_checker.check_correspondence(self.live_switches, self.live_links, self.access_links) else: log.warn("Unknown input...") if result is None: return else: msg.interactive("Result: %s" % str(result)) def dataplane_trace_prompt(self): if self.dataplane_trace: while True: answer = msg.raw_input('Feed in next dataplane event? [Ny]') if answer != '' and answer.lower() != 'n': self.inject_trace_event() else: break # ============================================ # # Bookkeeping methods # # ============================================ # @property def live_switches(self): """ Return the switch_impls which are currently up """ return self.switch_impls - self.failed_switches @property def live_links(self): return self.dataplane_links - self.cut_links # ============================================ # # Methods to trigger events # # ============================================ # def trigger_events(self): self.check_dataplane() self.check_controlplane() self.check_switch_crashes() self.check_timeouts() self.fuzz_traffic() def check_dataplane(self): ''' Decide whether to delay, drop, or deliver packets ''' for dp_event in set(self.panel.get_buffered_dp_events()): if self.random.random() < self.dataplane_delay_rate: msg.event("Delaying dataplane event") # (Monkey patch on a delay counter) if not hasattr(dp_event, "delayed_rounds"): dp_event.delayed_rounds = 0 dp_event.delayed_rounds += 1 elif self.random.random() < self.dataplane_drop_rate: msg.event("Dropping dataplane event") # Drop the message self.panel.drop_dp_event(dp_event) self.dropped_dp_events.append(dp_event) else: (next_hop, next_port) = self.panel.get_connected_port(dp_event.switch, dp_event.port) if type(dp_event.node) == Host or type(next_hop) == Host: # TODO: model access link failures: self.panel.permit_dp_event(dp_event) else: link = Link(dp_event.switch, dp_event.port, next_hop, next_port) if not link in self.cut_links: msg.event("Forwarding dataplane event") # Forward the message self.panel.permit_dp_event(dp_event) def check_controlplane(self): ''' Decide whether to delay or deliver packets ''' def check_deliver(switch_impl, type, give_permission): if self.random.random() < self.controlplane_delay_rate: log.debug("Delaying control plane %s for %s" % (type, str(switch_impl))) else: log.debug("Giving permission for control plane %s for %s" % (type, str(switch_impl))) give_permission() for switch_impl in self.live_switches: # Check reads # TODO: shouldn't be sticking our hands into switch_impl._connection for c in switch_impl.connections: if c.io_worker.has_pending_receives(): check_deliver(switch_impl, "receive", c.io_worker.permit_receive) # Check writes for c in switch_impl.connections: if c.io_worker.has_pending_sends(): check_deliver(switch_impl, "send", c.io_worker.permit_send) def check_switch_crashes(self): ''' Decide whether to crash or restart switches, links and controllers ''' def crash_switches(): crashed_this_round = set() for switch_impl in self.live_switches: if self.random.random() < self.switch_failure_rate: msg.event("Crashing switch_impl %s" % str(switch_impl)) switch_impl.fail() crashed_this_round.add(switch_impl) self.failed_switches.add(switch_impl) return crashed_this_round def restart_switches(crashed_this_round): for switch_impl in set(self.failed_switches): if switch_impl in crashed_this_round: continue if self.random.random() < self.switch_recovery_rate: msg.event("Rebooting switch_impl %s" % str(switch_impl)) switch_impl.recover() self.failed_switches.remove(switch_impl) def sever_links(): # TODO: model administratively down links? (OFPPC_PORT_DOWN) cut_this_round = set() for link in self.live_links: if self.random.random() < self.link_failure_rate: msg.event("Cutting link %s" % str(link)) self.cut_links.add(link) link.start_switch_impl.take_port_down(link.start_port) cut_this_round.add(link) return cut_this_round def repair_links(cut_this_round): for link in set(self.cut_links): if link in cut_this_round: continue if self.random.random() < self.link_recovery_rate: msg.event("Restoring link %s" % str(link)) link.start_switch_impl.bring_port_up(link.start_port) self.cut_links.remove(link) crashed_this_round = crash_switches() restart_switches(crashed_this_round) cut_this_round = sever_links() repair_links(cut_this_round) def check_timeouts(self): # Interpose on timeouts pass def inject_trace_event(self): ''' Precondition: --trace is set ''' if len(self.dataplane_trace) == 0: log.warn("No more trace inputs to inject!") return else: log.info("Injecting trace input") dp_event = self.dataplane_trace.pop(0) host = self.interface2host[dp_event.interface] if not host: log.warn("Host %s not present" % str(host)) return host.send(dp_event.interface, dp_event.packet) def fuzz_traffic(self): if not self.dataplane_trace: # randomly generate messages from switches for switch_impl in self.live_switches: if self.random.random() < self.traffic_generation_rate: if len(switch_impl.ports) > 0: msg.event("injecting a random packet") traffic_type = "icmp_ping" # Generates a packet, and feeds it to the switch_impl self.traffic_generator.generate(traffic_type, switch_impl)