def on_start(self): """ Build and configure the RideD middleware """ # TODO: probably run this in the background? if self.rided is not None: if not BUILD_RIDED_IN_INIT: assert isinstance(self.rided, dict) self.rided = RideD(**self.rided) assert isinstance(self.rided, RideD) # Rather than periodically update the topology, which in our experiments would result in perfectly routing # around all the failures due to 0-latency control plane, we just update it once for now... self.timed_call(self.maintenance_interval, self.__class__.__maintain_topology, repeat=False) # self.timed_call(self.maintenance_interval, self.__class__.__maintain_topology, repeat=True) super(RideDEventSink, self).on_start()
def setUp(self): # Our test topology is a basic campus network topology (constructed with the campus_topo_gen.py script) with: # 4 core, 2 buildings per core, 2 hosts/building, and 2 inter-building links; # TODO: see example diagram to visualize the relevant parts topo_file = os.path.join(os.path.split(__file__)[0], 'test_topo.json') self.topology = NetworkxSdnTopology(topo_file) self.root = self.topology.get_servers()[0] # self.topology.draw() # set up some manual MDMTs by just building networkx Graphs using collections of links self.ntrees = 4 self.mdmts = [ # tree1 nx.Graph( ((self.root, 'c0'), ('c0', 'c1'), ('c1', 'b0'), ('b0', 'h0-b0'), ('c0', 'c2'), ('c2', 'b1'), ('b1', 'h0-b1'), ('b1', 'h1-b1'), ('c2', 'b3'), ('b3', 'h0-b3'))), # tree2 nx.Graph( ((self.root, 'c3'), ('c3', 'c2'), ('c2', 'b1'), ('b1', 'h0-b1'), ('b1', 'h1-b1'), ('b1', 'b0'), ('b0', 'h0-b0'), ('b0', 'c1'), ('c1', 'b5'), ('b5', 'b3'), ('b3', 'h0-b3'))), # tree3 nx.Graph(((self.root, 'c0'), ('c0', 'c1'), ('c1', 'b0'), ('b0', 'h0-b0'), ('b0', 'b1'), ('b1', 'h0-b1'), ('b1', 'h1-b1'), (self.root, 'c3'), ('c3', 'c2'), ('c2', 'b3'), ('b3', 'h0-b3'))), # tree4 nx.Graph(((self.root, 'c0'), ('c0', 'c1'), ('c1', 'b0'), ('b0', 'h0-b0'), ('c2', 'b1'), ('b1', 'h0-b1'), ('b1', 'h1-b1'), (self.root, 'c3'), ('c3', 'c2'), ('c2', 'b3'), ('b3', 'h0-b3'))) ] # self.topology.draw_multicast_trees(self.mdmts[2:3]) mdmt_addresses = ['tree%d' % (d + 1) for d in range(self.ntrees)] self.rided = RideD( topology_mgr=self.topology, ntrees=self.ntrees, dpid=self.root, addresses=mdmt_addresses, tree_choosing_heuristic=RideD.MAX_LINK_IMPORTANCE, # This test callback notifies us of subscribers reached and ensures the right MDMT was selected alert_sending_callback=self.__send_alert_test_callback) # XXX: manually set the MDMTs to avoid calling RideD.update(), which will try to run SDN operations in addition # to creating the MDMTs using the construction algorithms self.rided.mdmts[ALERT_TOPIC] = self.mdmts for mdmt, addr in zip(self.mdmts, mdmt_addresses): self.rided.set_address_for_mdmt(mdmt, addr) # set up manual publisher routes self.publishers = ['h1-b5', 'h1-b1'] self.publisher_routes = [['h1-b5', 'b5', 'c1', 'c0', self.root], ['h1-b1', 'b1', 'c2', 'c3', self.root]] for pub_route in self.publisher_routes: self.rided.set_publisher_route(pub_route[0], pub_route) for pub in self.publishers: self.rided.notify_publication(pub) # register the subscribers self.subscribers = ['h0-b0', 'h0-b1', 'h1-b1', 'h0-b3'] for sub in self.subscribers: self.rided.add_subscriber(sub, ALERT_TOPIC) # We expect the MDMTs to be selected (via 'importance' policy) in this order for the following tests... self.expected_mdmts = [('tree4', ), ('tree2', ), ('tree3', ), ('tree1', ), ('tree2', ), ('tree1', 'tree3', 'tree4')] # ... based on these subscribers being reached during each attempt. self.subs_reached_at_attempt = [ ('h0-b1', 'h1-b1'), #0 tuple(), tuple(), tuple(), # 1-3 no responses... ('h0-b3', ), #4 ('h0-b0', ) #5 ; all done! ] # NOTES about the test cases: # NOTE: we only do these tests for 'importance' since the others will have a tie between tree3/4 # we should choose tree2 second due to update about subs reached... # because AlertContext tracks trees tried we should use tree3 third # furthermore, we should lastly try tree1 even though it had lowest importance! # then, we should try tree2 as the highest current importance after a notification since we've tried all of them # finally, since we have a tie among all the others self.attempt_num = 0 self.alert = self.rided._make_new_alert(ALERT_MSG, ALERT_TOPIC)
def run_experiment(self): """Check what percentage of subscribers are still reachable from the server after the failure model has been applied by removing the failed_nodes and failed_links from each tree as well as a copy of the overall topology (for the purpose of establishing an upper bound oracle heuristic). We also explore the use of an intelligent multicast tree-choosing heuristic that picks the tree with the most overlap with the paths each publisher's sensor data packet arrived on. :rtype dict: """ # IDEA: we can determine the reachability by the following: # for each topology, remove the failed nodes and links, # determine all reachable nodes in topology, # consider only those that are subscribers, # record % reachable by any/all topologies. # We also have different methods of choosing which tree to use # and two non-multicast comparison heuristics. # NOTE: the reachability from the whole topology (oracle) gives us an # upper bound on how well the edge server could possibly do, # even without using multicast. subscribers = set(self.subscribers) result = dict() heuristic = self.get_mcast_heuristic_name() # we'll record reachability for various choices of trees result[heuristic] = dict() failed_topology = self.get_failed_topology(self.topo.topo, self.failed_nodes, self.failed_links) # start up and configure RideD middleware for building/choosing trees # We need to specify dummy addresses that won't actually be used for anything. addresses = ["10.0.0.%d" % d for d in range(self.ntrees)] rided = RideD( self.topo, self.server, addresses, self.ntrees, construction_algorithm=self.tree_construction_algorithm[0], const_args=self.tree_construction_algorithm[1:]) # HACK: since we never made an actual API for the controller, we just do this manually... for s in subscribers: rided.add_subscriber(s, PUBLICATION_TOPIC) # Build up the Successfully Traversed Topology (STT) from each publisher # by determining which path the packet would take in the functioning # topology and add its edges to the STT only if that path is # functioning in the failed topology. # BIG OH: O(T) + O(S), where S = |STT| # XXX: because the RideC implementation requires an actual SDN controller adapter, we just repeat the logic # for computing 'redirection' routes (publisher-->edge after cloud failure) here... if self.reroute_policy == 'shortest': pub_routes = { pub: self.topo.get_path(pub, self.server, weight=DISTANCE_METRIC) for pub in self.publishers } else: if self.reroute_policy != 'disjoint': log.error( "unknown reroute_policy '%s'; defaulting to 'disjoint'...") pub_routes = { p[0]: p for p in self.topo.get_multi_source_disjoint_paths( self.publishers, self.server, weight=DISTANCE_METRIC) } assert list(sorted(pub_routes.keys())) == list( sorted(self.publishers) ), "not all hosts accounted for in disjoint paths: %s" % pub_routes.values( ) # Determine which publishers successfully reached the edge to build the STT in Ride-D and report pub_rate pub_rate = 0 for pub in self.publishers: path = pub_routes[pub] rided.set_publisher_route(pub, path) if random.random() >= self.error_rate and nx.is_simple_path( failed_topology, path): rided.notify_publication(pub) pub_rate += 1 pub_rate /= float(len(self.publishers)) result['pub_rate'] = pub_rate # build and get multicast trees trees = rided.build_mdmts()[PUBLICATION_TOPIC] # XXX: rather than use the install_mdmts API, which would try to install flow rules, we just set them directly rided.mdmts[PUBLICATION_TOPIC] = trees # record which heuristic we used for tree in trees: tree.graph['heuristic'] = self.get_mcast_heuristic_name() # sanity check that the returned trees reach all destinations assert all( nx.has_path(tree, self.server, sub) for sub in subscribers) # ORACLE # First, use a copy of whole topology as the 'oracle' heuristic, # which sees what subscribers are even reachable by ANY path. reach = self.get_oracle_reachability(subscribers, self.server, failed_topology) result['oracle'] = reach # UNICAST # Second, get the reachability for the 'unicast' heuristic, # which sees what subscribers are reachable on the failed topology # via the path they'd normally be reached on the original topology paths = [ nx.shortest_path(self.topo.topo, self.server, s, weight=DISTANCE_METRIC) for s in subscribers ] # record the cost of the paths whether they would succeed or not unicast_cost = sum(self.topo.topo[u][v].get(COST_METRIC, 1) for p in paths\ for u, v in zip(p, p[1:])) # now filter only paths that are still functioning and record the reachability paths = [p for p in paths if nx.is_simple_path(failed_topology, p)] result['unicast'] = len(paths) / float(len(subscribers)) # TODO: disjoint unicast paths comparison! # ALL TREES' REACHABILITIES: all, min, max, mean, stdev # Next, check all the redundant multicast trees together to get their respective (and aggregate) reachabilities topos_to_check = [ self.get_failed_topology(t, self.failed_nodes, self.failed_links) for t in trees ] reaches = self.get_reachability(self.server, subscribers, topos_to_check) heuristic = trees[0].graph[ 'heuristic'] # we assume all trees from same heuristic result[heuristic]['all'] = reaches[-1] reaches = reaches[:-1] result[heuristic]['max'] = max(reaches) result[heuristic]['min'] = min(reaches) result[heuristic]['mean'] = np.mean(reaches) result[heuristic]['stdev'] = np.std(reaches) # CHOSEN # Finally, check the tree chosen by the edge server heuristic(s) # for having the best estimated chance of data delivery choices = dict() for method in RideD.MDMT_SELECTION_POLICIES: alert_ctx = rided._make_new_alert("dummy msg", PUBLICATION_TOPIC) choices[method] = rided.get_best_mdmt(alert_ctx, method) for choice_method, best_tree in choices.items(): best_tree_idx = trees.index(best_tree) reach = reaches[best_tree_idx] result[heuristic]['%s-chosen' % choice_method] = reach ### RECORDING METRICS ### # Record the distance to the subscribers in terms of # hops # TODO: make this latency instead? nhops = [] for t in trees: for s in subscribers: nhops.append(len(nx.shortest_path(t, s, self.server)) - 1) result['nhops'] = dict(mean=np.mean(nhops), stdev=np.std(nhops), min=min(nhops), max=max(nhops)) # Record the pair-wise overlap between the trees tree_edges = [set(t.edges()) for t in trees] overlap = [ len(t1.intersection(t2)) for t1 in tree_edges for t2 in tree_edges ] result['overlap'] = sum(overlap) # TODO: try to get this working on topos > 20? # the ILP will need some work if we're going to get even the relaxed version running on large topologies # overlap_lower_bound = ilp_redundant_multicast(self.topo.topo, server, subscribers, len(trees), get_lower_bound=True) # result['overlap_lower_bound'] = overlap_lower_bound # Record the average size of the trees costs = [ sum(e[2].get(COST_METRIC, 1) for e in t.edges(data=True)) for t in trees ] result['cost'] = dict(mean=np.mean(costs), stdev=np.std(costs), min=min(costs), max=max(costs), unicast=unicast_cost) return result
def __init__(self, broker, # RideD parameters # TODO: sublcass RideD in order to avoid code repetition here for extracting parameters? dpid, addresses=None, topology_mgr='onos', ntrees=2, tree_choosing_heuristic='importance', tree_construction_algorithm=('red-blue',), max_retries=None, # XXX: rather than running a separate service that would intercept incoming publications matching the # specified flow for use in the STT, we simply wait for seismic picks and use them as if they're # incoming packets. This ignores other potential packets from those hosts, but this will have to do # for now since running such a separated service would require more systems programming than this... subscriptions=(SEISMIC_PICK_TOPIC, # RideD gathers the publisher routes from RideC via events when they change PUBLISHER_ROUTE_TOPIC, ), maintenance_interval=10, multicast=True, dst_port=DEFAULT_COAP_PORT, topics_to_sink=(SEISMIC_ALERT_TOPIC,), **kwargs): """ See also the parameters for RideD constructor! :param ntrees: # MDMTs to build (passed to RideD constructor); note that setting this to 0 disables multicast! :param broker: :param addresses: iterable of network addresses (i.e. tuples of (str[ipv4_src_addr], udp_src_port)) that can be used to register multicast trees and send alert packets through them ****NOTE: we use udp_src_port rather than the expected dst_port because this allows the clients to respond to this port# and have the response routed via the proper MDMT :param dst_port: port number to send events to (NOTE: we expect all subscribers to listen on the same port OR for you to configure the flow rules to convert this port to the expected one before delivery to subscriber) :param topics_to_sink: a SensedEvent whose topic matches one in this list will be resiliently multicast delivered; others will be ignored :param maintenance_interval: seconds between running topology updates and reconstructing MDMTs if necessary, accounting for topology changes or new/removed subscribers :param multicast: if True (default unless ntrees==0), build RideD for using multicast; otherwise, subscribers are alerting one at a time (async) using unicast :param kwargs: """ super(RideDEventSink, self).__init__(broker, topics_to_sink=topics_to_sink, subscriptions=subscriptions, **kwargs) # Catalogue active subscribers' host addresses (indexed by topic with value being a set of subscribers) self.subscribers = dict() self.dst_port = dst_port self.maintenance_interval = maintenance_interval # If we need to do anything with the server right away or expect some logic to be called # that will not directly check whether the server is running currently, we should wait # for a CoapServerRunning event before accessing the actual server. # NOTE: make sure to do this here, not on_start, as we currently only send the ready notification once! ev = CoapServer.CoapServerRunning(None) self.subscribe(ev, callback=self.__class__.__on_coap_ready) # Store parameters for RideD resilient multicast middleware; we'll actually build it later since it takes a while... self.use_multicast = multicast if ntrees else False if self.use_multicast and addresses is None: raise NotImplementedError("you must specify the multicast 'addresses' parameter if multicast is enabled!") # If we aren't doing multicast, we can create a single CoapClient without a specified src_port/address and this # will be filled in for us... # COAPTHON-SPECIFIC: unclear that we'd be able to do this in all future versions... if not self.use_multicast: self.rided = None srv_ip = '10.0.0.1' self._coap_clients = {'unicast': CoapClient(server_hostname=srv_ip, server_port=self.dst_port, confirmable_messages=not self.use_multicast)} # Configure RideD and necessary CoapClient instances... # Use a single client for EACH MDMT to connect with each server. We do this so that we can specify the source # port and have the 'server' (remote subscribers) respond to this port# and therefore route responses along the # same path. Hence, we need to ensure addresses contains some useable addresses or we'll get null exceptions! else: # cmd-line specified addresses might convert them to a list of lists, so make them tuples for hashing! addresses = [tuple(address) for address in addresses] # This callback is essentially the CoAP implementation for RideD: it uses CoAPthon to send a request to the # given address through a 'helper client' and register a callback for receiving subscriber responses and # notifying RideD of them. # NOTE: a different CoAP message is created for each alert re-try since they're sent as non-CONfirmable! do_send_cb = self.__sendto # We may opt to build RideD in on_start() instead depending on what resources we want available first... self.rided = dict(topology_mgr=topology_mgr, dpid=dpid, addresses=addresses, ntrees=ntrees, tree_choosing_heuristic=tree_choosing_heuristic, tree_construction_algorithm=tree_construction_algorithm, alert_sending_callback=do_send_cb, max_retries=max_retries) if BUILD_RIDED_IN_INIT: self.rided = RideD(**self.rided) # NOTE: we store CoapClient instances in a dict so that we can index them by MDMT address for easily # accessing the proper instance for the chosen MDMT self._coap_clients = dict() for address in addresses: dst_ip, src_port = address self._coap_clients[address] = CoapClient(server_hostname=dst_ip, server_port=self.dst_port, src_port=src_port, confirmable_messages=not self.use_multicast) # Need to track outstanding alerts as we can only have a single one for each topic at a time # since they're updates: index them by topic --> AlertContext self._outstanding_alerts = dict() # Use thread locks to prevent simultaneous write access to data structures due to e.g. # handling multiple simultaneous subscription registrations. self.__subscriber_lock = Lock()