class TestMdmtSelection(unittest.TestCase): """Tests the RideD algorithms but NOT the SDN mechanisms""" def setUp(self): # Our test topology is a basic campus network topology (constructed with the campus_topo_gen.py script) with: # 4 core, 2 buildings per core, 2 hosts/building, and 2 inter-building links; # TODO: see example diagram to visualize the relevant parts topo_file = os.path.join(os.path.split(__file__)[0], 'test_topo.json') self.topology = NetworkxSdnTopology(topo_file) self.root = self.topology.get_servers()[0] # self.topology.draw() # set up some manual MDMTs by just building networkx Graphs using collections of links self.ntrees = 4 self.mdmts = [ # tree1 nx.Graph( ((self.root, 'c0'), ('c0', 'c1'), ('c1', 'b0'), ('b0', 'h0-b0'), ('c0', 'c2'), ('c2', 'b1'), ('b1', 'h0-b1'), ('b1', 'h1-b1'), ('c2', 'b3'), ('b3', 'h0-b3'))), # tree2 nx.Graph( ((self.root, 'c3'), ('c3', 'c2'), ('c2', 'b1'), ('b1', 'h0-b1'), ('b1', 'h1-b1'), ('b1', 'b0'), ('b0', 'h0-b0'), ('b0', 'c1'), ('c1', 'b5'), ('b5', 'b3'), ('b3', 'h0-b3'))), # tree3 nx.Graph(((self.root, 'c0'), ('c0', 'c1'), ('c1', 'b0'), ('b0', 'h0-b0'), ('b0', 'b1'), ('b1', 'h0-b1'), ('b1', 'h1-b1'), (self.root, 'c3'), ('c3', 'c2'), ('c2', 'b3'), ('b3', 'h0-b3'))), # tree4 nx.Graph(((self.root, 'c0'), ('c0', 'c1'), ('c1', 'b0'), ('b0', 'h0-b0'), ('c2', 'b1'), ('b1', 'h0-b1'), ('b1', 'h1-b1'), (self.root, 'c3'), ('c3', 'c2'), ('c2', 'b3'), ('b3', 'h0-b3'))) ] # self.topology.draw_multicast_trees(self.mdmts[2:3]) mdmt_addresses = ['tree%d' % (d + 1) for d in range(self.ntrees)] self.rided = RideD( topology_mgr=self.topology, ntrees=self.ntrees, dpid=self.root, addresses=mdmt_addresses, tree_choosing_heuristic=RideD.MAX_LINK_IMPORTANCE, # This test callback notifies us of subscribers reached and ensures the right MDMT was selected alert_sending_callback=self.__send_alert_test_callback) # XXX: manually set the MDMTs to avoid calling RideD.update(), which will try to run SDN operations in addition # to creating the MDMTs using the construction algorithms self.rided.mdmts[ALERT_TOPIC] = self.mdmts for mdmt, addr in zip(self.mdmts, mdmt_addresses): self.rided.set_address_for_mdmt(mdmt, addr) # set up manual publisher routes self.publishers = ['h1-b5', 'h1-b1'] self.publisher_routes = [['h1-b5', 'b5', 'c1', 'c0', self.root], ['h1-b1', 'b1', 'c2', 'c3', self.root]] for pub_route in self.publisher_routes: self.rided.set_publisher_route(pub_route[0], pub_route) for pub in self.publishers: self.rided.notify_publication(pub) # register the subscribers self.subscribers = ['h0-b0', 'h0-b1', 'h1-b1', 'h0-b3'] for sub in self.subscribers: self.rided.add_subscriber(sub, ALERT_TOPIC) # We expect the MDMTs to be selected (via 'importance' policy) in this order for the following tests... self.expected_mdmts = [('tree4', ), ('tree2', ), ('tree3', ), ('tree1', ), ('tree2', ), ('tree1', 'tree3', 'tree4')] # ... based on these subscribers being reached during each attempt. self.subs_reached_at_attempt = [ ('h0-b1', 'h1-b1'), #0 tuple(), tuple(), tuple(), # 1-3 no responses... ('h0-b3', ), #4 ('h0-b0', ) #5 ; all done! ] # NOTES about the test cases: # NOTE: we only do these tests for 'importance' since the others will have a tie between tree3/4 # we should choose tree2 second due to update about subs reached... # because AlertContext tracks trees tried we should use tree3 third # furthermore, we should lastly try tree1 even though it had lowest importance! # then, we should try tree2 as the highest current importance after a notification since we've tried all of them # finally, since we have a tie among all the others self.attempt_num = 0 self.alert = self.rided._make_new_alert(ALERT_MSG, ALERT_TOPIC) def test_basic_mdmt_selection(self): """Tests MDMT-selection (without alerting context) for the default policy by manually assigning MDMTs, publisher routes, notifying RideD about a few publications and verifying that the selected MDMT is the one expected given this information.""" mdmt = self.rided.get_best_mdmt( self.alert, heuristic=self.rided.MAX_LINK_IMPORTANCE) self.assertIn(self.rided.get_address_for_mdmt(mdmt), self.expected_mdmts[0]) mdmt = self.rided.get_best_mdmt( self.alert, heuristic=self.rided.MAX_OVERLAPPING_LINKS) self.assertEqual(self.rided.get_address_for_mdmt(mdmt), 'tree4') mdmt = self.rided.get_best_mdmt(self.alert, heuristic=self.rided.MIN_MISSING_LINKS) self.assertEqual(self.rided.get_address_for_mdmt(mdmt), 'tree4') # TODO: ENHANCE: additional test... # Now, if we reset the STT and change the publisher routes we should get different MDMTs # self.rided.stt_mgr.reset() # self.rided.set_publisher_route('h1-b5', ['h1-b5', 'b5', 'c1', 'c0', self.root]) # # for pub in self.publishers: # self.rided.notify_publication(pub) # # mdmt = self.rided.get_best_mdmt(ALERT_TOPIC, heuristic=self.rided.MAX_OVERLAPPING_LINKS) # self.assertEqual(self.rided.get_address_for_mdmt(mdmt), 'tree4') # # mdmt = self.rided.get_best_mdmt(ALERT_TOPIC, heuristic=self.rided.MAX_LINK_IMPORTANCE) # self.assertEqual(self.rided.get_address_for_mdmt(mdmt), 'tree4') # # mdmt = self.rided.get_best_mdmt(ALERT_TOPIC, heuristic=self.rided.MIN_MISSING_LINKS) # self.assertEqual(self.rided.get_address_for_mdmt(mdmt), 'tree4') def test_mdmt_selection_with_context(self): """Tests MDMT-selection WITH alerting context in a similar manner to the basic tests. Here we use an AlertContext object to change the MDMT choice based on claiming that some subscribers have already been alerted.""" # NOTE: we're actually using the _do_send_alert method instead of manually recording and doing notifications. # The callback used to actually 'send the alert packet' (no network operations) will handle notifying subs. for attempt_num, subs_reached in enumerate( self.subs_reached_at_attempt): mdmt = self.rided._do_send_alert(self.alert) self.assertIn(self.rided.get_address_for_mdmt(mdmt), self.expected_mdmts[attempt_num]) #### TEST ACTUAL send_alert(...) API ###### def test_send_alert(self): """ Tests the main send_alert API that exercises everything previously tested along with the retransmit capability. This uses a custom testing callback instead of opening a socket and test servers to receive alerts. """ expected_num_attempts = len(self.subs_reached_at_attempt) # Send the alert and ensure it took the right # retries alert = self.rided.send_alert(ALERT_MSG, ALERT_TOPIC, timeout=TIMEOUT, max_retries=expected_num_attempts + 1) sleep((expected_num_attempts + 1) * TIMEOUT) self.assertFalse(alert.active) self.assertEqual(self.attempt_num, expected_num_attempts) self.assertEqual(len(alert.subscribers_reached), len(self.subscribers)) # not all subs reached???? def test_cancel_alert(self): """Ensure that cancelling alerts works properly by cancelling it before it finishes and verify that some subscribers remain unreached.""" expected_num_attempts = len(self.subs_reached_at_attempt) alert = self.rided.send_alert(ALERT_MSG, ALERT_TOPIC, timeout=TIMEOUT, max_retries=expected_num_attempts + 1) # instead of waiting for it to finish, cancel the alert right before the last one gets sent sleep((expected_num_attempts - 1.5) * TIMEOUT) self.rided.cancel_alert(alert) sleep(TIMEOUT) # Now we should note that the last alert message wasn't sent! self.assertFalse(alert.active) self.assertEqual(self.attempt_num, expected_num_attempts - 1) self.assertEqual(len(alert.subscribers_reached), len(self.subscribers) - 1) def test_send_alert_unsuccessfully(self): expected_num_attempts = len(self.subs_reached_at_attempt) # since we set max_retries to be less than the number required this alert should stop early despite not reaching all subs alert = self.rided.send_alert(ALERT_MSG, ALERT_TOPIC, timeout=TIMEOUT, max_retries=expected_num_attempts - 2) sleep((expected_num_attempts + 1) * TIMEOUT) self.assertFalse(alert.active) self.assertEqual(self.attempt_num, expected_num_attempts - 1) self.assertEqual(len(alert.subscribers_reached), len(self.subscribers) - 1) # not all subs reached???? def __send_alert_test_callback(self, alert, mdmt): """ Custom callback to handle verifying that the expected MDMT was used in between each attempt and notifies RideD of which subscribers were reached. :param alert: :type alert: RideD.AlertContext :param mdmt: :return: """ self.assertTrue( alert.active, "__send_alert_test_callback should not fire if alert isn't active!" ) expected_mdmt = self.expected_mdmts[self.attempt_num] self.assertIn( self.rided.get_address_for_mdmt(mdmt), expected_mdmt, "incorrect MDMT selected for attempt %d: expected one of %s but got %s" % (self.attempt_num, expected_mdmt, mdmt)) for s in self.subs_reached_at_attempt[self.attempt_num]: # XXX: because this callback is fired while the alert's thread_lock is acquired, we have to do this # from inside another thread so that it will run after this callback returns. Otherwise, deadlock! # self.rided.notify_alert_response(s, alert, mdmt) Thread(target=self.rided.notify_alert_response, args=(s, alert, mdmt)).start() self.attempt_num += 1 # ENHANCE: test_send_alert_multi_threaded???? # ENHANCE: test_send_alert_network_socket ## helper functions def _build_mdmts(self, subscribers=None): mdmts = self.rided.build_mdmts(subscribers=subscribers) self.rided.mdmts = mdmts return mdmts
def run_experiment(self): """Check what percentage of subscribers are still reachable from the server after the failure model has been applied by removing the failed_nodes and failed_links from each tree as well as a copy of the overall topology (for the purpose of establishing an upper bound oracle heuristic). We also explore the use of an intelligent multicast tree-choosing heuristic that picks the tree with the most overlap with the paths each publisher's sensor data packet arrived on. :rtype dict: """ # IDEA: we can determine the reachability by the following: # for each topology, remove the failed nodes and links, # determine all reachable nodes in topology, # consider only those that are subscribers, # record % reachable by any/all topologies. # We also have different methods of choosing which tree to use # and two non-multicast comparison heuristics. # NOTE: the reachability from the whole topology (oracle) gives us an # upper bound on how well the edge server could possibly do, # even without using multicast. subscribers = set(self.subscribers) result = dict() heuristic = self.get_mcast_heuristic_name() # we'll record reachability for various choices of trees result[heuristic] = dict() failed_topology = self.get_failed_topology(self.topo.topo, self.failed_nodes, self.failed_links) # start up and configure RideD middleware for building/choosing trees # We need to specify dummy addresses that won't actually be used for anything. addresses = ["10.0.0.%d" % d for d in range(self.ntrees)] rided = RideD( self.topo, self.server, addresses, self.ntrees, construction_algorithm=self.tree_construction_algorithm[0], const_args=self.tree_construction_algorithm[1:]) # HACK: since we never made an actual API for the controller, we just do this manually... for s in subscribers: rided.add_subscriber(s, PUBLICATION_TOPIC) # Build up the Successfully Traversed Topology (STT) from each publisher # by determining which path the packet would take in the functioning # topology and add its edges to the STT only if that path is # functioning in the failed topology. # BIG OH: O(T) + O(S), where S = |STT| # XXX: because the RideC implementation requires an actual SDN controller adapter, we just repeat the logic # for computing 'redirection' routes (publisher-->edge after cloud failure) here... if self.reroute_policy == 'shortest': pub_routes = { pub: self.topo.get_path(pub, self.server, weight=DISTANCE_METRIC) for pub in self.publishers } else: if self.reroute_policy != 'disjoint': log.error( "unknown reroute_policy '%s'; defaulting to 'disjoint'...") pub_routes = { p[0]: p for p in self.topo.get_multi_source_disjoint_paths( self.publishers, self.server, weight=DISTANCE_METRIC) } assert list(sorted(pub_routes.keys())) == list( sorted(self.publishers) ), "not all hosts accounted for in disjoint paths: %s" % pub_routes.values( ) # Determine which publishers successfully reached the edge to build the STT in Ride-D and report pub_rate pub_rate = 0 for pub in self.publishers: path = pub_routes[pub] rided.set_publisher_route(pub, path) if random.random() >= self.error_rate and nx.is_simple_path( failed_topology, path): rided.notify_publication(pub) pub_rate += 1 pub_rate /= float(len(self.publishers)) result['pub_rate'] = pub_rate # build and get multicast trees trees = rided.build_mdmts()[PUBLICATION_TOPIC] # XXX: rather than use the install_mdmts API, which would try to install flow rules, we just set them directly rided.mdmts[PUBLICATION_TOPIC] = trees # record which heuristic we used for tree in trees: tree.graph['heuristic'] = self.get_mcast_heuristic_name() # sanity check that the returned trees reach all destinations assert all( nx.has_path(tree, self.server, sub) for sub in subscribers) # ORACLE # First, use a copy of whole topology as the 'oracle' heuristic, # which sees what subscribers are even reachable by ANY path. reach = self.get_oracle_reachability(subscribers, self.server, failed_topology) result['oracle'] = reach # UNICAST # Second, get the reachability for the 'unicast' heuristic, # which sees what subscribers are reachable on the failed topology # via the path they'd normally be reached on the original topology paths = [ nx.shortest_path(self.topo.topo, self.server, s, weight=DISTANCE_METRIC) for s in subscribers ] # record the cost of the paths whether they would succeed or not unicast_cost = sum(self.topo.topo[u][v].get(COST_METRIC, 1) for p in paths\ for u, v in zip(p, p[1:])) # now filter only paths that are still functioning and record the reachability paths = [p for p in paths if nx.is_simple_path(failed_topology, p)] result['unicast'] = len(paths) / float(len(subscribers)) # TODO: disjoint unicast paths comparison! # ALL TREES' REACHABILITIES: all, min, max, mean, stdev # Next, check all the redundant multicast trees together to get their respective (and aggregate) reachabilities topos_to_check = [ self.get_failed_topology(t, self.failed_nodes, self.failed_links) for t in trees ] reaches = self.get_reachability(self.server, subscribers, topos_to_check) heuristic = trees[0].graph[ 'heuristic'] # we assume all trees from same heuristic result[heuristic]['all'] = reaches[-1] reaches = reaches[:-1] result[heuristic]['max'] = max(reaches) result[heuristic]['min'] = min(reaches) result[heuristic]['mean'] = np.mean(reaches) result[heuristic]['stdev'] = np.std(reaches) # CHOSEN # Finally, check the tree chosen by the edge server heuristic(s) # for having the best estimated chance of data delivery choices = dict() for method in RideD.MDMT_SELECTION_POLICIES: alert_ctx = rided._make_new_alert("dummy msg", PUBLICATION_TOPIC) choices[method] = rided.get_best_mdmt(alert_ctx, method) for choice_method, best_tree in choices.items(): best_tree_idx = trees.index(best_tree) reach = reaches[best_tree_idx] result[heuristic]['%s-chosen' % choice_method] = reach ### RECORDING METRICS ### # Record the distance to the subscribers in terms of # hops # TODO: make this latency instead? nhops = [] for t in trees: for s in subscribers: nhops.append(len(nx.shortest_path(t, s, self.server)) - 1) result['nhops'] = dict(mean=np.mean(nhops), stdev=np.std(nhops), min=min(nhops), max=max(nhops)) # Record the pair-wise overlap between the trees tree_edges = [set(t.edges()) for t in trees] overlap = [ len(t1.intersection(t2)) for t1 in tree_edges for t2 in tree_edges ] result['overlap'] = sum(overlap) # TODO: try to get this working on topos > 20? # the ILP will need some work if we're going to get even the relaxed version running on large topologies # overlap_lower_bound = ilp_redundant_multicast(self.topo.topo, server, subscribers, len(trees), get_lower_bound=True) # result['overlap_lower_bound'] = overlap_lower_bound # Record the average size of the trees costs = [ sum(e[2].get(COST_METRIC, 1) for e in t.edges(data=True)) for t in trees ] result['cost'] = dict(mean=np.mean(costs), stdev=np.std(costs), min=min(costs), max=max(costs), unicast=unicast_cost) return result