def exposed_get_fourid_neighbor_xhost(self, type, src): printtime("<<<< GET 4ID NEIGHBORS >>>>") if type == '6RD': gA = self.exposed_get_sixrd_A(); gB = self.exposed_get_sixrd_B() if type == '4ID': gA = self.exposed_get_fourid_GA(); gB = self.exposed_get_fourid_GB() if type == 'SDN': gA = self.exposed_get_sdn_A(); gB = self.exposed_get_sdn_B() print "GETTING FROM: %s" % self.exposed_get_fourid_CA() s = 'AD:%s HID:%s' % (rpc(self.exposed_get_fourid_CA(), 'get_ad', ()), rpc(self.exposed_get_fourid_CA(), 'get_hid', ())) print "GETTING FROM: %s" % self.exposed_get_fourid_CB() d = 'AD:%s HID:%s' % (rpc(self.exposed_get_fourid_CB(), 'get_ad', ()), rpc(self.exposed_get_fourid_CB(), 'get_hid', ())) if self._host in CLIENTS: src = not src if src: print "GETTING FROM: %s" % gB ad = rpc(gB, 'get_ad', ()) print "GETTING FROM: %s" % gA fourid = rpc(gA, 'get_fourid', ()) return 'RE AD:%s IP:%s %s' % (ad, fourid, s) else: print "GETTING FROM: %s" % gA ad = rpc(gA, 'get_ad', ()) print "GETTING FROM: %s" % gB fourid = rpc(gB, 'get_fourid', ()) return 'RE AD:%s IP:%s %s' % (ad, fourid, d)
def exposed_get_commands(self, host = None): host = self._host if host == None else host if 'master' in PRINT_VERB: printtime('MASTER: %s checked in for commands' % host) if FOURID_EXPERIMENT: print TYPE cmd = [self.exposed_get_xianet(host=host)] if CLIENTS: cmd += ["self.exposed_wait_for_neighbors(rpc(MASTER_SERVER, 'get_experiment_nodes', ()), 'waiting for xianet to start on all nodes')"] cmd += ["self.exposed_gather_fourid_stats('%s')" % TYPE] return cmd if host in BACKBONES: return [self.exposed_get_xianet(host = host)] elif host in CLIENTS: if not NEW_EXP_LOCK.locked(): cmd = ["my_backbone = self.exposed_gather_stats()[1]"] cmd += ["self.exposed_wait_for_neighbors([my_backbone], 'waiting for backbone: %s' % my_backbone)"] cmd += ["rpc(my_backbone, 'soft_restart', (my_name, ))"] cmd += ["xianetcmd = rpc(MASTER_SERVER, 'get_xianet', (my_backbone, ))"] cmd += ["printtime(xianetcmd)"] cmd += ["exec(xianetcmd)"] cmd += ["self.exposed_wait_for_neighbors(['localhost'], 'waiting for xianet to start')"] cmd += ["my_neighbor = rpc(MASTER_SERVER, 'get_neighbor_host', ())"] cmd += ["printtime(my_neighbor)"] cmd += ["self.exposed_wait_for_neighbors([my_backbone], 'waiting for backbone: %s' % my_backbone)"] cmd += ["self.exposed_wait_for_neighbors([my_neighbor], 'waiting for neighbor: %s' % my_neighbor)"] cmd += ["self.exposed_gather_xstats()"] if APP_EXPERIMENT: if host == CLIENTS[0]: cmd += ["check_output('%s')" % WEBSERVER_CMD] else: cmd += ["self.exposed_gather_browser_stats('tunneling')"] return cmd
def exposed_xstats(self, xping, xhops): cur_exp = tuple(CLIENTS) m = 'A' if self._host == CLIENTS[0] else 'B' n = 'B' if self._host == CLIENTS[0] else 'A' neighbor = CLIENTS[1] if self._host == CLIENTS[0] else CLIENTS[0] STATSD[cur_exp]['%s%s' % (m,n)] = (self._host, neighbor, xping, xhops) if 'xstats' in PRINT_VERB: printtime('%s:\t %s\n' % (cur_exp,STATSD[cur_exp]))
def exposed_run_commands(self): printtime('stopping local processes') printtime('requesting commands!') commands = rpc(MASTER_SERVER, 'get_commands', ()) printtime('commands received!') printtime('commands: %s' % commands) for command in commands: printtime(command) exec(command)
def exposed_gather_stats(self): printtime('<<<<GATHER STATS>>>>') neighbors = rpc(MASTER_SERVER, 'get_backbone', ()) out = multi_ping(neighbors) latency = out[0][0] my_backbone = out[0][1] hops = traceroute(my_backbone) rpc(MASTER_SERVER, 'stats', (my_backbone, latency, hops)) return ['Sent stats: (%s, %s, %s)' % (my_backbone, latency, hops), my_backbone]
def run(self): for backbone in BACKBONES: while True: try: rpc('localhost', 'hard_restart', (backbone, )) break; except Exception, e: printtime('%s' % e) time.sleep(1)
def exposed_error(self, msg, host): printtime('<<<< %s (error!): %s >>>>' % (host, msg)) if SINGLE_EXPERIMENT: return host = self._host if host == None else host if host not in BACKBONES: printtime('<<<< Remvoing bad host: %s from NAMES >>>>' % host) NAMES.remove(HOSTNAME_LOOKUP[host]) # remove this misbehaving host from further experiments self.exposed_new_exp(host=host)
def exposed_error(self, msg, host): printtime("<<<< %s (error!): %s >>>>" % (host, msg)) if SINGLE_EXPERIMENT: return host = self._host if host == None else host if host not in BACKBONES: printtime("<<<< Remvoing bad host: %s from NAMES >>>>" % host) NAMES.remove(HOSTNAME_LOOKUP[host]) # remove this misbehaving host from further experiments self.exposed_new_exp(host=host)
def exposed_xstats(self, xping, xhops): cur_exp = tuple(CLIENTS) m = "A" if self._host == CLIENTS[0] else "B" n = "B" if self._host == CLIENTS[0] else "A" neighbor = CLIENTS[1] if self._host == CLIENTS[0] else CLIENTS[0] STATSD[cur_exp]["%s%s" % (m, n)] = (self._host, neighbor, xping, xhops) if "xstats" in PRINT_VERB: printtime("%s:\t %s\n" % (cur_exp, STATSD[cur_exp]))
def run(self): for backbone in BACKBONES: while True: try: rpc("localhost", "hard_restart", (backbone,)) break except Exception, e: printtime("%s" % e) time.sleep(1)
def run(self): while FINISH_EVENT.isSet(): try: rpc('localhost', 'run_commands', ()) break except Exception, e: printtime('%s' % e) #rpc(MASTER_SERVER, 'error', ('command broke?', my_name)) time.sleep(1)
def exposed_get_neighbor_xhost(self): printtime("<<<< GET NEIGHBORS >>>>") if self._host in CLIENTS: neighbor = [client for client in CLIENTS if client != self._host][0] while True: try: return "RE AD:%s HID:%s" % (rpc(neighbor, "get_ad", ()), rpc(neighbor, "get_hid", ())) except: pass return None
def run(self): for t in NODES: for node in t: while True: try: rpc('localhost', 'hard_restart', (node, )) break; except Exception, e: printtime('%s' % e) time.sleep(1)
def exposed_wait_for_neighbors(self, neighbors, msg): while True: for neighbor in neighbors: try: printtime('waiting on: %s' % neighbor) out = rpc(neighbor, 'get_hid', ()) return out except: printtime(msg) time.sleep(1)
def exposed_get_neighbor_xhost(self): printtime("<<<< GET NEIGHBORS >>>>") if self._host in CLIENTS: neighbor = [client for client in CLIENTS if client != self._host][0] while True: try: return 'RE AD:%s HID:%s' % \ (rpc(neighbor, 'get_ad', ()), rpc(neighbor, 'get_hid', ())) except: pass return None
def xping(neighbor,tryUntilSuccess=False,src=None): xpingcmd = '/home/cmu_xia/fedora-bin/xia-core/bin/xping' if src: xpingcmd = '%s -s "%s"' % (xpingcmd, src) while True: s = '%s -t 5 "%s"' % (xpingcmd, neighbor) while True: try: printtime(s) out = check_output(s) printtime(out) stat = "%.3f" % float(out[0].split("\n")[-2].split('=')[1].split('/')[1]) break except: pass s = '%s -t 30 -i %s -c %s "%s"' % (xpingcmd, XPING_INTERVAL, XPING_COUNT, neighbor) while True: try: printtime(s) out = check_output(s) break except: if not tryUntilSuccess: break printtime(out) try: l = sorted([float(x.split('time=')[1].split(' ')[0]) for x in out[0].split('\n') if 'time=' in x])[:5] print l stat = sum(l) / len(l) #stat = "%.3f" % float(out[0].split("\n")[-2].split('=')[1].split('/')[1]) except: stat = -1 stat = "%.3f" % stat stat = -1 if stat == '=' else stat return stat
def exposed_fidstats(self, type, neighbor, ping, hops): cur_exp = tuple(CLIENTS) #(DC, NYC, UMassD, UNC, MIT) --> # [['6RD', ('6RD-AB', 'KC', 'DC', '40.000', 4), # ('6RD-GG' Houston', 'NYC', '40.243', 14), # ('6RD-BA' 'DC', 'KC', '40.000', 4), # ('6RD-AG', 'KC', 'Houston', '30.000', 1), # ('6RD-BG', 'DC', 'NYC', '10.000', 1)], # ['4ID', ...], ['SDN',...]] i = ['6RD','4ID','SDN'].index(type.split('-')[0]) STATSD[cur_exp][i][type] = (self._host, neighbor, ping, hops) if 'stats' in PRINT_VERB: printtime('%s:\t %s\n' % (cur_exp,STATSD[cur_exp]))
def run(self): while FINISH_EVENT.isSet(): try: printtime('<<<<DISCOVERY>>>>') nodes_to_check = rpc(MASTER_SERVER, 'get_nodes_to_check', ()) for node in nodes_to_check: if check_neighbor(node) == "": continue cmd = 'ab http://%s:%d/test | grep "Transfer rate"' % (node, HTTP_PORT) out = check_output(cmd)[0] printtime(out) stats = (node, int(float(out.split(':')[1].split('[')[0].strip()))) rpc(MASTER_SERVER, 'discovery_stats', (stats,)) except Exception, e: print e pass time.sleep(DISCOVERY_PERIOD)
def exposed_get_proxy_address(self, type): printtime("<<<< GET PROXY ADDR >>>>") me = CLIENTS[0] neighbor = BACKBONES[0] while True: try: gad = "AD:%s" % rpc(self.exposed_get_gateway_host(type, neighbor), "get_ad", ()) ad = "AD:%s" % rpc(self._host, "get_ad", ()) hid = "HID:%s" % rpc(self._host, "get_hid", ()) fourid = "IP:%s" % rpc(self.exposed_get_gateway_host(type, self._host), "get_fourid", ()) s = "RE %s %s %s %s SID:%s" % (gad, fourid, ad, hid, PROXY_SID) return s except: time.sleep(1) print "Trying to get proxy addres..." pass
def exposed_fidstats(self, type, neighbor, ping, hops): cur_exp = tuple(CLIENTS) # (DC, NYC, UMassD, UNC, MIT) --> # [['6RD', ('6RD-AB', 'KC', 'DC', '40.000', 4), # ('6RD-GG' Houston', 'NYC', '40.243', 14), # ('6RD-BA' 'DC', 'KC', '40.000', 4), # ('6RD-AG', 'KC', 'Houston', '30.000', 1), # ('6RD-BG', 'DC', 'NYC', '10.000', 1)], # ['4ID', ...], ['SDN',...]] i = ["6RD", "4ID", "SDN"].index(type.split("-")[0]) STATSD[cur_exp][i][type] = (self._host, neighbor, ping, hops) if "stats" in PRINT_VERB: printtime("%s:\t %s\n" % (cur_exp, STATSD[cur_exp]))
def exposed_gather_xstats(self): printtime('<<<<GATHER XSTATS>>>>') neighbor = rpc(MASTER_SERVER, 'get_neighbor_xhost', ()) printtime('neighbor: %s' % neighbor) xlatency = xping(neighbor) printtime('xlatency: %s' % xlatency) time.sleep(2) xhops = xtraceroute(neighbor) printtime('xhops: %s' % xhops) rpc(MASTER_SERVER, 'xstats', (xlatency, xhops)) return 'Sent xstats: (%s, %s, %s)' % (neighbor, xlatency, xhops)
def exposed_get_proxy_address(self, type): printtime("<<<< GET PROXY ADDR >>>>") me = CLIENTS[0] neighbor = BACKBONES[0] while True: try: gad = 'AD:%s' % rpc( \ self.exposed_get_gateway_host(type,neighbor), 'get_ad', ()) ad = 'AD:%s' % rpc(self._host, 'get_ad', ()) hid = 'HID:%s' % rpc(self._host, 'get_hid', ()) fourid = 'IP:%s' % rpc( \ self.exposed_get_gateway_host(type,self._host), 'get_fourid', ()) s = 'RE %s %s %s %s SID:%s' % \ (gad, fourid, ad, hid, PROXY_SID) return s except: time.sleep(1) print "Trying to get proxy addres..." pass
def exposed_browser_stats(self, latency, type): print "<<<< Browser Stats >>>>" cur_exp = tuple(CLIENTS) if type == 'tunneling': STATSD[cur_exp]['browser'] = (self._host, CLIENTS[0], latency) if 'stats' in PRINT_VERB: printtime('%s:\t %s\n' % (cur_exp,STATSD[cur_exp])) self.exposed_new_exp() else: print type i = ['6RD','4ID','SDN'].index(type) STATSD[cur_exp][i]['browser'] = (self._host, CLIENTS[0], latency) if 'stats' in PRINT_VERB: printtime('%s:\t %s\n' % (cur_exp,STATSD[cur_exp])) #l = STATSD[cur_exp] #if len(l[0]) == 6 and len(l[1]) == 6 and len(l[2]) == 6: if len(STATSD[cur_exp][i]) == 6: print "<<< GOT ALL STATS!! >>>" self.exposed_new_exp()
def exposed_get_neighbor_webserver(self, type): printtime("<<<< GET WEBSERVER >>>>") if type == "tunneling" and self._host in CLIENTS: neighbor = [client for client in CLIENTS if client != self._host][0] elif type in ["6RD", "4ID", "SDN"]: neighbor = BACKBONES[0] while True: try: ad = "AD:%s" % rpc(neighbor, "get_ad", ()) hid = "HID:%s" % rpc(neighbor, "get_hid", ()) if type == "tunneling": fourid = "IP:%s" % BLANK_FOURID s = "RE %s %s %s %s SID:%s" % (ad, fourid, ad, hid, WEBSERVER_SID) else: gad = "AD:%s" % rpc(self.exposed_get_gateway_host(type, self._host), "get_ad", ()) fourid = "IP:%s" % rpc(self.exposed_get_gateway_host(type, neighbor), "get_fourid", ()) s = "RE %s %s %s %s SID:%s" % (gad, fourid, ad, hid, WEBSERVER_SID) return s except: pass
def exposed_browser_stats(self, latency, type): print "<<<< Browser Stats >>>>" cur_exp = tuple(CLIENTS) if type == "tunneling": STATSD[cur_exp]["browser"] = (self._host, CLIENTS[0], latency) if "stats" in PRINT_VERB: printtime("%s:\t %s\n" % (cur_exp, STATSD[cur_exp])) self.exposed_new_exp() else: print type i = ["6RD", "4ID", "SDN"].index(type) STATSD[cur_exp][i]["browser"] = (self._host, CLIENTS[0], latency) if "stats" in PRINT_VERB: printtime("%s:\t %s\n" % (cur_exp, STATSD[cur_exp])) # l = STATSD[cur_exp] # if len(l[0]) == 6 and len(l[1]) == 6 and len(l[2]) == 6: if len(STATSD[cur_exp][i]) == 6: print "<<< GOT ALL STATS!! >>>" self.exposed_new_exp()
def exposed_get_fourid_neighbor_xhost(self, type, src): printtime("<<<< GET 4ID NEIGHBORS >>>>") if type == "6RD": gA = self.exposed_get_sixrd_A() gB = self.exposed_get_sixrd_B() if type == "4ID": gA = self.exposed_get_fourid_GA() gB = self.exposed_get_fourid_GB() if type == "SDN": gA = self.exposed_get_sdn_A() gB = self.exposed_get_sdn_B() print "GETTING FROM: %s" % self.exposed_get_fourid_CA() s = "AD:%s HID:%s" % ( rpc(self.exposed_get_fourid_CA(), "get_ad", ()), rpc(self.exposed_get_fourid_CA(), "get_hid", ()), ) print "GETTING FROM: %s" % self.exposed_get_fourid_CB() d = "AD:%s HID:%s" % ( rpc(self.exposed_get_fourid_CB(), "get_ad", ()), rpc(self.exposed_get_fourid_CB(), "get_hid", ()), ) if self._host in CLIENTS: src = not src if src: print "GETTING FROM: %s" % gB ad = rpc(gB, "get_ad", ()) print "GETTING FROM: %s" % gA fourid = rpc(gA, "get_fourid", ()) return "RE AD:%s IP:%s %s" % (ad, fourid, s) else: print "GETTING FROM: %s" % gA ad = rpc(gA, "get_ad", ()) print "GETTING FROM: %s" % gB fourid = rpc(gB, "get_fourid", ()) return "RE AD:%s IP:%s %s" % (ad, fourid, d)
def exposed_get_commands(self, host=None): host = self._host if host == None else host if "master" in PRINT_VERB: printtime("MASTER: %s checked in for commands" % host) if FOURID_EXPERIMENT: print TYPE cmd = [self.exposed_get_xianet(host=host)] if CLIENTS: cmd += [ "self.exposed_wait_for_neighbors(rpc(MASTER_SERVER, 'get_experiment_nodes', ()), 'waiting for xianet to start on all nodes')" ] cmd += ["self.exposed_gather_fourid_stats('%s')" % TYPE] return cmd if host in BACKBONES: return [self.exposed_get_xianet(host=host)] elif host in CLIENTS: if not NEW_EXP_LOCK.locked(): cmd = ["my_backbone = self.exposed_gather_stats()[1]"] cmd += ["self.exposed_wait_for_neighbors([my_backbone], 'waiting for backbone: %s' % my_backbone)"] cmd += ["rpc(my_backbone, 'soft_restart', (my_name, ))"] cmd += ["xianetcmd = rpc(MASTER_SERVER, 'get_xianet', (my_backbone, ))"] cmd += ["printtime(xianetcmd)"] cmd += ["exec(xianetcmd)"] cmd += ["self.exposed_wait_for_neighbors(['localhost'], 'waiting for xianet to start')"] cmd += ["my_neighbor = rpc(MASTER_SERVER, 'get_neighbor_host', ())"] cmd += ["printtime(my_neighbor)"] cmd += ["self.exposed_wait_for_neighbors([my_backbone], 'waiting for backbone: %s' % my_backbone)"] cmd += ["self.exposed_wait_for_neighbors([my_neighbor], 'waiting for neighbor: %s' % my_neighbor)"] cmd += ["self.exposed_gather_xstats()"] if APP_EXPERIMENT: if host == CLIENTS[0]: cmd += ["check_output('%s')" % WEBSERVER_CMD] else: cmd += ["self.exposed_gather_browser_stats('tunneling')"] return cmd
def exposed_get_neighbor_webserver(self, type): printtime("<<<< GET WEBSERVER >>>>") if type == "tunneling" and self._host in CLIENTS: neighbor = [client for client in CLIENTS if client != self._host][0] elif type in ['6RD','4ID','SDN']: neighbor = BACKBONES[0] while True: try: ad = 'AD:%s' % rpc(neighbor, 'get_ad', ()) hid = 'HID:%s' % rpc(neighbor, 'get_hid', ()) if type == "tunneling": fourid = 'IP:%s' % BLANK_FOURID s = 'RE %s %s %s %s SID:%s' % \ (ad, fourid, ad, hid, WEBSERVER_SID) else: gad = 'AD:%s' % rpc( \ self.exposed_get_gateway_host(type,self._host), 'get_ad', ()) fourid = 'IP:%s' % rpc( \ self.exposed_get_gateway_host(type,neighbor), 'get_fourid', ()) s = 'RE %s %s %s %s SID:%s' % \ (gad, fourid, ad, hid, WEBSERVER_SID) return s except: pass
class Runner(threading.Thread): def run(self): for backbone in BACKBONES: while True: try: rpc('localhost', 'hard_restart', (backbone, )) break; except Exception, e: printtime('%s' % e) time.sleep(1) while True: try: rpc('localhost', 'new_exp', ()) break except Exception, e: printtime('%s' % e) time.sleep(1)
def exposed_gather_fourid_stats(self, type): printtime('<<<<GATHER FOURIDSTATS>>>>') # [CA, 6RDA, GA, SDNA, CB, 6RDB, GB, SDNB] fidn = rpc(MASTER_SERVER, 'get_fourid_nodes', ()) print my_name, fidn if my_name not in fidn: return elif my_name in [fidn[0], fidn[4]]: #CA, CB printtime('<<<<<NODE STARTING 4IDSTAT>>>>') i = fidn.index(my_name) gpair = 'AG' if i == 0 else 'BG' npair = 'AB' if i == 0 else 'BA' neighbor = fidn[4] if i == 0 else fidn[0] #for type in ['SDN', '4ID', '6RD']: printtime('<<<<NODE STARTING %s STATS>>>>' % type) gateway = rpc(MASTER_SERVER,'get_gateway_host',(type,my_name)) # DST = rpc(MASTER_SERVER,'get_gateway_xhost',(type,)) # xlatency = xping(DST) # xhops = xtraceroute(DST) # DST_h = rpc(MASTER_SERVER,'get_gateway_host',(type,my_name)) latency = multi_ping([gateway])[0][0] hops = traceroute(gateway, gateway=True) # rpc(MASTER_SERVER, 'fidstats', ('%s-%s' % (type, gpair), gateway, xlatency, xhops)) rpc(MASTER_SERVER, 'fidstats', ('%s-%s' % (type, gpair), gateway, latency, hops)) SRC = rpc(MASTER_SERVER,'get_fourid_neighbor_xhost',(type,True)) DST = rpc(MASTER_SERVER,'get_fourid_neighbor_xhost',(type,False)) xlatency = xping(DST, src=SRC) xhops = -1 rpc(MASTER_SERVER, 'fidstats', ('%s-%s' % (type, npair), neighbor, xlatency, xhops)) if my_name == fidn[4]: #CB self.exposed_gather_browser_stats(type) elif my_name == fidn[0]: #CA self.exposed_run_webserver(type) while not rpc(MASTER_SERVER, 'check_done_type', (type,)): print "Waiting for browser test to complete..." time.sleep(1) elif my_name in [fidn[1], fidn[2], fidn[3]]: # 6RDA, GA, SDNA indices = [i for i,x in enumerate(fidn) if x == my_name] for i in indices: t = ['6RD', '4ID', 'SDN'][i-1] if t == type: partner_node = fidn[i+4] latency = multi_ping([partner_node])[0][0] hops = traceroute(partner_node) rpc(MASTER_SERVER, 'fidstats', ('%s-GG' % type, partner_node, latency, hops))
def xtraceroute(neighbor,tryUntilSuccess=True, src=None): cmd = '/home/cmu_xia/fedora-bin/xia-core/bin/xtraceroute -t 30' if src: cmd = '%s -s "%s"' % (cmd, src) cmd = '%s "%s"' % (cmd, neighbor) while tryUntilSuccess: try: printtime(cmd) out = check_output(cmd) printtime(out) break except Exception, e: printtime('%s' % e) pass
def exposed_stats(self, backbone_name, ping, hops): cur_exp = tuple(CLIENTS) m = 'A' if self._host == CLIENTS[0] else 'B' n = 'B' if self._host == CLIENTS[0] else 'A' STATSD[cur_exp]['backbone-%s' % m] = (self._host, backbone_name, ping, hops) # went to the same BB node -- we don't handle this if len(STATSD[cur_exp]) >= 2: my_bb = STATSD[cur_exp]['backbone-%s' % m][1] n_bb = STATSD[cur_exp]['backbone-%s' % n][1] if my_bb == n_bb: printtime("<<<< Experiment went to the same backbone node >>>>") self.exposed_new_exp() pass if ping == '-1.000' or hops == -1: printtime("<<<< NODE can't see backbone >>>>") self.exposed_new_exp() pass if 'stats' in PRINT_VERB: printtime('%s:\t %s\n' % (cur_exp,STATSD[cur_exp]))
def exposed_stats(self, backbone_name, ping, hops): cur_exp = tuple(CLIENTS) m = "A" if self._host == CLIENTS[0] else "B" n = "B" if self._host == CLIENTS[0] else "A" STATSD[cur_exp]["backbone-%s" % m] = (self._host, backbone_name, ping, hops) # went to the same BB node -- we don't handle this if len(STATSD[cur_exp]) >= 2: my_bb = STATSD[cur_exp]["backbone-%s" % m][1] n_bb = STATSD[cur_exp]["backbone-%s" % n][1] if my_bb == n_bb: printtime("<<<< Experiment went to the same backbone node >>>>") self.exposed_new_exp() pass if ping == "-1.000" or hops == -1: printtime("<<<< NODE can't see backbone >>>>") self.exposed_new_exp() pass if "stats" in PRINT_VERB: printtime("%s:\t %s\n" % (cur_exp, STATSD[cur_exp]))
def print_write(self, s): if self.host in PRINT_VERB: printtime("%s: %s" % (self.host, s)) self.out.write("%s: %s\n" % (stime(), s))
if line == "": type += 1 continue NODES[type].append(NAME_LOOKUP[line]) for t in NODES: for node in t: IP_LOOKUP[socket.gethostbyname(node)] = node IP_LOOKUP['127.0.0.1'] = socket.gethostbyaddr('127.0.0.1') PRINT_VERB.append('stats') PRINT_VERB.append('master') for t in NODES: [PRINT_VERB.append(node) for node in t] printtime(('Threaded heartbeat server listening on port %d\n' 'press Ctrl-C to stop\n') % RPC_PORT) FINISHED_EVENT.set() printer = Printer() printer.start() runner = Runner() runner.start() decision = Decision() decision.start() stats = Stats() stats.start() try:
def exposed_discovery_stats(self, stats): if not self._host in DSTATSD: DSTATSD[self._host] = {} DSTATSD[self._host][stats[0]] = stats[1] if 'stats' in PRINT_VERB: printtime('STATSD: %s\t %s' % (self._host, DSTATSD[self._host]))
def exposed_error(self, msg, host): printtime('<<<< %s (error (not doing anything about it)!): %s >>>>' % (host, msg))
def exposed_new_exp(self, host=None): global CLIENTS, NUMEXP, NEW_EXP_TIMER, PRINT_VERB, NEW_EXP_LOCK, FOURID_G_INDEX, TYPE if SINGLE_EXPERIMENT and NUMEXP == 2: return if NEW_EXP_LOCK.locked(): return NEW_EXP_LOCK.acquire() print "<<< lock aquired >>>" # some host errored that's not currently in the experiment if host and host not in CLIENTS and host not in BACKBONES: NEW_EXP_LOCK.release() print "<<< nope: lock released >>>" return try: cur_exp = tuple(CLIENTS) if len(STATSD[cur_exp]) != 4 and not FOURID_EXPERIMENT: printtime("<<<< TIMEOUT!! (%s) >>>>" % cur_exp) if len(STATSD[cur_exp]) != 3 and FOURID_EXPERIMENT: printtime("<<<< TIMEOUT!! (%s) >>>>" % cur_exp) except: pass # pick new test clients while True: if not FOURID_EXPERIMENT: while True: new_clients = sample(NAMES, 2) new_clients = [NAME_LOOKUP[client] for client in new_clients] if new_clients[0] not in CLIENTS and new_clients[1] not in CLIENTS: break if SAME_TEST_NODES: new_clients = [NAME_LOOKUP["planetlab1.tsuniv.edu"], NAME_LOOKUP["planetlab5.cs.cornell.edu"]] new_clients = sorted(new_clients) else: while True: while True: new_clients = [NAME_LOOKUP[choice(NAMES)]] if new_clients[0] not in CLIENTS: break if SAME_TEST_NODES: new_clients = [NAME_LOOKUP["DC"]] print new_clients[0] IP_LOOKUP[socket.gethostbyname(new_clients[0])] = new_clients[0] self.exposed_hard_restart(new_clients[0], setup=True) i = 0 while new_clients[0] in NODE_WATCHERS and i < 30: try: rpc(new_clients[0], "get_hello", ()) break except: time.sleep(1) i += 1 if new_clients[0] not in NODE_WATCHERS or i >= 30: # client crashed continue i = 0 while i < 5: # only try five times since some nodes have ping issue and we don't want to lock try: print "Trying Ping" test_clients = [NAME_LOOKUP[c] for c in sample(NAMES, 50)] if new_clients[0] in test_clients: continue pings = rpc(new_clients[0], "get_ping", (test_clients,)) print pings if float(pings[3][0]) < FOURID_MAX_PING_TIME: break except Exception, e: print "Ping didn't work" print e # print pings time.sleep(1) i += 1 if i < 5: # if we successfully got our topo break else: # take down client print "need to try new ping client" NODE_WATCHERS[new_clients[0]].clearGoOn() while new_clients[0] in NODE_WATCHERS: print "waiting for node to go down" time.sleep(1) print "Done Ping" ping_hosts = [p[1] for p in pings] new_clients += ping_hosts[:4] if SAME_TEST_NODES: new_clients = [new_clients[0]] + ["NYC", "Atlanta", "Cleveland", "Houston"] print new_clients new_clients = [new_clients[0]] + [NAME_LOOKUP[c] for c in new_clients[1:]] print "building topo" self.build_topo(new_clients) print "done topo" NODE_WATCHERS[new_clients[0]].clearGoOn() while new_clients[0] in NODE_WATCHERS: print "waiting for node to go down" time.sleep(1) # Make sure we haven't done this experiment before if tuple(new_clients) not in STATSD: self.init_statsd(new_clients) break
self.init_statsd(new_clients) break CLIENTS = new_clients for client in CLIENTS: while True: try: IP_LOOKUP[socket.gethostbyname(client)] = client break except: print "Error doing Client lookup in new exp: %s" % client time.sleep(1) [PRINT_VERB.append(c) for c in CLIENTS] printtime('<<<< new experiment (%s): %s >>>>' % (NUMEXP, CLIENTS)) FOURID_G_INDEX = randint(1,4) printtime("<<<<<<<<RANDINT: %s>>>>>>>>" % FOURID_G_INDEX) TYPE = choice(['6RD','4ID','SDN']) for host in NODE_WATCHERS: self.exposed_hard_restart(host) for host in CLIENTS: self.exposed_hard_restart(host) printtime('<<< FINISHED RELAUNCHING HOSTS >>>') NUMEXP += 1
def exposed_new_exp(self, host=None): global CLIENTS, NUMEXP, NEW_EXP_TIMER, PRINT_VERB, NEW_EXP_LOCK, FOURID_G_INDEX, TYPE if SINGLE_EXPERIMENT and NUMEXP == 2: return if NEW_EXP_LOCK.locked(): return NEW_EXP_LOCK.acquire() print "<<< lock aquired >>>" # some host errored that's not currently in the experiment if host and host not in CLIENTS and host not in BACKBONES: NEW_EXP_LOCK.release() print "<<< nope: lock released >>>" return try: cur_exp = tuple(CLIENTS) if len(STATSD[cur_exp]) != 4 and not FOURID_EXPERIMENT: printtime("<<<< TIMEOUT!! (%s) >>>>" % cur_exp) if len(STATSD[cur_exp]) != 3 and FOURID_EXPERIMENT: printtime("<<<< TIMEOUT!! (%s) >>>>" % cur_exp) except: pass # pick new test clients while True: if not FOURID_EXPERIMENT: while True: new_clients = sample(NAMES, 2) new_clients = [NAME_LOOKUP[client] for client in new_clients] if new_clients[0] not in CLIENTS and new_clients[1] not in CLIENTS: break if SAME_TEST_NODES: new_clients = [NAME_LOOKUP['planetlab1.tsuniv.edu'],NAME_LOOKUP['planetlab5.cs.cornell.edu']] new_clients = sorted(new_clients) else: while True: while True: new_clients = [NAME_LOOKUP[choice(NAMES)]] if new_clients[0] not in CLIENTS: break if SAME_TEST_NODES: new_clients = [NAME_LOOKUP['DC']] print new_clients[0] IP_LOOKUP[socket.gethostbyname(new_clients[0])] = new_clients[0] self.exposed_hard_restart(new_clients[0], setup=True) i = 0 while new_clients[0] in NODE_WATCHERS and i < 30: try: rpc(new_clients[0],'get_hello',()) break except: time.sleep(1) i += 1 if new_clients[0] not in NODE_WATCHERS or i >= 30: # client crashed continue i = 0 while i < 5: # only try five times since some nodes have ping issue and we don't want to lock try: print "Trying Ping" test_clients = [NAME_LOOKUP[c] for c in sample(NAMES, 50)] if new_clients[0] in test_clients: continue pings = rpc(new_clients[0], 'get_ping', (test_clients, )) print pings if float(pings[3][0]) < FOURID_MAX_PING_TIME: break except Exception, e: print "Ping didn't work" print e #print pings time.sleep(1) i += 1 if i < 5: # if we successfully got our topo break else: # take down client print 'need to try new ping client' NODE_WATCHERS[new_clients[0]].clearGoOn() while new_clients[0] in NODE_WATCHERS: print "waiting for node to go down"; time.sleep(1) print "Done Ping" ping_hosts = [p[1] for p in pings] new_clients += ping_hosts[:4] if SAME_TEST_NODES: new_clients = [new_clients[0]]+['NYC','Atlanta','Cleveland', 'Houston'] print new_clients new_clients = [new_clients[0]]+[NAME_LOOKUP[c] for c in new_clients[1:]] print "building topo" self.build_topo(new_clients) print "done topo" NODE_WATCHERS[new_clients[0]].clearGoOn() while new_clients[0] in NODE_WATCHERS: print "waiting for node to go down"; time.sleep(1) # Make sure we haven't done this experiment before if tuple(new_clients) not in STATSD: self.init_statsd(new_clients) break
def print_write(self, s): if self.host in PRINT_VERB: printtime('%s: %s' % (self.host, s)) self.out.write('%s: %s\n' % (stime(), s))
nodes_to_check = rpc(MASTER_SERVER, 'get_nodes_to_check', ()) for node in nodes_to_check: if check_neighbor(node) == "": continue cmd = 'ab http://%s:%d/test | grep "Transfer rate"' % (node, HTTP_PORT) out = check_output(cmd)[0] printtime(out) stats = (node, int(float(out.split(':')[1].split('[')[0].strip()))) rpc(MASTER_SERVER, 'discovery_stats', (stats,)) except Exception, e: print e pass time.sleep(DISCOVERY_PERIOD) if __name__ == '__main__': printtime(('RPC server listening on port %d\n' 'press Ctrl-C to stop\n') % RPC_PORT) FINISH_EVENT.set() mapper = Mapper() mapper.start() discovery = Discovery() discovery.start() start_httpd() try: t = ThreadPoolServer(MyService, port = RPC_PORT) t.start() except Exception, e: printtime('%s' % e)
def check_neighbor(neighbor): try: printtime('checking on: %s' % neighbor) return rpc(neighbor, 'check_httpd', ()) except: return ""
def start_httpd(conf_file = HTTPD_CONF_TOP): printtime('Starting httpd') check_output('sudo cp %s %s' % (conf_file, HTTPD_CONF)) check_output(HTTP_START)
self.init_statsd(new_clients) break CLIENTS = new_clients for client in CLIENTS: while True: try: IP_LOOKUP[socket.gethostbyname(client)] = client break except: print "Error doing Client lookup in new exp: %s" % client time.sleep(1) [PRINT_VERB.append(c) for c in CLIENTS] printtime("<<<< new experiment (%s): %s >>>>" % (NUMEXP, CLIENTS)) FOURID_G_INDEX = randint(1, 4) printtime("<<<<<<<<RANDINT: %s>>>>>>>>" % FOURID_G_INDEX) TYPE = choice(["6RD", "4ID", "SDN"]) for host in NODE_WATCHERS: self.exposed_hard_restart(host) for host in CLIENTS: self.exposed_hard_restart(host) printtime("<<< FINISHED RELAUNCHING HOSTS >>>") NUMEXP += 1 if NEW_EXP_TIMER: NEW_EXP_TIMER.cancel()