def run(self, message_bus, timeout): """ send verb to all nodes, waits for max timeout returns True if all nodes behaved as expected and False otherwise - including in case of KeyboardInterrupt """ nodes = [ Node(cmc_name, message_bus) for cmc_name in self.selector.cmc_names() ] jobs = [ Job(self.get_and_show_verb(node, self.verb), critical=True) for node in nodes ] display = Display(nodes, message_bus) scheduler = Scheduler(Job(display.run(), forever=True, critical=True), *jobs, timeout=timeout, critical=False) try: if scheduler.run(): return True else: scheduler.debrief() print(f"rhubarbe-{self.verb} failed: {scheduler.why()}") return False except KeyboardInterrupt: print(f"rhubarbe-{self.verb} : keyboard interrupt - exiting") return False
def _test_exc_non_critical(self, verbose): print("verbose = {}".format(verbose)) a1, a2 = SLJ(1), J(co_exception(0.5), label='non critical boom') sched = Scheduler(a1, a2, verbose=verbose) self.assertTrue(sched.orchestrate()) print(sep + 'debrief()') sched.debrief()
def run_one_job(self, job, *, details=False, expected=True): print(job) scheduler = Scheduler(job, verbose=True) orchestration = scheduler.run() scheduler.list(details=details) if not orchestration: scheduler.debrief() self.assertTrue(orchestration) if expected: self.assertEqual(job.result(), 0) else: self.assertNotEqual(job.result(), 0)
def _test_window(self, total, window): atom = .1 tolerance = 8 # more or less % in terms of overall time s = Scheduler() jobs = [PrintJob("{}-th {}s job".format(i, atom), sleep=atom, scheduler=s) for i in range(1, total + 1)] import time beg = time.time() ok = s.orchestrate(jobs_window=window) ok or s.debrief(details=True) end = time.time() duration = end - beg # estimate global time # unwindowed: overall duration is atom # otherwise a multiple of it (assuming total = k*window) expected = atom if not window else (total / window) * atom print('overall expected {} - measured {}' .format(expected, duration)) distortion = duration / expected time_ok = 1 - tolerance / 100 <= distortion <= 1 + tolerance / 100 if not time_ok: print("_test_window - window = {} :" "wrong execution time {} - not within {}% of {}" .format(window, end - beg, tolerance, expected)) self.assertTrue(time_ok) self.assertTrue(ok)
def test_simple(self): """a simple topology, that should work""" jobs = SLJ(0.1), SLJ(0.2), SLJ(0.3), SLJ( 0.4), SLJ(0.5), J(sl(0.6)), J(sl(0.7)) a1, a2, a3, a4, a5, a6, a7 = jobs a4.requires(a1, a2, a3) a5.requires(a4) a6.requires(a4) a7.requires(a5) a7.requires(a6) sched = Scheduler(*jobs) list_sep(sched, sep + "LIST BEFORE") self.assertTrue(sched.rain_check()) self.assertTrue(sched.orchestrate(loop=asyncio.get_event_loop())) for j in jobs: self.assertFalse(j.raised_exception()) list_sep(sched, sep + "LIST AFTER") print(sep + "DEBRIEF") sched.debrief()
def main(self, reset, timeout): mainjob = Job(self.run(reset), critical=True) displayjob = Job(self.display.run(), forever=True, critical=True) scheduler = Scheduler(mainjob, displayjob, timeout=timeout, critical=False) try: is_ok = scheduler.run() if not is_ok: scheduler.debrief() self.display.set_goodbye( f"rhubarbe-save failed: {scheduler.why()}") return 1 return 0 if mainjob.result() else 1 except KeyboardInterrupt: self.display.set_goodbye("rhubarbe-save : keyboard interrupt, bye") return 1 finally: self.cleanup()
def test_commands_verbose(self): dummy_path = "tests/dummy-10" dummy_file = Path(dummy_path).name scheduler = Scheduler() Sequence(SshJob( node=self.gateway(), verbose=True, commands=[ Run("hostname"), RunScript("tests/script-with-args.sh", "arg1", "arg2"), RunString("for i in $(seq 3); do echo verbose$i; done"), Push(localpaths=dummy_path, remotepath="."), Pull(remotepaths=dummy_file, localpath=dummy_path + ".loop"), ]), SshJob(node=LocalNode(), critical=True, commands=Run("diff {x} {x}.loop".format(x=dummy_path), verbose=True)), scheduler=scheduler) ok = scheduler.run() ok or scheduler.debrief() self.assertTrue(ok)
def global_check_image(self, _image, check_strings): # on the remaining nodes: check image marker self.print(f"checking {len(self.nodes)} nodes" f" against {check_strings} in /etc/rhubarbe-image") grep_pattern = "|".join(check_strings) check_command = ( f"tail -1 /etc/rhubarbe-image | egrep -q '{grep_pattern}'") jobs = [ SshJob(node=silent_sshnode(node, verbose=self.verbose), command=check_command, critical=False) for node in self.nodes ] scheduler = Scheduler(Job(self.display.run(), forever=True), *jobs, critical=False, timeout=self.wait_timeout) if not scheduler.run(): self.verbose and scheduler.debrief() # pylint: disable=w0106 # exclude nodes that have not behaved for node, job in zip(self.nodes, jobs): if not job.is_done() or job.raised_exception(): self.verbose_msg( f"checking {grep_pattern}: something went badly wrong with {node}" ) message = None if exc := job.raised_exception(): message = f"OOPS {type(exc)} {exc}" self.mark_and_exclude(node, Reason.CANT_CHECK_IMAGE, message) continue if not job.result() == 0: explanation = f"wrong image found on {node} - looking for {grep_pattern}" self.verbose_msg(explanation) self.mark_and_exclude(node, Reason.DID_NOT_LOAD, explanation) continue self.print(f"node {node} checked out OK")
def global_wait_ssh(self): # wait for nodes to be ssh-reachable self.print(f"waiting for {len(self.nodes)} nodes" f" (timeout={self.wait_timeout})") sshs = [SshWaiter(node, verbose=self.verbose) for node in self.nodes] jobs = [ Job(ssh.wait_for(self.backoff), critical=False) for ssh in sshs ] scheduler = Scheduler(Job(self.display.run(), forever=True), *jobs, critical=False, timeout=self.wait_timeout) if not scheduler.run(): self.verbose and scheduler.debrief() # pylint: disable=w0106 # exclude nodes that have not behaved for node, job in zip(self.nodes, jobs): self.verbose_msg( f"node {node.id} wait_ssh_job -> done={job.is_done()}", f"exc={job.raised_exception()}") if exc := job.raised_exception(): message = f"OOPS {type(exc)} {exc}" self.mark_and_exclude(node, Reason.WONT_SSH, message)
def run( *, # the pieces to use slice, hss, epc, enb, phones, e3372_ues, oai_ues, gnuradios, e3372_ue_xterms, oai_ue_xterms, gnuradio_xterms, # boolean flags load_nodes, skip_reset_usb, oscillo, # the images to load image_gw, image_enb, image_oai_ue, image_e3372_ue, image_gnuradio, # miscell n_rb, verbose, dry_run): """ ########## # 3 methods to get nodes ready # (*) load images # (*) reset nodes that are known to have the right image # (*) do nothing, proceed to experiment expects e.g. * slice : s.t like [email protected] * hss : 04 * epc : 03 * enb : 23 * phones: list of indices of phones to use * e3372_ues : list of nodes to use as a UE using e3372 * oai_ues : list of nodes to use as a UE using OAI * gnuradios : list of nodes to load with a gnuradio image * image_* : the name of the images to load on the various nodes Plus * load_nodes: whether to load images or not - in which case image_gw, image_enb and image_* are used to tell the image names * skip_reset_usb : the USRP board will be reset as well unless this is set """ # what argparse knows as a slice actually is a gateway (user + host) gwuser, gwhost = r2lab_parse_slice(slice) gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=TimeColonFormatter(verbose=verbose), debug=verbose) hostnames = hssname, epcname, enbname = [ r2lab_hostname(x) for x in (hss, epc, enb) ] optional_ids = e3372_ues + oai_ues + gnuradios + \ e3372_ue_xterms + oai_ue_xterms + gnuradio_xterms hssnode, epcnode, enbnode = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) for hostname in hostnames ] sched = Scheduler(verbose=verbose) ########## preparation job_check_for_lease = SshJob( node=gwnode, command=["rhubarbe", "leases", "--check"], label="check we have a current lease", scheduler=sched, ) # turn off all nodes turn_off_command = ["rhubarbe", "off", "-a"] # except our 3 nodes and the optional ones turn_off_command += [ "~{}".format(x) for x in [hss, epc, enb] + optional_ids ] # only do the turn-off thing if load_nodes if load_nodes: job_off_nodes = SshJob( node=gwnode, # switch off all nodes but the ones we use command=turn_off_command, label="turn off unused nodes", required=job_check_for_lease, scheduler=sched, ) # actually run this in the gateway, not on the macphone # the ssh keys are stored in the gateway and we do not yet have # the tools to leverage such remote keys job_stop_phones = [ SshJob( node=gwnode, command=RunScript( # script find_local_embedded_script("faraday.sh"), # arguments "macphone{}".format(id), "r2lab-embedded/shell/macphone.sh", "phone-off", # options includes=includes), label="put phone{} in airplane mode".format(id), required=job_check_for_lease, scheduler=sched, ) for id in phones ] ########## prepare the image-loading phase # this will be a dict of items imagename -> ids to_load = defaultdict(list) to_load[image_gw] += [hss, epc] to_load[image_enb] += [enb] if e3372_ues: to_load[image_e3372_ue] += e3372_ues if e3372_ue_xterms: to_load[image_e3372_ue] += e3372_ue_xterms if oai_ues: to_load[image_oai_ue] += oai_ues if oai_ue_xterms: to_load[image_oai_ue] += oai_ue_xterms if gnuradios: to_load[image_gnuradio] += gnuradios if gnuradio_xterms: to_load[image_gnuradio] += gnuradio_xterms prep_job_by_node = {} for image, nodes in to_load.items(): commands = [] if load_nodes: commands.append(Run("rhubarbe", "usrpoff", *nodes)) commands.append(Run("rhubarbe", "load", "-i", image, *nodes)) commands.append(Run("rhubarbe", "usrpon", *nodes)) # always do this commands.append(Run("rhubarbe", "wait", "-t", 120, *nodes)) job = SshJob( node=gwnode, commands=commands, label="Prepare node(s) {}".format(nodes), required=job_check_for_lease, scheduler=sched, ) for node in nodes: prep_job_by_node[node] = job # start services job_service_hss = SshJob( node=hssnode, command=RunScript(find_local_embedded_script("oai-hss.sh"), "run-hss", epc, includes=includes), label="start HSS service", required=prep_job_by_node[hss], scheduler=sched, ) delay = 15 job_service_epc = SshJob( node=epcnode, commands=[ Run("echo giving HSS a headstart {delay}s to warm up; sleep {delay}" .format(delay=delay)), RunScript(find_local_embedded_script("oai-epc.sh"), "run-epc", hss, includes=includes), ], label="start EPC services", required=prep_job_by_node[epc], scheduler=sched, ) ########## enodeb job_warm_enb = SshJob( node=enbnode, commands=[ RunScript(find_local_embedded_script("oai-enb.sh"), "warm-enb", epc, n_rb, not skip_reset_usb, includes=includes), ], label="Warm eNB", required=prep_job_by_node[enb], scheduler=sched, ) enb_requirements = (job_warm_enb, job_service_hss, job_service_epc) # wait for everything to be ready, and add an extra grace delay grace = 30 if load_nodes else 10 grace_delay = SshJob( node = LocalNode(formatter=TimeColonFormatter()), command = "echo Allowing grace of {grace} seconds; sleep {grace}"\ .format(grace=grace), required = enb_requirements, scheduler = sched, ) # start services job_service_enb = SshJob( node=enbnode, # run-enb expects the id of the epc as a parameter # n_rb means number of resource blocks for DL, set to either 25 or 50. commands=[ RunScript(find_local_embedded_script("oai-enb.sh"), "run-enb", oscillo, includes=includes, x11=oscillo), ], label="start softmodem on eNB", required=grace_delay, scheduler=sched, ) ########## run experiment per se # Manage phone(s) # this starts at the same time as the eNB, but some # headstart is needed so that eNB actually is ready to serve delay = 12 msg = "wait for {delay}s for enodeb to start up"\ .format(delay=delay) wait_command = "echo {msg}; sleep {delay}".format(msg=msg, delay=delay) job_start_phones = [ SshJob( node=gwnode, commands=[ Run(wait_command), RunScript(find_local_embedded_script("faraday.sh"), "macphone{}".format(id), "r2lab-embedded/shell/macphone.sh", "phone-on", includes=includes), RunScript(find_local_embedded_script("faraday.sh"), "macphone{}".format(id), "r2lab-embedded/shell/macphone.sh", "phone-start-app", includes=includes), ], label="start Nexus phone and speedtest app", required=grace_delay, scheduler=sched, ) for id in phones ] job_ping_phones_from_epc = [ SshJob( node=epcnode, commands=[ Run("sleep 10"), Run("ping -c 100 -s 100 -i .05 172.16.0.{ip} &> /root/ping-phone" .format(ip=id + 1)), ], label="ping Nexus phone from EPC", critical=False, required=job_start_phones, ) for id in phones ] ########## xterm nodes colors = ["wheat", "gray", "white", "darkolivegreen"] xterms = e3372_ue_xterms + oai_ue_xterms + gnuradio_xterms for xterm, color in zip(xterms, itertools.cycle(colors)): xterm_node = SshNode(gateway=gwnode, hostname=r2lab_hostname(xterm), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) SshJob( node=xterm_node, command=Run("xterm -fn -*-fixed-medium-*-*-*-20-*-*-*-*-*-*-*" " -bg {} -geometry 90x10".format(color), x11=True), label="xterm on node {}".format(xterm_node.hostname), required=prep_job_by_node[xterm], scheduler=sched, # don't set forever; if we do, then these xterms get killed # when all other tasks have completed # forever = True, ) # # remove dangling requirements - if any - should not be needed but won't hurt either sched.sanitize() print(20 * "*", "nodes usage summary") if load_nodes: for image, nodes in to_load.items(): for node in nodes: print("node {node} : {image}".format(node=node, image=image)) else: print("NODES ARE USED AS IS (no image loaded, no reset)") print(10 * "*", "phones usage summary") if phones: for phone in phones: print("Using phone{phone}".format(phone=phone)) else: print("No phone involved") sched.rain_check() # Update the .dot and .png file for illustration purposes if verbose or dry_run: sched.list() name = "scenario-load" if load_nodes else \ "scenario" sched.export_as_dotfile("{name}.dot".format(name=name)) os.system("dot -Tpng {name}.dot -o {name}.png".format(name=name)) print("(Over)wrote {name}.png".format(name=name)) if dry_run: return False if verbose: input('OK ? - press control C to abort ? ') if not sched.orchestrate(): print("RUN KO : {}".format(sched.why())) sched.debrief() return False else: print("RUN OK") return True
def collect(run_name, slice, hss, epc, enb, verbose): """ retrieves all relevant logs under a common name otherwise, same signature as run() for convenience retrieved stuff will be 3 compressed tars named <run_name>-(hss|epc|enb).tar.gz xxx - todo - it would make sense to also unwrap them all in a single place locally, like what "logs.sh unwrap" does """ gwuser, gwhost = r2lab_parse_slice(slice) gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=TimeColonFormatter(verbose=verbose), debug=verbose) functions = "hss", "epc", "enb" hostnames = hssname, epcname, enbname = [ r2lab_hostname(x) for x in (hss, epc, enb) ] nodes = hssnode, epcnode, enbnode = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) for hostname in hostnames ] # first run a 'capture' function remotely to gather all the relevant # info into a single tar named <run_name>.tgz capturers = [ SshJob( node=node, command=RunScript(find_local_embedded_script("oai-common.sh"), "capture-{}".format(function), run_name, includes=[ find_local_embedded_script( "oai-{}.sh".format(function)) ]), label="capturer on {}".format(function), # capture-enb will run oai-as-enb and thus requires oai-enb.sh ) for (node, function) in zip(nodes, functions) ] collectors = [ SshJob( node=node, command=Pull(remotepaths=["{}-{}.tgz".format(run_name, function)], localpath="."), label="collector on {}".format(function), required=capturers, ) for (node, function, capturer) in zip(nodes, functions, capturers) ] sched = Scheduler(verbose=verbose) sched.update(capturers) sched.update(collectors) if verbose: sched.list() if not sched.orchestrate(): print("KO") sched.debrief() return print("OK") if os.path.exists(run_name): print("local directory {} already exists = NOT UNWRAPPED !".format( run_name)) return os.mkdir(run_name) local_tars = [ "{run_name}-{ext}.tgz".format(run_name=run_name, ext=ext) for ext in ['hss', 'epc', 'enb'] ] for tar in local_tars: print("Untaring {} in {}".format(tar, run_name)) os.system("tar -C {} -xzf {}".format(run_name, tar))
def wait(*argv): usage = """ Wait for selected nodes to be reachable by ssh Returns 0 if all nodes indeed are reachable """ the_config = Config() default_timeout = the_config.value('nodes', 'wait_default_timeout') default_backoff = the_config.value('networking', 'ssh_backoff') parser = ArgumentParser(usage=usage) parser.add_argument("-c", "--curses", action='store_true', default=False, help="Use curses to provide term-based animation") parser.add_argument("-t", "--timeout", action='store', default=default_timeout, type=float, help="Specify global timeout for the whole process, default={}" .format(default_timeout)) parser.add_argument("-b", "--backoff", action='store', default=default_backoff, type=float, help="Specify backoff average between " "attempts to ssh connect, default={}" .format(default_backoff)) # really dont' write anything parser.add_argument("-s", "--silent", action='store_true', default=False) parser.add_argument("-v", "--verbose", action='store_true', default=False) add_selector_arguments(parser) args = parser.parse_args(argv) # --curses implies --verbose otherwise nothing shows up if args.curses: args.verbose = True selector = selected_selector(args) message_bus = asyncio.Queue() if args.verbose: message_bus.put_nowait({'selected_nodes': selector}) from rhubarbe.logger import logger logger.info("wait: backoff is {} and global timeout is {}" .format(args.backoff, args.timeout)) nodes = [Node(cmc_name, message_bus) for cmc_name in selector.cmc_names()] sshs = [SshProxy(node, verbose=args.verbose) for node in nodes] jobs = [Job(ssh.wait_for(args.backoff)) for ssh in sshs] display_class = Display if not args.curses else DisplayCurses display = display_class(nodes, message_bus) # have the display class run forever until the other ones are done scheduler = Scheduler(Job(display.run(), forever=True), *jobs) try: orchestration = scheduler.orchestrate(timeout=args.timeout) if orchestration: return 0 else: if args.verbose: scheduler.debrief() return 1 except KeyboardInterrupt as e: print("rhubarbe-wait : keyboard interrupt - exiting") # xxx return 1 finally: display.epilogue() if not args.silent: for ssh in sshs: print("{}:ssh {}".format(ssh.node, "OK" if ssh.status else "KO"))
def collect(run_name, slicename, cn, ran, oai_ues, verbose, dry_run): """ retrieves all relevant logs under a common name otherwise, same signature as run() for convenience retrieved stuff will be made of * one pcap file for the CN * compressed tgz files, one per node, gathering logs and configs and datas * for convenience the tgz files are unwrapped in run_name/id0 """ # the local dir to store incoming raw files. mostly tar files local_path = Path(f"{run_name}") if not local_path.exists(): print(f"Creating directory {local_path}") local_path.mkdir() gwuser, gwhost = r2lab_parse_slice(slicename) gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=TimeColonFormatter(verbose=verbose), debug=verbose) functions = ["cn", "ran"] hostnames = [r2lab_hostname(x) for x in (cn, ran)] node_cn, node_ran = nodes = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) for hostname in hostnames ] if oai_ues: hostnames_ue = [r2lab_hostname(x) for x in oai_ues] nodes_ue = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) for hostname in hostnames_ue] # all nodes involved are managed in the same way # node: a SshNode instance # id: the fit number # function, a string like 'cn' or 'ran' or 'oai-ue' local_nodedirs_tars = [] scheduler = Scheduler(verbose=verbose) for (node, id, function) in zip( chain(nodes, nodes_ue), chain( [cn, ran], oai_ues), chain(functions, cycle(["oai-ue"]))): # nodes on 2 digits id0 = f"{id:02d}" # node-dep collect dir node_dir = local_path / id0 node_dir.exists() or node_dir.mkdir() local_tar = f"{local_path}/{function}-{id0}.tgz" SshJob( node=node, commands=[ # first run a 'capture-all' function remotely # to gather all the relevant files and commands remotely RunScript( find_local_embedded_script(f"mosaic-{function}.sh"), f"capture-all", f"{run_name}-{function}", includes=INCLUDES), # and retrieve it locally Pull( remotepaths=f"{run_name}-{function}.tgz", localpath=local_tar), ], scheduler=scheduler) local_nodedirs_tars.append((node_dir, local_tar)) # retrieve tcpdump on CN SshJob( node=node_cn, commands=[ tcpdump_cn_service.stop_command(), Pull(remotepaths=[tcpdump_cn_pcap], localpath=local_path), ], scheduler=scheduler ) print(10*'*', 'See collect scheduler in', scheduler.export_as_pngfile("cefore-collect")) if verbose: scheduler.list() if dry_run: return if not scheduler.run(): print("KO") scheduler.debrief() return # unwrap for node_dir, tar in local_nodedirs_tars: print(f"Untaring {tar} in {node_dir}/") os.system(f"tar -C {node_dir} -xzf {tar}")
def main() -> bool: parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("-U", "--url", default=default_topurl, dest='topurl', help="url to reach nbhosting server") parser.add_argument( "-i", "--indices", default=[0], action=IntsRanges, help="(cumulative) ranges of indices in the list of known notebooks" " - run nbhtest with -l to see list") parser.add_argument( "-u", "--users", default=[1], action=IntsRanges, help= "(cumulative) ranges of students indexes; e.g. -u 101-400 -u 501-600") parser.add_argument("-b", "--base", default='student', help="basename for students name") parser.add_argument("-p", "--period", default=default_period, type=float, help="delay between 2 triggers of nbhtest") parser.add_argument( "-s", "--sleep", default=default_sleep_internal, type=float, help="delay in seconds to sleep between actions inside nbhtest") parser.add_argument( "-g", "--go", default=default_go_between_notebooks, type=float, help="go/wait duration between 2 consecutive notebooks") parser.add_argument( "-c", "--cut", default=False, action='store_true', help="""just load the urls, don't do any further processing""") parser.add_argument("-w", "--window", default=default_window, type=int, help="window depth for spawning the nbhtest instances") parser.add_argument("--idle", default=None, help="monitor idle setting") parser.add_argument("-n", "--dry-run", action='store_true') parser.add_argument( "coursedirs", default=[default_course_gitdir], nargs='*', help="""a list of git repos where to fetch notebooks""") signature = "".join(sys.argv[1:]) args = parser.parse_args() local = LocalNode(formatter=TerminalFormatter( custom_format="%H-%M-%S:@line@", verbose=True)) if args.idle is not None: hostname = urlparse(args.topurl).netloc command = f"ssh root@{hostname} nbh test-set-monitor-idle {args.idle}" os.system(command) scheduler = Scheduler() for user in args.users: student_name = f"{args.base}-{user:04d}" for coursedir in args.coursedirs: command = (f"nbhtest.py -U {args.topurl} -u {student_name} " f"-s {args.sleep} -g {args.go} ") if args.cut: command += "-c " for index in args.indices: command += f"{coursedir}:{index} " command += " &" if args.dry_run: print("dry-run:", command) else: # schedule this command to run _job = SshJob(scheduler=scheduler, node=local, commands=[command, f"sleep {args.period}"]) if args.dry_run: return True scheduler.jobs_window = args.window overall = scheduler.orchestrate() if not overall: scheduler.debrief() untagged = Path("artefacts") tagged = Path(f"artefacts{signature}") if tagged.exists(): print(f"NOT RENAMING because {tagged} exists; command to run is") print(f"mv {untagged} {tagged}") else: print(f"renaming {untagged} into {tagged}") untagged.rename(tagged) print("nbhtests DONE") return overall
def main() -> bool: parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("-U", "--url", default=default_topurl, dest='topurl', help="url to reach nbhosting server") parser.add_argument("-c", "--course-gitdir", default=default_course_gitdir, help="""location of a git repo where to fetch notebooks; needed in order to generate relevant URLs""") parser.add_argument("-i", "--indices", default=[0], action=IntsRanges, help="(cumulative) ranges of indices in the list of known notebooks" " - run nbhtest with -l to see list") parser.add_argument("-u", "--users", default=[1], action=IntsRanges, help="(cumulative) ranges of students indexes; e.g. -u 101-400 -u 501-600") parser.add_argument("-m", "--random", action='store_true', help="if set, a random notebook index is used for each student") parser.add_argument("-b", "--base", default='student', help="basename for students name") parser.add_argument("-p", "--period", default=20, type=float, help="delay between 2 triggers of nbhtest") parser.add_argument("-s", "--sleep", default=default_sleep_internal, type=float, help="delay in seconds to sleep between actions inside nbhtest") parser.add_argument("-w", "--window", default=default_window, type=int, help="window depth for spawning the nbhtest instances") parser.add_argument("-n", "--dry-run", action='store_true') args = parser.parse_args() course_gitdir = args.course_gitdir course, notebooks = list_notebooks(course_gitdir) # in random mode; what are the choices that we randomize on if args.random: if len(args.indices) > 1: choices = args.indices else: choices = list(range(len(notebooks))) local = LocalNode( formatter=TerminalFormatter( format="%H-%M-%S:@line@", verbose=True )) scheduler = Scheduler() jobs = [] for user in args.users: student_name = "{}-{:04d}".format(args.base, user) if args.random: indices = [ random.choice(choices) ] else: indices = args.indices for index in indices: command = "nbhtest.py -U {} -c {} -i {} -u {} -s {} &"\ .format(args.topurl, course_gitdir, index, student_name, args.sleep) if args.dry_run: print("dry-run:", command) else: # schule this command to run job = Sequence( SshJob(scheduler=scheduler, node=local, command = command, ), Job(asyncio.sleep(args.period)) ) jobs.append(job) if args.dry_run: return True overall = scheduler.orchestrate( jobs_window = args.window ) if not overall: scheduler.debrief() print("nbhtests DONE") return overall
# the command we want to run in node1 is as simple as it gets ping = SshJob( node=node1, required=(init_node_01, init_node_02), command=Run( 'ping', '-c', '20', '10.0.0.2', '-I', wireless_interface, # verbose=True, ), scheduler=scheduler, ) ########## # run the scheduler ok = scheduler.orchestrate() # give details if it failed ok or scheduler.debrief() success = ok and ping.result() == 0 # producing a dot file for illustration scheduler.export_as_dotfile("B1.dot") # return something useful to your OS exit(0 if success else 1)
def main(nodename1, nodename2, *, verbose=True): # show ssh outputs on stdout as they appear # together with corresponding hostname formatter = ColonFormatter(verbose=verbose) ########## declare the needed ssh connections # our main ssh connection gateway = SshNode(hostname=gwname, username=slice, formatter=formatter) # the ssh connections to each of the 2 nodes node1, node2 = [ SshNode( hostname=nodename, username="******", # this is how we create a 2-hop # ssh connection behind a gateway gateway=gateway, formatter=formatter, debug=verbose) for nodename in (nodename1, nodename2) ] ########## job_warmup = SshJob( node=gateway, # with just Run() # you can run a command already available on the remote command=[ Run("rhubarbe leases --check"), Run("rhubarbe on", nodename1, nodename2), Run("rhubarbe wait", nodename1, nodename2), ]) job_prep_send = SshJob( node=node1, command=[ # an example of a compound job # with RunScript, we run a command whose source is local here RunScript("demo.sh", "prepare-sender"), Run("ip address show control"), ], # run this only once this job is done required=job_warmup, ) job_prep_recv = SshJob( node=node2, command=RunScript("demo.sh", "prepare-receiver"), required=job_warmup, ) job_run_send = SshJob( node=node1, command=[ RunScript("demo.sh", "run-sender"), Pull("PREP", "PREP-SEND"), Pull("RUN", "RUN-SEND"), ], # start when both nodes are ready required=(job_prep_send, job_prep_recv), ) job_run_recv = SshJob( node=node2, command=[ RunScript("demo.sh", "run-receiver"), Pull("PREP", "PREP-RECV"), Pull("RUN", "RUN-RECV"), ], required=(job_prep_send, job_prep_recv), ) scheduler = Scheduler(job_warmup, job_prep_send, job_prep_recv, job_run_send, job_run_recv, verbose=verbose) scheduler.export_as_dotfile('demo.dot') print("# produce .png file with the following command") print("# install dot with e.g. brew install graphviz on macos") print("dot -Tpng demo.dot -o demo.png") print(20 * '=') ok = scheduler.orchestrate() if not ok: scheduler.debrief()
def main(self, *test_argv): # pylint: disable=r0915,r0912,r0914,c0111 self.parser = parser = argparse.ArgumentParser() # scope - on what hosts parser.add_argument( "-s", "--script", action='store_true', default=False, help=f"""If this flag is present, the first element of the remote command is assumed to be either the name of a local script, or, if this is not found, the body of a local script, that will be copied over before being executed remotely. In this case it should be executable. On the remote boxes it will be installed and run in the {default_remote_workdir} directory. """) parser.add_argument( "-i", "--includes", dest='includes', default=[], action='append', help="""for script mode only : a list of local files that are pushed remotely together with the local script, and in the same location; useful when you want to to run remotely a shell script that sources other files; remember that on the remote end all files (scripts and includes) end up in the same location""") parser.add_argument("-t", "--target", dest='targets', action='append', default=[], help=""" specify targets (additive); at least one is required; each target can be either * a space-separated list of hostnames * the name of a file containing hostnames * the name of a directory containing files named after hostnames; see e.g. the --mark option """) parser.add_argument("-x", "--exclude", dest='excludes', action='append', default=[], help=""" like --target, but for specifying exclusions; for now there no wildcard mechanism is supported here; also the order in which --target and --exclude options are mentioned does not matter; use --dry-run to only check for the list of applicable hosts """) # global settings parser.add_argument("-w", "--window", type=int, default=0, help=""" specify how many connections can run simultaneously; default is no limit """) parser.add_argument( "-c", "--connect-timeout", dest='timeout', type=float, default=default_timeout, help=f"specify connection timeout, default is {default_timeout}s") # ssh settings parser.add_argument( "-l", "--login", default=default_username, help=f"remote user name - default is {default_username}") parser.add_argument("-k", "--key", dest='keys', default=None, action='append', type=str, help=""" The default is for apssh to locate an ssh-agent through the SSH_AUTH_SOCK environment variable. If this cannot be found, or has an empty set of keys, then the user should specify private key file(s) - additive """) parser.add_argument("-K", "--ok-if-no-key", default=False, action='store_true', help=""" When no key can be found, apssh won't even bother to try and connect. With this option it proceeds even with no key available. """) parser.add_argument("-g", "--gateway", default=None, help=""" specify a gateway for 2-hops ssh - either hostname or username@hostname """) # how to store results # terminal parser.add_argument("-r", "--raw-format", default=False, action='store_true', help=""" produce raw result, incoming lines are shown as-is without hostname """) parser.add_argument( "-tc", "--time-colon-format", default=False, action='store_true', help="equivalent to --format '@time@:@host@:@line@") parser.add_argument("-f", "--format", default=None, action='store', help="""specify output format, which may include * `strftime` formats like e.g. %%H-%%M, and one of the following: * @user@ for the remote username, * @host@ for the target hostname, * @line@ for the actual line output (which contains the actual newline) * @time@ is a shorthand for %%H-%%M-%%S""") # filesystem parser.add_argument("-o", "--out-dir", default=None, help="specify directory where to store results") parser.add_argument("-d", "--date-time", default=None, action='store_true', help="use date-based directory to store results") parser.add_argument("-m", "--mark", default=False, action='store_true', help=""" available with the -d and -o options only. When specified, then for all nodes there will be a file created in the output subdir, named either 0ok/<hostname> for successful nodes, or 1failed/<hostname> for the other ones. This mark file will contain a single line with the returned code, or 'None' if the node was not reachable at all """) # usual stuff parser.add_argument("-n", "--dry-run", default=False, action='store_true', help="Only show details on selected hostnames") parser.add_argument("-v", "--verbose", action='store_true', default=False) parser.add_argument("-D", "--debug", action='store_true', default=False) parser.add_argument("-V", "--version", action='store_true', default=False) # the commands to run parser.add_argument("commands", nargs=argparse.REMAINDER, type=str, help=""" command to run remotely. If the -s or --script option is provided, the first argument here should denote a (typically script) file **that must exist** on the local filesystem. This script is then copied over to the remote system and serves as the command for remote execution """) if test_argv: args = self.parsed_args = parser.parse_args(test_argv) else: args = self.parsed_args = parser.parse_args() # helpers if args.version: print(f"apssh version {apssh_version}") exit(0) # manual check for REMAINDER if not args.commands: print("You must provide a command to be run remotely") parser.print_help() exit(1) # load keys self.loaded_private_keys = load_private_keys( self.parsed_args.keys, args.verbose or args.debug) if not self.loaded_private_keys and not args.ok_if_no_key: print("Could not find any usable key - exiting") exit(1) # initialize a gateway proxy if --gateway is specified gateway = None if args.gateway: gwuser, gwhost = self.user_host(args.gateway) gateway = SshProxy(hostname=gwhost, username=gwuser, keys=self.loaded_private_keys, formatter=self.get_formatter(), timeout=self.parsed_args.timeout, debug=self.parsed_args.debug) proxies = self.create_proxies(gateway) if args.verbose: print_stderr(f"apssh is working on {len(proxies)} nodes") window = self.parsed_args.window # populate scheduler scheduler = Scheduler(verbose=args.verbose) if not args.script: command_class = Run extra_kwds_args = {} else: # try RunScript command_class = RunScript extra_kwds_args = {'includes': args.includes} # but if the filename is not found then use RunString script = args.commands[0] if not Path(script).exists(): if args.verbose: print("Warning: file not found '{}'\n" "=> Using RunString instead".format(script)) command_class = RunString for proxy in proxies: scheduler.add( SshJob(node=proxy, critical=False, command=command_class(*args.commands, **extra_kwds_args))) # pylint: disable=w0106 scheduler.jobs_window = window if not scheduler.run(): scheduler.debrief() results = [job.result() for job in scheduler.jobs] ########## # print on stdout the name of the output directory # useful mostly with -d : subdir = self.get_formatter().run_name \ if isinstance(self.get_formatter(), SubdirFormatter) \ else None if subdir: print(subdir) # details on the individual retcods - a bit hacky if self.parsed_args.debug: for proxy, result in zip(proxies, results): print(f"PROXY {proxy.hostname} -> {result}") # marks names = {0: '0ok', None: '1failed'} if subdir and self.parsed_args.mark: # do we need to create the subdirs need_ok = [s for s in results if s == 0] if need_ok: os.makedirs(f"{subdir}/{names[0]}", exist_ok=True) need_fail = [s for s in results if s != 0] if need_fail: os.makedirs(f"{subdir}/{names[None]}", exist_ok=True) for proxy, result in zip(proxies, results): prefix = names[0] if result == 0 else names[None] mark_path = Path(subdir) / prefix / proxy.hostname with mark_path.open("w") as mark: mark.write(f"{result}\n") # xxx - when in gateway mode, the gateway proxy never gets disconnected # which probably is just fine # return 0 only if all hosts have returned 0 # otherwise, return 1 failures = [r for r in results if r != 0] overall = 0 if not failures else 1 return overall
def run(self, verbose, no_load, no_save): """ can skip the load or save phases """ print("Using node {} through gateway {}".format( self.node, self.gateway)) print("In order to produce {} from {}".format(self.to_image, self.from_image)) print("The following scripts will be run:") for i, script in enumerate(self.scripts, 1): print("{:03d}:{}".format(i, " ".join(script))) items = [] if no_load: items.append("skip load") if no_save: items.append("skip save") if items: print("WARNING: using fast-track mode {}".format( ' & '.join(items))) self.locate_companion_shell() if verbose: print("Located companion in {}".format(self.companion)) if verbose: print("Preparing tar of input shell scripts .. ", end="") tarfile = self.prepare_tar(self.to_image) if verbose: print("Done in {}".format(tarfile)) keys = load_agent_keys() if verbose: print("We have found {} keys in the ssh agent".format(len(keys))) #################### the 2 nodes we need to talk to gateway_proxy = None gwuser, gwname = self.user_host(self.gateway) gateway_proxy = None if not gwuser else SshNode( hostname=gwname, username=gwuser, keys=keys, formatter=ColonFormatter(verbose=verbose), ) # really not sure it makes sense to use username other than root username, nodename = self.user_host(self.node) node_proxy = SshNode( gateway=gateway_proxy, hostname=nodename, username=username, keys=keys, formatter=ColonFormatter(verbose=verbose), ) banner = 20 * '=' # now that node_proxy is initialized, we need to # have a valid gateway_proxy for when we run all this from inside # the gateway if gateway_proxy is None: print("WARNING: build-image is designed to be run on your laptop") # best-effort, not even tested.... gateway_proxy = LocalNode() #################### the little pieces sequence = Sequence( PrintJob("Checking for a valid lease"), # bail out if we don't have a valid lease SshJob(node = gateway_proxy, command = "rhubarbe leases --check", critical = True), PrintJob("loading image {}".format(self.from_image) if not no_load else "fast-track: skipping image load", banner = banner, # label = "welcome message", ), SshJob( node = gateway_proxy, commands = [ Run("rhubarbe", "load", "-i", self.from_image, nodename) \ if not no_load else None, Run("rhubarbe", "wait", "-v", "-t", "240", nodename), ], # label = "load and wait image {}".format(self.from_image), ), SshJob( node = node_proxy, commands = [ Run("rm", "-rf", "/etc/rhubarbe-history/{}".format(self.to_image)), Run("mkdir", "-p", "/etc/rhubarbe-history"), Push(localpaths = tarfile, remotepath = "/etc/rhubarbe-history"), RunScript(self.companion, nodename, self.from_image, self.to_image), Pull(localpath = "{}/logs/".format(self.to_image), remotepaths = "/etc/rhubarbe-history/{}/logs/".format(self.to_image), recurse = True), ], label = "set up and run scripts in /etc/rhubarbe-history/{}".format(self.to_image)), ) # avoid creating an SshJob with void commands if self.extra_logs: sequence.append( SshJob( node=node_proxy, label="collecting extra logs", critical=False, commands=[ Pull(localpath="{}/logs/".format(self.to_image), remotepaths=extra_log, recurse=True) for extra_log in self.extra_logs ], )) # creating these as critical = True means the whole # scenario will fail if these are not found for binary in self.expected_binaries: check_with = "ls" if os.path.isabs(binary) else ("type -p") sequence.append( Sequence( PrintJob( "Checking for expected binaries", # label = "message about checking" ), SshJob( node=node_proxy, command=[check_with, binary], # label = "Checking for {}".format(binary) ))) # xxx some flag if no_save: sequence.append( PrintJob("fast-track: skipping image save", banner=banner)) else: sequence.append( Sequence( PrintJob("saving image {} ...".format(self.to_image), banner=banner), # make sure we capture all the logs and all that # mostly to test RunString SshJob( node=node_proxy, command=RunString("sync ; sleep $1; sync; sleep $1", 1), # label = 'sync', ), SshJob( node=gateway_proxy, command=Run("rhubarbe", "save", "-o", self.to_image, nodename), # label = "save image {}".format(self.to_image), ), SshJob( node=gateway_proxy, command="rhubarbe images -d", # label = "list current images", ), )) sched = Scheduler(sequence, verbose=verbose) # sanitizing for the cases where some pieces are left out sched.sanitize() print(20 * '+', "before run") sched.list(details=verbose) print(20 * 'x') if sched.orchestrate(): if verbose: print(20 * '+', "after run") sched.list() print(20 * 'x') print("image {} OK".format(self.to_image)) return True else: print("Something went wrong with image {}".format(self.to_image)) print(20 * '+', "after run - KO") sched.debrief() print(20 * 'x') return False
def run(*, gateway, slicename, nodes, node_epc, node_enb, quectel_nodes, phones, verbose, dry_run, load_images, epc_image, enb_image, quectel_image): """ Launch latest OAICI EPC and eNB Docker images on R2lab Arguments: slicename: the Unix login name (slice name) to enter the gateway quectel_nodes: list of indices of quectel UE nodes to use phones: list of indices of phones to use nodes: a list of node ids to run the scenario on; strings or ints are OK; node_epc: the node id on which to run the EPC node_enb: the node id for the enb, which is connected to B210/eNB-duplexer """ quectel_ids = quectel_nodes[:] quectel = len(quectel_ids) > 0 faraday = SshNode(hostname=default_gateway, username=slicename, verbose=verbose, formatter=TimeColonFormatter()) epc = SshNode(gateway=faraday, hostname=fitname(node_epc), username="******", verbose=verbose, formatter=TimeColonFormatter()) node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******", formatter=TimeColonFormatter(), verbose=verbose) for id in nodes } nodes_quectel_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******", formatter=TimeColonFormatter(), verbose=verbose) for id in quectel_nodes } allnodes = nodes + quectel_nodes fit_epc = fitname(node_epc) fit_enb = fitname(node_enb) # the global scheduler scheduler = Scheduler(verbose=verbose) ########## check_lease = SshJob( scheduler=scheduler, node=faraday, critical=True, verbose=verbose, command=Run("rhubarbe leases --check"), ) green_light = check_lease if load_images: green_light = [ SshJob(scheduler=scheduler, required=check_lease, node=faraday, critical=True, verbose=verbose, label=f"Load image {epc_image} on {fit_epc}", commands=[ Run(f"rhubarbe load {node_epc} -i {epc_image}"), Run(f"rhubarbe wait {node_epc}"), RunScript("oaici.sh", "init-epc", node_epc, node_enb), ]), SshJob( scheduler=scheduler, required=check_lease, node=faraday, critical=True, verbose=verbose, label=f"Load image {enb_image} on {fit_enb}", commands=[ Run(f"rhubarbe usrpoff {node_enb}" ), # if usrp is on, load could be problematic... Run(f"rhubarbe load {node_enb} -i {enb_image}"), Run(f"rhubarbe wait {node_enb}"), Run(f"rhubarbe usrpon {node_enb}" ), # ensure a reset of the USRP on the enB node RunScript("oaici.sh", "init-enb", node_enb, node_epc), ], ), SshJob(scheduler=scheduler, required=check_lease, node=faraday, critical=False, verbose=verbose, label="turning off unused nodes", command=[ Run("rhubarbe bye --all " + "".join(f"~{x} " for x in allnodes)) ]) ] if quectel: prepare_quectel = SshJob( scheduler=scheduler, required=check_lease, node=faraday, critical=True, verbose=verbose, label=f"Load image {quectel_image} on quectel UE nodes", commands=[ Run("rhubarbe", "usrpoff", *quectel_ids), Run("rhubarbe", "load", *quectel_ids, "-i", quectel_image), Run("rhubarbe", "wait", *quectel_ids), Run("rhubarbe", "usrpon", *quectel_ids), ], ), ########## # Prepare the Quectel UE nodes if quectel: # wait 30s for Quectel modules show up wait_quectel_ready = PrintJob( "Let Quectel modules show up", scheduler=scheduler, required=prepare_quectel, sleep=30, label="sleep 30s for the Quectel modules to show up") # run the Quectel Connection Manager as a service on each Quectel UE node quectelCM_service = Service( command="quectel-CM -s oai.ipv4 -4", service_id="QuectelCM", verbose=verbose, ) init_quectel_nodes = [ SshJob( scheduler=scheduler, required=wait_quectel_ready, node=node, critical=True, verbose=verbose, label=f"Init Quectel UE on fit node {id}", commands=[ RunScript(find_local_embedded_script("nodes.sh"), "check-quectel-on", includes=INCLUDES), quectelCM_service.start_command(), ], ) for id, node in nodes_quectel_index.items() ] # wait 20s for Quectel Connection Manager to start up wait_quectelCM_ready = PrintJob( "Let QuectelCM start up", scheduler=scheduler, required=init_quectel_nodes, sleep=20, label="Sleep 20s for the Quectel Connection Manager(s) to start up" ) detach_quectel_nodes = [ SshJob( scheduler=scheduler, required=wait_quectelCM_ready, node=node, critical=True, verbose=verbose, label=f"Detach Quectel UE on fit node {id}", command=RunScript(find_local_embedded_script("nodes.sh"), "quectel-detach", includes=INCLUDES), ) for id, node in nodes_quectel_index.items() ] ########## # Launch the EPC start_epc = SshJob( scheduler=scheduler, required=green_light, node=faraday, critical=True, verbose=verbose, label=f"Launch EPC on {fit_epc}", commands=[ RunScript("oaici.sh", "start-epc", node_epc), ], ) # Launch the eNB if quectel: req = (start_epc, detach_quectel_nodes) else: req = start_epc start_enb = SshJob( scheduler=scheduler, required=req, node=faraday, critical=True, verbose=verbose, label=f"Launch eNB on {fit_enb}", commands=[ RunScript("oaici.sh", "start-enb", node_enb), ], ) wait_ran_ready = PrintJob("Let the eNB start up", scheduler=scheduler, required=start_enb, sleep=50, label="sleep 50s for the eNB to start up") ########## Test phone(s) connectivity sleeps_ran = (0, 10) phone_msgs = [ f"wait again for {sleep}s before waking up phone{id}" for sleep, id in zip(sleeps_ran, phones) ] wait_commands = [ f"echo {msg}; sleep {sleep}" for msg, sleep in zip(phone_msgs, sleeps_ran) ] sleeps_phone = (10, 10) phone2_msgs = [ f"wait for {sleep}s for phone{id} before starting tests" for sleep, id in zip(sleeps_phone, phones) ] wait2_commands = [ f"echo {msg}; sleep {sleep}" for msg, sleep in zip(phone2_msgs, sleeps_phone) ] job_start_phones = [ SshJob(node=faraday, commands=[ Run(wait_command), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-on", includes=INCLUDES), Run(wait2_command), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-check-cx", includes=INCLUDES), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-start-app", includes=INCLUDES), ], label=f"turn off airplane mode on phone {id}", required=wait_ran_ready, scheduler=scheduler) for id, wait_command, wait2_command in zip( phones, wait_commands, wait2_commands) ] if quectel: job_attach_quectel = [ SshJob( scheduler=scheduler, required=(job_start_phones, wait_ran_ready, detach_quectel_nodes), node=node, critical=True, verbose=verbose, label=f"Attach Quectel UE on fit node {id}", command=RunScript(find_local_embedded_script("nodes.sh"), "quectel-attach", includes=INCLUDES), ) for id, node in nodes_quectel_index.items() ] # wait 30s for Quectel connection to set up wait_quectel_cx_ready = PrintJob( "Let the Quectel connection(s) set up", scheduler=scheduler, required=job_attach_quectel, sleep=30, label="Sleep 30s for the Quectel connection(s) to set up") test_quectel_cx = [ SshJob( scheduler=scheduler, required=wait_quectel_cx_ready, node=node, verbose=verbose, label=f"Check the Quectel cx on fit node {id}", command=RunScript(find_local_embedded_script("nodes.sh"), "check-quectel-cx", includes=INCLUDES), ) for id, node in nodes_quectel_index.items() ] ########## # Update the .dot and .png file for illustration purposes scheduler.check_cycles() name = "deploy-oaici" print(10 * '*', 'See main scheduler in', scheduler.export_as_pngfile(name)) # orchestration scheduler jobs if verbose: scheduler.list() if dry_run: return True if not scheduler.orchestrate(): print(f"RUN KO : {scheduler.why()}") scheduler.debrief() return False print( f"RUN OK, you can log now on the EPC node {fit_epc} and the eNB node {fit_enb} to check the logs" ) print(80 * '*')
def run(*, # pylint: disable=r0912, r0914, r0915 # the pieces to use slicename, cn, ran, phones, e3372_ues, oai_ues, gnuradios, e3372_ue_xterms, gnuradio_xterms, ns3, # boolean flags load_nodes, reset_usb, oscillo, tcp_streaming, # the images to load image_cn, image_ran, image_oai_ue, image_e3372_ue, image_gnuradio, image_T_tracer, image_ns3, # miscell n_rb, nodes_left_alone, T_tracer, publisher_ip, verbose, dry_run): """ ########## # 3 methods to get nodes ready # (*) load images # (*) reset nodes that are known to have the right image # (*) do nothing, proceed to experiment expects e.g. * slicename : s.t like [email protected] * cn : 7 * ran : 23 * ns3 : 32 * phones: list of indices of phones to use * e3372_ues : list of nodes to use as a UE using e3372 * oai_ues : list of nodes to use as a UE using OAI * gnuradios : list of nodes to load with a gnuradio image * T_tracer : list of nodes to load with a tracer image * image_* : the name of the images to load on the various nodes Plus * load_nodes: whether to load images or not - in which case image_cn, image_ran and image_* are used to tell the image names * reset_usb : the USRP board will be reset when this is set * tcp_streaming : set up TCP streaming scenario * publisher_ip : IP address of the publisher """ # what argparse knows as a slice actually is about the gateway (user + host) gwuser, gwhost = r2lab_parse_slice(slicename) gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=TimeColonFormatter(verbose=verbose), debug=verbose) hostnames = [r2lab_hostname(x) for x in (cn, ran)] cnnode, rannode = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) for hostname in hostnames ] scheduler = Scheduler(verbose=verbose, label="CORE EXP") ########## prepare the image-loading phase # focus on the experiment, and use # prepare_testbed_scheduler later on to prepare testbed # all we need to do at this point is compute a mapping dict # image -> list-of-nodes images_to_load = defaultdict(list) images_to_load[image_cn] += [cn] images_to_load[image_ran] += [ran] if e3372_ues: images_to_load[image_e3372_ue] += e3372_ues if e3372_ue_xterms: images_to_load[image_e3372_ue] += e3372_ue_xterms if oai_ues: images_to_load[image_oai_ue] += oai_ues if gnuradios: images_to_load[image_gnuradio] += gnuradios if gnuradio_xterms: images_to_load[image_gnuradio] += gnuradio_xterms if T_tracer: images_to_load[image_T_tracer] += T_tracer if ns3: images_to_load[image_ns3] += [ns3] # start core network job_start_cn = SshJob( node=cnnode, commands=[ RunScript(find_local_embedded_script("nodes.sh"), "git-pull-r2lab", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-cn.sh"), "journal --vacuum-time=1s", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-cn.sh"), "configure", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-cn.sh"), "start", includes=INCLUDES), tcpdump_cn_service.start_command(), ], label="start CN service", scheduler=scheduler, ) # prepare enodeb reset_option = "-u" if reset_usb else "" job_warm_ran = SshJob( node=rannode, commands=[ RunScript(find_local_embedded_script("nodes.sh"), "git-pull-r2lab", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-ran.sh"), "journal --vacuum-time=1s", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-ran.sh"), "warm-up", reset_option, includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-ran.sh"), "configure -b", n_rb, cn, includes=INCLUDES), ], label="Configure eNB", scheduler=scheduler, ) ran_requirements = [job_start_cn, job_warm_ran] ### if oai_ues: # prepare OAI UEs for ue in oai_ues: ue_node = SshNode(gateway=gwnode, hostname=r2lab_hostname(ue), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) job_warm_ues = [ SshJob( node=ue_node, commands=[ RunScript(find_local_embedded_script("nodes.sh"), "git-pull-r2lab", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-oai-ue.sh"), "journal --vacuum-time=1s", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-oai-ue.sh"), "warm-up", reset_option, includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-oai-ue.sh"), "configure -b", n_rb, includes=INCLUDES), ], label=f"Configure OAI UE on fit{ue:02d}", scheduler=scheduler) ] ran_requirements.append(job_warm_ues) ### if not load_nodes and phones: job_turn_off_phones = SshJob( node=gwnode, commands=[ RunScript(find_local_embedded_script("faraday.sh"), f"macphone{phone} phone-off") for phone in phones], scheduler=scheduler, ) ran_requirements.append(job_turn_off_phones) # wait for everything to be ready, and add an extra grace delay grace = 5 grace_delay = PrintJob( f"Allowing grace of {grace} seconds", sleep=grace, required=ran_requirements, scheduler=scheduler, label=f"settle for {grace}s", ) # optionally start T_tracer if T_tracer: job_start_T_tracer = SshJob( # pylint: disable=w0612 node=SshNode( gateway=gwnode, hostname=r2lab_hostname(T_tracer[0]), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose), commands=[ Run(f"/root/trace {ran}", x11=True), ], label="start T_tracer service", required=ran_requirements, scheduler=scheduler, ) # ran_requirements.append(job_start_T_tracer) # start services graphical_option = "-x" if oscillo else "" graphical_message = "graphical" if oscillo else "regular" tracer_option = " -T" if T_tracer else "" # we use a Python variable for consistency # although it not used down the road _job_service_ran = SshJob( node=rannode, commands=[ RunScript(find_local_embedded_script("mosaic-ran.sh"), "start", graphical_option, tracer_option, includes=INCLUDES, x11=oscillo, ), ], label=f"start {graphical_message} softmodem on eNB", required=grace_delay, scheduler=scheduler, ) ########## run experiment per se # Manage phone(s) and OAI UE(s) # this starts at the same time as the eNB, but some # headstart is needed so that eNB actually is ready to serve sleeps = [20, 30] phone_msgs = [f"wait for {sleep}s for eNB to start up before waking up phone{id}" for sleep, id in zip(sleeps, phones)] wait_commands = [f"echo {msg}; sleep {sleep}" for msg, sleep in zip(phone_msgs, sleeps)] job_start_phones = [ SshJob( node=gwnode, commands=[ Run(wait_command), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-on", includes=INCLUDES), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-start-app", includes=INCLUDES), ], label=f"turn off airplace mode on phone {id}", required=grace_delay, scheduler=scheduler) for id, wait_command in zip(phones, wait_commands)] if oai_ues: delay = 25 for ue in oai_ues: msg = f"wait for {delay}s for eNB to start up before running UE on node fit{ue:02d}" wait_command = f"echo {msg}; sleep {delay}" ue_node = SshNode(gateway=gwnode, hostname=r2lab_hostname(ue), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) job_start_ues = [ SshJob( node=ue_node, commands=[ Run(wait_command), RunScript(find_local_embedded_script("mosaic-oai-ue.sh"), "start", includes=INCLUDES), ], label=f"Start OAI UE on fit{ue:02d}", required=grace_delay, scheduler=scheduler) ] delay += 20 for ue in oai_ues: environ = {'USER': '******'} cefnet_ue_service = Service("cefnetd", service_id="cefnet", environ=environ) ue_node = SshNode(gateway=gwnode, hostname=r2lab_hostname(ue), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) msg = f"Wait 60s and then ping faraday gateway from UE on fit{ue:02d}" ue_commands = f"echo {msg}; sleep 60; ping -c 5 -I oip1 faraday.inria.fr" if tcp_streaming: # TCP streaming scenario if load_nodes: ue_commands += "sysctl -w net.ipv4.ip_forward=1;" ue_commands += "ip route add 10.1.1.0/24 via 192.168.2.32 dev data" job_setup_ue = [ SshJob( node=ue_node, commands=[ Run(ue_commands,label="test UE link and set up routing for TCP streaming"), ], label=f"ping faraday gateway from UE on fit{ue:02d} and set up routing for the TCP streaming scenario", critical=True, required=job_start_ues, scheduler=scheduler) ] else: # Cefore streaming scenario if load_nodes: ue_commands += "sysctl -w net.ipv4.ip_forward=1;" ue_commands += f"ip route add {publisher_ip}/32 dev oip1;" ue_commands += "ip route add 10.1.1.0/24 via 192.168.2.32 dev data;" ue_commands += "iptables -t nat -A POSTROUTING -s 10.1.1.2/32 -j SNAT --to-source 172.16.0.2;" ue_commands += "iptables -t nat -A PREROUTING -d 172.16.0.2 -j DNAT --to-destination 10.1.1.2;" ue_commands += "iptables -A FORWARD -d 10.1.1.2/32 -i oip1 -j ACCEPT;" ue_commands += f"iptables -A FORWARD -d {publisher_ip}/32 -i data -j ACCEPT;" ue_commands += "ip rule del from all to 172.16.0.2 lookup 201;" ue_commands += "ip rule del from 172.16.0.2 lookup 201;" ue_commands += "ip rule add from 10.1.1.2 lookup lte prio 32760;" ue_commands += "ip rule add from all to 172.16.0.2 lookup lte prio 32761;" ue_commands += "ip rule add from all fwmark 0x1 lookup lte prio 32762;" ue_commands += "ip route add table lte 10.1.1.0/24 via 192.168.2.32 dev data;" # ue_commands += "killall cefnetd || true" job_setup_ue = [ SshJob( node=ue_node, commands=[ Run(ue_commands,label="test UE link and set up routing for Cefore streaming"), cefnet_ue_service.start_command(), ], label=f"ping faraday gateway from fit{ue:02d} UE and set up routing for the Cefore streaming scenario", critical=True,#old cefnetd not killed when running new one... required=job_start_ues, scheduler=scheduler) ] if ns3: ns3_node = SshNode(gateway=gwnode, hostname=r2lab_hostname(ns3), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) msg = f"Wait for the UE node to be ready before running the streaming scenario with ns-3 on fit{ns3}" if load_nodes: job_prepare_ns3_node = [ SshJob( node=ns3_node, commands=[ Run("turn-on-data"), Run("ifconfig data promisc up"), Run("ip route add default via 192.168.2.6 dev data || true"), Run("sysctl -w net.ipv4.ip_forward=1"), ], label=f"setup routing on ns-3 fit{ns3:02d} node", # ip route may already be there so the ip route command may fail critical=True, required=job_setup_ue, scheduler=scheduler) ] ns3_requirements = job_prepare_ns3_node else: ns3_requirements = job_setup_ue if not tcp_streaming: environ = {'USER': '******'} cefnet_ns3_service = Service("cefnetd", service_id="cefnet", environ=environ) job_start_cefnet_on_cn = [ SshJob( node=cnnode, commands=[ Run(f"echo 'ccn:/streaming tcp {publisher_ip}:80' > /usr/local/cefore/cefnetd.conf"), # Run("killall cefnetd || true"),# not done by default with service.start_command() cefnet_ns3_service.start_command(), ], label=f"Start Cefnet on EPC running at fit{cn:02d}", critical=True,#old cefnetd not killed when running new one... required=ns3_requirements, scheduler=scheduler, ) ] # ditto _job_ping_phones_from_cn = [ SshJob( node=cnnode, commands=[ Run("sleep 20"), Run(f"ping -c 100 -s 100 -i .05 172.16.0.{id+1} &> /root/ping-phone{id}"), ], label=f"ping phone {id} from core network", critical=False, required=job_start_phones, scheduler=scheduler) for id in phones] ########## xterm nodes colors = ("wheat", "gray", "white", "darkolivegreen") xterms = e3372_ue_xterms + gnuradio_xterms for xterm, color in zip(xterms, cycle(colors)): xterm_node = SshNode( gateway=gwnode, hostname=r2lab_hostname(xterm), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) SshJob( node=xterm_node, command=Run(f"xterm -fn -*-fixed-medium-*-*-*-20-*-*-*-*-*-*-*", f" -bg {color} -geometry 90x10", x11=True), label=f"xterm on node {xterm_node.hostname}", scheduler=scheduler, # don't set forever; if we do, then these xterms get killed # when all other tasks have completed # forever = True, ) # remove dangling requirements - if any # should not be needed but won't hurt either scheduler.sanitize() ########## print(10*"*", "nodes usage summary") if load_nodes: for image, nodes in images_to_load.items(): for node in nodes: print(f"node fit{node:02d} : {image}") else: print("NODES ARE USED AS IS (no image loaded, no reset)") print(10*"*", "phones usage summary") if phones: for phone in phones: print(f"Using phone{phone}") else: print("No phone involved") if nodes_left_alone: print(f"Ignore following fit nodes: {nodes_left_alone}") print(f"Publisher IP is {publisher_ip}") if tcp_streaming: print("Run streaming scenario with TCP") else: print("Run streaming scenario with Cefore") # wrap scheduler into global scheduler that prepares the testbed scheduler = prepare_testbed_scheduler( gwnode, load_nodes, scheduler, images_to_load, nodes_left_alone) scheduler.check_cycles() # Update the .dot and .png file for illustration purposes name = "cefore-load" if load_nodes else "cefore" print(10*'*', 'See main scheduler in', scheduler.export_as_pngfile(name)) if verbose: scheduler.list() if dry_run: return True if verbose: input('OK ? - press control C to abort ? ') if not scheduler.orchestrate(): print(f"RUN KO : {scheduler.why()}") scheduler.debrief() return False print("RUN OK") print(80*'*') if tcp_streaming: # TCP streaming scenario print(f"Now it's time to run the ns-3 script on node fit{ns3:02d}") print(f"root@fit{ns3:02d}:~# /root/NS3/source/ns-3-dce/waf --run dce-tcp-test") print("Then, run iperf on the publisher host:") print("yourlogin@publisher:~# iperf -s -P 1 -p 80") print(f"Log file will be available on fit{ns3:02d} at:") print(" /root/NS3/source/ns-3-dce/files-4/var/log/56884/stdout") else: # Cefore streaming scenario print("Now, if not already done, copy cefnetd and cefputfile binaries on your publisher host") print("login@your_host:r2lab-demos/cefore# scp bin/cefnetd yourlogin@publisher_node:/usr/local/sbin") print("login@your_host:r2lab-demos/cefore# scp bin/cefputfile yourlogin@publisher_node:/user/local/bin") print(f"After that, run on the ns-3 fit{ns3:02d} node the following command:") print(f"root@fit{ns3:02d}:~# /root/NS3/source/ns-3-dce/waf --run dce-cefore-test ") print("Then, run on the publisher host:") print("yourlogin@publisher:~# cefnetd") print("yourlogin@publisher:~# cefputfile ccn:/streaming/test -f ./[file-name] -r [1 <= streaming_rate <= 32 (Mbps)]") print(f"Log file will be available on fit{ns3:02d} at:") print(" /root/NS3/source/ns-3-dce/files-3/tmp/cefgetstream-thuputLog-126230400110000") print(80*'*') return True
def one_run(*, protocol, interference, run_name=default_run_name, slicename=default_slicename, tx_power, phy_rate, antenna_mask, channel, load_images=False, node_ids=DEFAULT_NODE_IDS, src_ids=DEFAULT_SRC_IDS, dest_ids=DEFAULT_DEST_IDS, scrambler_id=DEFAULT_SCRAMBLER_ID, tshark=False, map=False, warmup=False, route_sampling=False, iperf=False, verbose_ssh=False, verbose_jobs=False, dry_run=False, run_number=None): """ Performs data acquisition on all nodes with the following settings Arguments: tx_power: in dBm, a string like 5, 10 or 14. Corresponds to the transmission power. phy_rate: a string among 1, 54. Correspond to the wifi rate. antenna_mask: a string among 1, 3, 7. channel: a string like e.g. 1 or 40. Correspond to the channel. protocol: a string among batman , olsr. Correspond to the protocol interference : in amplitude percentage, a string like 15 or 20. Correspond to the power of the noise generated in the spectrum. Can be either None or "None" to mean no interference. run_name: the name for a subdirectory where all data will be kept successive runs should use the same name for further visualization slicename: the Unix login name (slice name) to enter the gateway load_images: a boolean specifying whether nodes should be re-imaged first node_ids: a list of node ids to run the scenario against; strings or ints are OK; tshark: a boolean specifying wether we should format/parse the .pcap. map: a boolean specifying wether we should fetch/parse the route tables of the nodes. warmup: a boolean specifying whether we should run a ping before the experiment to be certain of the stabilisation on the network. src_ids: a list of nodes from which we will launch the ping from. strings or ints are OK. ping_messages : the number of ping packets that will be generated """ # set default for the nodes parameter node_ids = ([int(id) for id in node_ids] if node_ids is not None else DEFAULT_NODE_IDS) src_ids = ([int(id) for id in src_ids] if src_ids is not None else DEFAULT_SRC_IDS) dest_ids = ([int(id) for id in dest_ids] if dest_ids is not None else DEFAULT_NODE_IDS) # all nodes - i.e. including sources and destinations - # need to run the protocol node_ids = list(set(node_ids).union(set(src_ids).union(set(dest_ids)))) if interference == "None": interference = None # open result dir no matter what run_root = naming_scheme( run_name=run_name, protocol=protocol, interference=interference, autocreate=True) # fix me trace = run_root / f"trace-{%m-%d-%H-%M}" ref_time = apssh_time() trace = run_root / f"trace-{ref_time}" try: with trace.open('w') as feed: def log_line(line): time_line(line, file=feed) load_msg = f"{'WITH' if load_images else 'NO'} image loading" interference_msg = (f"interference={interference} " f"from scrambler={scrambler_id}") nodes = " ".join(str(n) for n in node_ids) srcs = " ".join(str(n) for n in src_ids) dests = " ".join(str(n) for n in dest_ids) ping_labels = [ f"PING {s} ➡︎ {d}" for s in src_ids # and on the destination for d in dest_ids if d != s ] log_line(f"output in {run_root}") log_line(f"trace in {trace}") log_line(f"protocol={protocol}") log_line(f"{load_msg}") log_line(f"{interference_msg}") log_line("----") log_line(f"Selected nodes : {nodes}") log_line(f"Sources : {srcs}") log_line(f"Destinations : {dests}") for label in ping_labels: log_line(f"{label}") log_line("----") for feature in ('warmup', 'tshark', 'map', 'route_sampling', 'iperf'): log_line(f"Feature {feature}: {locals()[feature]}") except Exception as exc: print(f"Cannot write into {trace} - aborting this run") print(f"Found exception {type(exc)} - {exc}") return False # # dry-run mode # just display a one-liner with parameters # prelude = "" if not dry_run else "dry_run:" with trace.open() as feed: print(f"**************** {ref_time} one_run #{run_number}:") for line in feed: print(prelude, line, sep='', end='') if dry_run: return True # the nodes involved faraday = SshNode(hostname=default_gateway, username=slicename, formatter=TimeColonFormatter(), verbose=verbose_ssh) # this is a python dictionary that allows to retrieve a node object # from an id node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******", formatter=TimeColonFormatter(), verbose=verbose_ssh) for id in node_ids } # extracts for sources and destinations src_index = {id:node for (id, node) in node_index.items() if id in src_ids} dest_index = {id:node for (id, node) in node_index.items() if id in dest_ids} if interference: node_scrambler = SshNode( gateway=faraday, hostname=fitname(scrambler_id), username="******", formatter=TimeColonFormatter(), verbose=verbose_ssh) # the global scheduler scheduler = Scheduler(verbose=verbose_jobs) ########## check_lease = SshJob( scheduler=scheduler, node=faraday, verbose=verbose_jobs, label="rhubarbe check lease", command=Run("rhubarbe leases --check", label="rlease"), ) # load images if requested green_light = check_lease # at some point we did not load the scrambler if interference was None # and that was a way to run faster loads with no interference # but now we always load the scrambler node with gnuradio # this is because when we do runs.py -i None 15 30 ... # then the first call to one_run is with interference being None # but it is still important to load the scrambler if load_images: # copy node_ids load_ids = node_ids[:] load_ids.append(scrambler_id) # the nodes that we **do not** use should be turned off # so if we have selected e.g. nodes 10 12 and 15, we will do # rhubarbe off -a ~10 ~12 ~15, meaning all nodes except 10, 12 and 15 negated_node_ids = [f"~{id}" for id in load_ids] # we can do these three things in parallel ready_jobs = [ SshJob(node=faraday, required=green_light, scheduler=scheduler, verbose=verbose_jobs, command=Run("rhubarbe", "off", "-a", *negated_node_ids, label="turn off unused nodes")), SshJob(node=faraday, required=green_light, scheduler=scheduler, verbose=verbose_jobs, label="load batman image", command=Run("rhubarbe", "load", "-i", "batman-olsr", *node_ids, label=f"load ubuntu on {node_ids}")), SshJob( node=faraday, required=green_light, scheduler=scheduler, verbose=verbose_jobs, label="load gnuradio image", command=Run("rhubarbe", "load", "-i", "batman-olsr-gnuradio", scrambler_id, label=f"load gnuradio on {scrambler_id}")), ] # replace green_light in this case green_light = SshJob( node=faraday, required=ready_jobs, scheduler=scheduler, verbose=verbose_jobs, label="wait for nodes to come up", command=Run("rhubarbe", "wait", *load_ids)) ########## # setting up the wireless interface on all nodes # # provide node-utilities with the ranges/units it expects frequency = channel_frequency[int(channel)] # tx_power_in_mBm not in dBm tx_power_driver = tx_power * 100 #just in case somme services failed in the previous experiment reset_failed_services_job = [ SshJob( node=node, verbose=verbose_jobs, label="reset failed services", command=Run("systemctl reset-failed", label="reset-failed services")) for id, node in node_index.items() ] reset_failed_services = Scheduler( *reset_failed_services_job, scheduler=scheduler, required=green_light, verbose=verbose_jobs, label="Reset failed services") init_wireless_sshjobs = [ SshJob( node=node, verbose=verbose_jobs, label=f"init {id}", command=RunScript( "node-utilities.sh", f"init-ad-hoc-network-{WIRELESS_DRIVER}", WIRELESS_DRIVER, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver, label="init add-hoc network"), ) for id, node in node_index.items()] init_wireless_jobs = Scheduler( *init_wireless_sshjobs, scheduler=scheduler, required=green_light, verbose=verbose_jobs, label="Initialisation of wireless chips") if interference: # Run uhd_siggen with the chosen power init_scrambler_job = SshJob( scheduler=scheduler, required=green_light, forever=True, node=node_scrambler, verbose=verbose_jobs, #TODO : If exit-signal patch is done add exit-signal=["TERM"] # to this run object and call uhd_siggen directly commands=[RunScript("node-utilities.sh", "init-scrambler", label="init scrambler"), Run(f"systemd-run --unit=uhd_siggen -t ", f"uhd_siggen -a usrp -f {frequency}M", f"--sine --amplitude 0.{interference}", label="systemctl start uhd_siggen") ] ) green_light = [init_wireless_jobs, reset_failed_services] # then install and run batman on fit nodes run_protocol_job = [ SshJob( # scheduler=scheduler, node=node, label=f"init and run {protocol} on fit node {id}", verbose=verbose_jobs, # CAREFUL : These ones use sytemd-run # with the ----service-type=forking option! command=RunScript("node-utilities.sh", f"run-{protocol}", label=f"run {protocol}"), ) for id, node in node_index.items()] run_protocol = Scheduler( *run_protocol_job, scheduler=scheduler, required=green_light, verbose=verbose_jobs, label="init and run routing protocols") green_light = run_protocol # after that, run tcpdump on fit nodes, this job never ends... if tshark: run_tcpdump_job = [ SshJob( # scheduler=scheduler_monitoring, node=node, forever=True, label=f"run tcpdump on fit node {id}", verbose=verbose_jobs, command=[ Run("systemd-run -t --unit=tcpdump", f"tcpdump -U -i moni-{WIRELESS_DRIVER}", f"-y ieee802_11_radio -w /tmp/fit{id}.pcap", label=f"tcpdump {id}") ] ) for id, node in node_index.items() ] run_tcpdump = Scheduler( *run_tcpdump_job, scheduler=scheduler, required=green_light, forever=True, verbose=verbose_jobs, label="Monitoring - tcpdumps") # let the wireless network settle settle_scheduler = Scheduler( scheduler=scheduler, required=green_light, ) if warmup: # warmup pings don't need to be sequential, so let's # do all the nodes at the same time # on a given node though, we'll ping the other ends sequentially # see the graph for more warmup_jobs = [ SshJob( node=node_s, verbose=verbose_jobs, commands=[ RunScript("node-utilities.sh", "my-ping", f"10.0.0.{d}", warmup_ping_timeout, warmup_ping_interval, warmup_ping_size, warmup_ping_messages, f"warmup {s} ➡︎ {d}", label=f"warmup {s} ➡︎ {d}") for d in dest_index.keys() if s != d ] ) # for each selected experiment nodes for s, node_s in src_index.items() ] warmup_scheduler = Scheduler( *warmup_jobs, scheduler=settle_scheduler, verbose=verbose_jobs, label="Warmup pings") settle_wireless_job2 = PrintJob( "Let the wireless network settle after warmup", sleep=settle_delay_shorter, scheduler=settle_scheduler, required=warmup_scheduler, label=f"settling-warmup for {settle_delay_shorter} sec") # this is a little cheating; could have gone before the bloc above # but produces a nicer graphical output # we might want to help asynciojobs if it offered a means # to specify entry and exit jobs in a scheduler settle_wireless_job = PrintJob( "Let the wireless network settle", sleep=settle_delay_long, scheduler=settle_scheduler, label=f"settling for {settle_delay_long} sec") green_light = settle_scheduler if iperf: iperf_service_jobs = [ SshJob( node=node_d, verbose=verbose_jobs, forever=True, commands=[ Run("systemd-run -t --unit=iperf", "iperf -s -p 1234 -u", label=f"iperf serv on {d}"), ], ) for d, node_d in dest_index.items() ] iperf_serv_sched = Scheduler( *iperf_service_jobs, verbose=verbose_jobs, label="Iperf Servers", # for a nicer graphical output # otherwise the exit arrow # from scheduler 'iperf mode' # to job 'settling for 60s' # gets to start from this box forever=True, ) iperf_cli = [ SshJob( node=node_s, verbose=verbose_jobs, commands=[ Run("sleep 7", label=""), Run(f"iperf", f"-c 10.0.0.{d} -p 1234", f"-u -b {phy_rate}M -t 60", f"-l 1024 > IPERF-{s:02d}-{d:02d}", label=f"run iperf {s} ➡︎ {d}") ] ) for s, node_s in src_index.items() for d, node_d in dest_index.items() if s != d ] iperf_cli_sched = Scheduler( Sequence(*iperf_cli), verbose=verbose_jobs, label="Iperf Clients") iperf_stop = [ SshJob(node=node_d, verbose=verbose_jobs, label=f"Stop iperf on {d}", command=Run("systemctl stop iperf")) for d, node_d in dest_index.items() ] iperf_stop_sched = Scheduler( *iperf_stop, required=iperf_cli_sched, verbose=verbose_jobs, label="Iperf server stop") iperf_fetch = [ SshJob(node=node_s, verbose=verbose_jobs, command=Pull( remotepaths=[f"IPERF-{s:02d}-{d:02d}"], localpath=str(run_root), label="fetch iperf {s} ➡︎ {d}") ) for s, node_s in src_index.items() for d, node_d in dest_index.items() if s != d ] iperf_fetch_sched = Scheduler( *iperf_fetch, required=iperf_stop_sched, verbose=verbose_jobs, label="Iperf fetch report") iperf_jobs = [iperf_serv_sched, iperf_cli_sched, iperf_stop_sched, iperf_fetch_sched] iperf_sched = Scheduler( *iperf_jobs, scheduler=scheduler, required=green_light, verbose=verbose_jobs, label="Iperf Module") settle_wireless_job_iperf = PrintJob( "Let the wireless network settle", sleep=settle_delay_shorter, scheduler=scheduler, required=iperf_sched, label=f"settling-iperf for {settle_delay_shorter} sec") green_light = settle_wireless_job_iperf # create all the tracepath jobs from the first node in the list if map: map_jobs = [ SshJob( node=node, label=f"Generating ROUTE file for proto {protocol} on node {id}", verbose=verbose_jobs, commands=[ RunScript(f"node-utilities.sh", f"route-{protocol}", f"> ROUTE-TABLE-{id:02d}", label="get route table"), Pull(remotepaths=[f"ROUTE-TABLE-{id:02d}"], localpath=str(run_root), label="") ], ) for id, node in node_index.items() ] map_scheduler = Scheduler( *map_jobs, scheduler=scheduler, required=green_light, verbose=verbose_jobs, label="Snapshoting route files") green_light = map_scheduler if route_sampling: route_sampling_jobs = [ SshJob( node=node, label=f"Route sampling service for proto {protocol} on node {id}", verbose=False, forever=True, commands=[ Push(localpaths=["route-sample-service.sh"], remotepath=".", label=""), Run("chmod +x route-sample-service.sh", label=""), Run("systemd-run -t --unit=route-sample", "/root/route-sample-service.sh", "route-sample", f"ROUTE-TABLE-{id:02d}-SAMPLED", protocol, label="start route-sampling"), ], ) for id, node in node_index.items() ] route_sampling_scheduler = Scheduler( *route_sampling_jobs, scheduler=scheduler, verbose=False, forever=True, label="Route Sampling services launch", required=green_light) ########## # create all the ping jobs, i.e. max*(max-1)/2 # this again is a python list comprehension # see the 2 for instructions at the bottom # # notice that these SshJob instances are not yet added # to the scheduler, we will add them later on # depending on the sequential/parallel strategy pings_job = [ SshJob( node=node_s, verbose=verbose_jobs, commands=[ Run(f"echo actual ping {s} ➡︎ {d} using {protocol}", label=f"ping {s} ➡︎ {d}"), RunScript("node-utilities.sh", "my-ping", f"10.0.0.{d}", ping_timeout, ping_interval, ping_size, ping_messages, f"actual {s} ➡︎ {d}", ">", f"PING-{s:02d}-{d:02d}", label=""), Pull(remotepaths=[f"PING-{s:02d}-{d:02d}"], localpath=str(run_root), label=""), ], ) # for each selected experiment nodes for s, node_s in src_index.items() for d, node_d in dest_index.items() if s != d ] pings = Scheduler( scheduler=scheduler, label="PINGS", verbose=verbose_jobs, required=green_light) # retrieve all pcap files from fit nodes stop_protocol_job = [ SshJob( # scheduler=scheduler, node=node, # required=pings, label=f"kill routing protocol on {id}", verbose=verbose_jobs, command=RunScript(f"node-utilities.sh", f"kill-{protocol}", label=f"kill-{protocol}"), ) for id, node in node_index.items() ] stop_protocol = Scheduler( *stop_protocol_job, scheduler=scheduler, required=pings, label="Stop routing protocols", ) if tshark: retrieve_tcpdump_job = [ SshJob( # scheduler=scheduler, node=nodei, # required=pings, label=f"retrieve pcap trace from fit{i:02d}", verbose=verbose_jobs, commands=[ Run("systemctl stop tcpdump", label="stop tcpdump"), #Run("systemctl reset-failed tcpdump"), #RunScript("node-utilities.sh", "kill-tcpdump", # label="kill-tcpdump"), Run( f"echo retrieving pcap trace and result-{i}.txt from fit{i:02d}", label=""), Pull(remotepaths=[f"/tmp/fit{i}.pcap"], localpath=str(run_root), label=""), ], ) for i, nodei in node_index.items() ] retrieve_tcpdump = Scheduler( *retrieve_tcpdump_job, scheduler=scheduler, required=pings, label="Retrieve tcpdump", ) if route_sampling: retrieve_sampling_job = [ SshJob( # scheduler=scheduler, node=nodei, # required=pings, label=f"retrieve sampling trace from fit{i:02d}", verbose=verbose_jobs, commands=[ # RunScript("node-utilities.sh", "kill-route-sample", protocol, # label = "kill route sample"), #RunScript("route-sample-service.sh", "kill-route-sample", # label="kill route sample"), Run("systemctl stop route-sample", label="stop route-sample"), Run( f"echo retrieving sampling trace from fit{i:02d}", label=""), Pull(remotepaths=[f"ROUTE-TABLE-{i:02d}-SAMPLED"], localpath=str(run_root), label=""), ], ) for i, nodei in node_index.items() ] retrieve_sampling = Scheduler( *retrieve_sampling_job, scheduler=scheduler, required=pings, verbose=verbose_jobs, label="Stop & retrieve route sampling", ) if tshark: parse_pcaps_job = [ SshJob( # scheduler=scheduler, node=LocalNode(), # required=retrieve_tcpdump, label=f"parse pcap trace {run_root}/fit{i}.pcap", verbose=verbose_jobs, #commands = [RunScript("parsepcap.sh", run_root, i)] command=Run("tshark", "-2", "-r", f"{run_root}/fit{i}.pcap", "-R", f"'(ip.dst==10.0.0.{i} && icmp) && radiotap.dbm_antsignal'", "-Tfields", "-e", "'ip.src'", "-e" "'ip.dst'", "-e", "'radiotap.dbm_antsignal'", ">", f"{run_root}/result-{i}.txt", label=f"parsing pcap from {i}"), ) for i in node_ids ] parse_pcaps = Scheduler( *parse_pcaps_job, scheduler=scheduler, required=retrieve_tcpdump, label="Parse pcap", ) if interference: kill_uhd_siggen = SshJob( scheduler=scheduler, node=node_scrambler, required=pings, label=f"killing uhd_siggen on the scrambler node {scrambler_id}", verbose=verbose_jobs, commands=[Run("systemctl", "stop", "uhd_siggen"), #Run("systemctl reset-failed tcpdump"), ], ) kill_2_uhd_siggen = SshJob( scheduler=scheduler, node=faraday, required=kill_uhd_siggen, label=f"turning off usrp on the scrambler node {scrambler_id}", verbose=verbose_jobs, command=Run("rhubarbe", "usrpoff", scrambler_id), ) pings.add(Sequence(*pings_job)) # for running sequentially we impose no limit on the scheduler # that will be limitied anyways by the very structure # of the required graph # safety check scheduler.export_as_pngfile(run_root / "experiment-graph") if dry_run: scheduler.list() return True # if not in dry-run mode, let's proceed to the actual experiment ok = scheduler.run() # jobs_window=jobs_window) # close all ssh connections close_ssh_in_scheduler(scheduler) # give details if it failed if not ok: scheduler.debrief() scheduler.export_as_pngfile("debug") if ok and map: time_line("Creation of MAP files") post_processor = ProcessRoutes(run_root, src_ids, node_ids) post_processor.run() if ok and route_sampling: time_line("Creation of ROUTE SAMPLING files") post_processor = ProcessRoutes(run_root, src_ids, node_ids) post_processor.run_sampled() # data acquisition is done, let's aggregate results # i.e. compute averages #if ok and tshark: #post_processor = Aggregator(run_root, node_ids, antenna_mask) #post_processor.run() time_line("one_run done") return ok
def run(*, gateway, slicename, disaggregated_cn, operator_version, nodes, node_master, node_enb, quectel_nodes, phones, flexran, drone, verbose, dry_run, load_images, master_image, worker_image, quectel_image): """ Install K8S on R2lab Arguments: slicename: the Unix login name (slice name) to enter the gateway quectel_nodes: list of indices of quectel UE nodes to use phones: list of indices of phones to use nodes: a list of node ids to run the scenario on; strings or ints are OK; node_master: the master node id, must be part of selected nodes node_enb: the node id for the enb, which is connected to usrp/duplexer disaggregated_cn: Boolean; True for the disaggregated CN scenario. False for all-in-one CN. operator_version: str, either "none" or "v1" or "v2". """ if operator_version == "none": only_kube5g = True else: only_kube5g = False if node_master not in nodes: print(f"master node {node_master} must be part of selected fit nodes {nodes}") exit(1) if node_enb not in nodes: print(f"eNB worker node {node_enb} must be part of selected fit nodes {nodes}") exit(1) # Check if the browser can be automatically run to display the Drone app if drone: run_browser = True if platform == "linux": cmd_open = "xdg-open" elif platform == "darwin": cmd_open = "open" else: run_browser = False if run_browser: print(f"**** Will run the browser with command {cmd_open}") else: print(f"**** Will not be able to run the browser as platform is {platform}") worker_ids = nodes[:] worker_ids.remove(node_master) quectel_ids = quectel_nodes[:] quectel = len(quectel_ids) > 0 faraday = SshNode(hostname=default_gateway, username=slicename, verbose=verbose, formatter=TimeColonFormatter()) master = SshNode(gateway=faraday, hostname=fitname(node_master), username="******", verbose=verbose, formatter=TimeColonFormatter()) node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******",formatter=TimeColonFormatter(), verbose=verbose) for id in nodes } nodes_quectel_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******",formatter=TimeColonFormatter(), verbose=verbose) for id in quectel_nodes } worker_index = dict(node_index) del worker_index[node_master] fit_master = fitname(node_master) fit_enb = fitname(node_enb) # the global scheduler scheduler = Scheduler(verbose=verbose) ########## check_lease = SshJob( scheduler=scheduler, node = faraday, critical = True, verbose=verbose, command = Run("rhubarbe leases --check"), ) green_light = check_lease if load_images: green_light = [ SshJob( scheduler=scheduler, required=check_lease, node=faraday, critical=True, verbose=verbose, label = f"Load image {master_image} on master {fit_master}", commands=[ Run(f"rhubarbe load {node_master} -i {master_image}"), Run(f"rhubarbe wait {node_master}"), ] ), SshJob( scheduler=scheduler, required=check_lease, node=faraday, critical=True, verbose=verbose, label = f"Load image {worker_image} on worker nodes", commands=[ Run(f"rhubarbe usrpoff {node_enb}"), # if usrp is on, load could be problematic... Run("rhubarbe", "load", *worker_ids, "-i", worker_image), Run("rhubarbe", "wait", *worker_ids), Run(f"rhubarbe usrpon {node_enb}"), # ensure a reset of the USRP on the enB node ], ), SshJob( scheduler=scheduler, required=check_lease, node=faraday, critical=False, verbose=verbose, label="turning off unused nodes", command=[ Run("rhubarbe bye --all " + "".join(f"~{x} " for x in nodes)) ] ) ] if quectel: prepare_quectel = SshJob( scheduler=scheduler, required=green_light, node=faraday, critical=True, verbose=verbose, label = f"Load image {quectel_image} on quectel UE nodes", commands=[ Run("rhubarbe", "usrpoff", *quectel_ids), Run("rhubarbe", "load", *quectel_ids, "-i", quectel_image), Run("rhubarbe", "wait", *quectel_ids), Run("rhubarbe", "usrpon", *quectel_ids), ], ), ########## if quectel: # wait 30s for Quectel modules show up wait_quectel_ready = PrintJob( "Let Quectel modules show up", scheduler=scheduler, required=prepare_quectel, sleep=30, label="sleep 30s for the Quectel modules to show up" ) # run the Quectel Connection Manager as a service on each Quectel UE node quectelCM_service = Service( command="quectel-CM -s oai.ipv4 -4", service_id="QuectelCM", verbose=verbose, ) init_quectel_nodes = [ SshJob( scheduler=scheduler, required=wait_quectel_ready, node=node, critical=True, verbose=verbose, label=f"Init Quectel UE on fit node {id}", commands = [ RunScript(find_local_embedded_script("nodes.sh"), "check-quectel-on", includes=INCLUDES), quectelCM_service.start_command(), ], ) for id, node in nodes_quectel_index.items() ] # wait 20s for Quectel Connection Manager to start up wait_quectelCM_ready = PrintJob( "Let QuectelCM start up", scheduler=scheduler, required=init_quectel_nodes, sleep=20, label="Sleep 20s for the Quectel Connection Manager(s) to start up" ) detach_quectel_nodes = [ SshJob( scheduler=scheduler, required=wait_quectelCM_ready, node=node, critical=True, verbose=verbose, label=f"Detach Quectel UE on fit node {id}", command = RunScript(find_local_embedded_script("nodes.sh"), "quectel-detach", includes=INCLUDES), ) for id, node in nodes_quectel_index.items() ] ########## # Initialize k8s on the master node init_master = SshJob( scheduler=scheduler, required=green_light, node=master, critical=True, verbose=verbose, label = f"Install and launch k8s on the master {node_master}", commands = [ Run("swapoff -a"), Run("hostnamectl set-hostname master-node"), Run("kubeadm init --pod-network-cidr=10.244.0.0/16 > /tmp/join_msg.txt"), Run("tail -2 /tmp/join_msg.txt > /tmp/join_msg"), Run("mkdir -p $HOME/.kube"), Run("cp -i /etc/kubernetes/admin.conf $HOME/.kube/config"), Run("chown $(id -u):$(id -g) $HOME/.kube/config"), Run("kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml"), Run("kubectl get pods --all-namespaces"), ], ) init_workers = [ SshJob( scheduler=scheduler, required=init_master, node=node, critical=True, verbose=verbose, label=f"Init k8s on fit node {id} and join the cluster", commands = [ Run("swapoff -a"), Run("increase-control-mtu"), Run(f"scp -o 'StrictHostKeyChecking no' {fit_master}:/tmp/join_msg /tmp/join_msg"), Run("chmod a+x /tmp/join_msg"), Run("/tmp/join_msg"), ], ) for id, node in worker_index.items() ] # wait 10s for K8S nodes setup wait_k8nodes_ready = PrintJob( "Let k8s set up", scheduler=scheduler, required=init_workers, sleep=10, label="sleep 10s for the k8s nodes to settle" ) init_kube5g = SshJob( scheduler=scheduler, required = wait_k8nodes_ready, node = master, verbose=verbose, label = f"Add oai:ran label to oai-ran pod on {node_enb} and start 5GOperator pod", commands = [ Run("kubectl get nodes"), # add label to the eNB node to help k8s scheduler selects the right fit node Run(f"kubectl label nodes fit{node_enb} oai=ran"), Run("kubectl get nodes -Loai"), ## retrieve the kube5g operator #Run("git clone -b develop [email protected]:mosaic5g/kube5g.git"), # install a few dependencies Run("apt install -y python3-pip"), Run("pip3 install --upgrade pip"), Run("pip3 install ruamel.yaml==0.16.12 colorlog==4.6.2"), Run("sed -i 's/oairan:v1-1.0-1/oairan:v1-1.0-3/g' /root/kube5g/common/config-manager/conf_global_default.yaml"), # specify the R2lab specific configuration Run("cd /root/kube5g/common/config-manager; ./conf-manager.py -s conf_short_r2lab.yaml"), # apply the R2lab CRD Run("cd /root/kube5g/openshift/kube5g-operator; ./k5goperator.sh -n"), # start the kube5g operator pod Run("cd /root/kube5g/openshift/kube5g-operator; ./k5goperator.sh container start"), Run("kubectl get pods"), ], ) # wait 30s for K8S 5G Operator setup wait_k8_5GOp_ready = PrintJob( "Let 5G Operator set up", scheduler=scheduler, required=init_kube5g, sleep=30, label="wait 30s for the 5G Operator pod to settle" ) if only_kube5g: finish = SshJob( scheduler=scheduler, required = wait_k8_5GOp_ready, node = master, verbose=verbose, label = f"showing nodes and pods before leaving", commands = [ Run("kubectl get nodes -Loai"), Run("kubectl get pods"), ], ) else: if disaggregated_cn: cn_type="disaggregated-cn" # setup_time = 120 setup_time = 200 else: cn_type="all-in-one" # setup_time = 60 setup_time = 140 if flexran: flexran_opt="flexran" else: flexran_opt="" run_kube5g = SshJob( scheduler=scheduler, required = wait_k8_5GOp_ready, node = master, verbose=verbose, label = f"deploy {operator_version} {cn_type} {flexran_opt} pods", commands = [ Run("kubectl get nodes -Loai"), Run(f"cd /root/kube5g/openshift/kube5g-operator; ./k5goperator.sh deploy {operator_version} {cn_type} {flexran_opt}"), Run("kubectl get pods"), ], ) # Coffee Break -- wait 1 or 2mn for K8S 5G pods setup wait_k8_5Gpods_ready = PrintJob( "Let all 5G pods set up", scheduler=scheduler, required=run_kube5g, sleep=setup_time, label=f"waiting {setup_time}s for all 5G pods to settle" ) check_kube5g = SshJob( scheduler=scheduler, required = wait_k8_5Gpods_ready, node = master, verbose=verbose, label = "Check which pods are deployed", commands = [ Run("kubectl get nodes -Loai"), Run("kubectl get pods"), ], ) if drone: # the place where runtime variables get stored env = Variables() # # Define and run all the services to launch the Drone app locally on a firefox browser # drone_service = Service( command=f"python /root/mosaic5g/store/sdk/frontend/drone/drone.py --port=8088 --tasks --address=192.168.3.{node_enb}", service_id="drone_app", verbose=verbose, ) k8s_port9999_fwd_service = Service( command=Deferred("kubectl port-forward {{flexran_pod}} 9999:9999 --address 0.0.0.0", env), service_id="k8s-port9999-fwd", verbose=verbose, # somehow this is required for kubectl to run properly environ={'KUBECONFIG': '/etc/kubernetes/admin.conf'} ) # can't use a Service instance on the local box if it's not a Linux # and we have macs... local_port_fwd = (f"ssh -f -N -4" f" -L9999:192.168.3.{node_master}:9999" f" -L8088:192.168.3.{node_enb}:8088" f" -o ExitOnForwardFailure=yes" f" {slicename}@faraday.inria.fr") browser_service = Service( command=f"sleep 10; {cmd_open} http://127.0.0.1:8088/", service_id="drone_browser", verbose=verbose, ) run_drone=SshJob( scheduler=scheduler, required=check_kube5g, node=worker_index[node_enb], verbose=verbose, label=f"Run the drone app on worker node {node_enb} as a service", commands=[ drone_service.start_command(), ], ) get_flexran_podname=SshJob( scheduler=scheduler, required=check_kube5g, node=master, verbose=verbose, label=f"Retrieve the name of the FlexRAN pod", commands=[ # xxx here Run("kubectl get --no-headers=true pods -l app=flexran -o custom-columns=:metadata.name", capture=Capture('flexran_pod', env)), ], ) run_k8s_port9999_fwd=SshJob( scheduler=scheduler, required=get_flexran_podname, node=master, verbose=verbose, label=f"Run port forwarding on the master node as a service", commands=[ k8s_port9999_fwd_service.start_command(), ], ) # On the local machine, impossible to use Services as the latter uses systemd-run, only available on Linux run_local_ports_fwd = SshJob( scheduler=scheduler, required = check_kube5g, node = LocalNode(), verbose=verbose, label = f"Forward local ports 8088 and 9999", command=Run(local_port_fwd + "&", ignore_outputs=True), ) if run_browser: run_local_browser = SshJob( scheduler=scheduler, required = (run_drone, run_k8s_port9999_fwd, run_local_ports_fwd), node = LocalNode(), verbose=verbose, label = f"Run the browser on the local node in background", command=browser_service.command+"&", ) phones_requirements=run_local_browser else: phones_requirements=run_k8s_port9999_fwd else: phones_requirements=check_kube5g ########## Test phone(s) connectivity sleeps_ran = (20, 25) phone_msgs = [f"wait for {sleep}s for eNB to start up before waking up phone{id}" for sleep, id in zip(sleeps_ran, phones)] wait_commands = [f"echo {msg}; sleep {sleep}" for msg, sleep in zip(phone_msgs, sleeps_ran)] sleeps_phone = (15, 20) phone2_msgs = [f"wait for {sleep}s for phone{id} before starting tests" for sleep, id in zip(sleeps_phone, phones)] wait2_commands = [f"echo {msg}; sleep {sleep}" for msg, sleep in zip(phone2_msgs, sleeps_phone)] job_start_phones = [ SshJob( node=faraday, commands=[ Run(wait_command), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-on", includes=INCLUDES), Run(wait2_command), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-check-cx", includes=INCLUDES), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-start-app", includes=INCLUDES), ], label=f"turn off airplane mode on phone {id}", required=phones_requirements, scheduler=scheduler) for id, wait_command, wait2_command in zip(phones, wait_commands, wait2_commands) ] if quectel: # wait 60s for Quectel connection(s) to set up wait_before_attach_quectel = PrintJob( "Wait again 30s before attaching Quectel device(s)", scheduler=scheduler, required=(job_start_phones,check_kube5g,detach_quectel_nodes), sleep=30, label="Sleep 30s before attaching Quectel device(s)" ) job_attach_quectel = [ SshJob( scheduler=scheduler, required=wait_before_attach_quectel, node=node, critical=True, verbose=verbose, label=f"Attach Quectel UE on fit node {id}", command = RunScript(find_local_embedded_script("nodes.sh"), "quectel-attach", includes=INCLUDES), ) for id, node in nodes_quectel_index.items() ] # wait 30s for Quectel connection(s) to set up wait_quectel_cx_ready = PrintJob( "Let the Quectel connection(s) set up", scheduler=scheduler, required=job_attach_quectel, sleep=30, label="Sleep 30s for the Quectel connection(s) to set up" ) test_quectel_cx = [ SshJob( scheduler=scheduler, required=wait_quectel_cx_ready, node=node, critical=False, verbose=verbose, label=f"Check the Quectel cx on fit node {id}", command = RunScript(find_local_embedded_script("nodes.sh"), "check-quectel-cx", includes=INCLUDES), ) for id, node in nodes_quectel_index.items() ] ########## # Update the .dot and .png file for illustration purposes scheduler.check_cycles() name = "deploy-kube5g" print(10*'*', 'See main scheduler in', scheduler.export_as_pngfile(name)) # orchestration scheduler jobs if verbose: scheduler.list() if dry_run: return True if not scheduler.orchestrate(): print(f"RUN KO : {scheduler.why()}") scheduler.debrief() return False print(f"RUN OK, you can log now on master node {fit_master} to manually change the scenario") print(80*'*')
def run_scenario(slicename=gateway_username, load_images=load_images, node_ids=node_ids, verbose_mode=verbose_mode, node_sender=node_sender): """ Performs L2BM experimentation Arguments: slicename: the Unix login name (slice name) to enter the gateway load_images: a boolean specifying whether nodes should be re-imaged first, else nodes will be reset to allow reconfiguration node_ids: a list of node ids to run the scenario on; strings or ints are OK; node_sender: the sender node id, must be part of selected nodes """ if node_sender not in node_ids: print("sender node {} must be part of selected fit nodes {}".format(node_sender, node_ids)) exit(1) faraday = SshNode(hostname=default_gateway, username=slicename, verbose=verbose_mode, formatter=TimeColonFormatter()) node_ovs = SshNode(gateway=faraday, hostname=fitname(node_sender), username="******", verbose=verbose_mode, formatter=TimeColonFormatter()) node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******",formatter=TimeColonFormatter(), verbose=verbose_mode) for id in node_ids } receiver_index = dict(node_index) del receiver_index[node_sender] fit_sender = fitname(node_sender) ip_sender = "10.0.0.{}".format(node_sender) # the global scheduler scheduler = Scheduler(verbose=verbose_mode) ########## check_lease = SshJob( scheduler=scheduler, node = faraday, critical = True, verbose=verbose_mode, command = Run("rhubarbe leases --check"), ) if load_images: green_light = SshJob( scheduler=scheduler, required=check_lease, node=faraday, critical=True, verbose=verbose_mode, commands=[ Run("rhubarbe", "load", "-i", fit_image, *node_ids), Run("rhubarbe", "wait", *node_ids) ] ) else: # reset nodes if images are already loaded green_light = SshJob( scheduler=scheduler, required=check_lease, node=faraday, critical=True, verbose=verbose_mode, commands=[ Run("rhubarbe", "reset", *node_ids), Run("rhubarbe", "wait", *node_ids) ] ) ########## # setting up the wireless interface on all nodes init_nodes = [ SshJob( scheduler=scheduler, required=green_light, node=node, critical=True, verbose=verbose_mode, label="init fit node {}".format(id), command=RunScript( "l2bm-setup.sh", "init-ad-hoc-network", wireless_driver, ssid, frequency) ) for id, node in node_index.items() ] # test Wi-Fi ad hoc connectivity between receivers and the sender ping = [ SshJob( scheduler=scheduler, required=init_nodes, node=node, verbose=verbose_mode, label="ping sender from receiver {}".format(id), command=RunScript( "l2bm-setup.sh", "my-ping", ip_sender, 20) ) for id, node in receiver_index.items() ] # Setting up OVS and libfluid on the sender node ovs_setup = SshJob( scheduler=scheduler, required=ping, node=node_ovs, critical=True, verbose=verbose_mode, command=RunScript("l2bm-setup.sh", "ovs-setup") ) # we need to wait for OVS and libfluid controller setup wait_ovs_job = PrintJob( "Let the OVS and Libfluid settle", scheduler=scheduler, required=ping, sleep=60, label="settling ovs and libfluid" ) iperf_sender = SshJob( scheduler=scheduler, required = wait_ovs_job, node = node_ovs, verbose=verbose_mode, command = RunScript("l2bm-setup.sh", "iperf_sender") ) # Run an iperf receiver at each receiving nodes iperf_receivers = [ SshJob( scheduler=scheduler, required=wait_ovs_job, node=node, verbose=verbose_mode, label="run iperf on receiver {}".format(id), command = RunScript("l2bm-setup.sh", "iperf_receiver") ) for id, node in receiver_index.items() ] ########## # orchestration scheduler jobs ok = scheduler.orchestrate() # give details if it failed if not ok: scheduler.debrief()
def one_run(gwhost, gwuser, keys, sendername, receivername, packets, size, period, formatter, verbose=False): """ gwhost, gwuser, keys: where to reach the testbed gateway sendername, receivername : hostnames for the test nodes packets, size, period : details of the traffic to send formatter: how to report results """ # we keep all 'environment' data for one run in a dedicated subdir # using this name scheme to store results locally # xxx inherited from the NEPI version - unused for now dataname = os.path.join("csi-{}-{}-{}-{}-{}".format( receivername, sendername, packets, size, period)) # we have reused the shell script from the NEPI version as-is auxiliary_script = "./angle-measure.sh" # the proxy to enter faraday r2lab_gateway = SshNode( hostname=gwhost, username=gwuser, keys=keys, formatter=formatter, ) # the sender node sender = SshNode( # specifying the gateway attribute means this node will be reached # through the ssh connection to the gateway gateway=r2lab_gateway, # hostname needs to make sense in the context of the gateway; so e.g. 'fit01' is fine hostname=sendername, # from the gateway we enter the R2lab nodes as root username='******', formatter=formatter, ) # the receiver node - ditto receiver = SshNode( hostname=receivername, username='******', gateway=r2lab_gateway, formatter=formatter, ) # one initialization job per node init_sender = SshJob( # on what node to run the command node=sender, # the command to run; being a JobSshScript, the first item in this # list is expected to be a **LOCAL** script that gets puhed remotely # before being run # a simple JobSsh is more suitable to issue standard Unix commands for instance command=RunScript(auxiliary_script, "init-sender", 64, "HT20"), # for convenience purposes label="init-sender") init_receiver = SshJob(node=receiver, command=RunScript(auxiliary_script, "init-receiver", 64, "HT20"), label="init-receiver") # ditto for actually running the experiment run_sender = SshJob(node=sender, command=RunScript(auxiliary_script, "run-sender", packets, size, period), label="run-sender") # run the sender only once both nodes are ready run_sender.requires(init_sender, init_receiver) run_receiver = SshJob(node=receiver, commands=[ RunScript(auxiliary_script, "run-receiver", packets, size, period), Pull(remotepaths='rawdata', localpath=dataname), ], label="run-receiver") # ditto run_receiver.requires(init_sender, init_receiver) # print a one-liner for that receiver, sender couple summary = "{} ==> {} - {} packets of {} bytes, each {}us"\ .format(sendername, receivername, packets, size, period) print(10 * '-', summary) # create an Scheduler object that will orchestrate this scenario e = Scheduler(init_sender, init_receiver, run_sender, run_receiver, verbose=verbose) print(20 * '*', "before run") e.list(details=verbose) print(20 * '*') if e.orchestrate(timeout=3 * 60): print("========== experiment OK") else: print("!!!!!!!!!! orchestration KO") e.debrief()
def run( *, # pylint: disable=r0912, r0914, r0915 # the pieces to use slicename, cn, ran, phones, e3372_ues, oai_ues, gnuradios, e3372_ue_xterms, gnuradio_xterms, # boolean flags load_nodes, reset_usb, oscillo, # the images to load image_cn, image_ran, image_oai_ue, image_e3372_ue, image_gnuradio, image_T_tracer, # miscell n_rb, nodes_left_alone, T_tracer, verbose, dry_run): """ ########## # 3 methods to get nodes ready # (*) load images # (*) reset nodes that are known to have the right image # (*) do nothing, proceed to experiment expects e.g. * slicename : s.t like [email protected] * cn : 7 * ran : 23 * phones: list of indices of phones to use * e3372_ues : list of nodes to use as a UE using e3372 * oai_ues : list of nodes to use as a UE using OAI * gnuradios : list of nodes to load with a gnuradio image * T_tracer : list of nodes to load with a tracer image * image_* : the name of the images to load on the various nodes Plus * load_nodes: whether to load images or not - in which case image_cn, image_ran and image_* are used to tell the image names * reset_usb : the USRP board will be reset when this is set """ # what argparse knows as a slice actually is about the gateway (user + host) gwuser, gwhost = r2lab_parse_slice(slicename) gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=TimeColonFormatter(verbose=verbose), debug=verbose) hostnames = [r2lab_hostname(x) for x in (cn, ran)] cnnode, rannode = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) for hostname in hostnames ] scheduler = Scheduler(verbose=verbose, label="CORE EXP") ########## prepare the image-loading phase # focus on the experiment, and use # prepare_testbed_scheduler later on to prepare testbed # all we need to do at this point is compute a mapping dict # image -> list-of-nodes images_to_load = defaultdict(list) images_to_load[image_cn] += [cn] images_to_load[image_ran] += [ran] if e3372_ues: images_to_load[image_e3372_ue] += e3372_ues if e3372_ue_xterms: images_to_load[image_e3372_ue] += e3372_ue_xterms if oai_ues: images_to_load[image_oai_ue] += oai_ues if gnuradios: images_to_load[image_gnuradio] += gnuradios if gnuradio_xterms: images_to_load[image_gnuradio] += gnuradio_xterms if T_tracer: images_to_load[image_T_tracer] += T_tracer # start core network job_start_cn = SshJob( node=cnnode, commands=[ RunScript(find_local_embedded_script("nodes.sh"), "git-pull-r2lab", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-cn.sh"), "journal --vacuum-time=1s", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-cn.sh"), "configure", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-cn.sh"), "start", includes=INCLUDES), tcpdump_cn_service.start_command(), ], label="start CN service", scheduler=scheduler, ) # prepare enodeb reset_option = "-u" if reset_usb else "" job_warm_ran = SshJob( node=rannode, commands=[ RunScript(find_local_embedded_script("nodes.sh"), "git-pull-r2lab", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-ran.sh"), "journal --vacuum-time=1s", includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-ran.sh"), "warm-up", reset_option, includes=INCLUDES), RunScript(find_local_embedded_script("mosaic-ran.sh"), "configure -b", n_rb, cn, includes=INCLUDES), ], label="Configure eNB", scheduler=scheduler, ) ran_requirements = [job_start_cn, job_warm_ran] ### if oai_ues: # prepare OAI UEs for ue in oai_ues: ue_node = SshNode(gateway=gwnode, hostname=r2lab_hostname(ue), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) job_warm_ues = [ SshJob(node=ue_node, commands=[ RunScript(find_local_embedded_script("nodes.sh"), "git-pull-r2lab", includes=INCLUDES), RunScript( find_local_embedded_script("mosaic-oai-ue.sh"), "journal --vacuum-time=1s", includes=INCLUDES), RunScript( find_local_embedded_script("mosaic-oai-ue.sh"), "warm-up", reset_option, includes=INCLUDES), RunScript( find_local_embedded_script("mosaic-oai-ue.sh"), "configure -b", n_rb, includes=INCLUDES), ], label=f"Configure OAI UE on fit{ue}", scheduler=scheduler) ] ran_requirements.append(job_warm_ues) ### if not load_nodes and phones: job_turn_off_phones = SshJob( node=gwnode, commands=[ RunScript(find_local_embedded_script("faraday.sh"), f"macphone{phone} phone-off") for phone in phones ], scheduler=scheduler, ) ran_requirements.append(job_turn_off_phones) # wait for everything to be ready, and add an extra grace delay grace = 5 grace_delay = PrintJob( f"Allowing grace of {grace} seconds", sleep=grace, required=ran_requirements, scheduler=scheduler, label=f"settle for {grace}s", ) # optionally start T_tracer if T_tracer: job_start_T_tracer = SshJob( # pylint: disable=w0612 node=SshNode(gateway=gwnode, hostname=r2lab_hostname(T_tracer[0]), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose), commands=[ Run(f"/root/trace {ran}", x11=True), ], label="start T_tracer service", required=ran_requirements, scheduler=scheduler, ) # ran_requirements.append(job_start_T_tracer) # start services graphical_option = "-x" if oscillo else "" graphical_message = "graphical" if oscillo else "regular" tracer_option = " -T" if T_tracer else "" # we use a Python variable for consistency # although it not used down the road _job_service_ran = SshJob( node=rannode, commands=[ RunScript( find_local_embedded_script("mosaic-ran.sh"), "start", graphical_option, tracer_option, includes=INCLUDES, x11=oscillo, ), ], label=f"start {graphical_message} softmodem on eNB", required=grace_delay, scheduler=scheduler, ) ########## run experiment per se # Manage phone(s) and OAI UE(s) # this starts at the same time as the eNB, but some # headstart is needed so that eNB actually is ready to serve sleeps = [20, 30] phone_msgs = [ f"wait for {sleep}s for eNB to start up before waking up phone{id}" for sleep, id in zip(sleeps, phones) ] wait_commands = [ f"echo {msg}; sleep {sleep}" for msg, sleep in zip(phone_msgs, sleeps) ] job_start_phones = [ SshJob(node=gwnode, commands=[ Run(wait_command), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-on", includes=INCLUDES), RunScript(find_local_embedded_script("faraday.sh"), f"macphone{id}", "r2lab-embedded/shell/macphone.sh", "phone-start-app", includes=INCLUDES), ], label=f"turn off airplace mode on phone {id}", required=grace_delay, scheduler=scheduler) for id, wait_command in zip(phones, wait_commands) ] if oai_ues: delay = 25 for ue in oai_ues: msg = f"wait for {delay}s for eNB to start up before running UE on node fit{ue}" wait_command = f"echo {msg}; sleep {delay}" ue_node = SshNode(gateway=gwnode, hostname=r2lab_hostname(ue), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) job_start_ues = [ SshJob(node=ue_node, commands=[ Run(wait_command), RunScript( find_local_embedded_script("mosaic-oai-ue.sh"), "start", includes=INCLUDES), ], label=f"Start OAI UE on fit{ue}", required=grace_delay, scheduler=scheduler) ] delay += 20 for ue in oai_ues: ue_node = SshNode(gateway=gwnode, hostname=r2lab_hostname(ue), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) msg = f"Wait 60s and then ping faraday gateway from UE on fit{ue}" _job_ping_gw_from_ue = [ SshJob(node=ue_node, commands=[ Run(f"echo {msg}; sleep 60"), Run(f"ping -c 5 -I oip1 faraday.inria.fr"), ], label=f"ping faraday gateway from UE on fit{ue}", critical=False, required=job_start_ues, scheduler=scheduler) ] # ditto _job_ping_phones_from_cn = [ SshJob( node=cnnode, commands=[ Run("sleep 20"), Run(f"ping -c 100 -s 100 -i .05 172.16.0.{id+1} &> /root/ping-phone{id}" ), ], label=f"ping phone {id} from core network", critical=False, required=job_start_phones, scheduler=scheduler) for id in phones ] ########## xterm nodes colors = ("wheat", "gray", "white", "darkolivegreen") xterms = e3372_ue_xterms + gnuradio_xterms for xterm, color in zip(xterms, cycle(colors)): xterm_node = SshNode(gateway=gwnode, hostname=r2lab_hostname(xterm), username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) SshJob( node=xterm_node, command=Run(f"xterm -fn -*-fixed-medium-*-*-*-20-*-*-*-*-*-*-*", f" -bg {color} -geometry 90x10", x11=True), label=f"xterm on node {xterm_node.hostname}", scheduler=scheduler, # don't set forever; if we do, then these xterms get killed # when all other tasks have completed # forever = True, ) # remove dangling requirements - if any # should not be needed but won't hurt either scheduler.sanitize() ########## print(10 * "*", "nodes usage summary") if load_nodes: for image, nodes in images_to_load.items(): for node in nodes: print(f"node {node} : {image}") else: print("NODES ARE USED AS IS (no image loaded, no reset)") print(10 * "*", "phones usage summary") if phones: for phone in phones: print(f"Using phone{phone}") else: print("No phone involved") if nodes_left_alone: print(f"Ignore following fit nodes: {nodes_left_alone}") # wrap scheduler into global scheduler that prepares the testbed scheduler = prepare_testbed_scheduler(gwnode, load_nodes, scheduler, images_to_load, nodes_left_alone) scheduler.check_cycles() # Update the .dot and .png file for illustration purposes name = "mosaic-load" if load_nodes else "mosaic" print(10 * '*', 'See main scheduler in', scheduler.export_as_pngfile(name)) if verbose: scheduler.list() if dry_run: return True if verbose: input('OK ? - press control C to abort ? ') if not scheduler.orchestrate(): print(f"RUN KO : {scheduler.why()}") scheduler.debrief() return False print("RUN OK") return True
def one_run(tx_power, phy_rate, antenna_mask, channel, interference, protocol, *, run_name=default_run_name, slicename=default_slicename, load_images=False, node_ids=None, verbose_ssh=False, verbose_jobs=False, dry_run=False, tshark=False, map=False, warmup=False, exp=default_exp, dest=default_node_ids, ping_number=default_ping_number, route_sampling=False): """ Performs data acquisition on all nodes with the following settings Arguments: tx_power: in dBm, a string like 5, 10 or 14. Correspond to the transmission power. phy_rate: a string among 1, 54. Correspond to the wifi rate. antenna_mask: a string among 1, 3, 7. channel: a string like e.g. 1 or 40. Correspond to the channel. protocol: a string among batman , olsr. Correspond to the protocol interference : in dBm, a string like 60 or 50. Correspond to the power of the noise generated in the root. run_name: the name for a subdirectory where all data will be kept successive runs should use the same name for further visualization slicename: the Unix login name (slice name) to enter the gateway load_images: a boolean specifying whether nodes should be re-imaged first node_ids: a list of node ids to run the scenario against; strings or ints are OK; defaults to the nodes [1, 4, 5, 12, 19, 22,27 ,31, 33, 37] tshark: a boolean specifying wether we should format/parse the .pcap. map: a boolean specifying wether we should fetch/parse the route tables of the nodes. warmup: a boolean specifying wether we should run a ping before the experiment to be certain of the stabilisation on the network. exp: a list of nodes from which we will launch the ping from. strings or ints are OK. default to the node [1] ping_number : The number of pings that will be generated """ # set default for the nodes parameter node_ids = [int(id) for id in node_ids ] if node_ids is not None else default_node_ids exp_ids = [int(id) for id in exp] if exp is not None else default_exp dest_ids = [int(id) for id in dest] if dest is not None else default_node_ids # # dry-run mode # just display a one-liner with parameters # if dry_run: print("************************************") print("\n") run_root = naming_scheme(protocol, run_name, tx_power, phy_rate, antenna_mask, channel, interference, autocreate=False) load_msg = "" if not load_images else " LOAD" nodes = " ".join(str(n) for n in node_ids) exps = " ".join(str(n) for n in exp) pingst = [ "PING{}-->{}".format(e, j) for e in exp_ids # and on the destination for j in node_ids if e != j #and not #(j in exp_ids and j < e) ] print( "dry-run:{protocol} {run_name}{load_msg} -" " t{tx_power} r{phy_rate} a{antenna_mask} ch{channel} I{interference}-" "nodes {nodes}" " exp {exps}".format(**locals())) print( "\nNodes from which the experiment will be launched : \n{}\nList of pings generated:\n" .format(exps)) print(pingst) print("\n") if warmup: print("Will do warmup pings\n") if tshark: print( "Will format data using tshark and will agregate the RSSI into one RSSI.txt file" ) if map: print( "Will fetch the routing tables of the node (when stabilited) and will agregate the results\n" ) if route_sampling: print("Will launch route sampling services on nodes") #print("Test creation of ROUTES files") #post_processor= ProcessRoutes(run_root, exp_ids, node_ids) #post_processor.run() #print("\nList of tracepaths generated:\n{}".format(tracepathst)) # in dry-run mode we are done ### # create the logs directory based on input parameters run_root = naming_scheme(protocol, run_name, tx_power, phy_rate, antenna_mask, channel, interference, autocreate=False) if (run_root.is_dir()): purgedir(run_root) run_root = naming_scheme(protocol, run_name, tx_power, phy_rate, antenna_mask, channel, interference, autocreate=True) exp_info_file_name = run_root / "info.txt" with exp_info_file_name.open("w") as info_file: info_file.write("Selected nodes : \n") for node in node_ids[:-1]: info_file.write(f"{node} ") info_file.write(f"{node_ids[-1]}") info_file.write("\nSources : \n") for src in exp_ids[:-1]: info_file.write(f"{src} ") info_file.write(f"{exp_ids[-1]}") info_file.write("\nDestinations : \n") for dest in dest_ids[:-1]: info_file.write(f"{dest} ") info_file.write(f"{dest_ids[-1]}" + "\n") # the nodes involved faraday = SshNode(hostname=default_gateway, username=slicename, formatter=TimeColonFormatter(), verbose=verbose_ssh) # this is a python dictionary that allows to retrieve a node object # from an id node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******", formatter=TimeColonFormatter(), verbose=verbose_ssh) for id in node_ids } if interference != "None": node_scrambler = SshNode(gateway=faraday, hostname=fitname(scrambler_id), username="******", formatter=TimeColonFormatter(), verbose=verbose_ssh) # the global scheduler scheduler = Scheduler(verbose=verbose_jobs) # if tshark: #scheduler_monitoring = Scheduler(verbose=verbose_jobs) #if interference != "None": #scheduler_interferences = Scheduler(verbose=verbose_jobs) ########## check_lease = SshJob( scheduler=scheduler, node=faraday, verbose=verbose_jobs, critical=True, label="rhubarbe check lease", command=Run("rhubarbe leases --check", label="rlease"), #keep_connection = True ) # load images if requested green_light = check_lease if load_images: # the nodes that we **do not** use should be turned off # so if we have selected e.g. nodes 10 12 and 15, we will do # rhubarbe off -a ~10 ~12 ~15, meaning all nodes except 10, 12 and 15 negated_node_ids = ["~{}".format(id) for id in node_ids] #Add the id of the scrambler in the list and load the gnuradio image negated_node_ids.append("~{}".format(scrambler_id)) load_ids = [int(id) for id in node_ids ] if node_ids is not None else default_node_ids load_ids.append(scrambler_id) # replace green_light in this case #We use a modified image of gnuradio where uhd_siggen handle the signal SIGTERM in order to finish properly green_light = SshJob( node=faraday, required=check_lease, #critical=True, scheduler=scheduler, verbose=verbose_jobs, label="rhubarbe load/wait on nodes {}".format(load_ids), commands=[ Run("rhubarbe", "off", "-a", *negated_node_ids, label="roff {}".format(negated_node_ids)), Run("rhubarbe", "load", *node_ids, label="rload {}".format(node_ids)), Run("rhubarbe", "load", "-i", "gnuradio_batman", scrambler_id, label="load gnuradio batman on {}".format(scrambler_id)), Run("rhubarbe", "wait", *load_ids, label="rwait") ], #keep_connection = True ) ########## # setting up the wireless interface on all nodes # # this is a python feature known as a list comprehension # we just create as many SshJob instances as we have # (id, SshNode) couples in node_index # and gather them all in init_wireless_jobs # they all depend on green_light # # provide node-utilities with the ranges/units it expects frequency = channel_frequency[int(channel)] # tx_power_in_mBm not in dBm tx_power_driver = tx_power * 100 init_wireless_sshjobs = [ SshJob( #scheduler=scheduler, #required=green_light, node=node, verbose=verbose_jobs, label="init {}".format(id), command=RunScript("node-utilities.sh", "init-ad-hoc-network-{}".format(wireless_driver), wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver, label="init add-hoc network"), #keep_connection = True ) for id, node in node_index.items() ] init_wireless_jobs = Scheduler( *init_wireless_sshjobs, scheduler=scheduler, required=green_light, #critical = True, verbose=verbose_jobs, label="Initialisation of wireless chips") green_light_prot = init_wireless_jobs if interference != "None": #Run uhd_siggen with the chosen power frequency_str = frequency / 1000 frequency_str = str(frequency_str) + "G" init_scrambler_job = [ SshJob( forever=True, node=node_scrambler, verbose=verbose_jobs, label="init scrambler on node {}".format(scrambler_id), command=RunScript("node-utilities.sh", "init-scrambler", interference, frequency_str, label="init scambler"), #keep_connection = True ) ] init_scrambler = Scheduler( *init_scrambler_job, scheduler=scheduler, required=green_light, #forever = True, #critical = True, verbose=verbose_jobs, label="Running interference") # then install and run batman on fit nodes run_protocol_job = [ SshJob( #scheduler=scheduler, node=node, #required=green_light_prot, label="init and run {} on fit node {}".format(protocol, i), verbose=verbose_jobs, command=RunScript("node-utilities.sh", "run-{}".format(protocol), label="run {}".format(protocol)), #keep_connection = True ) for i, node in node_index.items() ] run_protocol = Scheduler( *run_protocol_job, scheduler=scheduler, required=green_light_prot, #critical = True, verbose=verbose_jobs, label="init and run routing protocols") # after that, run tcpdump on fit nodes, this job never ends... if tshark: run_tcpdump_job = [ SshJob( #scheduler=scheduler_monitoring, node=node, forever=True, label="run tcpdump on fit node".format(i), verbose=verbose_jobs, commands=[ RunScript("node-utilities.sh", "run-tcpdump", wireless_driver, i, label="run tcpdump") ], #keep_connection = True ) for i, node in node_index.items() ] run_tcpdump = Scheduler( *run_tcpdump_job, scheduler=scheduler, required=run_protocol, forever=True, #critical = True, verbose=verbose_jobs, label="Monitoring (tcpdum) Jobs") # let the wireless network settle settle_wireless_job = PrintJob( "Let the wireless network settle", sleep=settle_delay, scheduler=scheduler, required=run_protocol, label="settling for {} sec".format(settle_delay)) green_light_experiment = settle_wireless_job if warmup: warmup_pings_job = [ SshJob( node=nodei, #required=green_light_experiment, label="warmup ping {} -> {}".format(i, j), verbose=verbose_jobs, commands=[ Run("echo {} '->' {}".format(i, j), label="ping {} '->' {}".format(i, j)), RunScript("node-utilities.sh", "my-ping", "10.0.0.{}".format(j), ping_timeout, ping_interval, ping_size, ping_number, label="") ], #keep_connection = True ) #for each selected experiment nodes for e in exp_ids # looping on the source (to get the correct sshnodes) for i, nodei in node_index.items() # and on the destination for j, nodej in node_index.items() # and keep only sources that are in the selected experiment nodes and remove destination that are themselves # and remove the couples that have already be done # print("i {index} exp {expe}".format(index = i, expe= exp)) if (i == e) and e != j and not (j in exp_ids and j < e) ] warmup_pings = Scheduler( Sequence(*warmup_pings_job), scheduler=scheduler, required=green_light_experiment, #critical = True, verbose=verbose_jobs, label="Warmup ping") settle_wireless_job2 = PrintJob( "Let the wireless network settle", sleep=settle_delay / 2, scheduler=scheduler, required=warmup_pings, label="settling-warmup for {} sec".format(settle_delay / 2)) green_light_experiment = settle_wireless_job2 ########## # create all the tracepath jobs from the first node in the list # if map: routes_job = [ SshJob( node=nodei, #scheduler=scheduler, #required=green_light_experiment, label="Generating ROUTE file for prot {} on node {}".format( protocol, i), verbose=verbose_jobs, commands=[ RunScript("node-utilities.sh", "route-{}".format(protocol), ">", "ROUTE-TABLE-{:02d}".format(i), label="get route table"), Pull(remotepaths="ROUTE-TABLE-{:02d}".format(i), localpath=str(run_root), label="") ], #keep_connection = True ) for i, nodei in node_index.items() ] routes = Scheduler( *routes_job, scheduler=scheduler, required=green_light_experiment, #critical = True, verbose=verbose_jobs, label="Snapshoting route files") green_light_experiment = routes if route_sampling: routes_sampling_job2 = [ SshJob( node=nodei, label="Route sampling service for prot {} on node {}".format( protocol, i), verbose=False, #forever = True, commands=[ Push(localpaths=["route_sample_service.sh"], remotepath=".", label=""), Run("source", "route_sample_service.sh;", "route-sample", "ROUTE-TABLE-{:02d}-SAMPLED".format(i), "{}".format(protocol), label="run route sampling service"), ], #keep_connection = True ) for i, nodei in node_index.items() ] routes_sampling_job = [ SshJob( node=nodei, label="Route sampling service for prot {} on node {}".format( protocol, i), verbose=False, forever=True, #critical = True, #required = green_light_experiment, #scheduler = scheduler, commands=[ RunScript("route_sample_service.sh", "route-sample", "ROUTE-TABLE-{:02d}-SAMPLED".format(i), "{}".format(protocol), label="run route sampling service"), ], #keep_connection = True ) for i, nodei in node_index.items() ] routes_sampling = Scheduler( *routes_sampling_job, scheduler=scheduler, verbose=False, forever=True, #critical = True, label="Route Sampling services launch", required=green_light_experiment) ########## # create all the ping jobs, i.e. max*(max-1)/2 # this again is a python list comprehension # see the 2 for instructions at the bottom # # notice that these SshJob instances are not yet added # to the scheduler, we will add them later on # depending on the sequential/parallel strategy pings_job = [ SshJob( node=nodei, #required=green_light_experiment, label="ping {} -> {}".format(i, j), verbose=verbose_jobs, commands=[ Run("echo {} '->' {}".format(i, j), label="ping {}'->' {}".format(i, j)), RunScript("node-utilities.sh", "my-ping", "10.0.0.{}".format(j), ping_timeout, ping_interval, ping_size, ping_number, ">", "PING-{:02d}-{:02d}".format(i, j), label=""), Pull(remotepaths="PING-{:02d}-{:02d}".format(i, j), localpath=str(run_root), label=""), ], #keep_connection = True ) #for each selected experiment nodes for e in exp_ids # looping on the source (to get the correct sshnodes) for i, nodei in node_index.items() # and on the destination for j in dest_ids # and keep only sources that are in the selected experiment nodes and remove destination that are themselves # and remove the couples that have already be done if (i == e) and e != j and not (j in exp_ids and j < e) ] pings = Scheduler( scheduler=scheduler, label="PINGS", #critical = True, verbose=verbose_jobs, required=green_light_experiment) # retrieve all pcap files from fit nodes stop_protocol_job = [ SshJob( #scheduler=scheduler, node=nodei, #required=pings, label="kill routing protocol on fit{:02d}".format(i), verbose=verbose_jobs, #critical = True, commands=[ RunScript("node-utilities.sh", "kill-{}".format(protocol), label="kill-{}".format(protocol)), ], #keep_connection = False ) for i, nodei in node_index.items() ] stop_protocol = Scheduler( *stop_protocol_job, scheduler=scheduler, required=pings, #critical = True, label="Stop routing protocols", ) if tshark: retrieve_tcpdump_job = [ SshJob( #scheduler=scheduler, node=nodei, #required=pings, label="retrieve pcap trace from fit{:02d}".format(i), verbose=verbose_jobs, #critical = True, commands=[ # RunScript("node-utilities.sh", "kill-{}".format(protocol), label = "kill-{}".format(protocol)), RunScript("node-utilities.sh", "kill-tcpdump", label="kill-tcpdump"), #Run("sleep 1"), Run("echo retrieving pcap trace and result-{i}.txt from fit{i:02d}" .format(i=i), label=""), Pull(remotepaths=["/tmp/fit{}.pcap".format(i)], localpath=str(run_root), label=""), ], #keep_connection = True ) for i, nodei in node_index.items() ] retrieve_tcpdump = Scheduler( *retrieve_tcpdump_job, scheduler=scheduler, required=pings, #critical = True, label="Retrieve tcpdump", ) if route_sampling: retrieve_sampling_job = [ SshJob( #scheduler=scheduler, node=nodei, #required=pings, label="retrieve sampling trace from fit{:02d}".format(i), verbose=verbose_jobs, #critical = True, commands=[ #RunScript("node-utilities.sh", "kill-route-sample", protocol, # label = "kill route sample"), RunScript("route_sample_service.sh", "kill-route-sample", label="kill route sample"), Run("echo retrieving sampling trace from fit{i:02d}". format(i=i), label=""), Pull(remotepaths=["ROUTE-TABLE-{:02d}-SAMPLED".format(i)], localpath=str(run_root), label=""), ], #keep_connection = True ) for i, nodei in node_index.items() ] retrieve_sampling = Scheduler( *retrieve_sampling_job, scheduler=scheduler, required=pings, #critical=True, verbose=verbose_jobs, label="Retrieve & stopping route sampling", ) if tshark: parse_pcaps_job = [ SshJob( #scheduler=scheduler, node=LocalNode(), #required=retrieve_tcpdump, label="parse pcap trace {path}/fit{node}.pcap".format( path=run_root, node=i), verbose=verbose_jobs, #commands = [RunScript("parsepcap.sh", run_root, i)] commands=[ Run("tshark", "-2", "-r", "{path}/fit{node}.pcap".format(path=run_root, node=i), "-R", "'(ip.dst==10.0.0.{node} && icmp) && radiotap.dbm_antsignal'" .format(node=i), "-Tfields", "-e", "'ip.src'", "-e" "'ip.dst'", "-e", "'radiotap.dbm_antsignal'", ">", "{path}/result-{node}.txt".format(path=run_root, node=i), label="parse pcap locally") ], #keep_connection = True ) for i in node_ids ] parse_pcaps = Scheduler( *parse_pcaps_job, scheduler=scheduler, required=retrieve_tcpdump, #critical=True, label="Parse pcap", ) #TODO: TURN OFF USRP if interference != "None": kill_uhd_siggen = SshJob( scheduler=scheduler, node=node_scrambler, required=pings, label="killing uhd_siggen on the scrambler node {}".format( scrambler_id), verbose=verbose_jobs, #critical = True, commands=[Run("pkill", "uhd_siggen")], #keep_connection = True ) kill_2_uhd_siggen = SshJob( scheduler=scheduler, node=faraday, required=kill_uhd_siggen, label="turning off usrp on the scrambler node {}".format( scrambler_id), verbose=verbose_jobs, commands=[ Run("rhubarbe", "usrpoff", "fit{}".format(scrambler_id)) ], #keep_connection = True ) #if map: #scheduler.add(Sequence(*tracepaths, scheduler=scheduler)) #if warmup: # scheduler.add(Sequence(*warmup_pings_job, scheduler=scheduler)) pings.add(Sequence(*pings_job)) # for running sequentially we impose no limit on the scheduler # that will be limitied anyways by the very structure # of the required graph #jobs_window = None if dry_run: scheduler.export_as_pngfile(run_root / "experiment_graph") return True # if not in dry-run mode, let's proceed to the actual experiment ok = scheduler.orchestrate() #jobs_window=jobs_window) scheduler.shutdown() dot_file = run_root / "experiment_graph" if not dot_file.is_file(): scheduler.export_as_dotfile(dot_file) #TODO : Is it necessary? if the user want to see it he can just do it? #call(["dot", "-Tpng", dot_file, "-o", run_root / "experitment_graph.png"]) #ok=True #ok = False # give details if it failed if not ok: scheduler.debrief() scheduler.export_as_dotfile("debug") if ok and map: print("Creation of ROUTES files") post_processor = ProcessRoutes(run_root, exp_ids, node_ids) post_processor.run() if ok and route_sampling: post_processor = ProcessRoutes(run_root, exp_ids, node_ids) post_processor.run_sampled() print("END of creation for ROUTES FILES") # data acquisition is done, let's aggregate results # i.e. compute averages if ok and tshark: post_processor = Aggregator(run_root, node_ids, antenna_mask) post_processor.run() return ok
def wait(*argv): # pylint: disable=r0914 usage = """ Wait for selected nodes to be reachable by ssh Returns 0 if all nodes indeed are reachable """ # suppress info log messages from asyncssh asyncssh_set_log_level(logging.WARNING) config = Config() parser = ArgumentParser(usage=usage, formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument("-c", "--curses", action='store_true', default=False, help="Use curses to provide term-based animation") parser.add_argument("-t", "--timeout", action='store', default=config.value('nodes', 'wait_default_timeout'), type=float, help="Specify global timeout for the whole process") parser.add_argument("-b", "--backoff", action='store', default=config.value('networking', 'ssh_backoff'), type=float, help="Specify backoff average between " "attempts to ssh connect") parser.add_argument("-u", "--user", default="root", help="select other username") # really dont' write anything parser.add_argument("-s", "--silent", action='store_true', default=False) parser.add_argument("-v", "--verbose", action='store_true', default=False) add_selector_arguments(parser) args = parser.parse_args(argv) # --curses implies --verbose otherwise nothing shows up if args.curses: args.verbose = True selector = selected_selector(args) message_bus = asyncio.Queue() if args.verbose: message_bus.put_nowait({'selected_nodes': selector}) from rhubarbe.logger import logger logger.info(f"wait: backoff is {args.backoff} " f"and global timeout is {args.timeout}") nodes = [ Node(cmc_name, message_bus) # pylint: disable=w0621 for cmc_name in selector.cmc_names() ] sshs = [ SshProxy(node, username=args.user, verbose=args.verbose) for node in nodes ] jobs = [Job(ssh.wait_for(args.backoff), critical=True) for ssh in sshs] display_class = Display if not args.curses else DisplayCurses display = display_class(nodes, message_bus) # have the display class run forever until the other ones are done scheduler = Scheduler(Job(display.run(), forever=True, critical=True), *jobs, timeout=args.timeout, critical=False) try: orchestration = scheduler.run() if orchestration: return 0 else: if args.verbose: scheduler.debrief() return 1 except KeyboardInterrupt: print("rhubarbe-wait : keyboard interrupt - exiting") # xxx return 1 finally: display.epilogue() if not args.silent: for ssh in sshs: print(f"{ssh.node}:ssh {'OK' if ssh.status else 'KO'}")
def one_run(tx_power, phy_rate, antenna_mask, channel, *, run_name=default_run_name, slicename=default_slicename, load_images=False, node_ids=None, parallel=None, verbose_ssh=False, verbose_jobs=False, dry_run=False): """ Performs data acquisition on all nodes with the following settings Arguments: tx_power: in dBm, a string like 5, 10 or 14 phy_rate: a string among 1, 54 antenna_mask: a string among 1, 3, 7 channel: a string like e.g. 1 or 40 run_name: the name for a subdirectory where all data will be kept successive runs should use the same name for further visualization slicename: the Unix login name (slice name) to enter the gateway load_images: a boolean specifying whether nodes should be re-imaged first node_ids: a list of node ids to run the scenario on; strings or ints are OK; defaults to the all 37 nodes i.e. the whole testbed parallel: a number of simulataneous jobs to run 1 means all data acquisition is sequential (default) 0 means maximum parallel """ # # dry-run mode # just display a one-liner with parameters # if dry_run: load_msg = "" if not load_images else " LOAD" nodes = " ".join(str(n) for n in node_ids) print("dry-run: {run_name}{load_msg} -" " t{tx_power} r{phy_rate} a{antenna_mask} ch{channel} -" "nodes {nodes}" .format(**locals())) # in dry-run mode we are done return True # set default for the nodes parameter node_ids = [int(id) for id in node_ids] if node_ids is not None else default_node_ids ### # create the logs directory based on input parameters run_root = naming_scheme(run_name, tx_power, phy_rate, antenna_mask, channel, autocreate=True) # the nodes involved faraday = SshNode(hostname=default_gateway, username=slicename, formatter=TimeColonFormatter(), verbose=verbose_ssh) # this is a python dictionary that allows to retrieve a node object # from an id node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******", formatter=TimeColonFormatter(), verbose=verbose_ssh) for id in node_ids } # the global scheduler scheduler = Scheduler(verbose=verbose_jobs) ########## check_lease = SshJob( scheduler=scheduler, node=faraday, verbose=verbose_jobs, critical=True, command=Run("rhubarbe leases --check"), ) # load images if requested green_light = check_lease if load_images: # the nodes that we **do not** use should be turned off # so if we have selected e.g. nodes 10 12 and 15, we will do # rhubarbe off -a ~10 ~12 ~15, meaning all nodes except 10, 12 and 15 negated_node_ids = ["~{}".format(id) for id in node_ids] # replace green_light in this case green_light = SshJob( node=faraday, required=check_lease, critical=True, scheduler=scheduler, verbose=verbose_jobs, commands=[ Run("rhubarbe", "off", "-a", *negated_node_ids), Run("rhubarbe", "load", "-i", "u16-ath-noreg", *node_ids), Run("rhubarbe", "wait", *node_ids) ] ) ########## # setting up the wireless interface on all nodes # # this is a python feature known as a list comprehension # we just create as many SshJob instances as we have # (id, SshNode) couples in node_index # and gather them all in init_wireless_jobs # they all depend on green_light # # provide node-utilities with the ranges/units it expects frequency = channel_frequency[int(channel)] # tx_power_in_mBm not in dBm tx_power_driver = tx_power * 100 if load_images: # The first init_wireless_jobs always has troubles... Do it twice the first time (nasty hack) init_wireless_jobs = [ SshJob( scheduler=scheduler, required=green_light, node=node, verbose=verbose_jobs, label="init {}".format(id), commands=[ RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver), RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver) ] ) for id, node in node_index.items()] else: init_wireless_jobs = [ SshJob( scheduler=scheduler, required=green_light, node=node, verbose=verbose_jobs, label="init {}".format(id), command=RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver) ) for id, node in node_index.items()] # then install and run olsr on fit nodes run_olsr = [ SshJob( scheduler=scheduler, node=node, required=init_wireless_jobs, label="init and run olsr on fit nodes", verbose=verbose_jobs, command=RunScript("node-utilities.sh", "run-olsr") ) for i, node in node_index.items()] # after that, run tcpdump on fit nodes, this job never ends... run_tcpdump = [ SshJob( scheduler=scheduler, node=node, required=run_olsr, label="run tcpdump on fit nodes", verbose=verbose_jobs, commands=[ Run("echo run tcpdump on fit{:02d}".format(i)), Run("tcpdump -U -i moni-{} -y ieee802_11_radio -w /tmp/fit{}.pcap".format(wireless_driver, i)) ] ) for i, node in node_index.items()] # let the wireless network settle settle_wireless_job = PrintJob( "Let the wireless network settle", sleep=settle_delay, scheduler=scheduler, required=run_olsr, label="settling") ########## # create all the ping jobs, i.e. max*(max-1)/2 # this again is a python list comprehension # see the 2 for instructions at the bottom # # notice that these SshJob instances are not yet added # to the scheduler, we will add them later on # depending on the sequential/parallel strategy pings = [ SshJob( node=nodei, required=settle_wireless_job, label="ping {} -> {}".format(i, j), verbose=verbose_jobs, commands=[ Run("echo {} '->' {}".format(i, j)), RunScript("node-utilities.sh", "my-ping", "10.0.0.{}".format(j), ping_timeout, ping_interval, ping_size, ping_number, ">", "PING-{:02d}-{:02d}".format(i, j)), Pull(remotepaths="PING-{:02d}-{:02d}".format(i, j), localpath=str(run_root)), ] ) # looping on the source, now only fit01 is source for i, nodei in node_index.items() # and on the destination for j, nodej in node_index.items() # and keep only half of the couples if (j > i) and (i==1) ] # retrieve all pcap files from fit nodes retrieve_tcpdump = [ SshJob( scheduler=scheduler, node=nodei, required=pings, label="retrieve pcap trace from fit{:02d}".format(i), verbose=verbose_jobs, commands=[ RunScript("node-utilities.sh", "kill-olsr"), Run("sleep 1;pkill tcpdump; sleep 1"), RunScript("node-utilities.sh", "process-pcap", i), Run( "echo retrieving pcap trace and result-{i}.txt from fit{i:02d}".format(i=i)), Pull(remotepaths=["/tmp/fit{}.pcap".format(i), "/tmp/result-{}.txt".format(i)], localpath=str(run_root)), ] ) for i, nodei in node_index.items() ] # xxx this is a little fishy # should we not just consider that the default is parallel=1 ? if parallel is None: # with the sequential strategy, we just need to # create a Sequence out of the list of pings # Sequence will add the required relationships scheduler.add(Sequence(*pings, scheduler=scheduler)) # for running sequentially we impose no limit on the scheduler # that will be limitied anyways by the very structure # of the required graph jobs_window = None else: # with the parallel strategy # we just need to insert all the ping jobs # as each already has its required OK scheduler.update(pings) # this time the value in parallel is the one # to use as the jobs_limit; if 0 then inch'allah jobs_window = parallel # if not in dry-run mode, let's proceed to the actual experiment ok = scheduler.orchestrate(jobs_window=jobs_window) # give details if it failed if not ok: scheduler.debrief() # data acquisition is done, let's aggregate results # i.e. compute averages if ok: post_processor = Aggregator(run_root, node_ids, antenna_mask) post_processor.run() return ok
def run(slice, hss, epc, enb, extras, load_nodes, image_gw, image_enb, image_extra, reset_nodes, reset_usrp, spawn_xterms, verbose): """ ########## # 3 methods to get nodes ready # (*) load images # (*) reset nodes that are known to have the right image # (*) do nothing, proceed to experiment expects e.g. * slice : s.t like [email protected] * hss : 04 * epc : 03 * enb : 23 * extras : a list of ids that will be loaded with the gnuradio image Plus * load_nodes: whether to load images or not - in which case image_gw, image_enb and image_extra are used to tell the image names * reset_nodes: if load_nodes is false and reset_nodes is true, the nodes are reset - i.e. rebooted * otherwise (both False): do nothing * reset_usrp : if not False, the USRP board won't be reset * spawn_xterms : if set, starts xterm on all extra nodes * image_* : the name of the images to load on the various nodes """ # what argparse knows as a slice actually is a gateway (user + host) gwuser, gwhost = parse_slice(slice) gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=ColonFormatter(verbose=verbose), debug=verbose) hostnames = hssname, epcname, enbname = [ r2lab_hostname(x) for x in (hss, epc, enb) ] extra_hostnames = [r2lab_hostname(x) for x in extras] hssnode, epcnode, enbnode = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=ColonFormatter(verbose=verbose), debug=verbose) for hostname in hostnames ] extra_nodes = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=ColonFormatter(verbose=verbose), debug=verbose) for hostname in extra_hostnames ] ########## preparation job_check_for_lease = SshJob( node=gwnode, command=["rhubarbe", "leases", "--check"], label="check we have a current lease", ) # turn off all nodes turn_off_command = ["rhubarbe", "off", "-a"] # except our 3 nodes and the optional extras turn_off_command += [ "~{}".format(x) for x in [hss, epc, enb] + extras + [20] ] job_off_nodes = SshJob( node=gwnode, # switch off all nodes but the ones we use command=turn_off_command, label="turn off unused nodes", required=job_check_for_lease, ) # actually run this in the gateway, not on the mac # the ssh keys are stored in the gateway and we do not yet have # the tools to leverage such remote keys job_stop_phone = SshJob( node=gwnode, command=RunScript(locate_local_script("faraday.sh"), "macphone", "r2lab/infra/user-env/macphone.sh", "phone-off", includes=includes), label="stop phone", required=job_check_for_lease, ) jobs_prepare = [job_check_for_lease, job_stop_phone] # turn off nodes only when --load or --reset is set if load_nodes or reset_nodes: jobs_prepare.append(job_off_nodes) ########## infra nodes hss + epc # prepare nodes commands = [] if load_nodes: commands.append( Run("rhubarbe", "load", "-i", image_gw, hssname, epcname)) elif reset_nodes: commands.append(Run("rhubarbe", "reset", hssname, epcname)) # always do this commands.append(Run("rhubarbe", "wait", "-t", 120, hssname, epcname)) job_load_infra = SshJob( node=gwnode, commands=commands, label="load and wait HSS and EPC nodes", required=jobs_prepare, ) # start services job_service_hss = SshJob( node=hssnode, command=RunScript(locate_local_script("oai-hss.sh"), "run-hss", epc, includes=includes), label="start HSS service", required=job_load_infra, ) msg = "wait for HSS to warm up" job_service_epc = Sequence( # let 15 seconds to HSS Job( verbose_delay(15, msg), label=msg, ), SshJob( node=epcnode, command=RunScript(locate_local_script("oai-epc.sh"), "run-epc", hss, includes=includes), label="start EPC services", ), required=job_load_infra, ) jobs_infra = job_load_infra, job_service_hss, job_service_epc ########## enodeb # prepare node commands = [] if load_nodes: commands.append(Run("rhubarbe", "usrpoff", enb)) commands.append(Run("rhubarbe", "load", "-i", image_enb, enb)) elif reset_nodes: commands.append(Run("rhubarbe", "reset", enb)) commands.append(Run("rhubarbe", "wait", "-t", "120", enb)) job_load_enb = SshJob( node=gwnode, commands=commands, label="load and wait ENB", required=jobs_prepare, ) # start service msg = "wait for EPC to warm up" job_service_enb = Sequence( Job(verbose_delay(15, msg), label=msg), SshJob( node=enbnode, # run-enb expects the id of the epc as a parameter command=RunScript(locate_local_script("oai-enb.sh"), "run-enb", epc, reset_usrp, includes=includes), label="start softmodem on ENB", ), required=(job_load_enb, job_service_hss, job_service_epc), ) jobs_enb = job_load_enb, job_service_enb ########## run experiment per se # the phone # we need to wait for the USB firmware to be loaded duration = 30 if reset_usrp is not False else 8 msg = "wait for enodeb firmware to load on USRP".format(duration) job_wait_enb = Job(verbose_delay(duration, msg), label=msg, required=job_service_enb) job_start_phone = SshJob( node=gwnode, commands=[ RunScript(locate_local_script("faraday.sh"), "macphone", "r2lab/infra/user-env/macphone.sh", "phone-on", includes=includes), RunScript(locate_local_script("faraday.sh"), "macphone", "r2lab/infra/user-env/macphone.sh", "phone-start-app", includes=includes), ], label="start phone 4g and speedtest app", required=job_wait_enb, ) job_ping_phone_from_epc = SshJob( node=epcnode, commands=[ Run("sleep 10"), Run("ping -c 100 -s 100 -i .05 172.16.0.2 &> /root/ping-phone"), ], label="ping phone from EPC", critical=False, required=job_wait_enb, ) jobs_exp = job_wait_enb, job_start_phone, job_ping_phone_from_epc ########## extra nodes # ssh -X not yet supported in apssh, so one option is to start them using # a local process # xxx to update: The following code kind of works, but it needs to be # turned off, because the process in question would be killed # at the end of the Scheduler orchestration (at the end of the run function) # which is the exact time where it would be useful :) # however the code for LocalJob appears to work fine, it would be nice to # move it around - maybe in apssh ? commands = [] if not extras: commands.append(Run("echo no extra nodes specified - ignored")) else: if load_nodes: commands.append(Run("rhubarbe", "usrpoff", *extra_hostnames)) commands.append( Run("rhubarbe", "load", "-i", image_extra, *extra_hostnames)) commands.append( Run("rhubarbe", "wait", "-t", 120, *extra_hostnames)) commands.append(Run("rhubarbe", "usrpon", *extra_hostnames)) elif reset_nodes: commands.append(Run("rhubarbe", "reset", extra_hostnames)) commands.append(Run("rhubarbe", "wait", "-t", "120", *extra_hostnames)) job_load_extras = SshJob( node=gwnode, commands=commands, label="load and wait extra nodes", required=job_check_for_lease, ) jobs_extras = [job_load_extras] colors = ["wheat", "gray", "white"] if spawn_xterms: jobs_xterms_extras = [ SshJob( node=extra_node, command=Run("xterm -fn -*-fixed-medium-*-*-*-20-*-*-*-*-*-*-*" " -bg {} -geometry 90x10".format(color), x11=True), label="xterm on node {}".format(extra_node.hostname), required=job_load_extras, # don't set forever; if we do, then these xterms get killed # when all other tasks have completed # forever = True, ) for extra_node, color in zip(extra_nodes, itertools.cycle(colors)) ] jobs_extras += jobs_xterms_extras # schedule the load phases only if required sched = Scheduler(verbose=verbose) # this is just a way to add a collection of jobs to the scheduler sched.update(jobs_prepare) sched.update(jobs_infra) sched.update(jobs_enb) sched.update(jobs_exp) sched.update(jobs_extras) # remove dangling requirements - if any - should not be needed but won't hurt either sched.sanitize() print(40 * "*") if load_nodes: print("LOADING IMAGES: (gw->{}, enb->{}, extras->{})".format( load_nodes, image_gw, image_enb, image_extra)) elif reset_nodes: print("RESETTING NODES") else: print("NODES ARE USED AS IS (no image loaded, no reset)") sched.rain_check() # Update the .dot and .png file for illustration purposes if verbose: sched.list() name = "scenario-load" if load_nodes else \ "scenario-reset" if reset_nodes else \ "scenario" sched.export_as_dotfile("{}.dot".format(name)) os.system("dot -Tpng {}.dot -o {}.png".format(name, name)) sched.list() if not sched.orchestrate(): print("RUN KO : {}".format(sched.why())) sched.debrief() return False else: print("RUN OK") return True