def test_sequence3(self): "a sequence and a job" a1 = J(sl(0.1), label=1) a2 = J(sl(0.1), label=2) s = Seq(a1, a2) a3 = J(sl(0.1), label=3, required=s) sched = Scheduler() sched.update((s, a3)) list_sep(sched, sep + "sequence3") self.assertEqual(len(a1.required), 0) self.assertEqual(len(a2.required), 1) self.assertEqual(len(a3.required), 1) self.assertTrue(check_required_types(sched, "test_sequence3")) self.assertTrue(sched.orchestrate())
def one_run(tx_power, phy_rate, antenna_mask, channel, *, run_name=default_run_name, slicename=default_slicename, load_images=False, node_ids=None, parallel=None, verbose_ssh=False, verbose_jobs=False, dry_run=False): """ Performs data acquisition on all nodes with the following settings Arguments: tx_power: in dBm, a string like 5, 10 or 14 phy_rate: a string among 1, 54 antenna_mask: a string among 1, 3, 7 channel: a string like e.g. 1 or 40 run_name: the name for a subdirectory where all data will be kept successive runs should use the same name for further visualization slicename: the Unix login name (slice name) to enter the gateway load_images: a boolean specifying whether nodes should be re-imaged first node_ids: a list of node ids to run the scenario on; strings or ints are OK; defaults to the all 37 nodes i.e. the whole testbed parallel: a number of simulataneous jobs to run 1 means all data acquisition is sequential (default) 0 means maximum parallel """ # # dry-run mode # just display a one-liner with parameters # if dry_run: load_msg = "" if not load_images else " LOAD" nodes = " ".join(str(n) for n in node_ids) print("dry-run: {run_name}{load_msg} -" " t{tx_power} r{phy_rate} a{antenna_mask} ch{channel} -" "nodes {nodes}" .format(**locals())) # in dry-run mode we are done return True # set default for the nodes parameter node_ids = [int(id) for id in node_ids] if node_ids is not None else default_node_ids ### # create the logs directory based on input parameters run_root = naming_scheme(run_name, tx_power, phy_rate, antenna_mask, channel, autocreate=True) # the nodes involved faraday = SshNode(hostname=default_gateway, username=slicename, formatter=TimeColonFormatter(), verbose=verbose_ssh) # this is a python dictionary that allows to retrieve a node object # from an id node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******", formatter=TimeColonFormatter(), verbose=verbose_ssh) for id in node_ids } # the global scheduler scheduler = Scheduler(verbose=verbose_jobs) ########## check_lease = SshJob( scheduler=scheduler, node=faraday, verbose=verbose_jobs, critical=True, command=Run("rhubarbe leases --check"), ) # load images if requested green_light = check_lease if load_images: # the nodes that we **do not** use should be turned off # so if we have selected e.g. nodes 10 12 and 15, we will do # rhubarbe off -a ~10 ~12 ~15, meaning all nodes except 10, 12 and 15 negated_node_ids = ["~{}".format(id) for id in node_ids] # replace green_light in this case green_light = SshJob( node=faraday, required=check_lease, critical=True, scheduler=scheduler, verbose=verbose_jobs, commands=[ Run("rhubarbe", "off", "-a", *negated_node_ids), Run("rhubarbe", "load", "-i", "u16-ath-noreg", *node_ids), Run("rhubarbe", "wait", *node_ids) ] ) ########## # setting up the wireless interface on all nodes # # this is a python feature known as a list comprehension # we just create as many SshJob instances as we have # (id, SshNode) couples in node_index # and gather them all in init_wireless_jobs # they all depend on green_light # # provide node-utilities with the ranges/units it expects frequency = channel_frequency[int(channel)] # tx_power_in_mBm not in dBm tx_power_driver = tx_power * 100 if load_images: # The first init_wireless_jobs always has troubles... Do it twice the first time (nasty hack) init_wireless_jobs = [ SshJob( scheduler=scheduler, required=green_light, node=node, verbose=verbose_jobs, label="init {}".format(id), commands=[ RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver), RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver) ] ) for id, node in node_index.items()] else: init_wireless_jobs = [ SshJob( scheduler=scheduler, required=green_light, node=node, verbose=verbose_jobs, label="init {}".format(id), command=RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver) ) for id, node in node_index.items()] # then install and run olsr on fit nodes run_olsr = [ SshJob( scheduler=scheduler, node=node, required=init_wireless_jobs, label="init and run olsr on fit nodes", verbose=verbose_jobs, command=RunScript("node-utilities.sh", "run-olsr") ) for i, node in node_index.items()] # after that, run tcpdump on fit nodes, this job never ends... run_tcpdump = [ SshJob( scheduler=scheduler, node=node, required=run_olsr, label="run tcpdump on fit nodes", verbose=verbose_jobs, commands=[ Run("echo run tcpdump on fit{:02d}".format(i)), Run("tcpdump -U -i moni-{} -y ieee802_11_radio -w /tmp/fit{}.pcap".format(wireless_driver, i)) ] ) for i, node in node_index.items()] # let the wireless network settle settle_wireless_job = PrintJob( "Let the wireless network settle", sleep=settle_delay, scheduler=scheduler, required=run_olsr, label="settling") ########## # create all the ping jobs, i.e. max*(max-1)/2 # this again is a python list comprehension # see the 2 for instructions at the bottom # # notice that these SshJob instances are not yet added # to the scheduler, we will add them later on # depending on the sequential/parallel strategy pings = [ SshJob( node=nodei, required=settle_wireless_job, label="ping {} -> {}".format(i, j), verbose=verbose_jobs, commands=[ Run("echo {} '->' {}".format(i, j)), RunScript("node-utilities.sh", "my-ping", "10.0.0.{}".format(j), ping_timeout, ping_interval, ping_size, ping_number, ">", "PING-{:02d}-{:02d}".format(i, j)), Pull(remotepaths="PING-{:02d}-{:02d}".format(i, j), localpath=str(run_root)), ] ) # looping on the source, now only fit01 is source for i, nodei in node_index.items() # and on the destination for j, nodej in node_index.items() # and keep only half of the couples if (j > i) and (i==1) ] # retrieve all pcap files from fit nodes retrieve_tcpdump = [ SshJob( scheduler=scheduler, node=nodei, required=pings, label="retrieve pcap trace from fit{:02d}".format(i), verbose=verbose_jobs, commands=[ RunScript("node-utilities.sh", "kill-olsr"), Run("sleep 1;pkill tcpdump; sleep 1"), RunScript("node-utilities.sh", "process-pcap", i), Run( "echo retrieving pcap trace and result-{i}.txt from fit{i:02d}".format(i=i)), Pull(remotepaths=["/tmp/fit{}.pcap".format(i), "/tmp/result-{}.txt".format(i)], localpath=str(run_root)), ] ) for i, nodei in node_index.items() ] # xxx this is a little fishy # should we not just consider that the default is parallel=1 ? if parallel is None: # with the sequential strategy, we just need to # create a Sequence out of the list of pings # Sequence will add the required relationships scheduler.add(Sequence(*pings, scheduler=scheduler)) # for running sequentially we impose no limit on the scheduler # that will be limitied anyways by the very structure # of the required graph jobs_window = None else: # with the parallel strategy # we just need to insert all the ping jobs # as each already has its required OK scheduler.update(pings) # this time the value in parallel is the one # to use as the jobs_limit; if 0 then inch'allah jobs_window = parallel # if not in dry-run mode, let's proceed to the actual experiment ok = scheduler.orchestrate(jobs_window=jobs_window) # give details if it failed if not ok: scheduler.debrief() # data acquisition is done, let's aggregate results # i.e. compute averages if ok: post_processor = Aggregator(run_root, node_ids, antenna_mask) post_processor.run() return ok
] if args.parallel is None: # with the sequential strategy, we just need to # create a Sequence out of the list of pings # Sequence will add the required relationships scheduler.add(Sequence(*pings, scheduler=scheduler)) # for running sequentially we impose no limit on the scheduler # that will be limitied anyways by the very structure # of the required graph jobs_window = None else: # with the parallel strategy # we just need to insert all the ping jobs # as each already has its required OK scheduler.update(pings) # this time the value in args.parallel is the one # to use as the jobs_limit; if 0 then inch'allah jobs_window = args.parallel # finally - i.e. when all pings are done # we can list the current contents of our local directory SshJob( node = LocalNode(), scheduler = scheduler, required = pings, verbose = verbose_jobs, commands = [ Run("ls", "-l", "PING*") ] )
def collect(run_name, slice, hss, epc, enb, verbose): """ retrieves all relevant logs under a common name otherwise, same signature as run() for convenience retrieved stuff will be 3 compressed tars named <run_name>-(hss|epc|enb).tar.gz xxx - todo - it would make sense to also unwrap them all in a single place locally, like what "logs.sh unwrap" does """ gwuser, gwhost = r2lab_parse_slice(slice) gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=TimeColonFormatter(verbose=verbose), debug=verbose) functions = "hss", "epc", "enb" hostnames = hssname, epcname, enbname = [ r2lab_hostname(x) for x in (hss, epc, enb) ] nodes = hssnode, epcnode, enbnode = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=TimeColonFormatter(verbose=verbose), debug=verbose) for hostname in hostnames ] # first run a 'capture' function remotely to gather all the relevant # info into a single tar named <run_name>.tgz capturers = [ SshJob( node=node, command=RunScript(find_local_embedded_script("oai-common.sh"), "capture-{}".format(function), run_name, includes=[ find_local_embedded_script( "oai-{}.sh".format(function)) ]), label="capturer on {}".format(function), # capture-enb will run oai-as-enb and thus requires oai-enb.sh ) for (node, function) in zip(nodes, functions) ] collectors = [ SshJob( node=node, command=Pull(remotepaths=["{}-{}.tgz".format(run_name, function)], localpath="."), label="collector on {}".format(function), required=capturers, ) for (node, function, capturer) in zip(nodes, functions, capturers) ] sched = Scheduler(verbose=verbose) sched.update(capturers) sched.update(collectors) if verbose: sched.list() if not sched.orchestrate(): print("KO") sched.debrief() return print("OK") if os.path.exists(run_name): print("local directory {} already exists = NOT UNWRAPPED !".format( run_name)) return os.mkdir(run_name) local_tars = [ "{run_name}-{ext}.tgz".format(run_name=run_name, ext=ext) for ext in ['hss', 'epc', 'enb'] ] for tar in local_tars: print("Untaring {} in {}".format(tar, run_name)) os.system("tar -C {} -xzf {}".format(run_name, tar))
def run(slice, hss, epc, enb, extras, load_nodes, image_gw, image_enb, image_extra, reset_nodes, reset_usrp, spawn_xterms, verbose): """ ########## # 3 methods to get nodes ready # (*) load images # (*) reset nodes that are known to have the right image # (*) do nothing, proceed to experiment expects e.g. * slice : s.t like [email protected] * hss : 04 * epc : 03 * enb : 23 * extras : a list of ids that will be loaded with the gnuradio image Plus * load_nodes: whether to load images or not - in which case image_gw, image_enb and image_extra are used to tell the image names * reset_nodes: if load_nodes is false and reset_nodes is true, the nodes are reset - i.e. rebooted * otherwise (both False): do nothing * reset_usrp : if not False, the USRP board won't be reset * spawn_xterms : if set, starts xterm on all extra nodes * image_* : the name of the images to load on the various nodes """ # what argparse knows as a slice actually is a gateway (user + host) gwuser, gwhost = parse_slice(slice) gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=ColonFormatter(verbose=verbose), debug=verbose) hostnames = hssname, epcname, enbname = [ r2lab_hostname(x) for x in (hss, epc, enb) ] extra_hostnames = [r2lab_hostname(x) for x in extras] hssnode, epcnode, enbnode = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=ColonFormatter(verbose=verbose), debug=verbose) for hostname in hostnames ] extra_nodes = [ SshNode(gateway=gwnode, hostname=hostname, username='******', formatter=ColonFormatter(verbose=verbose), debug=verbose) for hostname in extra_hostnames ] ########## preparation job_check_for_lease = SshJob( node=gwnode, command=["rhubarbe", "leases", "--check"], label="check we have a current lease", ) # turn off all nodes turn_off_command = ["rhubarbe", "off", "-a"] # except our 3 nodes and the optional extras turn_off_command += [ "~{}".format(x) for x in [hss, epc, enb] + extras + [20] ] job_off_nodes = SshJob( node=gwnode, # switch off all nodes but the ones we use command=turn_off_command, label="turn off unused nodes", required=job_check_for_lease, ) # actually run this in the gateway, not on the mac # the ssh keys are stored in the gateway and we do not yet have # the tools to leverage such remote keys job_stop_phone = SshJob( node=gwnode, command=RunScript(locate_local_script("faraday.sh"), "macphone", "r2lab/infra/user-env/macphone.sh", "phone-off", includes=includes), label="stop phone", required=job_check_for_lease, ) jobs_prepare = [job_check_for_lease, job_stop_phone] # turn off nodes only when --load or --reset is set if load_nodes or reset_nodes: jobs_prepare.append(job_off_nodes) ########## infra nodes hss + epc # prepare nodes commands = [] if load_nodes: commands.append( Run("rhubarbe", "load", "-i", image_gw, hssname, epcname)) elif reset_nodes: commands.append(Run("rhubarbe", "reset", hssname, epcname)) # always do this commands.append(Run("rhubarbe", "wait", "-t", 120, hssname, epcname)) job_load_infra = SshJob( node=gwnode, commands=commands, label="load and wait HSS and EPC nodes", required=jobs_prepare, ) # start services job_service_hss = SshJob( node=hssnode, command=RunScript(locate_local_script("oai-hss.sh"), "run-hss", epc, includes=includes), label="start HSS service", required=job_load_infra, ) msg = "wait for HSS to warm up" job_service_epc = Sequence( # let 15 seconds to HSS Job( verbose_delay(15, msg), label=msg, ), SshJob( node=epcnode, command=RunScript(locate_local_script("oai-epc.sh"), "run-epc", hss, includes=includes), label="start EPC services", ), required=job_load_infra, ) jobs_infra = job_load_infra, job_service_hss, job_service_epc ########## enodeb # prepare node commands = [] if load_nodes: commands.append(Run("rhubarbe", "usrpoff", enb)) commands.append(Run("rhubarbe", "load", "-i", image_enb, enb)) elif reset_nodes: commands.append(Run("rhubarbe", "reset", enb)) commands.append(Run("rhubarbe", "wait", "-t", "120", enb)) job_load_enb = SshJob( node=gwnode, commands=commands, label="load and wait ENB", required=jobs_prepare, ) # start service msg = "wait for EPC to warm up" job_service_enb = Sequence( Job(verbose_delay(15, msg), label=msg), SshJob( node=enbnode, # run-enb expects the id of the epc as a parameter command=RunScript(locate_local_script("oai-enb.sh"), "run-enb", epc, reset_usrp, includes=includes), label="start softmodem on ENB", ), required=(job_load_enb, job_service_hss, job_service_epc), ) jobs_enb = job_load_enb, job_service_enb ########## run experiment per se # the phone # we need to wait for the USB firmware to be loaded duration = 30 if reset_usrp is not False else 8 msg = "wait for enodeb firmware to load on USRP".format(duration) job_wait_enb = Job(verbose_delay(duration, msg), label=msg, required=job_service_enb) job_start_phone = SshJob( node=gwnode, commands=[ RunScript(locate_local_script("faraday.sh"), "macphone", "r2lab/infra/user-env/macphone.sh", "phone-on", includes=includes), RunScript(locate_local_script("faraday.sh"), "macphone", "r2lab/infra/user-env/macphone.sh", "phone-start-app", includes=includes), ], label="start phone 4g and speedtest app", required=job_wait_enb, ) job_ping_phone_from_epc = SshJob( node=epcnode, commands=[ Run("sleep 10"), Run("ping -c 100 -s 100 -i .05 172.16.0.2 &> /root/ping-phone"), ], label="ping phone from EPC", critical=False, required=job_wait_enb, ) jobs_exp = job_wait_enb, job_start_phone, job_ping_phone_from_epc ########## extra nodes # ssh -X not yet supported in apssh, so one option is to start them using # a local process # xxx to update: The following code kind of works, but it needs to be # turned off, because the process in question would be killed # at the end of the Scheduler orchestration (at the end of the run function) # which is the exact time where it would be useful :) # however the code for LocalJob appears to work fine, it would be nice to # move it around - maybe in apssh ? commands = [] if not extras: commands.append(Run("echo no extra nodes specified - ignored")) else: if load_nodes: commands.append(Run("rhubarbe", "usrpoff", *extra_hostnames)) commands.append( Run("rhubarbe", "load", "-i", image_extra, *extra_hostnames)) commands.append( Run("rhubarbe", "wait", "-t", 120, *extra_hostnames)) commands.append(Run("rhubarbe", "usrpon", *extra_hostnames)) elif reset_nodes: commands.append(Run("rhubarbe", "reset", extra_hostnames)) commands.append(Run("rhubarbe", "wait", "-t", "120", *extra_hostnames)) job_load_extras = SshJob( node=gwnode, commands=commands, label="load and wait extra nodes", required=job_check_for_lease, ) jobs_extras = [job_load_extras] colors = ["wheat", "gray", "white"] if spawn_xterms: jobs_xterms_extras = [ SshJob( node=extra_node, command=Run("xterm -fn -*-fixed-medium-*-*-*-20-*-*-*-*-*-*-*" " -bg {} -geometry 90x10".format(color), x11=True), label="xterm on node {}".format(extra_node.hostname), required=job_load_extras, # don't set forever; if we do, then these xterms get killed # when all other tasks have completed # forever = True, ) for extra_node, color in zip(extra_nodes, itertools.cycle(colors)) ] jobs_extras += jobs_xterms_extras # schedule the load phases only if required sched = Scheduler(verbose=verbose) # this is just a way to add a collection of jobs to the scheduler sched.update(jobs_prepare) sched.update(jobs_infra) sched.update(jobs_enb) sched.update(jobs_exp) sched.update(jobs_extras) # remove dangling requirements - if any - should not be needed but won't hurt either sched.sanitize() print(40 * "*") if load_nodes: print("LOADING IMAGES: (gw->{}, enb->{}, extras->{})".format( load_nodes, image_gw, image_enb, image_extra)) elif reset_nodes: print("RESETTING NODES") else: print("NODES ARE USED AS IS (no image loaded, no reset)") sched.rain_check() # Update the .dot and .png file for illustration purposes if verbose: sched.list() name = "scenario-load" if load_nodes else \ "scenario-reset" if reset_nodes else \ "scenario" sched.export_as_dotfile("{}.dot".format(name)) os.system("dot -Tpng {}.dot -o {}.png".format(name, name)) sched.list() if not sched.orchestrate(): print("RUN KO : {}".format(sched.why())) sched.debrief() return False else: print("RUN OK") return True