def test_deferred_chain(self): """ one command computes a string that gets passed to another one this is analogous to run1=$(ssh localhost echo from-first-run) final=$(ssh localhost echo ${run1}) the 'final' variable is only needed for checking everything went well """ s = Scheduler() env = Variables() n = SshNode(localhostname(), username=localuser()) Sequence(SshJob(n, commands=Run("echo from-first-run", capture=Capture('run1', env))), SshJob(n, commands=Run(Deferred("echo {{run1}}", env), capture=Capture('final', env))), scheduler=s) s.run() #print(f"env={env}") obtained = env.final expected = "from-first-run" self.assertEqual(obtained, expected)
def test_deferred_service(self): """ a service can be defined from a deferred instance rather than a plain string """ s = Scheduler() env = Variables() echo_service = Service(Deferred("echo {{run1}}", env), service_id='echo', verbose=True) n = SshNode(localhostname(), username=localuser()) Sequence(SshJob(n, commands=Run("echo from-first-run", capture=Capture('run1', env))), SshJob(n, commands=Run(echo_service.start_command())), SshJob(n, commands=Run(echo_service.journal_command( since="10 second ago"), capture=Capture('journal', env))), scheduler=s) print('STARTING', 20 * '-', echo_service.start_command()) s.run() print('DONE', 20 * '-', echo_service.start_command()) #print(f"env={env}") obtained = env.journal expected = "from-first-run" found = expected in obtained self.assertTrue(found)
def _allowed_signal(self, allowed_exits, host="localhost", username=None): print(f"Testing allowed signal allowed_exits={allowed_exits}") # global timeout total = 4 # scheduled duration long = 2 # send signal after that amount short = 1 # we always kill with TERM signal = "TERM" if username is None: username = util.localuser() node = SshNode(host, username=username) scheduler = Scheduler(timeout = total, critical=False) SshJob(node=node, scheduler=scheduler, command=Run(f"sleep {long}", allowed_exits=allowed_exits)) SshJob(node=node, scheduler=scheduler, command=f"sleep {short}; pkill -{signal} sleep") expected = signal in allowed_exits run = scheduler.run() scheduler.list() self.assertEqual(run, expected)
def test_topology(self): g1 = SshNode("faraday", username="******") n1 = SshNode(gateway=g1, hostname="fit01", username="******") n2 = SshNode(gateway=g1, hostname="fit02", username="******") s = Scheduler() SshJob(n1, command='hostname', scheduler=s) SshJob(n2, command='hostname', scheduler=s) topology_as_pngfile(s, "topology")
def test_environment(self): needle_foo = 'xxx-foo-xxx' needle_bar = 'xxx-bar-xxx' scheduler = Scheduler() node = SshNode("localhost") env = Variables() service = Service("env", service_id='echo-environ', environ={ 'FOO': needle_foo, 'BAR': needle_bar, }) SshJob(scheduler=scheduler, node=node, commands=[ Run(service.start_command()), Run(service.journal_command(since='5s ago'), capture=Capture('journal', env)) ]) self.assertEqual(scheduler.run(), True) self.assertTrue(f"FOO={needle_foo}" in env.journal) self.assertTrue(f"BAR={needle_bar}" in env.journal)
def all_off(slice, verbose, debug): """ expects a slice name, and turns off faraday completely """ # what argparse knows as a slice actually is a gateway (user + host) try: gwuser, gwhost = slice.split('@') except: gwuser, gwhost = slice, "faraday.inria.fr" gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=ColonFormatter(verbose=verbose), debug=debug) scheduler = Scheduler( SshJob( node=gwnode, command=Run("rhubarbe", "bye"), label="turn off", )) result = scheduler.orchestrate() if not result: print("RUN KO : {}".format(scheduler.why())) sched.debrief() else: print("faraday turned off OK") return 0 if result else 1
def test_local_string(self): with open("tests/script-with-args.sh") as reader: my_script = reader.read() self.run_one_job( SshJob(node=self.gateway(), command=RunString(my_script, "foo", "bar", "tutu"), label="test_local_string"))
def test_logic1(self): self.run_one_job(SshJob(node=self.gateway(), critical=False, commands=[Run("false"), Run("true")], label="should fail"), expected=False)
def _allowed_retcod(self, allowed_exits, host="localhost", username=None): print(f"Testing allowed retcod allowed_exits={allowed_exits}") # global timeout total = 4 # scheduled duration long = 1 # we always exit code 100 retcod = 1000 if username is None: username = util.localuser() node = SshNode(host, username=username) scheduler = Scheduler(timeout = total, critical=False) SshJob(node=node, scheduler=scheduler, command=Run(f"sleep {long}; exit {retcod}", allowed_exits=allowed_exits)) expected = retcod in allowed_exits run = scheduler.run() scheduler.list() self.assertEqual(run, expected)
def test_s1(self): self.run_one_job( SshJob(node=self.gateway(), command=[ "echo", "SshJob with s1 command singular", "$(hostname)" ], label='s1'))
def check_lease(experiment_scheduler, sshnode): """ re-usable function that acts a bit like a python decorator on schedulers. Given an experiment described as a scheduler, this function returns a higher-level scheduler that first checks for the lease, and then proceeds with the experiment. """ check_lease_job = SshJob( # checking the lease is done on the gateway node=faraday, # this means that a failure in any of the commands # will cause the scheduler to bail out immediately critical=True, command=Run("rhubarbe leases --check"), ) return Scheduler( Sequence( check_lease_job, # here we create a nested scheduler # by inserting the experiment_scheduler # as a regular job in the main scheduler experiment_scheduler, ))
def test_p3(self): self.run_one_job( SshJob(node=self.gateway(), commands=[ Run("echo", "SshJob p3 commands plural", "$(hostname)") ], label='p3'))
def test_s3(self): self.run_one_job( SshJob(node=self.gateway(), command=[ Run("echo", "SshJob with s3 command singular", "$(hostname)") ], label='s3'))
def test_local_command2(self): self.run_one_job( # details = True, job=SshJob( node=LocalNode(), commands=[ Run("for i in $(seq 3); do echo line $i; sleep 1; done"), ]))
def test_x11_shell(self): self.run_one_job(job=SshJob(node=self.gateway(), command=[ Run("echo DISPLAY=$DISPLAY", x11=True), RunString("""#!/bin/bash xlsfonts | head -5 """, x11=True) ]))
def test_variables(self): """ check how variables are rendered """ v1 = Variables() v2 = Variables() v2.defined = 'OK-defined' template = "defined: {{defined}} undefined: {{undefined}}" scheduler = Scheduler(critical=False) gateway = SshNode(hostname=localhostname(), username=localuser()) Sequence(SshJob(gateway, command=Deferred(template, v1)), SshJob(gateway, command=Deferred(template, v2)), scheduler=scheduler) produce_png(scheduler, "test_graphics_variables")
def test_local_string_includes(self): with open("tests/needsinclude.sh") as reader: my_script = reader.read() self.run_one_job( SshJob(node=self.gateway(), command=RunString(my_script, "some", "'more text'", remote_name="run-script-sample.sh", includes=["tests/inclusion.sh"]), label="test_local_string"))
def all_off(slice, verbose, debug): """ expects a slice name, and turns off faraday completely """ # what argparse knows as a slice actually is a gateway (user + host) try: gwuser, gwhost = slice.split('@') except: gwuser, gwhost = slice, "faraday.inria.fr" gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=ColonFormatter(verbose=verbose), debug=debug) scheduler = Scheduler( Sequence( SshJob( node=gwnode, command=Run("rhubarbe", "leases", "--check"), label="check we have a current lease", ), SshJob( node=gwnode, command=Run("rhubarbe", "bye"), label="turn off", ))) result = scheduler.orchestrate() if not result: if check_for_lease.raised_exception(): print( "slice {} does not appear to hold a valid lease".format(slice)) else: print("RUN KO : {}".format(scheduler.why())) sched.debrief() else: print("faraday turned off OK") return 0 if result else 1
def check_expansion(self, *deferred_expected_s): s = Scheduler() formatters = {} for deferred, _ in deferred_expected_s: formatters[deferred] = f = CaptureFormatter() f.start_capture() n = SshNode(localhostname(), username=localuser(), formatter=f) s.add(SshJob(node=n, commands=Run(deferred))) s.run() for deferred, expected in deferred_expected_s: captured = formatters[deferred].get_capture() self.assertEqual(captured, expected)
def test_commands_verbose(self): dummy_path = "tests/dummy-10" dummy_file = Path(dummy_path).name scheduler = Scheduler() Sequence(SshJob( node=self.gateway(), verbose=True, commands=[ Run("hostname"), RunScript("tests/script-with-args.sh", "arg1", "arg2"), RunString("for i in $(seq 3); do echo verbose$i; done"), Push(localpaths=dummy_path, remotepath="."), Pull(remotepaths=dummy_file, localpath=dummy_path + ".loop"), ]), SshJob(node=LocalNode(), critical=True, commands=Run("diff {x} {x}.loop".format(x=dummy_path), verbose=True)), scheduler=scheduler) ok = scheduler.run() ok or scheduler.debrief() self.assertTrue(ok)
def hop1(self, hostname='localhost', username=None, *, c1, commands, s_command='echo hop1-{}-{}', nested_sched=(0, 1)): """ create * <c1> connections to one node 1 hop away * and on each <commands> commands check current number of connections """ if username is None: username = localuser() verbose(f"creating {c1} hop1-connections - " f"{commands} commands per conn - " f" to {username}@{hostname}") scheduler = Scheduler() nodes = [] jobs = [] for n in range(c1): node1 = SshNode(hostname, username=username, formatter=ColonFormatter(verbose=False)) nodes.append(node1) for c in range(commands): jobs.append(SshJob(node=node1, command=s_command.format(n, c), )) scheduler = self.populate_sched(scheduler, jobs, nested=nested_sched[0], pack_job=nested_sched[1]) expected = c1 # record base status in0, out0 = in_out_connections() verbose(f"INITIAL count in={in0} out={out0}") scheduler.export_as_pngfile("debug") topology_as_pngfile(scheduler, "topology") scheduler.run() in1, out1 = in_out_connections() verbose(f"AFTER RUN in={in1} out={out1}") self.assertEqual(in1-in0, expected) self.assertEqual(out1-out0, expected) arg = nodes # cleanup close_ssh_in_scheduler(scheduler) in1, out1 = in_out_connections() verbose(f"AFTER CLEANUP in={in1} out={out1}") self.assertEqual(in1-in0, 0) self.assertEqual(out1-out0, 0)
def _run_xterm_node_shell(self, node, shell): if shell: xterm_command = RunString("""#!/bin/bash xterm """, x11=True) else: xterm_command = Run("xterm", x11=True) self.run_one_job( job=SshJob(node=node, command=[ Run("echo without x11, DISPLAY=$DISPLAY"), Run("echo with x11, DISPLAY=$DISPLAY", x11=True), xterm_command, ]))
def test_hop_depth(self, hostname='localhost', username=None, depth=4, commands=1): # Do not use the close_nodes manually on this test, it does keep the # Order of the declared nodes. if username is None: username = localuser() verbose(f"creating hop{depth}-connections - " f"{commands} commands per conn " f" to {username}@{hostname}") scheduler = Scheduler(timeout=7) nodes = [] jobs = [] gateway = None for n in range(depth): node = SshNode(hostname, gateway=gateway, username=username, formatter=ColonFormatter(verbose=False)) nodes.append(node) gateway = node for c in range(commands): jobs.append(SshJob(node=node, command=f"echo hop{n}-{c}", scheduler=scheduler)) expected = depth # record base status in0, out0 = in_out_connections() verbose(f"INITIAL count in={in0} out={out0}") # try: scheduler.run() #except Exception: # pass in1, out1 = in_out_connections() verbose(f"AFTER RUN in={in1} out={out1}") self.assertEqual(in1-in0, expected) self.assertEqual(out1-out0, expected) # cleanup close_ssh_in_scheduler(scheduler) #Lets wait a little bit to count time.sleep(1) in1, out1 = in_out_connections() verbose(f"AFTER CLEANUP in={in1} out={out1}") self.assertEqual(in1-in0, 0) self.assertEqual(out1-out0, 0)
def test_file_loopback(self, size=20): # randomly create a 2**size chars file b1 = "random-{}".format(size) b2 = "loopback-{}".format(size) b3 = "again-{}".format(size) p1 = "tests/" + b1 p2 = "tests/" + b2 p3 = "tests/" + b3 self.random_file(p1, size) self.run_one_job( SshJob(node=self.gateway(), commands=[ Run("mkdir -p apssh-tests"), Push(localpaths=p1, remotepath="apssh-tests"), Pull(remotepaths="apssh-tests/" + b1, localpath="tests/" + b2), ])) with open(p1) as f1: s1 = f1.read() with open(p2) as f2: s2 = f2.read() self.assertEqual(s1, s2) # pull it again in another ssh connection self.run_one_job( SshJob(node=self.gateway(), commands=[ Run("mkdir -p apssh-tests"), Pull(remotepaths="apssh-tests/" + b1, localpath="tests/" + b3), ])) with open(p3) as f3: s3 = f3.read() self.assertEqual(s1, s3)
def test_mixed_commands(self): includes = ["tests/inclusion.sh"] self.run_one_job( SshJob(node=self.gateway(), commands=[ RunScript("tests/needsinclude.sh", "run1", includes=includes), Run("echo +++++; cat /etc/lsb-release; echo +++++"), RunScript("tests/needsinclude.sh", "another", "run", includes=includes) ], label='script_commands'))
def hop1(self, hostname='localhost', username=None, *, c1, commands): """ create * <c1> connections to one node 1 hop away * and on each <commands> commands check current number of connections """ if username is None: username = localuser() print(f"creating {c1} hop1-connections - " f"{commands} commands per conn - " f" to {username}@{hostname}") scheduler = Scheduler() nodes = [] jobs = [] for n in range(c1): node1 = SshNode(hostname, username=username, formatter=ColonFormatter(verbose=False)) nodes.append(node1) for c in range(commands): jobs.append( SshJob(node=node1, command=f'echo hop1-{n}-{c}', scheduler=scheduler)) expected = c1 # record base status in0, out0 = in_out_connections() print(f"INITIAL count in={in0} out={out0}") scheduler.run() in1, out1 = in_out_connections() print(f"AFTER RUN in={in1} out={out1}") self.assertEqual(in1 - in0, expected) self.assertEqual(out1 - out0, expected) # cleanup gathered = asyncio.get_event_loop().run_until_complete( asyncio.gather(*(node.close() for node in nodes))) in1, out1 = in_out_connections() print(f"AFTER CLEANUP in={in1} out={out1}") self.assertEqual(in1 - in0, 0) self.assertEqual(out1 - out0, 0)
def test_capture(self): s = Scheduler() f = CaptureFormatter() n = SshNode(localhostname(), username=localuser(), formatter=f) s.add(SshJob(node=n, commands=[ Run("echo LINE1"), Run("echo LINE2"), ])) f.start_capture() s.run() captured = f.get_capture() expected = "LINE1\nLINE2\n" self.assertEqual(captured, expected)
def test_local_command(self): # create random file in python rather than with /dev/random # that is not working in virtualbox random_full = "RANDOM-full" random_head = "RANDOM" self.random_file(random_full, size=19) print("DONE") self.run_one_job( # details = True, job=SshJob( node=LocalNode(), commands=[ Run(f"head -c {2**18} < {random_full} > {random_head}"), Run(f"ls -l {random_head}"), Run(f"shasum {random_head}"), ]))
def global_check_image(self, _image, check_strings): # on the remaining nodes: check image marker self.print(f"checking {len(self.nodes)} nodes" f" against {check_strings} in /etc/rhubarbe-image") grep_pattern = "|".join(check_strings) check_command = ( f"tail -1 /etc/rhubarbe-image | egrep -q '{grep_pattern}'") jobs = [ SshJob(node=silent_sshnode(node, verbose=self.verbose), command=check_command, critical=False) for node in self.nodes ] scheduler = Scheduler(Job(self.display.run(), forever=True), *jobs, critical=False, timeout=self.wait_timeout) if not scheduler.run(): self.verbose and scheduler.debrief() # pylint: disable=w0106 # exclude nodes that have not behaved for node, job in zip(self.nodes, jobs): if not job.is_done() or job.raised_exception(): self.verbose_msg( f"checking {grep_pattern}: something went badly wrong with {node}" ) message = None if exc := job.raised_exception(): message = f"OOPS {type(exc)} {exc}" self.mark_and_exclude(node, Reason.CANT_CHECK_IMAGE, message) continue if not job.result() == 0: explanation = f"wrong image found on {node} - looking for {grep_pattern}" self.verbose_msg(explanation) self.mark_and_exclude(node, Reason.DID_NOT_LOAD, explanation) continue self.print(f"node {node} checked out OK")
def one_run(tx_power, phy_rate, antenna_mask, channel, *, run_name=default_run_name, slicename=default_slicename, load_images=False, node_ids=None, parallel=None, verbose_ssh=False, verbose_jobs=False, dry_run=False): """ Performs data acquisition on all nodes with the following settings Arguments: tx_power: in dBm, a string like 5, 10 or 14 phy_rate: a string among 1, 54 antenna_mask: a string among 1, 3, 7 channel: a string like e.g. 1 or 40 run_name: the name for a subdirectory where all data will be kept successive runs should use the same name for further visualization slicename: the Unix login name (slice name) to enter the gateway load_images: a boolean specifying whether nodes should be re-imaged first node_ids: a list of node ids to run the scenario on; strings or ints are OK; defaults to the all 37 nodes i.e. the whole testbed parallel: a number of simulataneous jobs to run 1 means all data acquisition is sequential (default) 0 means maximum parallel """ # # dry-run mode # just display a one-liner with parameters # if dry_run: load_msg = "" if not load_images else " LOAD" nodes = " ".join(str(n) for n in node_ids) print("dry-run: {run_name}{load_msg} -" " t{tx_power} r{phy_rate} a{antenna_mask} ch{channel} -" "nodes {nodes}" .format(**locals())) # in dry-run mode we are done return True # set default for the nodes parameter node_ids = [int(id) for id in node_ids] if node_ids is not None else default_node_ids ### # create the logs directory based on input parameters run_root = naming_scheme(run_name, tx_power, phy_rate, antenna_mask, channel, autocreate=True) # the nodes involved faraday = SshNode(hostname=default_gateway, username=slicename, formatter=TimeColonFormatter(), verbose=verbose_ssh) # this is a python dictionary that allows to retrieve a node object # from an id node_index = { id: SshNode(gateway=faraday, hostname=fitname(id), username="******", formatter=TimeColonFormatter(), verbose=verbose_ssh) for id in node_ids } # the global scheduler scheduler = Scheduler(verbose=verbose_jobs) ########## check_lease = SshJob( scheduler=scheduler, node=faraday, verbose=verbose_jobs, critical=True, command=Run("rhubarbe leases --check"), ) # load images if requested green_light = check_lease if load_images: # the nodes that we **do not** use should be turned off # so if we have selected e.g. nodes 10 12 and 15, we will do # rhubarbe off -a ~10 ~12 ~15, meaning all nodes except 10, 12 and 15 negated_node_ids = ["~{}".format(id) for id in node_ids] # replace green_light in this case green_light = SshJob( node=faraday, required=check_lease, critical=True, scheduler=scheduler, verbose=verbose_jobs, commands=[ Run("rhubarbe", "off", "-a", *negated_node_ids), Run("rhubarbe", "load", "-i", "u16-ath-noreg", *node_ids), Run("rhubarbe", "wait", *node_ids) ] ) ########## # setting up the wireless interface on all nodes # # this is a python feature known as a list comprehension # we just create as many SshJob instances as we have # (id, SshNode) couples in node_index # and gather them all in init_wireless_jobs # they all depend on green_light # # provide node-utilities with the ranges/units it expects frequency = channel_frequency[int(channel)] # tx_power_in_mBm not in dBm tx_power_driver = tx_power * 100 if load_images: # The first init_wireless_jobs always has troubles... Do it twice the first time (nasty hack) init_wireless_jobs = [ SshJob( scheduler=scheduler, required=green_light, node=node, verbose=verbose_jobs, label="init {}".format(id), commands=[ RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver), RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver) ] ) for id, node in node_index.items()] else: init_wireless_jobs = [ SshJob( scheduler=scheduler, required=green_light, node=node, verbose=verbose_jobs, label="init {}".format(id), command=RunScript("node-utilities.sh", "init-ad-hoc-network", wireless_driver, "foobar", frequency, phy_rate, antenna_mask, tx_power_driver) ) for id, node in node_index.items()] # then install and run olsr on fit nodes run_olsr = [ SshJob( scheduler=scheduler, node=node, required=init_wireless_jobs, label="init and run olsr on fit nodes", verbose=verbose_jobs, command=RunScript("node-utilities.sh", "run-olsr") ) for i, node in node_index.items()] # after that, run tcpdump on fit nodes, this job never ends... run_tcpdump = [ SshJob( scheduler=scheduler, node=node, required=run_olsr, label="run tcpdump on fit nodes", verbose=verbose_jobs, commands=[ Run("echo run tcpdump on fit{:02d}".format(i)), Run("tcpdump -U -i moni-{} -y ieee802_11_radio -w /tmp/fit{}.pcap".format(wireless_driver, i)) ] ) for i, node in node_index.items()] # let the wireless network settle settle_wireless_job = PrintJob( "Let the wireless network settle", sleep=settle_delay, scheduler=scheduler, required=run_olsr, label="settling") ########## # create all the ping jobs, i.e. max*(max-1)/2 # this again is a python list comprehension # see the 2 for instructions at the bottom # # notice that these SshJob instances are not yet added # to the scheduler, we will add them later on # depending on the sequential/parallel strategy pings = [ SshJob( node=nodei, required=settle_wireless_job, label="ping {} -> {}".format(i, j), verbose=verbose_jobs, commands=[ Run("echo {} '->' {}".format(i, j)), RunScript("node-utilities.sh", "my-ping", "10.0.0.{}".format(j), ping_timeout, ping_interval, ping_size, ping_number, ">", "PING-{:02d}-{:02d}".format(i, j)), Pull(remotepaths="PING-{:02d}-{:02d}".format(i, j), localpath=str(run_root)), ] ) # looping on the source, now only fit01 is source for i, nodei in node_index.items() # and on the destination for j, nodej in node_index.items() # and keep only half of the couples if (j > i) and (i==1) ] # retrieve all pcap files from fit nodes retrieve_tcpdump = [ SshJob( scheduler=scheduler, node=nodei, required=pings, label="retrieve pcap trace from fit{:02d}".format(i), verbose=verbose_jobs, commands=[ RunScript("node-utilities.sh", "kill-olsr"), Run("sleep 1;pkill tcpdump; sleep 1"), RunScript("node-utilities.sh", "process-pcap", i), Run( "echo retrieving pcap trace and result-{i}.txt from fit{i:02d}".format(i=i)), Pull(remotepaths=["/tmp/fit{}.pcap".format(i), "/tmp/result-{}.txt".format(i)], localpath=str(run_root)), ] ) for i, nodei in node_index.items() ] # xxx this is a little fishy # should we not just consider that the default is parallel=1 ? if parallel is None: # with the sequential strategy, we just need to # create a Sequence out of the list of pings # Sequence will add the required relationships scheduler.add(Sequence(*pings, scheduler=scheduler)) # for running sequentially we impose no limit on the scheduler # that will be limitied anyways by the very structure # of the required graph jobs_window = None else: # with the parallel strategy # we just need to insert all the ping jobs # as each already has its required OK scheduler.update(pings) # this time the value in parallel is the one # to use as the jobs_limit; if 0 then inch'allah jobs_window = parallel # if not in dry-run mode, let's proceed to the actual experiment ok = scheduler.orchestrate(jobs_window=jobs_window) # give details if it failed if not ok: scheduler.debrief() # data acquisition is done, let's aggregate results # i.e. compute averages if ok: post_processor = Aggregator(run_root, node_ids, antenna_mask) post_processor.run() return ok