def test_nesting_sequence(self): expected_duration = 1. watch = Watch('test_nesting_sequence') subjob = Scheduler( Sequence( Job(co_print_sleep(watch, .2, "one")), Job(co_print_sleep(watch, .2, "two")), Job(co_print_sleep(watch, .2, "three")), ), watch=watch, label="sub-scheduler\non several lines", critical=True, forever=True, ) main = Scheduler(Sequence( Job(co_print_sleep(watch, .2, "BEGIN"), label="job-label"), subjob, Job(co_print_sleep(watch, .2, "END")), ), watch=watch) print("===== test_nesting_sequence", "LIST with details") main.list(details=True) self.assertTrue(main.run()) self.assertAlmostEqual(watch.seconds(), expected_duration, delta=.05) produce_png(main, "test_nesting_sequence")
def test_order1(self): async def aprint(x): print(x) def job(n): return Job(aprint(n), label=n) sub1, sub2, sub3, sub4 = Scheduler(), Scheduler(), Scheduler(), Scheduler() sched = Scheduler( Sequence( job('top'), sub1, job('middle'), sub2, sub3, sub4)) for i in range(3): sub1.add(job(i+1)) sub2.add(job(i+4)) sub3.add(job(i+7)) sub4.add(job(i+10)) sub4.add(job(13)) produce_png(sched, "test_png_order1")
def nested(): j11, j12, j13, j14, j15 = [job(i) for i in range(11, 16)] s2 = Scheduler(Sequence(j11, j12, j13), label="nested internal") j12.requires(j14) j13.requires(j15) j1, j2, j3, j4, j5 = [job(i) for i in range(1, 6)] s1 = Scheduler(Sequence(j1, s2, j3), label="nested top") j1.requires(j4) j1.requires(j11) s2.requires(j13) # j2 not included in sched, untouched j2.requires(j1) self.assertEqual(len(j12.required), 2) self.assertEqual(len(j13.required), 2) self.assertEqual(len(j1.required), 2) self.assertEqual(len(s2.required), 2) self.assertEqual(len(j3.required), 1) s1.sanitize() self.assertEqual(len(j12.required), 1) self.assertEqual(len(j13.required), 1) self.assertEqual(len(j1.required), 0) self.assertEqual(len(s2.required), 1) self.assertEqual(len(j3.required), 1)
def sched_sched_boom(s1_crit, s2_crit, j_crit): return Scheduler(Scheduler(Job(boom("ok"), critical=j_crit, label=f"boom {j_crit}"), critical=s2_crit, label=f"internal {s2_crit}"), critical=s1_crit, label=f"external {s1_crit}")
def test_deferred_service(self): """ a service can be defined from a deferred instance rather than a plain string """ s = Scheduler() env = Variables() echo_service = Service(Deferred("echo {{run1}}", env), service_id='echo', verbose=True) n = SshNode(localhostname(), username=localuser()) Sequence(SshJob(n, commands=Run("echo from-first-run", capture=Capture('run1', env))), SshJob(n, commands=Run(echo_service.start_command())), SshJob(n, commands=Run(echo_service.journal_command( since="10 second ago"), capture=Capture('journal', env))), scheduler=s) print('STARTING', 20 * '-', echo_service.start_command()) s.run() print('DONE', 20 * '-', echo_service.start_command()) #print(f"env={env}") obtained = env.journal expected = "from-first-run" found = expected in obtained self.assertTrue(found)
def test_deferred_chain(self): """ one command computes a string that gets passed to another one this is analogous to run1=$(ssh localhost echo from-first-run) final=$(ssh localhost echo ${run1}) the 'final' variable is only needed for checking everything went well """ s = Scheduler() env = Variables() n = SshNode(localhostname(), username=localuser()) Sequence(SshJob(n, commands=Run("echo from-first-run", capture=Capture('run1', env))), SshJob(n, commands=Run(Deferred("echo {{run1}}", env), capture=Capture('final', env))), scheduler=s) s.run() #print(f"env={env}") obtained = env.final expected = "from-first-run" self.assertEqual(obtained, expected)
def run(self, message_bus, timeout): """ send verb to all nodes, waits for max timeout returns True if all nodes behaved as expected and False otherwise - including in case of KeyboardInterrupt """ nodes = [ Node(cmc_name, message_bus) for cmc_name in self.selector.cmc_names() ] jobs = [ Job(self.get_and_show_verb(node, self.verb), critical=True) for node in nodes ] display = Display(nodes, message_bus) scheduler = Scheduler(Job(display.run(), forever=True, critical=True), *jobs, timeout=timeout, critical=False) try: if scheduler.run(): return True else: scheduler.debrief() print(f"rhubarbe-{self.verb} failed: {scheduler.why()}") return False except KeyboardInterrupt: print(f"rhubarbe-{self.verb} : keyboard interrupt - exiting") return False
def test_environment(self): needle_foo = 'xxx-foo-xxx' needle_bar = 'xxx-bar-xxx' scheduler = Scheduler() node = SshNode("localhost") env = Variables() service = Service("env", service_id='echo-environ', environ={ 'FOO': needle_foo, 'BAR': needle_bar, }) SshJob(scheduler=scheduler, node=node, commands=[ Run(service.start_command()), Run(service.journal_command(since='5s ago'), capture=Capture('journal', env)) ]) self.assertEqual(scheduler.run(), True) self.assertTrue(f"FOO={needle_foo}" in env.journal) self.assertTrue(f"BAR={needle_bar}" in env.journal)
def test_shutdown_nested_timeout(self): # so here we create 16 jobs for which the shutdown # durations will be # 0.0 0.1 0.2 0.3 - 1.0 1.1 1.2 1.3 # 2.0 2.1 2.2 2.3 - 3.0 3.1 3.2 3.3 # so if we set shutdown_timeout = 0.9s, we should # still find counter == 12 cardinal = 4 # same to the square top = CounterScheduler(label="TOP", shutdown_timeout=0.9) subs = [] for i in range(cardinal): sub = Scheduler(label=f"SUB {i}") subs.append(sub) sub.add( Sequence(*[ CounterJob(top, 10 * i + j, aprint('ok'), label=10 * i + j) for j in range(cardinal) ])) top.add(Sequence(*subs)) self.assertEqual(top.counter, 0) self.assertTrue(top.run()) self.assertEqual(top.counter, cardinal * cardinal) self.assertFalse(top.shutdown()) self.assertEqual(top.counter, cardinal * (cardinal - 1))
def all_off(slice, verbose, debug): """ expects a slice name, and turns off faraday completely """ # what argparse knows as a slice actually is a gateway (user + host) try: gwuser, gwhost = slice.split('@') except: gwuser, gwhost = slice, "faraday.inria.fr" gwnode = SshNode(hostname=gwhost, username=gwuser, formatter=ColonFormatter(verbose=verbose), debug=debug) scheduler = Scheduler( SshJob( node=gwnode, command=Run("rhubarbe", "bye"), label="turn off", )) result = scheduler.orchestrate() if not result: print("RUN KO : {}".format(scheduler.why())) sched.debrief() else: print("faraday turned off OK") return 0 if result else 1
def check_lease(experiment_scheduler, sshnode): """ re-usable function that acts a bit like a python decorator on schedulers. Given an experiment described as a scheduler, this function returns a higher-level scheduler that first checks for the lease, and then proceeds with the experiment. """ check_lease_job = SshJob( # checking the lease is done on the gateway node=faraday, # this means that a failure in any of the commands # will cause the scheduler to bail out immediately critical=True, command=Run("rhubarbe leases --check"), ) return Scheduler( Sequence( check_lease_job, # here we create a nested scheduler # by inserting the experiment_scheduler # as a regular job in the main scheduler experiment_scheduler, ))
def _allowed_signal(self, allowed_exits, host="localhost", username=None): print(f"Testing allowed signal allowed_exits={allowed_exits}") # global timeout total = 4 # scheduled duration long = 2 # send signal after that amount short = 1 # we always kill with TERM signal = "TERM" if username is None: username = util.localuser() node = SshNode(host, username=username) scheduler = Scheduler(timeout = total, critical=False) SshJob(node=node, scheduler=scheduler, command=Run(f"sleep {long}", allowed_exits=allowed_exits)) SshJob(node=node, scheduler=scheduler, command=f"sleep {short}; pkill -{signal} sleep") expected = signal in allowed_exits run = scheduler.run() scheduler.list() self.assertEqual(run, expected)
def test_forever(self): a1, a2, t1 = SLJ(1), SLJ(1.5), TJ(.6) a2.requires(a1) sched = Scheduler(a1, a2, t1) sched.list() self.assertTrue(sched.orchestrate()) sched.list()
def _allowed_retcod(self, allowed_exits, host="localhost", username=None): print(f"Testing allowed retcod allowed_exits={allowed_exits}") # global timeout total = 4 # scheduled duration long = 1 # we always exit code 100 retcod = 1000 if username is None: username = util.localuser() node = SshNode(host, username=username) scheduler = Scheduler(timeout = total, critical=False) SshJob(node=node, scheduler=scheduler, command=Run(f"sleep {long}; exit {retcod}", allowed_exits=allowed_exits)) expected = retcod in allowed_exits run = scheduler.run() scheduler.list() self.assertEqual(run, expected)
def _test_window(self, total, window): atom = .1 tolerance = 8 # more or less % in terms of overall time s = Scheduler() jobs = [PrintJob("{}-th {}s job".format(i, atom), sleep=atom, scheduler=s) for i in range(1, total + 1)] import time beg = time.time() ok = s.orchestrate(jobs_window=window) ok or s.debrief(details=True) end = time.time() duration = end - beg # estimate global time # unwindowed: overall duration is atom # otherwise a multiple of it (assuming total = k*window) expected = atom if not window else (total / window) * atom print('overall expected {} - measured {}' .format(expected, duration)) distortion = duration / expected time_ok = 1 - tolerance / 100 <= distortion <= 1 + tolerance / 100 if not time_ok: print("_test_window - window = {} :" "wrong execution time {} - not within {}% of {}" .format(window, end - beg, tolerance, expected)) self.assertTrue(time_ok) self.assertTrue(ok)
def test_png_styles2(self): """ trying the rendering of critical and forever jobs """ watch = Watch() j1 = pipes(watch, .5, "none", nb_pipes=6) j1.critical = False j1.forever = False j1.label = "label-none" j2 = diamond_scheduler(watch, .5, "critical") j2.critical = True j2.forever = False j2.label = "label-critical" j3 = diamond_scheduler(watch, .5, "forever") j3.critical = False j3.forever = True j3.label = "label-forever" j4 = diamond_scheduler(watch, .5, "both") j4.critical = True j4.forever = True j4.label = "label-both" sched = Scheduler( Sequence(j1, j2, j3, j4), watch=watch, ) produce_png(sched, "test_png_styles2")
def test_timeout(self): a1, a2, a3 = [SLJ(x) for x in (0.5, 0.6, 0.7)] a2.requires(a1) a3.requires(a2) sched = Scheduler(a1, a2, a3) # should timeout in the middle of stage 2 self.assertFalse(sched.orchestrate(timeout=1)) sched.list()
def test_sequence6(self): "adding a sequence" sched = Scheduler() a1 = J(sl(0.1), label=1) a2 = J(sl(0.1), label=2) a3 = J(sl(0.1), label=3) sched.add(Seq(a1, a2, a3)) self.assertTrue(sched.orchestrate())
def test_nested_cycles(self): watch = Watch() def job(i): return Job(co_print_sleep(watch, .2, f"job {i}"), label=f"job{i}") js1, js2, js3 = [job(i) for i in range(11, 14)] s2 = Scheduler(Sequence(js1, js2, js3)) j1, j3 = job(1), job(3) s1 = Scheduler(Sequence(j1, s2, j3)) self.assertTrue(s1.check_cycles()) # create cycle in subgraph js1.requires(js3) self.assertFalse(s1.check_cycles()) # restore in OK state js1.requires(js3, remove=True) self.assertTrue(s1.check_cycles()) # add cycle in toplevel j1.requires(j3) self.assertFalse(s1.check_cycles()) # restore in OK state j1.requires(j3, remove=True) self.assertTrue(s1.check_cycles()) # add one level down s3 = Scheduler() jss1, jss2, jss3 = [job(i) for i in range(111, 114)] Sequence(jss1, jss2, jss3, scheduler=s3) # surgery in s2; no cycles s2.remove(js2) s2.sanitize() s2.add(s3) s3.requires(js1) js3.requires(s3) self.assertTrue(s1.check_cycles()) # add cycle in s3 js1.requires(js3) self.assertFalse(s1.check_cycles())
def _test_exc_non_critical(self, verbose): print("verbose = {}".format(verbose)) a1, a2 = SLJ(1), J(co_exception(0.5), label='non critical boom') sched = Scheduler(a1, a2, verbose=verbose) self.assertTrue(sched.orchestrate()) print(sep + 'debrief()') sched.debrief()
def simple(): j1, j2, j3, j4, j5 = [job(i) for i in range(1, 6)] s1 = Scheduler(j1, j2, j3, label='top simple') j2.requires(j4) j3.requires(j5) self.assertEqual(len(j2.required), 1) self.assertEqual(len(j3.required), 1) s1.sanitize() self.assertEqual(len(j2.required), 0) self.assertEqual(len(j3.required), 0)
def test_topology(self): g1 = SshNode("faraday", username="******") n1 = SshNode(gateway=g1, hostname="fit01", username="******") n2 = SshNode(gateway=g1, hostname="fit02", username="******") s = Scheduler() SshJob(n1, command='hostname', scheduler=s) SshJob(n2, command='hostname', scheduler=s) topology_as_pngfile(s, "topology")
def test_creation_scheduler(self): sched = Scheduler() s = Seq(J(sl(1)), J(sl(2)), scheduler=sched) j = J(sl(3), required=s, scheduler=sched) # make sure that jobs appended in the sequence # even later on are also added to the scheduler s.append(J(sl(.5))) self.assertEqual(len(sched.jobs), 4) self.assertTrue(sched.rain_check()) self.assertTrue(sched.orchestrate())
def test_display(self): class FakeTask: def __init__(self): self._result = 0 self._exception = None def annotate_job_with_fake_task(job, state, boom): task = FakeTask() if state == "done": task._state = asyncio.futures._FINISHED job._task = task job._running = True elif state == "running": task._state = "NONE" job._task = task job._running = True elif state == "scheduled": task._state = "NONE" job._task = task job._running = False else: pass # here we assume that a job that has raised an exception is # necessarily done if boom: if state in ("idle", "scheduled", "running"): print("incompatible combination boom x idle - ignored") return else: job._task._exception = True return job class J(AbstractJob): pass sched = Scheduler() previous = None for state in "idle", "scheduled", "running", "done": for boom in True, False: for critical in True, False: for forever in True, False: j = J(critical=critical, forever=forever, label="forever={} crit.={} status={} boom={}" .format(forever, critical, state, boom), required=previous ) if annotate_job_with_fake_task(j, state, boom): sched.add(j) previous = j sched.list()
def test_timeout(self): a1 = J(sl(1), label="a1") a2 = J(sl(2), label="a2") a3 = J(sl(10), label="a3") result = Scheduler(a1, a2, a3).orchestrate(timeout=3) self.assertEqual(result, False) self.assertEqual(a1.is_done(), True) self.assertEqual(a1.result(), 1) self.assertEqual(a2.is_done(), True) self.assertEqual(a2.result(), 2) self.assertEqual(a3.is_done(), False)
def check_expansion(self, *deferred_expected_s): s = Scheduler() formatters = {} for deferred, _ in deferred_expected_s: formatters[deferred] = f = CaptureFormatter() f.start_capture() n = SshNode(localhostname(), username=localuser(), formatter=f) s.add(SshJob(node=n, commands=Run(deferred))) s.run() for deferred, expected in deferred_expected_s: captured = formatters[deferred].get_capture() self.assertEqual(captured, expected)
def run_one_job(self, job, *, details=False, expected=True): print(job) scheduler = Scheduler(job, verbose=True) orchestration = scheduler.run() scheduler.list(details=details) if not orchestration: scheduler.debrief() self.assertTrue(orchestration) if expected: self.assertEqual(job.result(), 0) else: self.assertNotEqual(job.result(), 0)
def test_cycle(self): """a simple loop with 3 jobs - cannot handle that""" a1, a2, a3 = J(sl(1.1)), J(sl(1.2)), J(sl(1.3)) a1.requires(a2) a2.requires(a3) a3.requires(a1) sched = Scheduler(a1, a2, a3) # these lines seem to trigger a nasty message about a coro not being # waited self.assertFalse(sched.rain_check())
def test_forever(self): async def tick(n): while True: print('tick {}'.format(n)) await asyncio.sleep(n) a1 = J(sl(0.5), label="finite") a2 = J(tick(0.1), forever=True, label="forever") sched = Scheduler(a1, a2) result = sched.orchestrate() self.assertEqual(result, True) self.assertEqual(a1.is_done(), True) self.assertEqual(a2.is_done(), False)
def test_sequence2(self): "a job and a sequence" a1 = J(sl(0.1), label=1) a2 = J(sl(0.1), label=2) a3 = J(sl(0.1), label=3) s = Seq(a2, a3, required=a1) sched = Scheduler(a1, s) list_sep(sched, sep + "sequence2") self.assertEqual(len(a1.required), 0) self.assertEqual(len(a2.required), 1) self.assertEqual(len(a3.required), 1) self.assertTrue(check_required_types(sched, "test_sequence2")) self.assertTrue(sched.orchestrate())