def test_task_logging_capture(engine): with engine.begin() as cn: cn.execute('delete from rework.task') with workers(engine, 2): t1 = api.schedule(engine, 'capture_logs') t2 = api.schedule(engine, 'capture_logs') t1.join() t2.join() t1logs = [scrub(logline) for id_, logline in t1.logs()] assert [ u'my_app_logger:ERROR: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X>', u'stdout:INFO: <X>-<X>-<X> <X>:<X>:<X>: I want to be captured', u'my_app_logger:DEBUG: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X> also' ] == t1logs t2logs = [scrub(logline) for id_, logline in t2.logs()] assert [ u'my_app_logger:ERROR: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X>', u'stdout:INFO: <X>-<X>-<X> <X>:<X>:<X>: I want to be captured', u'my_app_logger:DEBUG: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X> also' ] == t2logs t3 = api.schedule(engine, 'capture_logs') t3.join() logids = [lid for lid, logline_ in t3.logs()] assert 2 == len(t3.logs(fromid=logids[0]))
def test_worker_max_runs(engine): with workers(engine, maxruns=2) as mon: wid = mon.wids[0] t = api.schedule(engine, 'print_sleep_and_go_away', 'a') t.join() assert t.output == 'aa' worker = Worker(engine.url, wid) assert not worker.shutdown_asked() t = api.schedule(engine, 'print_sleep_and_go_away', 'a') t.join() guard(engine, 'select running from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == False) with workers(engine, maxruns=1) as mon: wid = mon.wids[0] t1 = api.schedule(engine, 'print_sleep_and_go_away', 'a') t2 = api.schedule(engine, 'print_sleep_and_go_away', 'a') t1.join() guard(engine, 'select running from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == False) assert t2.state == 'queued' assert t2.worker is None end = t2._propvalue('finished') assert end is None
def test_more_unstarted(engine): with engine.begin() as cn: cn.execute('delete from rework.task') cn.execute('delete from rework.worker') with workers(engine) as mon: nworkers = engine.execute( 'select count(*) from rework.worker').scalar() assert nworkers == 1 with engine.begin() as cn: insert('rework.worker').values( host='127.0.0.1', domain='default').do(cn) # unborn worker nworkers = engine.execute( 'select count(*) from rework.worker').scalar() assert nworkers == 2 t1 = api.schedule(engine, 'raw_input', b'foo') t2 = api.schedule(engine, 'raw_input', b'foo') t1.join() t2.join() assert engine.execute('select count(*) from rework.task').scalar() == 2 nworkers = engine.execute( 'select count(*) from rework.worker').scalar() assert nworkers == 2 mon.cleanup_unstarted() nworkers = engine.execute( 'select count(*) from rework.worker').scalar() assert nworkers == 1 assert engine.execute('select count(*) from rework.task').scalar() == 2
def test_timeout(engine): # first, a small unittest on utility functions d1 = datetime(2018, 1, 1) d2 = datetime(2018, 3, 3, 12, 45, 30) delta = d2 - d1 iso = delta_isoformat(delta) assert iso == 'P61DT0H0M45930S' delta_out = parse_delta(iso) assert delta == delta_out with workers(engine, numworkers=3) as mon: t1 = api.schedule(engine, 'infinite_loop_timeout') t2 = api.schedule(engine, 'infinite_loop_timeout') t3 = api.schedule(engine, 'infinite_loop_long_timeout') t1.join('running') t2.join('running') t3.join('running') time.sleep(1) # make sure we're going to time out mon.track_timeouts() assert t1.state == 'aborting' assert t2.state == 'aborting' assert t3.state == 'running' mon.preemptive_kill() assert t1.state == 'aborted' assert t2.state == 'aborted' assert t3.state == 'running'
def test_monitor_step(engine): mon = Monitor(engine) api.schedule(engine, 'print_sleep_and_go_away', 21, metadata={'user': '******'}) stats = mon.step() assert len(stats.new) == 2 assert all(isinstance(w, int) for w in stats.new) # simulate a hard crash for _, proc in mon.workers.items(): if proc.poll() is None: kill_process_tree(proc.pid) dead = mon.reap_dead_workers() assert dead == stats.new stats2 = mon.step() assert stats2.new != stats.new assert len(stats2.new) == 2 assert all(isinstance(w, int) for w in stats2.new) mon.killall()
def run_stuff(): with workers(engine, numworkers=2): r = cli('list-workers', engine.url) assert r.output.count('running') == 2 t1 = api.schedule(engine, 'print_sleep_and_go_away', 1) t2 = api.schedule(engine, 'print_sleep_and_go_away', 2) t3 = api.schedule(engine, 'print_sleep_and_go_away', 3) t1.join() or t2.join() or t3.join() r = cli('list-tasks', engine.url) assert r.output.count('done') == 3
def test_worker_two_runs_nondfefault_domain(engine): with engine.begin() as cn: cn.execute('delete from rework.worker') with workers(engine, maxruns=2, domain='nondefault') as mon: t1 = api.schedule(engine, 'run_in_non_default_domain') t2 = api.schedule(engine, 'run_in_non_default_domain') t3 = api.schedule(engine, 'run_in_non_default_domain') t1.join() t2.join() assert t1.status == 'done' assert t2.status == 'done' assert t3.status == 'queued'
def test_cleanup_unstarted(engine): with engine.begin() as cn: cn.execute('delete from rework.task') cn.execute('delete from rework.worker') mon = Monitor(engine, 'default', None, 1, 1, 0, False) mon.register() mon.ensure_workers() with engine.begin() as cn: insert('rework.worker').values(host='127.0.0.1', domain='default').do( cn) # unborn worker nworkers = engine.execute('select count(*) from rework.worker').scalar() assert nworkers == 2 t = api.schedule(engine, 'raw_input', b'foo') t.join() mon.killall(msg=None) deleted = mon.cleanup_unstarted() assert deleted == 1 assert engine.execute('select count(*) from rework.worker').scalar() == 1 assert engine.execute('select count(*) from rework.task').scalar() == 1 deleted = mon.cleanup_unstarted() assert deleted == 0
def test_basic_task_operations(engine): api.schedule(engine, 'print_sleep_and_go_away', 21, metadata={'user': '******'}) known = [(name, Path(path).name) for name, path in engine.execute( 'select name, path from rework.operation order by name').fetchall()] assert [ ('allocate_and_leak_mbytes', 'tasks.py'), ('capture_logs', 'tasks.py'), ('flush_captured_stdout', 'tasks.py'), ('infinite_loop', 'tasks.py'), ('infinite_loop_long_timeout', 'tasks.py'), ('infinite_loop_timeout', 'tasks.py'), ('log_swarm', 'tasks.py'), ('normal_exception', 'tasks.py'), ('print_sleep_and_go_away', 'tasks.py'), ('raw_input', 'tasks.py'), ('run_in_non_default_domain', 'tasks.py'), ('stderr_swarm', 'tasks.py'), ('unstopable_death', 'tasks.py'), ] == known mon = Monitor(engine) wid = mon.new_worker() t = Task.fromqueue(engine, wid) t.run() assert t.output == 42 assert t.metadata == {'user': '******'} cdate = t._propvalue('queued') now = datetime.now() assert now.year == cdate.year assert now.month == cdate.month t2 = Task.byid(engine, t.tid) assert (t2.tid, t2.operation) == (t.tid, t.operation) t3 = Task.byid(engine, 42000) assert t3 is None with pytest.raises(Exception) as err: api.schedule(engine, 'no_such_task') assert err.value.args[0] == 'No operation was found for these parameters'
def test_run_worker(engine): mon = Monitor(engine, maxruns=1) wid = mon.new_worker() t = api.schedule(engine, 'print_sleep_and_go_away', 0) worker = Worker(engine.url, wid, os.getppid(), maxruns=1) worker.run() assert t.state == 'done'
def test_killed_task(engine): with workers(engine) as mon: t = api.schedule(engine, 'infinite_loop') t.join('running') assert t.state == 'aborted' assert t.traceback is None try: with workers(engine) as mon: t = api.schedule(engine, 'infinite_loop') t.join('running') raise Exception('kill the monitor') except: pass assert t.state == 'aborted' assert 'kill the monitor' in t.traceback
def test_task_error(engine): with workers(engine): t = api.schedule(engine, 'normal_exception') t.join() assert t.traceback.strip().endswith('oops') assert t.state == 'failed' start = t._propvalue('queued') end = t._propvalue('finished') assert end > start
def test_worker_max_mem(engine): with workers(engine, maxmem=100) as mon: wid = mon.wids[0] t1 = api.schedule(engine, 'allocate_and_leak_mbytes', 50) t1.join() assert engine.execute( 'select mem from rework.worker where id = {}'.format( wid)).scalar() == 0 mon.track_resources() assert engine.execute('select mem from rework.worker where id = {}'. format(wid)).scalar() > 50 t2 = api.schedule(engine, 'allocate_and_leak_mbytes', 100) t2.join() guard(engine, 'select shutdown from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == True) worker = Worker(engine.url, wid) assert worker.shutdown_asked()
def test_task_logs(engine, cli): with workers(engine): t = api.schedule(engine, 'capture_logs') t.join() r = cli('log-task', engine.url, t.tid) assert ( '\x1b[<X>mmy_app_logger:ERROR: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X>\n' '\x1b[<X>mstdout:INFO: <X>-<X>-<X> <X>:<X>:<X>: I want to be captured\n' '\x1b[<X>mmy_app_logger:DEBUG: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X> also' ) == scrub(r.output)
def test_process_lock(engine): with workers(engine): t = api.schedule(engine, 'stderr_swarm') def join(): Task.byid(t.engine, t.tid).join(timeout=2) thr = threading.Thread(target=join) thr.start() thr.join() assert t.state == 'done'
def test_domain_map(engine, cleanup): with engine.begin() as cn: cn.execute('delete from rework.operation') api.freeze_operations(engine, domain='nondefault', domain_map={'nondefault': 'fancy'}) with workers(engine, maxruns=1, domain='fancy'): t1 = api.schedule(engine, 'run_in_non_default_domain') t1.join() assert t1.status == 'done'
def test_task_input_output(engine): with workers(engine) as mon: t = api.schedule(engine, 'raw_input', rawinputdata=b'Hello Babar') t.join() with pytest.raises(TypeError): t.input ri = t.raw_input with pytest.raises(TypeError): t.output ro = t.raw_output assert ri == b'Hello Babar' assert ro == b'Hello Babar and Celeste' t = api.schedule(engine, 'unstopable_death') wait_true(mon.reap_dead_workers) assert t.input is None assert t.raw_input is None assert t.output is None assert t.raw_output is None
def test_task_abortion(engine): with workers(engine) as mon: wid = mon.wids[0] t = api.schedule(engine, 'infinite_loop', True) guard( engine, 'select count(id) from rework.task where worker = {}'.format(wid), lambda res: res.scalar() == 1) assert t.state == 'running' with pytest.raises(TimeOut) as err: t.join(timeout=.1) assert err.value.args[0] == t # check cpu usage mon.track_resources() cpu = engine.execute( 'select cpu from rework.worker where id = {}'.format( wid)).scalar() assert cpu > 0 t.abort() assert t.aborted # this is potentially racy but might work most of the time assert t.state == 'aborting' mon.preemptive_kill() t.join() assert t.state == 'aborted' assert t.deathinfo.startswith('preemptive kill') # one dead worker guard(engine, 'select running from rework.worker where id = {}'.format(wid), lambda res: not res.scalar()) diagnostic = engine.execute( 'select deathinfo from rework.worker where id = {}'.format( wid)).scalar() assert 'preemptive kill at <X>-<X>-<X> <X>:<X>:<X>.<X>+<X>:<X>' == scrub( diagnostic) queued = t._propvalue('queued') started = t._propvalue('started') finished = t._propvalue('finished') assert finished > started > queued
def test_captured_stdout(engine): with workers(engine): t = api.schedule(engine, 'flush_captured_stdout') t.join() assert t.state == 'done' logs = t.logs() assert len(logs) == 7 logs = [line.split(':')[-1] for lid, line in logs] assert logs == [ ' Hello World', ' This is an unfinished statement which could go on for a long time, but I have had enough', ' A truly multiline', ' statement.', ' ', ' Honor the space.', ' (hi) ' ]
def test_domain(engine): with workers(engine, maxruns=1) as mon: wid = mon.wids[0] t1 = api.schedule(engine, 'run_in_non_default_domain') t2 = api.schedule(engine, 'print_sleep_and_go_away', 1) guard(engine, 'select running from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == False) assert t1.status == 'queued' assert t2.status == 'done' with workers(engine, maxruns=1, domain='nondefault') as mon: wid = mon.wids[0] t1 = api.schedule(engine, 'run_in_non_default_domain') t2 = api.schedule(engine, 'print_sleep_and_go_away', 1) guard(engine, 'select running from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == False) assert t1.status == 'done' assert t2.status == 'queued'
def test_worker_unplanned_death(engine): with workers(engine) as mon: wid = mon.wids[0] t = api.schedule(engine, 'unstopable_death') deadlist = wait_true(mon.reap_dead_workers) assert wid in deadlist guard(engine, 'select deathinfo from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == 'Unaccounted death (hard crash)') assert t.state == 'aborted' start = t._propvalue('queued') end = t._propvalue('finished') assert end > start assert t.deathinfo == 'Unaccounted death (hard crash)'
def relaunch_task(tid): if not has_permission('relaunch'): return json.dumps(0) t = Task.byid(engine, tid) if t is None: return json.dumps(0) op = select('name', 'host', 'domain').table('rework.operation').join( 'rework.task as task on (task.operation = operation.id)').where( 'task.id = %(tid)s', tid=t.tid).do(engine).fetchone() newtask = api.schedule(engine, op.name, rawinputdata=t.raw_input, domain=op.domain, hostid=op.host, metadata=t.metadata) return json.dumps(newtask.tid)
def test_monitors_table(engine, client, refresh): with engine.begin() as cn: cn.execute('delete from rework.monitor') cn.execute('delete from rework.worker') with workers(engine): res = client.get('/workers-table') html = normalize(scrub(res.text)) refpath = DATADIR / 'monitors-table.html' if refresh: refpath.write_bytes(html) assert html == refpath.read_bytes() t = api.schedule(engine, 'abortme') t.join('running') res = client.get('/workers-table') html = normalize(scrub(res.text)) refpath = DATADIR / 'monitors-table-1-task.html' if refresh: refpath.write_bytes(html) assert html == refpath.read_bytes()
def _schedule_job(engine, service, args, inputfile): user = args.user if user is None: abort(400, 'user parameter is mandatory') hostid = args.hostid or api.host() domain = args.domain metadata = {'user': user} if args.options: metadata['options'] = args.options try: task = api.schedule(engine, service, rawinputdata=inputfile, hostid=hostid, domain=domain, metadata=metadata) except Exception as err: abort(400, str(err)) return json.dumps(task.tid)
def test_logging_stress_test(engine): with engine.begin() as cn: cn.execute('delete from rework.log') with workers(engine): t = api.schedule(engine, 'log_swarm') t.join() records = engine.execute( 'select id, line from rework.log where task = {}'.format( t.tid)).fetchall() # we check that there is a constant offset between the # log id and the idx that is emitted by the task code # => ordering has been preserved offsets = [ lid - int(line.rsplit(',')[-1].strip()) for lid, line in records ] assert all(offsets[0] == offset for offset in offsets) assert len(list(t.logs())) == 249 assert len(list(t.logs(fromid=245))) == 4 + offsets[0]
def test_abort_task(engine, cli): url = engine.url with workers(engine) as mon: r = cli('list-workers', url) assert ('<X> <X>@<X>.<X>.<X>.<X> <X> Mb [running (idle)] ' '[<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]') == scrub(r.output) t = api.schedule(engine, 'infinite_loop') t.join('running') # let the worker pick up the task r = cli('list-workers', url) assert ('<X> <X>@<X>.<X>.<X>.<X> <X> Mb [running #<X>]' ' [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]') == scrub(r.output) r = cli('list-tasks', url) assert ('<X> infinite_loop running [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]' ' → [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]') == scrub(r.output) r = cli('abort-task', url, t.tid) mon.preemptive_kill() t.join() r = cli('list-workers', url) assert ('<X> <X>@<X>.<X>.<X>.<X> <X> Mb [dead] ' '[<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' 'preemptive kill at ' '<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>:<X>') == scrub(r.output) r = cli('list-tasks', url) assert ('<X> infinite_loop aborted [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]') == scrub(r.output)
def test_kill_worker(engine, cli): url = engine.url with engine.begin() as cn: cn.execute('delete from rework.worker') with workers(engine) as mon: t = api.schedule(engine, 'infinite_loop') t.join('running') # let the worker pick up the task r = cli('kill-worker', url, mon.wids[0]) mon.preemptive_kill() r = cli('list-workers', url) assert ('<X> <X>@<X>.<X>.<X>.<X> <X> Mb [dead] ' '[<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' 'preemptive kill at ' '<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>:<X>') == scrub(r.output) r = cli('list-tasks', url) assert ('<X> infinite_loop aborted [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] ' '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]') == scrub(r.output)
def test_basic_worker_task_execution(engine): t = api.schedule(engine, 'print_sleep_and_go_away', 21) assert t.state == 'queued' guard(engine, "select count(id) from rework.task where status = 'queued'", lambda res: res.scalar() == 1) guard(engine, 'select count(id) from rework.worker where running = true', lambda res: res.scalar() == 0) mon = Monitor(engine, 'default', maxworkers=1) mon.ensure_workers() guard(engine, 'select count(id) from rework.worker where running = true', lambda res: res.scalar() == 1) guard(engine, "select count(id) from rework.task where status = 'running'", lambda res: res.scalar() == 1) t.join() assert t.output == 42 assert t.state == 'done' guard(engine, "select count(id) from rework.task where status = 'running'", lambda res: res.scalar() == 0)
def test_schedule_domain(engine, cleanup): reset_ops(engine) from . import task_testenv from . import task_prodenv api.freeze_operations(engine, domain='test') api.freeze_operations(engine, domain='production') api.freeze_operations(engine, domain='production', hostid='192.168.122.42') with pytest.raises(ValueError) as err: api.schedule(engine, 'foo') assert err.value.args[0] == 'Ambiguous operation selection' api.schedule(engine, 'foo', domain='test') # there two of them but .schedule will by default pick the one # matching the *current* host api.schedule(engine, 'foo', domain='production') api.schedule(engine, 'foo', domain='production', hostid='192.168.122.42') api.schedule(engine, 'foo', domain='production', hostid=host()) hosts = [ host for host, in engine.execute( 'select host from rework.task as t, rework.operation as op ' 'where t.operation = op.id').fetchall() ] assert hosts.count(host()) == 3 assert hosts.count('192.168.122.42') == 1 with pytest.raises(Exception): api.schedule(engine, 'foo', domain='production', hostid='172.16.0.1') with pytest.raises(Exception): api.schedule(engine, 'foo', domain='bogusdomain')
def test_task_rawinput(engine): with workers(engine): t = api.schedule(engine, 'raw_input', rawinputdata=b'Babar') t.join() assert t.raw_output == b'Babar and Celeste'