def test_worker_max_runs(engine): with workers(engine, maxruns=2) as mon: wid = mon.wids[0] t = api.schedule(engine, 'print_sleep_and_go_away', 'a') t.join() assert t.output == 'aa' worker = Worker(engine.url, wid) assert not worker.shutdown_asked() t = api.schedule(engine, 'print_sleep_and_go_away', 'a') t.join() guard(engine, 'select running from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == False) with workers(engine, maxruns=1) as mon: wid = mon.wids[0] t1 = api.schedule(engine, 'print_sleep_and_go_away', 'a') t2 = api.schedule(engine, 'print_sleep_and_go_away', 'a') t1.join() guard(engine, 'select running from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == False) assert t2.state == 'queued' assert t2.worker is None end = t2._propvalue('finished') assert end is None
def test_shutdown_worker(engine, cli): url = engine.url with workers(engine) as mon: cli('shutdown-worker', url, mon.wids[0]) guard( engine, 'select running from rework.worker where id = {}'.format( mon.wids[0]), lambda res: res.scalar() == 0) r = cli('list-workers', url) assert 'explicit shutdown' in scrub(r.output)
def test_task_abortion(engine): with workers(engine) as mon: wid = mon.wids[0] t = api.schedule(engine, 'infinite_loop', True) guard( engine, 'select count(id) from rework.task where worker = {}'.format(wid), lambda res: res.scalar() == 1) assert t.state == 'running' with pytest.raises(TimeOut) as err: t.join(timeout=.1) assert err.value.args[0] == t # check cpu usage mon.track_resources() cpu = engine.execute( 'select cpu from rework.worker where id = {}'.format( wid)).scalar() assert cpu > 0 t.abort() assert t.aborted # this is potentially racy but might work most of the time assert t.state == 'aborting' mon.preemptive_kill() t.join() assert t.state == 'aborted' assert t.deathinfo.startswith('preemptive kill') # one dead worker guard(engine, 'select running from rework.worker where id = {}'.format(wid), lambda res: not res.scalar()) diagnostic = engine.execute( 'select deathinfo from rework.worker where id = {}'.format( wid)).scalar() assert 'preemptive kill at <X>-<X>-<X> <X>:<X>:<X>.<X>+<X>:<X>' == scrub( diagnostic) queued = t._propvalue('queued') started = t._propvalue('started') finished = t._propvalue('finished') assert finished > started > queued
def ensure_workers(self): # rid self.workers of dead things stats = self._cleanup_workers() # update mem/cpu stats self.track_resources() # reduce by one the worker pool if possible shuttingdown = self.shrink_workers() if shuttingdown is not None: stats.shrink.append(shuttingdown) # compute the needed workers with self.engine.begin() as cn: numworkers = self.num_workers busycount = len(self.busy_workers(cn)) waiting = self.queued_tasks(cn) idle = numworkers - busycount assert idle >= 0 needed_workers = clip(waiting - idle, self.minworkers - numworkers, self.maxworkers - numworkers) # bail out if there's nothing to do if not needed_workers: return stats procs = [] debug_ports = [] for offset in range(needed_workers): debug_ports.append(self.grab_debug_port(offset)) for debug_port in debug_ports: procs.append(self.spawn_worker(debug_port=debug_port)) # wait til they are up and running if procs: guard(self.engine, "select count(id) from rework.worker where running = true " "and id in ({})".format(','.join(repr(wid) for wid in procs)), lambda c: c.scalar() == len(procs), timeout=20 + self.maxworkers * 2) stats.new.extend(procs) return stats
def test_debug_port(engine, cli): with workers(engine, numworkers=3, debug=True) as mon: r = cli('list-workers', engine.url) assert '6666' in r.output assert '6667' in r.output assert '6668' in r.output with pytest.raises(AssertionError): mon.grab_debug_port(0) killtarget = mon.wids[0] cli('kill-worker', engine.url, killtarget) killed = mon.preemptive_kill() assert len(killed) == 1 assert killed[0] == killtarget assert len(mon.wids) == 2 guard( engine, 'select running from rework.worker where id = {}'.format( killtarget), lambda r: not r.scalar()) r = cli('list-workers', engine.url) assert '[dead] debugport = 6666' in r.output port = mon.grab_debug_port(0) assert port == 6666 # recycled stats = mon.ensure_workers() assert stats.new guard( engine, 'select running from rework.worker where id = {}'.format( stats.new[0]), lambda r: r.scalar()) r = cli('list-workers', engine.url) assert '[dead] debugport = 6666' in r.output assert '(idle)] debugport = 6666' in r.output # proper recycling did happen with workers(engine, numworkers=3, debug=True): r = cli('list-workers', engine.url) assert '6666' in r.output assert '6667' in r.output assert '6668' in r.output
def test_worker_unplanned_death(engine): with workers(engine) as mon: wid = mon.wids[0] t = api.schedule(engine, 'unstopable_death') deadlist = wait_true(mon.reap_dead_workers) assert wid in deadlist guard(engine, 'select deathinfo from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == 'Unaccounted death (hard crash)') assert t.state == 'aborted' start = t._propvalue('queued') end = t._propvalue('finished') assert end > start assert t.deathinfo == 'Unaccounted death (hard crash)'
def test_worker_kill(engine): with workers(engine) as mon: wid = mon.wids[0] with engine.begin() as cn: update('rework.worker').where(id=wid).values(kill=True).do(cn) guard(engine, 'select kill from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == True) mon.preemptive_kill() guard(engine, 'select count(id) from rework.worker where running = true', lambda r: r.scalar() == 0) assert engine.execute( 'select deathinfo from rework.worker where id = %(wid)s', wid=wid).scalar().startswith('preemptive kill')
def test_worker_max_mem(engine): with workers(engine, maxmem=100) as mon: wid = mon.wids[0] t1 = api.schedule(engine, 'allocate_and_leak_mbytes', 50) t1.join() assert engine.execute( 'select mem from rework.worker where id = {}'.format( wid)).scalar() == 0 mon.track_resources() assert engine.execute('select mem from rework.worker where id = {}'. format(wid)).scalar() > 50 t2 = api.schedule(engine, 'allocate_and_leak_mbytes', 100) t2.join() guard(engine, 'select shutdown from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == True) worker = Worker(engine.url, wid) assert worker.shutdown_asked()
def test_worker_shutdown(engine): with workers(engine) as mon: wid = mon.wids[0] worker = Worker(engine.url, wid) assert not worker.shutdown_asked() with engine.begin() as cn: update('rework.worker').where(id=wid).values(shutdown=True).do(cn) assert worker.shutdown_asked() guard(engine, 'select shutdown from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == True) guard(engine, 'select count(id) from rework.worker where running = true', lambda r: r.scalar() == 0) assert u'explicit shutdown' == engine.execute( 'select deathinfo from rework.worker where id = %(wid)s', wid=wid).scalar()
def test_monitor_base(engine): with workers(engine) as mon: assert engine.execute( 'select count(id) from rework.monitor where id = {}'.format( mon.monid)).scalar() == 1 res = engine.execute( 'select options from rework.monitor where id = {}'.format( mon.monid)).scalar() assert { 'maxmem': 0, 'maxruns': 0, 'debugport': 0, 'maxworkers': 1, 'minworkers': 1 } == res # generic monitor assertions guard( engine, 'select count(id) from rework.monitor where id = {}'.format(mon.monid), lambda r: r.scalar() == 0)
def test_domain(engine): with workers(engine, maxruns=1) as mon: wid = mon.wids[0] t1 = api.schedule(engine, 'run_in_non_default_domain') t2 = api.schedule(engine, 'print_sleep_and_go_away', 1) guard(engine, 'select running from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == False) assert t1.status == 'queued' assert t2.status == 'done' with workers(engine, maxruns=1, domain='nondefault') as mon: wid = mon.wids[0] t1 = api.schedule(engine, 'run_in_non_default_domain') t2 = api.schedule(engine, 'print_sleep_and_go_away', 1) guard(engine, 'select running from rework.worker where id = {}'.format(wid), lambda r: r.scalar() == False) assert t1.status == 'done' assert t2.status == 'queued'
def test_basic_worker_task_execution(engine): t = api.schedule(engine, 'print_sleep_and_go_away', 21) assert t.state == 'queued' guard(engine, "select count(id) from rework.task where status = 'queued'", lambda res: res.scalar() == 1) guard(engine, 'select count(id) from rework.worker where running = true', lambda res: res.scalar() == 0) mon = Monitor(engine, 'default', maxworkers=1) mon.ensure_workers() guard(engine, 'select count(id) from rework.worker where running = true', lambda res: res.scalar() == 1) guard(engine, "select count(id) from rework.task where status = 'running'", lambda res: res.scalar() == 1) t.join() assert t.output == 42 assert t.state == 'done' guard(engine, "select count(id) from rework.task where status = 'running'", lambda res: res.scalar() == 0)
def test_shrink_minworkers(engine, cli): with engine.begin() as cn: cn.execute('delete from rework.worker') with workers(engine, minworkers=0, numworkers=4) as mon: r = cli('list-workers', engine.url) assert r.output.count('running (idle)') == 0 # ramp up, more tasks than maxworkers # we want to saturate the monitor tasks = {} for idx in range(6): tasks[idx] = api.schedule(engine, 'print_sleep_and_go_away', 1) assert [task.state for task in tasks.values()] == [ 'queued', 'queued', 'queued', 'queued', 'queued', 'queued' ] stat1 = mon.ensure_workers() assert len(stat1.new) == 4 # occupy a worker looping = api.schedule(engine, 'infinite_loop') for t in tasks.values(): t.join() assert [task.state for task in tasks.values() ] == ['done', 'done', 'done', 'done', 'done', 'done'] stat2 = mon.ensure_workers() assert len(stat2.new) == 0 assert len(stat2.shrink) == 1 assert len(mon.wids) == 4 # wait for the first shutdown to happen guard( engine, 'select running from rework.worker ' 'where id = {}'.format(stat2.shrink[0]), lambda r: not r.scalar()) # give 2 times a chance to shutdown a spare worker r = cli('list-workers', engine.url) assert r.output.count('running') == 3 assert r.output.count('idle') == 2 for _ in range(1, 3): stat = mon.ensure_workers() shuttingdown = stat.shrink[0] assert shuttingdown in mon.wids guard( engine, 'select running from rework.worker ' 'where id = {}'.format(shuttingdown), lambda r: not r.scalar()) guard( engine, 'select count(*) from rework.worker ' 'where shutdown = true and running = false', lambda r: r.scalar() == 3) def shrinking(): mon.ensure_workers() return len(mon.wids) == 1 wait_true(shrinking) # finish the show looping.abort() mon.preemptive_kill() looping.join() guard(engine, 'select count(*) from rework.worker ' 'where running = true', lambda r: r.scalar() == 0)