Example #1
0
def test_worker_max_runs(engine):
    with workers(engine, maxruns=2) as mon:
        wid = mon.wids[0]

        t = api.schedule(engine, 'print_sleep_and_go_away', 'a')
        t.join()

        assert t.output == 'aa'
        worker = Worker(engine.url, wid)
        assert not worker.shutdown_asked()

        t = api.schedule(engine, 'print_sleep_and_go_away', 'a')
        t.join()

        guard(engine,
              'select running from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == False)

    with workers(engine, maxruns=1) as mon:
        wid = mon.wids[0]

        t1 = api.schedule(engine, 'print_sleep_and_go_away', 'a')
        t2 = api.schedule(engine, 'print_sleep_and_go_away', 'a')

        t1.join()

        guard(engine,
              'select running from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == False)

        assert t2.state == 'queued'
        assert t2.worker is None

        end = t2._propvalue('finished')
        assert end is None
Example #2
0
def test_shutdown_worker(engine, cli):
    url = engine.url
    with workers(engine) as mon:
        cli('shutdown-worker', url, mon.wids[0])

        guard(
            engine, 'select running from rework.worker where id = {}'.format(
                mon.wids[0]), lambda res: res.scalar() == 0)

        r = cli('list-workers', url)
        assert 'explicit shutdown' in scrub(r.output)
Example #3
0
def test_task_abortion(engine):
    with workers(engine) as mon:
        wid = mon.wids[0]

        t = api.schedule(engine, 'infinite_loop', True)
        guard(
            engine,
            'select count(id) from rework.task where worker = {}'.format(wid),
            lambda res: res.scalar() == 1)

        assert t.state == 'running'

        with pytest.raises(TimeOut) as err:
            t.join(timeout=.1)
        assert err.value.args[0] == t

        # check cpu usage
        mon.track_resources()
        cpu = engine.execute(
            'select cpu from rework.worker where id = {}'.format(
                wid)).scalar()
        assert cpu > 0

        t.abort()
        assert t.aborted
        # this is potentially racy but might work most of the time
        assert t.state == 'aborting'

        mon.preemptive_kill()
        t.join()
        assert t.state == 'aborted'
        assert t.deathinfo.startswith('preemptive kill')

        # one dead worker
        guard(engine,
              'select running from rework.worker where id = {}'.format(wid),
              lambda res: not res.scalar())

        diagnostic = engine.execute(
            'select deathinfo from rework.worker where id = {}'.format(
                wid)).scalar()

        assert 'preemptive kill at <X>-<X>-<X> <X>:<X>:<X>.<X>+<X>:<X>' == scrub(
            diagnostic)

        queued = t._propvalue('queued')
        started = t._propvalue('started')
        finished = t._propvalue('finished')
        assert finished > started > queued
Example #4
0
    def ensure_workers(self):
        # rid self.workers of dead things
        stats = self._cleanup_workers()

        # update mem/cpu stats
        self.track_resources()

        # reduce by one the worker pool if possible
        shuttingdown = self.shrink_workers()
        if shuttingdown is not None:
            stats.shrink.append(shuttingdown)

        # compute the needed workers
        with self.engine.begin() as cn:
            numworkers = self.num_workers
            busycount = len(self.busy_workers(cn))
            waiting = self.queued_tasks(cn)

        idle = numworkers - busycount
        assert idle >= 0
        needed_workers = clip(waiting - idle, self.minworkers - numworkers,
                              self.maxworkers - numworkers)

        # bail out if there's nothing to do
        if not needed_workers:
            return stats

        procs = []
        debug_ports = []
        for offset in range(needed_workers):
            debug_ports.append(self.grab_debug_port(offset))

        for debug_port in debug_ports:
            procs.append(self.spawn_worker(debug_port=debug_port))

        # wait til they are up and running
        if procs:
            guard(self.engine,
                  "select count(id) from rework.worker where running = true "
                  "and id in ({})".format(','.join(repr(wid)
                                                   for wid in procs)),
                  lambda c: c.scalar() == len(procs),
                  timeout=20 + self.maxworkers * 2)

        stats.new.extend(procs)
        return stats
Example #5
0
def test_debug_port(engine, cli):
    with workers(engine, numworkers=3, debug=True) as mon:
        r = cli('list-workers', engine.url)
        assert '6666' in r.output
        assert '6667' in r.output
        assert '6668' in r.output

        with pytest.raises(AssertionError):
            mon.grab_debug_port(0)

        killtarget = mon.wids[0]
        cli('kill-worker', engine.url, killtarget)
        killed = mon.preemptive_kill()

        assert len(killed) == 1
        assert killed[0] == killtarget
        assert len(mon.wids) == 2

        guard(
            engine, 'select running from rework.worker where id = {}'.format(
                killtarget), lambda r: not r.scalar())

        r = cli('list-workers', engine.url)
        assert '[dead] debugport = 6666' in r.output

        port = mon.grab_debug_port(0)
        assert port == 6666  # recycled

        stats = mon.ensure_workers()
        assert stats.new
        guard(
            engine, 'select running from rework.worker where id = {}'.format(
                stats.new[0]), lambda r: r.scalar())

        r = cli('list-workers', engine.url)
        assert '[dead] debugport = 6666' in r.output
        assert '(idle)] debugport = 6666' in r.output
        # proper recycling did happen

    with workers(engine, numworkers=3, debug=True):
        r = cli('list-workers', engine.url)
        assert '6666' in r.output
        assert '6667' in r.output
        assert '6668' in r.output
Example #6
0
def test_worker_unplanned_death(engine):
    with workers(engine) as mon:
        wid = mon.wids[0]

        t = api.schedule(engine, 'unstopable_death')

        deadlist = wait_true(mon.reap_dead_workers)
        assert wid in deadlist

        guard(engine,
              'select deathinfo from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == 'Unaccounted death (hard crash)')

        assert t.state == 'aborted'

        start = t._propvalue('queued')
        end = t._propvalue('finished')
        assert end > start
        assert t.deathinfo == 'Unaccounted death (hard crash)'
Example #7
0
def test_worker_kill(engine):
    with workers(engine) as mon:
        wid = mon.wids[0]

        with engine.begin() as cn:
            update('rework.worker').where(id=wid).values(kill=True).do(cn)
        guard(engine,
              'select kill from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == True)

        mon.preemptive_kill()

        guard(engine,
              'select count(id) from rework.worker where running = true',
              lambda r: r.scalar() == 0)

        assert engine.execute(
            'select deathinfo from rework.worker where id = %(wid)s',
            wid=wid).scalar().startswith('preemptive kill')
Example #8
0
def test_worker_max_mem(engine):
    with workers(engine, maxmem=100) as mon:
        wid = mon.wids[0]

        t1 = api.schedule(engine, 'allocate_and_leak_mbytes', 50)
        t1.join()

        assert engine.execute(
            'select mem from rework.worker where id = {}'.format(
                wid)).scalar() == 0
        mon.track_resources()
        assert engine.execute('select mem from rework.worker where id = {}'.
                              format(wid)).scalar() > 50
        t2 = api.schedule(engine, 'allocate_and_leak_mbytes', 100)
        t2.join()
        guard(engine,
              'select shutdown from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == True)
        worker = Worker(engine.url, wid)
        assert worker.shutdown_asked()
Example #9
0
def test_worker_shutdown(engine):
    with workers(engine) as mon:
        wid = mon.wids[0]
        worker = Worker(engine.url, wid)
        assert not worker.shutdown_asked()

        with engine.begin() as cn:
            update('rework.worker').where(id=wid).values(shutdown=True).do(cn)
        assert worker.shutdown_asked()
        guard(engine,
              'select shutdown from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == True)

        guard(engine,
              'select count(id) from rework.worker where running = true',
              lambda r: r.scalar() == 0)

        assert u'explicit shutdown' == engine.execute(
            'select deathinfo from rework.worker where id = %(wid)s',
            wid=wid).scalar()
Example #10
0
def test_monitor_base(engine):
    with workers(engine) as mon:
        assert engine.execute(
            'select count(id) from rework.monitor where id = {}'.format(
                mon.monid)).scalar() == 1

        res = engine.execute(
            'select options from rework.monitor where id = {}'.format(
                mon.monid)).scalar()
        assert {
            'maxmem': 0,
            'maxruns': 0,
            'debugport': 0,
            'maxworkers': 1,
            'minworkers': 1
        } == res

    # generic monitor assertions
    guard(
        engine,
        'select count(id) from rework.monitor where id = {}'.format(mon.monid),
        lambda r: r.scalar() == 0)
Example #11
0
def test_domain(engine):
    with workers(engine, maxruns=1) as mon:
        wid = mon.wids[0]
        t1 = api.schedule(engine, 'run_in_non_default_domain')
        t2 = api.schedule(engine, 'print_sleep_and_go_away', 1)

        guard(engine,
              'select running from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == False)

        assert t1.status == 'queued'
        assert t2.status == 'done'

    with workers(engine, maxruns=1, domain='nondefault') as mon:
        wid = mon.wids[0]
        t1 = api.schedule(engine, 'run_in_non_default_domain')
        t2 = api.schedule(engine, 'print_sleep_and_go_away', 1)

        guard(engine,
              'select running from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == False)

        assert t1.status == 'done'
        assert t2.status == 'queued'
Example #12
0
def test_basic_worker_task_execution(engine):
    t = api.schedule(engine, 'print_sleep_and_go_away', 21)
    assert t.state == 'queued'

    guard(engine, "select count(id) from rework.task where status = 'queued'",
          lambda res: res.scalar() == 1)
    guard(engine, 'select count(id) from rework.worker where running = true',
          lambda res: res.scalar() == 0)

    mon = Monitor(engine, 'default', maxworkers=1)
    mon.ensure_workers()

    guard(engine, 'select count(id) from rework.worker where running = true',
          lambda res: res.scalar() == 1)

    guard(engine, "select count(id) from rework.task where status = 'running'",
          lambda res: res.scalar() == 1)

    t.join()
    assert t.output == 42
    assert t.state == 'done'

    guard(engine, "select count(id) from rework.task where status = 'running'",
          lambda res: res.scalar() == 0)
Example #13
0
def test_shrink_minworkers(engine, cli):
    with engine.begin() as cn:
        cn.execute('delete from rework.worker')
    with workers(engine, minworkers=0, numworkers=4) as mon:
        r = cli('list-workers', engine.url)
        assert r.output.count('running (idle)') == 0

        # ramp up, more tasks than maxworkers
        # we want to saturate the monitor
        tasks = {}
        for idx in range(6):
            tasks[idx] = api.schedule(engine, 'print_sleep_and_go_away', 1)

        assert [task.state for task in tasks.values()] == [
            'queued', 'queued', 'queued', 'queued', 'queued', 'queued'
        ]

        stat1 = mon.ensure_workers()
        assert len(stat1.new) == 4

        # occupy a worker
        looping = api.schedule(engine, 'infinite_loop')

        for t in tasks.values():
            t.join()

        assert [task.state for task in tasks.values()
                ] == ['done', 'done', 'done', 'done', 'done', 'done']

        stat2 = mon.ensure_workers()
        assert len(stat2.new) == 0
        assert len(stat2.shrink) == 1
        assert len(mon.wids) == 4

        # wait for the first shutdown to happen
        guard(
            engine, 'select running from rework.worker '
            'where id = {}'.format(stat2.shrink[0]), lambda r: not r.scalar())

        # give 2 times a chance to shutdown a spare worker
        r = cli('list-workers', engine.url)
        assert r.output.count('running') == 3
        assert r.output.count('idle') == 2

        for _ in range(1, 3):
            stat = mon.ensure_workers()
            shuttingdown = stat.shrink[0]
            assert shuttingdown in mon.wids
            guard(
                engine, 'select running from rework.worker '
                'where id = {}'.format(shuttingdown), lambda r: not r.scalar())

        guard(
            engine, 'select count(*) from rework.worker '
            'where shutdown = true and running = false',
            lambda r: r.scalar() == 3)

        def shrinking():
            mon.ensure_workers()
            return len(mon.wids) == 1

        wait_true(shrinking)

        # finish the show
        looping.abort()
        mon.preemptive_kill()
        looping.join()
        guard(engine, 'select count(*) from rework.worker '
              'where running = true', lambda r: r.scalar() == 0)