Ejemplo n.º 1
0
def test_task_logging_capture(engine):
    with engine.begin() as cn:
        cn.execute('delete from rework.task')

    with workers(engine, 2):
        t1 = api.schedule(engine, 'capture_logs')
        t2 = api.schedule(engine, 'capture_logs')

        t1.join()
        t2.join()

        t1logs = [scrub(logline) for id_, logline in t1.logs()]
        assert [
            u'my_app_logger:ERROR: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X>',
            u'stdout:INFO: <X>-<X>-<X> <X>:<X>:<X>: I want to be captured',
            u'my_app_logger:DEBUG: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X> also'
        ] == t1logs

        t2logs = [scrub(logline) for id_, logline in t2.logs()]
        assert [
            u'my_app_logger:ERROR: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X>',
            u'stdout:INFO: <X>-<X>-<X> <X>:<X>:<X>: I want to be captured',
            u'my_app_logger:DEBUG: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X> also'
        ] == t2logs

        t3 = api.schedule(engine, 'capture_logs')
        t3.join()

        logids = [lid for lid, logline_ in t3.logs()]
        assert 2 == len(t3.logs(fromid=logids[0]))
Ejemplo n.º 2
0
def test_worker_max_runs(engine):
    with workers(engine, maxruns=2) as mon:
        wid = mon.wids[0]

        t = api.schedule(engine, 'print_sleep_and_go_away', 'a')
        t.join()

        assert t.output == 'aa'
        worker = Worker(engine.url, wid)
        assert not worker.shutdown_asked()

        t = api.schedule(engine, 'print_sleep_and_go_away', 'a')
        t.join()

        guard(engine,
              'select running from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == False)

    with workers(engine, maxruns=1) as mon:
        wid = mon.wids[0]

        t1 = api.schedule(engine, 'print_sleep_and_go_away', 'a')
        t2 = api.schedule(engine, 'print_sleep_and_go_away', 'a')

        t1.join()

        guard(engine,
              'select running from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == False)

        assert t2.state == 'queued'
        assert t2.worker is None

        end = t2._propvalue('finished')
        assert end is None
Ejemplo n.º 3
0
def test_more_unstarted(engine):
    with engine.begin() as cn:
        cn.execute('delete from rework.task')
        cn.execute('delete from rework.worker')
    with workers(engine) as mon:
        nworkers = engine.execute(
            'select count(*) from rework.worker').scalar()
        assert nworkers == 1

        with engine.begin() as cn:
            insert('rework.worker').values(
                host='127.0.0.1', domain='default').do(cn)  # unborn worker

        nworkers = engine.execute(
            'select count(*) from rework.worker').scalar()
        assert nworkers == 2

        t1 = api.schedule(engine, 'raw_input', b'foo')
        t2 = api.schedule(engine, 'raw_input', b'foo')
        t1.join()
        t2.join()
        assert engine.execute('select count(*) from rework.task').scalar() == 2

        nworkers = engine.execute(
            'select count(*) from rework.worker').scalar()
        assert nworkers == 2
        mon.cleanup_unstarted()
        nworkers = engine.execute(
            'select count(*) from rework.worker').scalar()
        assert nworkers == 1

        assert engine.execute('select count(*) from rework.task').scalar() == 2
Ejemplo n.º 4
0
def test_timeout(engine):
    # first, a small unittest on utility functions
    d1 = datetime(2018, 1, 1)
    d2 = datetime(2018, 3, 3, 12, 45, 30)
    delta = d2 - d1
    iso = delta_isoformat(delta)
    assert iso == 'P61DT0H0M45930S'
    delta_out = parse_delta(iso)
    assert delta == delta_out

    with workers(engine, numworkers=3) as mon:
        t1 = api.schedule(engine, 'infinite_loop_timeout')
        t2 = api.schedule(engine, 'infinite_loop_timeout')
        t3 = api.schedule(engine, 'infinite_loop_long_timeout')
        t1.join('running')
        t2.join('running')
        t3.join('running')

        time.sleep(1)  # make sure we're going to time out
        mon.track_timeouts()
        assert t1.state == 'aborting'
        assert t2.state == 'aborting'
        assert t3.state == 'running'

        mon.preemptive_kill()
        assert t1.state == 'aborted'
        assert t2.state == 'aborted'
        assert t3.state == 'running'
Ejemplo n.º 5
0
def test_monitor_step(engine):
    mon = Monitor(engine)
    api.schedule(engine,
                 'print_sleep_and_go_away',
                 21,
                 metadata={'user': '******'})

    stats = mon.step()
    assert len(stats.new) == 2
    assert all(isinstance(w, int) for w in stats.new)

    # simulate a hard crash
    for _, proc in mon.workers.items():
        if proc.poll() is None:
            kill_process_tree(proc.pid)

    dead = mon.reap_dead_workers()
    assert dead == stats.new

    stats2 = mon.step()
    assert stats2.new != stats.new
    assert len(stats2.new) == 2
    assert all(isinstance(w, int) for w in stats2.new)

    mon.killall()
Ejemplo n.º 6
0
    def run_stuff():
        with workers(engine, numworkers=2):
            r = cli('list-workers', engine.url)
            assert r.output.count('running') == 2

            t1 = api.schedule(engine, 'print_sleep_and_go_away', 1)
            t2 = api.schedule(engine, 'print_sleep_and_go_away', 2)
            t3 = api.schedule(engine, 'print_sleep_and_go_away', 3)

            t1.join() or t2.join() or t3.join()
            r = cli('list-tasks', engine.url)
            assert r.output.count('done') == 3
Ejemplo n.º 7
0
def test_worker_two_runs_nondfefault_domain(engine):
    with engine.begin() as cn:
        cn.execute('delete from rework.worker')
    with workers(engine, maxruns=2, domain='nondefault') as mon:
        t1 = api.schedule(engine, 'run_in_non_default_domain')
        t2 = api.schedule(engine, 'run_in_non_default_domain')
        t3 = api.schedule(engine, 'run_in_non_default_domain')

        t1.join()
        t2.join()
        assert t1.status == 'done'
        assert t2.status == 'done'
        assert t3.status == 'queued'
Ejemplo n.º 8
0
def test_cleanup_unstarted(engine):
    with engine.begin() as cn:
        cn.execute('delete from rework.task')
        cn.execute('delete from rework.worker')

    mon = Monitor(engine, 'default', None, 1, 1, 0, False)
    mon.register()
    mon.ensure_workers()

    with engine.begin() as cn:
        insert('rework.worker').values(host='127.0.0.1', domain='default').do(
            cn)  # unborn worker

    nworkers = engine.execute('select count(*) from rework.worker').scalar()
    assert nworkers == 2

    t = api.schedule(engine, 'raw_input', b'foo')
    t.join()

    mon.killall(msg=None)
    deleted = mon.cleanup_unstarted()
    assert deleted == 1

    assert engine.execute('select count(*) from rework.worker').scalar() == 1
    assert engine.execute('select count(*) from rework.task').scalar() == 1

    deleted = mon.cleanup_unstarted()
    assert deleted == 0
Ejemplo n.º 9
0
def test_basic_task_operations(engine):
    api.schedule(engine,
                 'print_sleep_and_go_away',
                 21,
                 metadata={'user': '******'})

    known = [(name, Path(path).name) for name, path in engine.execute(
        'select name, path from rework.operation order by name').fetchall()]
    assert [
        ('allocate_and_leak_mbytes', 'tasks.py'),
        ('capture_logs', 'tasks.py'),
        ('flush_captured_stdout', 'tasks.py'),
        ('infinite_loop', 'tasks.py'),
        ('infinite_loop_long_timeout', 'tasks.py'),
        ('infinite_loop_timeout', 'tasks.py'),
        ('log_swarm', 'tasks.py'),
        ('normal_exception', 'tasks.py'),
        ('print_sleep_and_go_away', 'tasks.py'),
        ('raw_input', 'tasks.py'),
        ('run_in_non_default_domain', 'tasks.py'),
        ('stderr_swarm', 'tasks.py'),
        ('unstopable_death', 'tasks.py'),
    ] == known

    mon = Monitor(engine)
    wid = mon.new_worker()
    t = Task.fromqueue(engine, wid)
    t.run()
    assert t.output == 42
    assert t.metadata == {'user': '******'}

    cdate = t._propvalue('queued')
    now = datetime.now()
    assert now.year == cdate.year
    assert now.month == cdate.month

    t2 = Task.byid(engine, t.tid)
    assert (t2.tid, t2.operation) == (t.tid, t.operation)

    t3 = Task.byid(engine, 42000)
    assert t3 is None

    with pytest.raises(Exception) as err:
        api.schedule(engine, 'no_such_task')
    assert err.value.args[0] == 'No operation was found for these parameters'
Ejemplo n.º 10
0
def test_run_worker(engine):
    mon = Monitor(engine, maxruns=1)
    wid = mon.new_worker()

    t = api.schedule(engine, 'print_sleep_and_go_away', 0)
    worker = Worker(engine.url, wid, os.getppid(), maxruns=1)
    worker.run()

    assert t.state == 'done'
Ejemplo n.º 11
0
def test_killed_task(engine):
    with workers(engine) as mon:
        t = api.schedule(engine, 'infinite_loop')
        t.join('running')

    assert t.state == 'aborted'
    assert t.traceback is None

    try:
        with workers(engine) as mon:
            t = api.schedule(engine, 'infinite_loop')
            t.join('running')
            raise Exception('kill the monitor')
    except:
        pass

    assert t.state == 'aborted'
    assert 'kill the monitor' in t.traceback
Ejemplo n.º 12
0
def test_task_error(engine):
    with workers(engine):
        t = api.schedule(engine, 'normal_exception')
        t.join()
        assert t.traceback.strip().endswith('oops')
        assert t.state == 'failed'

        start = t._propvalue('queued')
        end = t._propvalue('finished')
        assert end > start
Ejemplo n.º 13
0
def test_worker_max_mem(engine):
    with workers(engine, maxmem=100) as mon:
        wid = mon.wids[0]

        t1 = api.schedule(engine, 'allocate_and_leak_mbytes', 50)
        t1.join()

        assert engine.execute(
            'select mem from rework.worker where id = {}'.format(
                wid)).scalar() == 0
        mon.track_resources()
        assert engine.execute('select mem from rework.worker where id = {}'.
                              format(wid)).scalar() > 50
        t2 = api.schedule(engine, 'allocate_and_leak_mbytes', 100)
        t2.join()
        guard(engine,
              'select shutdown from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == True)
        worker = Worker(engine.url, wid)
        assert worker.shutdown_asked()
Ejemplo n.º 14
0
def test_task_logs(engine, cli):
    with workers(engine):
        t = api.schedule(engine, 'capture_logs')
        t.join()

        r = cli('log-task', engine.url, t.tid)
        assert (
            '\x1b[<X>mmy_app_logger:ERROR: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X>\n'
            '\x1b[<X>mstdout:INFO: <X>-<X>-<X> <X>:<X>:<X>: I want to be captured\n'
            '\x1b[<X>mmy_app_logger:DEBUG: <X>-<X>-<X> <X>:<X>:<X>: will be captured <X> also'
        ) == scrub(r.output)
Ejemplo n.º 15
0
def test_process_lock(engine):
    with workers(engine):
        t = api.schedule(engine, 'stderr_swarm')

        def join():
            Task.byid(t.engine, t.tid).join(timeout=2)

        thr = threading.Thread(target=join)
        thr.start()
        thr.join()
        assert t.state == 'done'
Ejemplo n.º 16
0
def test_domain_map(engine, cleanup):
    with engine.begin() as cn:
        cn.execute('delete from rework.operation')

    api.freeze_operations(engine,
                          domain='nondefault',
                          domain_map={'nondefault': 'fancy'})

    with workers(engine, maxruns=1, domain='fancy'):
        t1 = api.schedule(engine, 'run_in_non_default_domain')
        t1.join()
        assert t1.status == 'done'
Ejemplo n.º 17
0
def test_task_input_output(engine):
    with workers(engine) as mon:
        t = api.schedule(engine, 'raw_input', rawinputdata=b'Hello Babar')
        t.join()

        with pytest.raises(TypeError):
            t.input
        ri = t.raw_input
        with pytest.raises(TypeError):
            t.output
        ro = t.raw_output
        assert ri == b'Hello Babar'
        assert ro == b'Hello Babar and Celeste'

        t = api.schedule(engine, 'unstopable_death')
        wait_true(mon.reap_dead_workers)

        assert t.input is None
        assert t.raw_input is None
        assert t.output is None
        assert t.raw_output is None
Ejemplo n.º 18
0
def test_task_abortion(engine):
    with workers(engine) as mon:
        wid = mon.wids[0]

        t = api.schedule(engine, 'infinite_loop', True)
        guard(
            engine,
            'select count(id) from rework.task where worker = {}'.format(wid),
            lambda res: res.scalar() == 1)

        assert t.state == 'running'

        with pytest.raises(TimeOut) as err:
            t.join(timeout=.1)
        assert err.value.args[0] == t

        # check cpu usage
        mon.track_resources()
        cpu = engine.execute(
            'select cpu from rework.worker where id = {}'.format(
                wid)).scalar()
        assert cpu > 0

        t.abort()
        assert t.aborted
        # this is potentially racy but might work most of the time
        assert t.state == 'aborting'

        mon.preemptive_kill()
        t.join()
        assert t.state == 'aborted'
        assert t.deathinfo.startswith('preemptive kill')

        # one dead worker
        guard(engine,
              'select running from rework.worker where id = {}'.format(wid),
              lambda res: not res.scalar())

        diagnostic = engine.execute(
            'select deathinfo from rework.worker where id = {}'.format(
                wid)).scalar()

        assert 'preemptive kill at <X>-<X>-<X> <X>:<X>:<X>.<X>+<X>:<X>' == scrub(
            diagnostic)

        queued = t._propvalue('queued')
        started = t._propvalue('started')
        finished = t._propvalue('finished')
        assert finished > started > queued
Ejemplo n.º 19
0
def test_captured_stdout(engine):
    with workers(engine):
        t = api.schedule(engine, 'flush_captured_stdout')
        t.join()
        assert t.state == 'done'
        logs = t.logs()
        assert len(logs) == 7

        logs = [line.split(':')[-1] for lid, line in logs]
        assert logs == [
            ' Hello World',
            ' This is an unfinished statement which could go on for a long time, but I have had enough',
            ' A truly multiline', ' statement.', ' ', ' Honor the space.',
            '  (hi) '
        ]
Ejemplo n.º 20
0
def test_domain(engine):
    with workers(engine, maxruns=1) as mon:
        wid = mon.wids[0]
        t1 = api.schedule(engine, 'run_in_non_default_domain')
        t2 = api.schedule(engine, 'print_sleep_and_go_away', 1)

        guard(engine,
              'select running from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == False)

        assert t1.status == 'queued'
        assert t2.status == 'done'

    with workers(engine, maxruns=1, domain='nondefault') as mon:
        wid = mon.wids[0]
        t1 = api.schedule(engine, 'run_in_non_default_domain')
        t2 = api.schedule(engine, 'print_sleep_and_go_away', 1)

        guard(engine,
              'select running from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == False)

        assert t1.status == 'done'
        assert t2.status == 'queued'
Ejemplo n.º 21
0
def test_worker_unplanned_death(engine):
    with workers(engine) as mon:
        wid = mon.wids[0]

        t = api.schedule(engine, 'unstopable_death')

        deadlist = wait_true(mon.reap_dead_workers)
        assert wid in deadlist

        guard(engine,
              'select deathinfo from rework.worker where id = {}'.format(wid),
              lambda r: r.scalar() == 'Unaccounted death (hard crash)')

        assert t.state == 'aborted'

        start = t._propvalue('queued')
        end = t._propvalue('finished')
        assert end > start
        assert t.deathinfo == 'Unaccounted death (hard crash)'
Ejemplo n.º 22
0
    def relaunch_task(tid):
        if not has_permission('relaunch'):
            return json.dumps(0)

        t = Task.byid(engine, tid)
        if t is None:
            return json.dumps(0)

        op = select('name', 'host', 'domain').table('rework.operation').join(
            'rework.task as task on (task.operation = operation.id)').where(
                'task.id = %(tid)s', tid=t.tid).do(engine).fetchone()

        newtask = api.schedule(engine,
                               op.name,
                               rawinputdata=t.raw_input,
                               domain=op.domain,
                               hostid=op.host,
                               metadata=t.metadata)
        return json.dumps(newtask.tid)
Ejemplo n.º 23
0
def test_monitors_table(engine, client, refresh):
    with engine.begin() as cn:
        cn.execute('delete from rework.monitor')
        cn.execute('delete from rework.worker')


    with workers(engine):
        res = client.get('/workers-table')
        html = normalize(scrub(res.text))
        refpath = DATADIR / 'monitors-table.html'
        if refresh:
            refpath.write_bytes(html)
        assert html == refpath.read_bytes()

        t = api.schedule(engine, 'abortme')
        t.join('running')
        res = client.get('/workers-table')
        html = normalize(scrub(res.text))
        refpath = DATADIR / 'monitors-table-1-task.html'
        if refresh:
            refpath.write_bytes(html)
        assert html == refpath.read_bytes()
Ejemplo n.º 24
0
def _schedule_job(engine, service, args, inputfile):
    user = args.user
    if user is None:
        abort(400, 'user parameter is mandatory')

    hostid = args.hostid or api.host()
    domain = args.domain
    metadata = {'user': user}

    if args.options:
        metadata['options'] = args.options

    try:
        task = api.schedule(engine,
                            service,
                            rawinputdata=inputfile,
                            hostid=hostid,
                            domain=domain,
                            metadata=metadata)
    except Exception as err:
        abort(400, str(err))
    return json.dumps(task.tid)
Ejemplo n.º 25
0
def test_logging_stress_test(engine):
    with engine.begin() as cn:
        cn.execute('delete from rework.log')

    with workers(engine):
        t = api.schedule(engine, 'log_swarm')

        t.join()
        records = engine.execute(
            'select id, line from rework.log where task = {}'.format(
                t.tid)).fetchall()

        # we check that there is a constant offset between the
        # log id and the idx that is emitted by the task code
        # => ordering has been preserved
        offsets = [
            lid - int(line.rsplit(',')[-1].strip()) for lid, line in records
        ]
        assert all(offsets[0] == offset for offset in offsets)

        assert len(list(t.logs())) == 249
        assert len(list(t.logs(fromid=245))) == 4 + offsets[0]
Ejemplo n.º 26
0
def test_abort_task(engine, cli):
    url = engine.url
    with workers(engine) as mon:
        r = cli('list-workers', url)
        assert ('<X> <X>@<X>.<X>.<X>.<X> <X> Mb [running (idle)] '
                '[<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]') == scrub(r.output)

        t = api.schedule(engine, 'infinite_loop')
        t.join('running')  # let the worker pick up the task

        r = cli('list-workers', url)
        assert ('<X> <X>@<X>.<X>.<X>.<X> <X> Mb [running #<X>]'
                ' [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]') == scrub(r.output)

        r = cli('list-tasks', url)
        assert ('<X> infinite_loop running [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]'
                ' → [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]') == scrub(r.output)

        r = cli('abort-task', url, t.tid)
        mon.preemptive_kill()
        t.join()

        r = cli('list-workers', url)
        assert ('<X> <X>@<X>.<X>.<X>.<X> <X> Mb [dead] '
                '[<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                'preemptive kill at '
                '<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>:<X>') == scrub(r.output)

        r = cli('list-tasks', url)
        assert ('<X> infinite_loop aborted [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]') == scrub(r.output)
Ejemplo n.º 27
0
def test_kill_worker(engine, cli):
    url = engine.url
    with engine.begin() as cn:
        cn.execute('delete from rework.worker')

    with workers(engine) as mon:
        t = api.schedule(engine, 'infinite_loop')
        t.join('running')  # let the worker pick up the task

        r = cli('kill-worker', url, mon.wids[0])
        mon.preemptive_kill()

        r = cli('list-workers', url)
        assert ('<X> <X>@<X>.<X>.<X>.<X> <X> Mb [dead] '
                '[<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                'preemptive kill at '
                '<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>:<X>') == scrub(r.output)

        r = cli('list-tasks', url)
        assert ('<X> infinite_loop aborted [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>] '
                '→ [<X>-<X>-<X> <X>:<X>:<X>.<X>+<X>]') == scrub(r.output)
Ejemplo n.º 28
0
def test_basic_worker_task_execution(engine):
    t = api.schedule(engine, 'print_sleep_and_go_away', 21)
    assert t.state == 'queued'

    guard(engine, "select count(id) from rework.task where status = 'queued'",
          lambda res: res.scalar() == 1)
    guard(engine, 'select count(id) from rework.worker where running = true',
          lambda res: res.scalar() == 0)

    mon = Monitor(engine, 'default', maxworkers=1)
    mon.ensure_workers()

    guard(engine, 'select count(id) from rework.worker where running = true',
          lambda res: res.scalar() == 1)

    guard(engine, "select count(id) from rework.task where status = 'running'",
          lambda res: res.scalar() == 1)

    t.join()
    assert t.output == 42
    assert t.state == 'done'

    guard(engine, "select count(id) from rework.task where status = 'running'",
          lambda res: res.scalar() == 0)
Ejemplo n.º 29
0
def test_schedule_domain(engine, cleanup):
    reset_ops(engine)
    from . import task_testenv
    from . import task_prodenv

    api.freeze_operations(engine, domain='test')
    api.freeze_operations(engine, domain='production')
    api.freeze_operations(engine, domain='production', hostid='192.168.122.42')

    with pytest.raises(ValueError) as err:
        api.schedule(engine, 'foo')
    assert err.value.args[0] == 'Ambiguous operation selection'

    api.schedule(engine, 'foo', domain='test')
    # there two of them but .schedule will by default pick the one
    # matching the *current* host
    api.schedule(engine, 'foo', domain='production')
    api.schedule(engine, 'foo', domain='production', hostid='192.168.122.42')
    api.schedule(engine, 'foo', domain='production', hostid=host())

    hosts = [
        host for host, in engine.execute(
            'select host from rework.task as t, rework.operation as op '
            'where t.operation = op.id').fetchall()
    ]
    assert hosts.count(host()) == 3
    assert hosts.count('192.168.122.42') == 1

    with pytest.raises(Exception):
        api.schedule(engine, 'foo', domain='production', hostid='172.16.0.1')

    with pytest.raises(Exception):
        api.schedule(engine, 'foo', domain='bogusdomain')
Ejemplo n.º 30
0
def test_task_rawinput(engine):
    with workers(engine):
        t = api.schedule(engine, 'raw_input', rawinputdata=b'Babar')
        t.join()
        assert t.raw_output == b'Babar and Celeste'