Beispiel #1
0
def test_invalid_queued_job_id(app4, depends_on_job_id1,
                               log, tasks_json_tmpfile):
    job_id = depends_on_job_id1  # this job_id does not match the app
    # manually bypass the decorator that validates job_id
    qb._set_state_unsafe(app4, job_id, pending=True)
    q = qb.get_qbclient().LockingQueue(app4)
    q.put(job_id)
    validate_one_queued_task(app4, job_id)

    run_code(log, tasks_json_tmpfile, app4, '--bash_cmd echo 123')
    validate_one_failed_task(app4, job_id)
    validate_zero_queued_task(app4)
Beispiel #2
0
def test_should_not_add_queue_while_consuming_queue(app1, job_id1):
    # This test guards from doubly queuing jobs
    # This protects from simultaneous operations on root and leaf nodes
    # ie (parent and child) for the following operations:
    # adding, readding or a mix of both
    enqueue(app1, job_id1)

    q = qb.get_qbclient().LockingQueue(app1)
    q.get()
    validate_one_queued_task(app1, job_id1)

    enqueue(app1, job_id1)
    with nose.tools.assert_raises(exceptions.JobAlreadyQueued):
        qb.readd_subtask(app1, job_id1)
    validate_one_queued_task(app1, job_id1)

    # cleanup
    q.consume()
Beispiel #3
0
def _handle_manually_given_job_id(ns):
    """This process was given a specific --job_id arg.
    Decide whether it's okay to execute this job_id,
    and if its okay to go forward, set job_id state appropriately
    """
    log.warn(
        ('using specific job_id and'
         ' blindly assuming this job is not already queued.'),
        extra=dict(app_name=ns.app_name, job_id=ns.job_id))
    if qb.get_qbclient().exists(qb.get_job_path(ns.app_name, ns.job_id)):
        msg = ('Will not execute this task because it might be already'
               ' queued or completed!')
        log.critical(
            msg, extra=dict(app_name=ns.app_name, job_id=ns.job_id))
        raise UserWarning(msg)
    lock = qb.obtain_execute_lock(
        ns.app_name, ns.job_id, safe=False, raise_on_error=True,
        blocking=False)
    qb.set_state(ns.app_name, ns.job_id, pending=True)
    return lock
Beispiel #4
0
def _handle_manually_given_job_id(ns):
    """This process was given a specific --job_id arg.
    Decide whether it's okay to execute this job_id,
    and if its okay to go forward, set job_id state appropriately
    """
    log.warn(('using specific job_id and'
              ' blindly assuming this job is not already queued.'),
             extra=dict(app_name=ns.app_name, job_id=ns.job_id))
    if qb.get_qbclient().exists(qb.get_job_path(ns.app_name, ns.job_id)):
        msg = ('Will not execute this task because it might be already'
               ' queued or completed!')
        log.critical(msg, extra=dict(app_name=ns.app_name, job_id=ns.job_id))
        raise UserWarning(msg)
    lock = qb.obtain_execute_lock(ns.app_name,
                                  ns.job_id,
                                  safe=False,
                                  raise_on_error=True,
                                  blocking=False)
    qb.set_state(ns.app_name, ns.job_id, pending=True)
    return lock
Beispiel #5
0
def main(ns):
    """
    Fetch a job_id from the `app_name` queue and figure out what to with it.

    If the job is runnable, execute it and then queue its children into their
    respective queues.  If it's not runnable, queue its parents into respective
    parent queues and remove the job from its own queue.
    If the job fails, either requeue it or mark it as permanently failed
    """
    assert ns.app_name in dt.get_task_names()
    if ns.bypass_scheduler:
        log.info(
            "Running a task without scheduling anything"
            " or fetching from a queue", extra=dict(
                app_name=ns.app_name, job_id=ns.job_id))
        assert ns.job_id
        ns.job_type_func(ns=ns)
        return

    log.info("Beginning Stolos", extra=dict(**ns.__dict__))
    q = qb.get_qbclient().LockingQueue(ns.app_name)
    if ns.job_id:
        lock = _handle_manually_given_job_id(ns)
        q.consume = object  # do nothing
    else:
        ns.job_id = q.get(timeout=ns.timeout)
        if not validate_job_id(app_name=ns.app_name, job_id=ns.job_id,
                               q=q, timeout=ns.timeout):
            return
        try:
            lock = get_lock_if_job_is_runnable(
                app_name=ns.app_name, job_id=ns.job_id)
        except exceptions.NoNodeError:
            q.consume()
            log.exception(
                "Job failed. The job is queued, so why does its state not"
                " exist?  The Queue backend may be in an inconsistent state."
                " Consuming this job",
                extra=dict(app_name=ns.app_name, job_id=ns.job_id))
            return

    log.debug(
        "Stolos got a job_id.", extra=dict(
            app_name=ns.app_name, job_id=ns.job_id, acquired_lock=bool(lock)))
    if lock is False:
        # infinite loop: some jobs will always requeue if lock is unobtainable
        log.info("Could not obtain a lock.  Will requeue and try again later",
                 extra=dict(app_name=ns.app_name, job_id=ns.job_id))
        _send_to_back_of_queue(
            q=q, app_name=ns.app_name, job_id=ns.job_id)
        return

    if not parents_completed(ns.app_name, ns.job_id, q=q, lock=lock):
        return

    log.info(
        "Job starting!", extra=dict(app_name=ns.app_name, job_id=ns.job_id))
    try:
        ns.job_type_func(ns=ns)
    except exceptions.CodeError:  # assume error is previously logged
        _handle_failure(ns, q, lock)
        return
    except Exception as err:
        log.exception(
            ("Job failed!  Unhandled exception in an application!"
             " Fix ASAP because"
             " it is unclear how to handle this failure.  %s: %s")
            % (err.__class__.__name__, err), extra=dict(
                app_name=ns.app_name, job_id=ns.job_id, failed=True))
        return
    _handle_success(ns, q, lock)
Beispiel #6
0
def get_qsize(app_name, queued=True, taken=True):
    """Get the number of objects in the queue"""
    return get_qbclient().LockingQueue(app_name).size(queued=queued,
                                                      taken=taken)
Beispiel #7
0
def main(ns):
    """
    Fetch a job_id from the `app_name` queue and figure out what to with it.

    If the job is runnable, execute it and then queue its children into their
    respective queues.  If it's not runnable, queue its parents into respective
    parent queues and remove the job from its own queue.
    If the job fails, either requeue it or mark it as permanently failed
    """
    assert ns.app_name in dt.get_task_names()
    if ns.bypass_scheduler:
        log.info(
            "Running a task without scheduling anything"
            " or fetching from a queue",
            extra=dict(app_name=ns.app_name, job_id=ns.job_id))
        assert ns.job_id
        ns.job_type_func(ns=ns)
        return

    log.info("Beginning Stolos", extra=dict(**ns.__dict__))
    q = qb.get_qbclient().LockingQueue(ns.app_name)
    if ns.job_id:
        lock = _handle_manually_given_job_id(ns)
        q.consume = object  # do nothing
    else:
        ns.job_id = q.get(timeout=ns.timeout)
        if not validate_job_id(
                app_name=ns.app_name, job_id=ns.job_id, q=q,
                timeout=ns.timeout):
            return
        try:
            lock = get_lock_if_job_is_runnable(app_name=ns.app_name,
                                               job_id=ns.job_id)
        except exceptions.NoNodeError:
            q.consume()
            log.exception(
                "Job failed. The job is queued, so why does its state not"
                " exist?  The Queue backend may be in an inconsistent state."
                " Consuming this job",
                extra=dict(app_name=ns.app_name, job_id=ns.job_id))
            return

    log.debug("Stolos got a job_id.",
              extra=dict(app_name=ns.app_name,
                         job_id=ns.job_id,
                         acquired_lock=bool(lock)))
    if lock is False:
        # infinite loop: some jobs will always requeue if lock is unobtainable
        log.info("Could not obtain a lock.  Will requeue and try again later",
                 extra=dict(app_name=ns.app_name, job_id=ns.job_id))
        _send_to_back_of_queue(q=q, app_name=ns.app_name, job_id=ns.job_id)
        return

    if not parents_completed(ns.app_name, ns.job_id, q=q, lock=lock):
        return

    log.info("Job starting!",
             extra=dict(app_name=ns.app_name, job_id=ns.job_id))
    try:
        ns.job_type_func(ns=ns)
    except exceptions.CodeError:  # assume error is previously logged
        _handle_failure(ns, q, lock)
        return
    except Exception as err:
        log.exception(("Job failed!  Unhandled exception in an application!"
                       " Fix ASAP because"
                       " it is unclear how to handle this failure.  %s: %s") %
                      (err.__class__.__name__, err),
                      extra=dict(app_name=ns.app_name,
                                 job_id=ns.job_id,
                                 failed=True))
        return
    _handle_success(ns, q, lock)
Beispiel #8
0
def teardown_queue_backend(func_name):
    qb.get_qbclient().delete(func_name, _recursive=True)
    qb.get_qbclient().delete(makepath(func_name), _recursive=True)
Beispiel #9
0
def teardown_queue_backend(func_name):
    qb.get_qbclient().delete(func_name, _recursive=True)
    qb.get_qbclient().delete(makepath(func_name), _recursive=True)
Beispiel #10
0
def get_qsize(app_name, queued=True, taken=True):
    """Get the number of objects in the queue"""
    return get_qbclient().LockingQueue(app_name).size(
        queued=queued, taken=taken)