Ejemplo n.º 1
0
def _run_worker(
    db_url: str,
    tick_seconds: float = 1,
    worker_tag: str = "",
    cleanup_timeout: float = 300,
) -> None:
    """ Infinite loop that continuously monitors the DB for tasks,
    claims tasks, executes their code, and saves results.

    It will also periodically do a clean up (reset orphaned tasks, delete old results)

    :param db_url: full database url string, including credentials, e.g.
        postgres://postgres:test@localhost:5000/qless
    :param tick_seconds: worker will sleep for this long between polls to the DB for
        tasks and cleanup attempts.
    :param worker_tag: enables this worker to execute tasks with this tag
    :param cleanup_timeout: when performing cleanup, any worker which has not reported
        a heartbeat in `cleanup_timeout` seconds will be considered dead, and its
        tasks reset to PENDING
    """
    sql.startup(db_url)
    me = _register_worker(worker_tag)
    log(f"Worker started. Tag: {worker_tag}. Id = {me}")
    while _hearbeat(me):
        _cleanup(cleanup_timeout)
        sleep(tick_seconds)
        task = _claim_task(me, worker_tag)
        if task is not None:
            _run_task(task, me)
Ejemplo n.º 2
0
def start_local_postgres_docker_db() -> str:
    db_url = "postgres://*****:*****@localhost:5000/qless"
    os.system("docker kill pg-test")
    assert not os.system(
        "docker run --rm --name pg-test -e POSTGRES_PASSWORD=test -d -p 5000:5432 postgres:11"
    )
    while not "database system is ready" in os.popen(
            "docker logs pg-test").read():
        log.log("Waiting for DB to be ready...")
        sleep(0.2)
    sleep(0.5)
    return db_url
Ejemplo n.º 3
0
def _make_qless_db_if_not_present(db):
    # We need an engine without the `qless` db name
    # NB: Autocommit is required to create databases
    engine = create_engine(db.replace("qless", ""),
                           isolation_level="AUTOCOMMIT")

    databases = engine.execute("SELECT datname FROM pg_database;").fetchall()
    databases = [d[0] for d in databases]
    if "qless" not in databases:
        conn = engine.connect()
        conn.execute("CREATE DATABASE qless")
        conn.close()
        log("Created database '/qless'")
Ejemplo n.º 4
0
def run_test_e2e(db_url):
    client.startup(db_url)
    worker.start_local_workers(n_workers=1, db_url=db_url, worker_tag="tag A")
    worker.start_local_workers(n_workers=4,
                               db_url=db_url,
                               worker_tag="tag B",
                               cleanup_timeout=1)
    func = _make_test_function()

    # run a simple task
    task_id = client.submit(func, {"param": "abc"}, 123, requires_tag="tag B")
    _wait_for_true(lambda: client.get_task_result(task_id) is not None)
    result = client.get_task_result(task_id)
    assert result == len("abc") + 42
    log.log("[OK] Tasks run")

    # Tasks are rescheduled if their worker is dead
    # start a task which takes longer than the expected heartbeat,
    # resulting in the worker being considered 'dead' and its task
    # rescheduled, this should happen `n_retries` times
    task_id = client.submit(_sleep, {"seconds": 5},
                            123,
                            n_retries_if_worker_hangs=2)
    _wait_for_true(
        lambda: client.get_task_status(task_id) == TaskStatus.TIMEOUT)
    log.log("[OK] Orphaned tasks rescheduled")

    log.log("[OK] All OK! :)")
Ejemplo n.º 5
0
def _run_task(task: Task, worker_id: int) -> None:
    """ Execute the task function and save its result if it completes or the exception
    if it errors

    :param task: the task to execute
    :param worker_id: the id of the current worker, running the task. This is needed
        as a worker id is compared against the DB to check the worker still owns the
        task, to avoid multiple workers working on the same task
    """
    func = _deserialise(task.func)
    params = _deserialise(task.kwargs)

    args = str(params)[:20]
    log(
        f"Starting task {task.id_}. Function: {func.__name__}. Args: {args}. Worker: {worker_id}"
    )

    try:
        _save_results(task.id_, func(**params), worker_id, TaskStatus.DONE)
        log(f"Task {task.id_} completed successfully")
    except Exception as err:
        _save_results(task.id_, err, worker_id, TaskStatus.ERROR)
        log(f"Error while running task {task.id_}: {err}")
    finally:
        _set_worker_task_to_none(worker_id)
Ejemplo n.º 6
0
def _search_for_dead_workers_and_disown_their_tasks(cleanup_timeout: float) -> None:
    """ If a worker is dead, the task it was working on should be reset, so another
    worker can pick it up.

    :param cleanup_timeout: a worker will be considered dead if it hasnt updated its
        heartbeat on the DB in the last `cleanup_timeout` seconds
    """
    too_long_ago = datetime.now() - timedelta(seconds=cleanup_timeout)
    with sql.session_scope() as session:
        # Find any workers whose last heartbeat was more than `cleanup_timeout` ago
        dead_workers = (
            session.query(WorkerRecord)
            .with_for_update()
            .filter(WorkerRecord.last_heartbeat < too_long_ago)
            .all()
        )
        for dead_worker in dead_workers:
            task_id = dead_worker.working_on_task_id
            if task_id is not None:
                # Reset the task (if any) that the worker was working on
                orphan_task = session.query(TaskRecord).with_for_update().get(task_id)
                log(
                    f"Worker {dead_worker.id_} has not responded in {cleanup_timeout} "
                    f"seconds. Its task {task_id} will be disowned, and..."
                )
                orphan_task.owner = NO_OWNER

                retries = orphan_task.retries
                if retries == 0:
                    log(f"...Task Status set to TIMEOUT. No more retries left")
                    orphan_task.status = TaskStatus.TIMEOUT.value
                else:
                    log(f"...Task status set to PENDING. {retries} retries left")
                    orphan_task.status = TaskStatus.PENDING.value
                    orphan_task.retries = retries - 1
                session.merge(orphan_task)

                dead_worker.working_on_task_id = None
Ejemplo n.º 7
0
def _save_results(
    task_id: int, results: Any, worker_id: int, status: Optional[TaskStatus] = None
) -> None:
    """ Saves the result for a given task (either the return value of the function
    executed or the exception raised).

    :param task_id: unique identifier for the task. This is created when the task is
        submitted.
    :param results: any object that can be serialised
    :param worker_id: the identifier for the worker attempting to save the results. This
        is needed because only workers that legitimally own a task are allowed to save
        results for it. This is to prevent multiple workers working on the same task
    :param status: if set, the task status will also be updated to this
    """
    serialised_results = _serialise(results)
    with sql.session_scope() as session:
        task = session.query(TaskRecord).get(task_id)

        # Only save results if tasks is RUNNING and this worker still owns it
        if task.status == TaskStatus.RUNNING.value and task.owner == worker_id:
            log(
                f"Saving result for task {task_id}, with size = {len(serialised_results)}"
            )
            task.results_dill = serialised_results
            if status is not None:
                task.status = status.value
            session.merge(task)
        elif task.status != TaskStatus.RUNNING.value:
            log(f"Task {task_id} not RUNNING. Status={task.status}. Results discarded.")
        elif task.owner != worker_id:
            log(
                f"Worker {worker_id} running task {task_id}, but task owner is: {task.owner}. Results discarded."
            )

        worker = session.query(WorkerRecord).get(worker_id)
        worker.working_on_task_id = None
        session.merge(worker)
Ejemplo n.º 8
0
def _create_all_tables() -> None:
    global _engine
    if "task" not in MetaData().tables:
        BASE.metadata.create_all(_engine)
        log("Created all tables")
Ejemplo n.º 9
0
def reset() -> None:
    """ Destroy all task data. Drops all tables and recreates them """
    BASE.metadata.drop_all(_engine)
    log("Dropped all tables")
    _create_all_tables()
Ejemplo n.º 10
0
        creator=creator,
        status=TaskStatus.RUNNING.value,
        func=func,
        kwargs=kwargs,
        results="",
    )


def _help() -> str:
    return """
Usage: 
    $ python -m queueless.worker POSTGRES_DB_URL [TAG] 

    POSTGRES_DB_URL: full postgres connection string
    TAG: arbitrary string which enables this worker to execute tasks with the same tag
    
Example:
    $ python -m queueless.worker postgres://postgres:test@localhost:5000/qless my_tag_1
"""


if __name__ == "__main__":
    args = sys.argv
    if len(args) < 2:
        log(_help())
    else:
        db_connection_string = args[1]
        tag = args[2] if len(args) > 2 else ""
        tick = float(args[3]) if len(args) > 3 else 1.0
        _run_worker(db_connection_string, tick, tag)