def kill_all_zombie_workers():
    workers = Worker.all(connection=server.get_redis().get_redis_conn())
    for worker in workers:
        if worker.state == "?":
            log.info(f"kill_all_zombie_workers : {worker.key} is found to be zombie")
            job = worker.get_current_job()
            if job is not None:
                job.ended_at = datetime.utcnow()
                worker.failed_queue.quarantine(
                    job, exc_info=("Dead worker", "Moving job to failed queue")
                )
            log.info(f"kill_all_zombie_workers : {worker.key} registering death")
            worker.register_death()
    return {"success": True}
def view_or_update_batch_job(batch_job_id):
    batch_job = job_repository.find_one(batch_job_id)
    if batch_job is None:
        return {
            "success": False,
            "statusCode": 400,
            "error": f"Batch job not found with id {batch_job_id}",
        }
    if batch_job["finished"]:
        return {"success": True, "body": batch_job}
    job_id_array = [job["_id"] for job in batch_job["jobs"]]
    job_array = Job.fetch_many(
        job_id_array, connection=server.get_redis().get_redis_conn()
    )

    job_array_with_meta = update_job_array_with_meta(job_array)
    job_finished = is_batch_job_finished(job_array_with_meta)
    if job_finished:
        server.get_job_queue().enqueue_job(
            post_batch_job,
            priority="low",
            args=(tuple([batch_job_id])),
            kwargs={"job_id": batch_job_id},
        )

    update_resp = job_repository.update_one(
        batch_job_id,
        {
            "finished": job_finished,
            "stats": batch_job_stats(job_array_with_meta),
            "jobs": job_array_with_meta,
        },
    )

    if update_resp.acknowledged:
        server.get_job_queue().enqueue_job(
            poll_batch_job, priority="low", args=(tuple([batch_job_id]))
        )
        updated_batch_job = job_repository.find_one(batch_job_id)
        return {"success": True, "body": updated_batch_job}
    else:
        log.error(
            f"view_or_update_batch_job : failed to update new batch job with id {batch_job_id}"
        )
        return {
            "success": False,
            "error": f"Batch job update failed with id {batch_job_id}",
        }
def get_all_workers():
    worker_stats = {}
    workers = Worker.all(connection=server.get_redis().get_redis_conn())
    for worker in workers:
        worker_stats.update(
            {
                worker.name: {
                    "key": str(worker.key),
                    "name": str(worker.name),
                    "hostname": str(worker.hostname),
                    "pid": str(worker.pid),
                    "state": str(worker.state),
                    "birthDate": str(worker.birth_date),
                    "lastHeartbeat": str(worker.last_heartbeat),
                }
            }
        )
    return {"success": True, "body": worker_stats}
def restart_batch_job(batch_job_id):
    batch_job = job_repository.find_one(batch_job_id)
    redis_conn = server.get_redis().get_redis_conn()
    if batch_job is None:
        return {
            "success": False,
            "statusCode": 400,
            "error": f"Batch job not found with id {batch_job_id}",
        }
    all_jobs = batch_job["jobs"]
    for job_meta in all_jobs:
        if job_meta["status"] != "finished":
            log.info(f"For batch job {batch_job_id}, requeue job {job_meta['_id']}")
            job = Job.fetch(job_meta["_id"], redis_conn)
            job.requeue()
    job_repository.update_one(batch_job_id, {"finished": False})
    server.get_job_queue().enqueue_job(
        poll_batch_job, priority="low", args=(tuple([batch_job_id]))
    )
    return {"success": True, "batch_job_id": batch_job_id}
Esempio n. 5
0
import multiprocessing as mpu
import sys
import uuid
import socket

from rq import Worker, Connection

from src.common.logger import Logger
from src.app import server

log = Logger()
redis_conn = server.get_redis().get_redis_conn()


def get_ip_and_host():
    ip = "0.0.0.0"
    host = ""
    try:
        host = socket.gethostname()
        ip = socket.gethostbyname(socket.gethostname())
    except Exception as e:
        log.info(f"get_ip_and_host : Exception occurred {e}")
        ip = socket.gethostbyname("")
    return ip + "_" + host


def start_worker(process_count):
    log.info(f"starting worker {process_count}")
    with Connection(redis_conn):
        if process_count % 4 == 0:
            w = Worker(
Esempio n. 6
0
def ping_redis():
    return server.get_redis().health()