Beispiel #1
0
def delete_workers(worker_ids, signal_to_pass=signal.SIGINT):
    """
    Expect worker ID without RQ REDIS WORKER NAMESPACE PREFIX of rq:worker:
    By default performs warm shutdown

    :param worker_id: list of worker id's to delete
    :param signal_to_pass:
    :return:
    """

    # find worker instance by key, refreshes worker implicitly
    def attach_rq_worker_prefix(worker_id):
        return Worker.redis_worker_namespace_prefix + worker_id

    try:
        for worker_instance in [
                Worker.find_by_key(attach_rq_worker_prefix(worker_id))
                for worker_id in worker_ids
        ]:
            # kill if on same instance
            if socket.gethostname() == worker_instance.hostname.decode(
                    'utf-8'):
                os.kill(worker_instance.pid, signal_to_pass)
    except ValueError:
        logger.warning('Problem in deleting workers {0}'.format(worker_ids))
        return False

    return True
Beispiel #2
0
def worker_details(request, queue_index, key):
    queue_index = int(queue_index)
    queue = get_queue_by_index(queue_index)
    worker = Worker.find_by_key(key, connection=queue.connection)

    try:
        # Convert microseconds to milliseconds
        worker.total_working_time = worker.total_working_time / 1000
    except AttributeError:
        # older version of rq do not have `total_working_time`
        worker.total_working_time = "-"

    queue_names = ', '.join(worker.queue_names())

    def get_job_graceful(worker):
        if not worker:
            return None
        try:
            return worker.get_current_job()
        except NoSuchJobError:
            return None

    context_data = {
        'queue': queue,
        'queue_index': queue_index,
        'worker': worker,
        'queue_names': queue_names,
        'job': get_job_graceful(worker)
    }
    return render(request, 'django_rq/worker_details.html', context_data)
Beispiel #3
0
def delete_workers(worker_ids, signal_to_pass=signal.SIGINT):
    """
    Expect worker ID without RQ REDIS WORKER NAMESPACE PREFIX of rq:worker:
    By default performs warm shutdown

    :param worker_id: list of worker id's to delete
    :param signal_to_pass:
    :return:
    """
    # find worker instance by key, refreshes worker implicitly
    def attach_rq_worker_prefix(worker_id):
        return Worker.redis_worker_namespace_prefix + worker_id

    for worker_instance in [Worker.find_by_key(attach_rq_worker_prefix(worker_id))
                            for worker_id in worker_ids]:
        requested_hostname = worker_instance.hostname
        requested_hostname = requested_hostname.decode('utf-8')
        # kill if on same instance
        if socket.gethostname() == requested_hostname:
            os.kill(worker_instance.pid, signal_to_pass)
        else:
            required_host_ip = socket.gethostbyname(requested_hostname)
            fabric_config_wrapper = Config()
            # loads from user level ssh config (~/.ssh/config) and system level
            # config /etc/ssh/ssh_config
            fabric_config_wrapper.load_ssh_config()
            # to use its ssh_config parser abilities
            paramiko_ssh_config = fabric_config_wrapper.base_ssh_config
            for hostname in paramiko_ssh_config.get_hostnames():
                ssh_info = paramiko_ssh_config.lookup(hostname)
                available_host_ip = ssh_info.get('hostname')
                if available_host_ip == required_host_ip:
                    process_owner = None
                    # make connection via fabric and send SIGINT for now
                    ssh_connection = Connection(hostname)
                    try:
                        #find owner of process https://unix.stackexchange.com/questions/284934/return-owner-of-process-given-pid
                        process_owner = ssh_connection.run('ps -o user= -p {0}'.format(worker_instance.pid))
                        # have permission to kill so this works without sudo
                        # need to plan for other cases
                        process_owner = process_owner.stdout.strip(' \n\t')
                        result_kill = ssh_connection.run('kill -{0} {1}'.format(2, worker_instance.pid), hide=True)
                        if result_kill.failed:
                            raise RQMonitorException("Some issue occured on running command {0.command!r} "
                                                     "on {0.connection.host}, we got stdout:\n{0.stdout}"
                                                     "and stderr:\n{0.stderr}".format(result_kill))
                    except UnexpectedExit as e:
                        stdout, stderr = e.streams_for_display()
                        # plan to accept password from user and proceed with sudo in future
                        if "Operation not permitted" in stderr.strip(' \n\t'):
                            raise RQMonitorException('Logged in user {0} does not have permission to kill worker'
                                                     ' process with pid {1} on {2} because it is owned '
                                                     ' by user {3}'.format(ssh_info.get('user'), worker_instance.pid,
                                                                           required_host_ip, process_owner))
                        raise RQMonitorException('Invoke\'s UnexpectedExit occurred with'
                                                 'stdout: {0}\nstderr: {1}\nresult: {2}\nreason {3}'.format(stdout.strip(' \n\t'),
                                                                                                            stderr.strip(' \n\t'),
                                                                                                            e.result, e.reason))
                    return
Beispiel #4
0
def acquire_task(db: Redis[bytes], op_hash: hash_t,
                 worker_name: Optional[str]) -> bool:
    """Check if someone else is currently executing this job."""
    if worker_name is None:
        # running in non-distributed mode
        return True

    owner_key = join(OPERATIONS, op_hash, "owner")
    response = db.setnx(owner_key, worker_name)
    if response:
        return True
    else:
        key = db.get(owner_key)
        if key is None:
            # the other worker just just drop off like right now
            # to avoid a race condition, will wait till later.
            logger.info("issue acquiring lock, will try again later")
            return False

        holder = key.decode()
        logger.info(f"job currently held by {holder}")
        if holder == worker_name:
            logger.error("other worker is myself! HOW!?")
            return True

        # grab other holder
        worker = Worker.find_by_key(Worker.redis_worker_namespace_prefix +
                                    holder,
                                    connection=db)
        if worker is None:
            # holder is gooooonnnneee. let's take over.
            db.set(owner_key, worker_name)
            logger.warning("other worker is gone, I'm taking over")
            return True
        else:
            if worker.state == "busy":
                # get other worker's job
                ojob = worker.get_current_job()
                if op_hash in ojob.description:
                    logger.info("will try again later")
                    return False
                else:
                    db.set(owner_key, worker_name)
                    logger.error("other worker has moved on, I'm taking over")
                    return True
            else:
                db.set(owner_key, worker_name)
                logger.error("other worker is not working, I'm taking over")
                return True
Beispiel #5
0
def worker_details(request, queue_index, key):
    queue_index = int(queue_index)
    queue = get_queue_by_index(queue_index)
    worker = Worker.find_by_key(key, connection=queue.connection)
    # Convert microseconds to milliseconds
    worker.total_working_time = worker.total_working_time / 1000

    queue_names = ', '.join(worker.queue_names())

    context_data = {
        'queue': queue,
        'queue_index': queue_index,
        'worker': worker,
        'queue_names': queue_names,
        'job': worker.get_current_job(),
        'total_working_time': worker.total_working_time * 1000
    }
    return render(request, 'django_rq/worker_details.html', context_data)
Beispiel #6
0
def worker_details(request, queue_index, key):
    queue_index = int(queue_index)
    queue = get_queue_by_index(queue_index)
    worker = Worker.find_by_key(key, connection=queue.connection)
    # Convert microseconds to milliseconds
    worker.total_working_time = worker.total_working_time / 1000

    queue_names = ', '.join(worker.queue_names())

    context_data = {
        'queue': queue,
        'queue_index': queue_index,
        'worker': worker,
        'queue_names': queue_names,
        'job': worker.get_current_job(),
        'total_working_time': worker.total_working_time * 1000
    }
    return render(request, 'django_rq/worker_details.html', context_data)
Beispiel #7
0
def worker_info_api():
    if request.method == 'GET':
        worker_id = request.args.get('worker_id', None)

        if worker_id is None:
            raise RQMonitorException('Worker ID not received !',
                                     status_code=400)

        worker_instance = Worker.find_by_key(
            Worker.redis_worker_namespace_prefix + worker_id)
        return {
            'worker_host_name':
            worker_instance.hostname.decode('utf-8'),
            'worker_ttl':
            worker_instance.default_worker_ttl,
            'worker_result_ttl':
            worker_instance.default_result_ttl,
            'worker_name':
            worker_instance.name,
            'worker_birth_date':
            worker_instance.birth_date.strftime('%d-%m-%Y %H:%M:%S')
            if worker_instance.birth_date is not None else "Not Available",
            'worker_death_date':
            worker_instance.death_date.strftime('%d-%m-%Y %H:%M:%S')
            if worker_instance.death_date is not None else "Is Alive",
            'worker_last_cleaned_at':
            worker_instance.last_cleaned_at.strftime('%d-%m-%Y %H:%M:%S') if
            worker_instance.last_cleaned_at is not None else "Not Yet Cleaned",
            'worker_failed_job_count':
            worker_instance.failed_job_count,
            'worker_successful_job_count':
            worker_instance.successful_job_count,
            'worker_job_monitoring_interval':
            worker_instance.job_monitoring_interval,
            'worker_last_heartbeat':
            worker_instance.last_heartbeat.strftime('%d-%m-%Y %H:%M:%S')
            if worker_instance.last_heartbeat is not None else "Not Available",
            'worker_current_job_id':
            worker_instance.get_current_job_id(),
        }
    raise RQMonitorException('Invalid HTTP Request type', status_code=400)
Beispiel #8
0
def worker_details(request, queue_index, key):
    queue_index = int(queue_index)
    queue = get_queue_by_index(queue_index)
    worker = Worker.find_by_key(key, connection=queue.connection)

    try:
        # Convert microseconds to milliseconds
        worker.total_working_time = worker.total_working_time / 1000
    except AttributeError:
        # older version of rq do not have `total_working_time`
        worker.total_working_time = "-"

    queue_names = ', '.join(worker.queue_names())

    context_data = {
        'queue': queue,
        'queue_index': queue_index,
        'worker': worker,
        'queue_names': queue_names,
        'job': worker.get_current_job()
    }
    return render(request, 'django_rq/worker_details.html', context_data)
Beispiel #9
0
def send_signal_worker(worker_id):
    worker_instance = Worker.find_by_key(Worker.redis_worker_namespace_prefix +
                                         worker_id)
    worker_instance.request_stop(signum=2, frame=5)