def delete_workers(worker_ids, signal_to_pass=signal.SIGINT): """ Expect worker ID without RQ REDIS WORKER NAMESPACE PREFIX of rq:worker: By default performs warm shutdown :param worker_id: list of worker id's to delete :param signal_to_pass: :return: """ # find worker instance by key, refreshes worker implicitly def attach_rq_worker_prefix(worker_id): return Worker.redis_worker_namespace_prefix + worker_id try: for worker_instance in [ Worker.find_by_key(attach_rq_worker_prefix(worker_id)) for worker_id in worker_ids ]: # kill if on same instance if socket.gethostname() == worker_instance.hostname.decode( 'utf-8'): os.kill(worker_instance.pid, signal_to_pass) except ValueError: logger.warning('Problem in deleting workers {0}'.format(worker_ids)) return False return True
def worker_details(request, queue_index, key): queue_index = int(queue_index) queue = get_queue_by_index(queue_index) worker = Worker.find_by_key(key, connection=queue.connection) try: # Convert microseconds to milliseconds worker.total_working_time = worker.total_working_time / 1000 except AttributeError: # older version of rq do not have `total_working_time` worker.total_working_time = "-" queue_names = ', '.join(worker.queue_names()) def get_job_graceful(worker): if not worker: return None try: return worker.get_current_job() except NoSuchJobError: return None context_data = { 'queue': queue, 'queue_index': queue_index, 'worker': worker, 'queue_names': queue_names, 'job': get_job_graceful(worker) } return render(request, 'django_rq/worker_details.html', context_data)
def delete_workers(worker_ids, signal_to_pass=signal.SIGINT): """ Expect worker ID without RQ REDIS WORKER NAMESPACE PREFIX of rq:worker: By default performs warm shutdown :param worker_id: list of worker id's to delete :param signal_to_pass: :return: """ # find worker instance by key, refreshes worker implicitly def attach_rq_worker_prefix(worker_id): return Worker.redis_worker_namespace_prefix + worker_id for worker_instance in [Worker.find_by_key(attach_rq_worker_prefix(worker_id)) for worker_id in worker_ids]: requested_hostname = worker_instance.hostname requested_hostname = requested_hostname.decode('utf-8') # kill if on same instance if socket.gethostname() == requested_hostname: os.kill(worker_instance.pid, signal_to_pass) else: required_host_ip = socket.gethostbyname(requested_hostname) fabric_config_wrapper = Config() # loads from user level ssh config (~/.ssh/config) and system level # config /etc/ssh/ssh_config fabric_config_wrapper.load_ssh_config() # to use its ssh_config parser abilities paramiko_ssh_config = fabric_config_wrapper.base_ssh_config for hostname in paramiko_ssh_config.get_hostnames(): ssh_info = paramiko_ssh_config.lookup(hostname) available_host_ip = ssh_info.get('hostname') if available_host_ip == required_host_ip: process_owner = None # make connection via fabric and send SIGINT for now ssh_connection = Connection(hostname) try: #find owner of process https://unix.stackexchange.com/questions/284934/return-owner-of-process-given-pid process_owner = ssh_connection.run('ps -o user= -p {0}'.format(worker_instance.pid)) # have permission to kill so this works without sudo # need to plan for other cases process_owner = process_owner.stdout.strip(' \n\t') result_kill = ssh_connection.run('kill -{0} {1}'.format(2, worker_instance.pid), hide=True) if result_kill.failed: raise RQMonitorException("Some issue occured on running command {0.command!r} " "on {0.connection.host}, we got stdout:\n{0.stdout}" "and stderr:\n{0.stderr}".format(result_kill)) except UnexpectedExit as e: stdout, stderr = e.streams_for_display() # plan to accept password from user and proceed with sudo in future if "Operation not permitted" in stderr.strip(' \n\t'): raise RQMonitorException('Logged in user {0} does not have permission to kill worker' ' process with pid {1} on {2} because it is owned ' ' by user {3}'.format(ssh_info.get('user'), worker_instance.pid, required_host_ip, process_owner)) raise RQMonitorException('Invoke\'s UnexpectedExit occurred with' 'stdout: {0}\nstderr: {1}\nresult: {2}\nreason {3}'.format(stdout.strip(' \n\t'), stderr.strip(' \n\t'), e.result, e.reason)) return
def acquire_task(db: Redis[bytes], op_hash: hash_t, worker_name: Optional[str]) -> bool: """Check if someone else is currently executing this job.""" if worker_name is None: # running in non-distributed mode return True owner_key = join(OPERATIONS, op_hash, "owner") response = db.setnx(owner_key, worker_name) if response: return True else: key = db.get(owner_key) if key is None: # the other worker just just drop off like right now # to avoid a race condition, will wait till later. logger.info("issue acquiring lock, will try again later") return False holder = key.decode() logger.info(f"job currently held by {holder}") if holder == worker_name: logger.error("other worker is myself! HOW!?") return True # grab other holder worker = Worker.find_by_key(Worker.redis_worker_namespace_prefix + holder, connection=db) if worker is None: # holder is gooooonnnneee. let's take over. db.set(owner_key, worker_name) logger.warning("other worker is gone, I'm taking over") return True else: if worker.state == "busy": # get other worker's job ojob = worker.get_current_job() if op_hash in ojob.description: logger.info("will try again later") return False else: db.set(owner_key, worker_name) logger.error("other worker has moved on, I'm taking over") return True else: db.set(owner_key, worker_name) logger.error("other worker is not working, I'm taking over") return True
def worker_details(request, queue_index, key): queue_index = int(queue_index) queue = get_queue_by_index(queue_index) worker = Worker.find_by_key(key, connection=queue.connection) # Convert microseconds to milliseconds worker.total_working_time = worker.total_working_time / 1000 queue_names = ', '.join(worker.queue_names()) context_data = { 'queue': queue, 'queue_index': queue_index, 'worker': worker, 'queue_names': queue_names, 'job': worker.get_current_job(), 'total_working_time': worker.total_working_time * 1000 } return render(request, 'django_rq/worker_details.html', context_data)
def worker_info_api(): if request.method == 'GET': worker_id = request.args.get('worker_id', None) if worker_id is None: raise RQMonitorException('Worker ID not received !', status_code=400) worker_instance = Worker.find_by_key( Worker.redis_worker_namespace_prefix + worker_id) return { 'worker_host_name': worker_instance.hostname.decode('utf-8'), 'worker_ttl': worker_instance.default_worker_ttl, 'worker_result_ttl': worker_instance.default_result_ttl, 'worker_name': worker_instance.name, 'worker_birth_date': worker_instance.birth_date.strftime('%d-%m-%Y %H:%M:%S') if worker_instance.birth_date is not None else "Not Available", 'worker_death_date': worker_instance.death_date.strftime('%d-%m-%Y %H:%M:%S') if worker_instance.death_date is not None else "Is Alive", 'worker_last_cleaned_at': worker_instance.last_cleaned_at.strftime('%d-%m-%Y %H:%M:%S') if worker_instance.last_cleaned_at is not None else "Not Yet Cleaned", 'worker_failed_job_count': worker_instance.failed_job_count, 'worker_successful_job_count': worker_instance.successful_job_count, 'worker_job_monitoring_interval': worker_instance.job_monitoring_interval, 'worker_last_heartbeat': worker_instance.last_heartbeat.strftime('%d-%m-%Y %H:%M:%S') if worker_instance.last_heartbeat is not None else "Not Available", 'worker_current_job_id': worker_instance.get_current_job_id(), } raise RQMonitorException('Invalid HTTP Request type', status_code=400)
def worker_details(request, queue_index, key): queue_index = int(queue_index) queue = get_queue_by_index(queue_index) worker = Worker.find_by_key(key, connection=queue.connection) try: # Convert microseconds to milliseconds worker.total_working_time = worker.total_working_time / 1000 except AttributeError: # older version of rq do not have `total_working_time` worker.total_working_time = "-" queue_names = ', '.join(worker.queue_names()) context_data = { 'queue': queue, 'queue_index': queue_index, 'worker': worker, 'queue_names': queue_names, 'job': worker.get_current_job() } return render(request, 'django_rq/worker_details.html', context_data)
def send_signal_worker(worker_id): worker_instance = Worker.find_by_key(Worker.redis_worker_namespace_prefix + worker_id) worker_instance.request_stop(signum=2, frame=5)