def process_worker_ch(tenant, worker_ch, actor_id, worker_id, actor_ch, ag_client): """ Target for a thread to listen on the worker channel for a message to stop processing. :param worker_ch: :return: """ global keep_running logger.info("Worker subscribing to worker channel...") while True: try: msg = worker_ch.get(timeout=2) except channelpy.ChannelTimeoutException: continue logger.debug("Received message in worker channel: {}".format(msg)) logger.debug("Type(msg)={}".format(type(msg))) if type(msg) == dict: value = msg.get('value', '') if value == 'status': # this is a health check, return 'ok' to the reply_to channel. logger.debug("received health check. returning 'ok'.") ch = msg['reply_to'] ch.put('ok') elif msg == 'stop': logger.info("Received stop message, stopping worker...") # first, delete an associated client # its possible this worker was not passed a client, # but if so, we need to delete it before shutting down. if ag_client: logger.info("Requesting client {} be deleted.".format( ag_client.api_key)) secret = os.environ.get('_abaco_secret') clients_ch = ClientsChannel() msg = clients_ch.request_delete_client( tenant=tenant, actor_id=actor_id, worker_id=worker_id, client_id=ag_client.api_key, secret=secret) if msg['status'] == 'ok': logger.info("Delete request completed successfully.") else: logger.error("Error deleting client. Message: {}".format( msg['message'])) else: logger.info( "Did not receive client. Not issuing delete. Exiting.") try: Worker.delete_worker(actor_id, worker_id) except WorkerException as e: logger.info( "Got WorkerException from delete_worker(). Exception: {}". format(e)) keep_running = False actor_ch.close() logger.info("Closing actor channel for actor: {}".format(actor_id)) logger.info("Worker is now exiting.") sys.exit()
def clean_up_clients_store(): logger.debug("top of clean_up_clients_store") secret = os.environ.get('_abaco_secret') if not secret: logger.error( "health.py not configured with _abaco_secret. exiting clean_up_clients_store." ) return None for k, client in clients_store.items(): wid = client.get('worker_id') if not wid: logger.error( "client object in clients_store without worker_id. client: {}". format(client)) continue tenant = client.get('tenant') if not tenant: logger.error( "client object in clients_store without tenant. client: {}". format(client)) continue actor_id = client.get('actor_id') if not actor_id: logger.error( "client object in clients_store without actor_id. client: {}". format(client)) continue client_key = client.get('client_key') if not client_key: logger.error( "client object in clients_store without client_key. client: {}" .format(client)) continue # check to see if the wid is the id of an actual worker: worker = get_worker(wid) if not worker: logger.info("worker {} is gone. deleting client {}.".format( wid, client)) clients_ch = ClientsChannel() msg = clients_ch.request_delete_client(tenant=tenant, actor_id=actor_id, worker_id=wid, client_id=client_key, secret=secret) if msg['status'] == 'ok': logger.info("Client delete request completed successfully for " "worker_id: {}, client_id: {}.".format( wid, client_key)) else: logger.error( "Error deleting client for " "worker_id: {}, client_id: {}. Message: {}".format( wid, msg['message'], client_key, msg)) else: logger.info("worker {} still here. ignoring client {}.".format( wid, client))
def delete_client(self, tenant, actor_id, worker_id, client_id, secret): clients_ch = ClientsChannel() msg = clients_ch.request_delete_client(tenant=tenant, actor_id=actor_id, worker_id=worker_id, client_id=client_id, secret=secret) if msg['status'] == 'ok': logger.info("Client delete request completed successfully for " "worker_id: {}, client_id: {}.".format(worker_id, client_id)) else: logger.error("Error deleting client for " "worker_id: {}, client_id: {}. Message: {}".format(worker_id, msg['message'], client_id, msg)) clients_ch.close()
def process_worker_ch(tenant, worker_ch, actor_id, actor_ch, ag_client): """ Target for a thread to listen on the worker channel for a message to stop processing. :param worker_ch: :return: """ global keep_running print("Worker subscribing to worker channel...") while True: try: msg = worker_ch.get(timeout=2) except channelpy.ChannelTimeoutException: continue print("Received message in worker channel: {}".format(msg)) print("Type(msg)={}".format(type(msg))) if type(msg) == dict: value = msg.get('value', '') if value == 'status': # this is a health check, return 'ok' to the reply_to channel. ch = msg['reply_to'] ch.put('ok') elif msg == 'stop': print("Received stop message, stopping worker...") # first, delete an associated client # its possible this worker was not passed a client, # but if so, we need to delete it before shutting down. if ag_client: print("Requesting client {} be deleted.".format(ag_client.api_key)) secret = os.environ.get('_abaco_secret') clients_ch = ClientsChannel() msg = clients_ch.request_delete_client(tenant=tenant, actor_id=actor_id, worker_id=worker_ch.name, client_id=ag_client.api_key, secret=secret) if msg['status'] == 'ok': print("Delete request completed successfully.") else: print("Error deleting client. Message: {}".format(msg['message'])) else: print("Did not receive client. Not issuing delete. Exiting.") try: Worker.delete_worker(actor_id, worker_ch.name) except WorkerException: pass keep_running = False actor_ch.close() sys.exit()
def process_worker_ch(tenant, worker_ch, actor_id, worker_id, actor_ch, ag_client): """ Target for a thread to listen on the worker channel for a message to stop processing. :param worker_ch: :return: """ global keep_running logger.info("Worker subscribing to worker channel...") while True: msg = worker_ch.get_one() logger.debug("Received message in worker channel: {}".format(msg)) logger.debug("Type(msg)={}".format(type(msg))) if type(msg) == dict: value = msg.get('value', '') if value == 'status': # this is a health check, return 'ok' to the reply_to channel. logger.debug("received health check. returning 'ok'.") ch = msg['reply_to'] ch.put('ok') # @TODO - # delete the anonymous channel from this thread but sleep first to avoid the race condition. time.sleep(1.5) ch.delete() # NOT doing this for now -- deleting entire anon channel instead (see above) # clean up the event queue on this anonymous channel. this should be fixed in channelpy. # ch._queue._event_queue elif msg == 'stop' or msg == 'stop-no-delete': logger.info("Worker with worker_id: {} (actor_id: {}) received stop message, " "stopping worker...".format(worker_id, actor_id)) # when an actor's image is updated, old workers are deleted while new workers are # created. Deleting the actor msg channel in this case leads to race conditions delete_actor_ch = True if msg == 'stop-no-delete': logger.info("Got stop-no-delete; will not delete actor_ch.") delete_actor_ch = False # first, delete an associated client # its possible this worker was not passed a client, # but if so, we need to delete it before shutting down. if ag_client: logger.info("Requesting client {} be deleted.".format(ag_client.api_key)) secret = os.environ.get('_abaco_secret') clients_ch = ClientsChannel() msg = clients_ch.request_delete_client(tenant=tenant, actor_id=actor_id, worker_id=worker_id, client_id=ag_client.api_key, secret=secret) if msg['status'] == 'ok': logger.info("Client delete request completed successfully for " "worker_id: {}, client_id: {}.".format(worker_id, ag_client.api_key)) else: logger.error("Error deleting client for " "worker_id: {}, client_id: {}. Message: {}".format(worker_id, msg['message'], ag_client.api_key)) clients_ch.close() else: logger.info("Did not receive client. Not issuing delete. Exiting.") try: Worker.delete_worker(actor_id, worker_id) except WorkerException as e: logger.info("Got WorkerException from delete_worker(). " "worker_id: {}" "Exception: {}".format(worker_id, e)) keep_running = False # delete associated channels: if delete_actor_ch: actor_ch.delete() worker_ch.delete() logger.info("WorkerChannel deleted and ActorMsgChannel closed for actor: {} worker_id: {}".format(actor_id, worker_id)) logger.info("Worker with worker_id: {} is now exiting.".format(worker_id)) _thread.interrupt_main() logger.info("main thread interruptted.") os._exit()
def process_worker_ch(tenant, worker_ch, actor_id, worker_id, actor_ch, ag_client): """ Target for a thread to listen on the worker channel for a message to stop processing. :param worker_ch: :return: """ global keep_running logger.info("Worker subscribing to worker channel...") while keep_running: msg, msg_obj = worker_ch.get_one() # receiving the message is enough to ack it - resiliency is currently handled in the calling code. msg_obj.ack() logger.debug("Received message in worker channel: {}".format(msg)) logger.debug("Type(msg)={}".format(type(msg))) if type(msg) == dict: value = msg.get('value', '') if value == 'status': # this is a health check, return 'ok' to the reply_to channel. logger.debug("received health check. returning 'ok'.") ch = msg['reply_to'] ch.put('ok') # @TODO - # delete the anonymous channel from this thread but sleep first to avoid the race condition. time.sleep(1.5) ch.delete() # NOT doing this for now -- deleting entire anon channel instead (see above) # clean up the event queue on this anonymous channel. this should be fixed in channelpy. # ch._queue._event_queue elif msg == 'force_quit': logger.info( "Worker with worker_id: {} (actor_id: {}) received a force_quit message, " "forcing the execution to halt...".format(worker_id, actor_id)) globals.force_quit = True elif msg == 'stop' or msg == 'stop-no-delete': logger.info( "Worker with worker_id: {} (actor_id: {}) received stop message, " "stopping worker...".format(worker_id, actor_id)) # set the worker status to SHUTTING_DOWN: try: Worker.update_worker_status(actor_id, worker_id, SHUTTING_DOWN) except Exception as e: logger.error( f"worker got exception trying to update status to SHUTTING_DOWN. actor_id: {actor_id};" f"worker_id: {worker_id}; exception: {e}") globals.keep_running = False # when an actor's image is updated, old workers are deleted while new workers are # created. Deleting the actor msg channel in this case leads to race conditions delete_actor_ch = True if msg == 'stop-no-delete': logger.info("Got stop-no-delete; will not delete actor_ch.") delete_actor_ch = False # if a `stop` was sent, the actor is being deleted, and so we want to immediately shutdown processing. else: globals.force_quit = True # first, delete an associated client # its possible this worker was not passed a client, # but if so, we need to delete it before shutting down. if ag_client: logger.info("Requesting client {} be deleted.".format( ag_client.api_key)) secret = os.environ.get('_abaco_secret') clients_ch = ClientsChannel() msg = clients_ch.request_delete_client( tenant=tenant, actor_id=actor_id, worker_id=worker_id, client_id=ag_client.api_key, secret=secret) if msg['status'] == 'ok': logger.info( "Client delete request completed successfully for " "worker_id: {}, client_id: {}.".format( worker_id, ag_client.api_key)) else: logger.error( "Error deleting client for " "worker_id: {}, client_id: {}. Message: {}".format( worker_id, msg['message'], ag_client.api_key)) clients_ch.close() else: logger.info( "Did not receive client. Not issuing delete. Exiting.") try: Worker.delete_worker(actor_id, worker_id) except WorkerException as e: logger.info("Got WorkerException from delete_worker(). " "worker_id: {}" "Exception: {}".format(worker_id, e)) # delete associated channels: # it is possible the actor channel was already deleted, in which case we just keep processing if delete_actor_ch: try: actor_ch.delete() logger.info( "ActorChannel deleted for actor: {} worker_id: {}". format(actor_id, worker_id)) except Exception as e: logger.info( "Got exception deleting ActorChannel for actor: {} " "worker_id: {}; exception: {}".format( actor_id, worker_id, e)) try: worker_ch.delete() logger.info( "WorkerChannel deleted for actor: {} worker_id: {}".format( actor_id, worker_id)) except Exception as e: logger.info( "Got exception deleting WorkerChannel for actor: {} " "worker_id: {}; exception: {}".format( actor_id, worker_id, e)) logger.info( "Worker with worker_id: {} is now exiting.".format(worker_id)) _thread.interrupt_main() logger.info("main thread interrupted, issuing os._exit()...") os._exit(0)