Example #1
0
def process_worker_ch(tenant, worker_ch, actor_id, worker_id, actor_ch,
                      ag_client):
    """ Target for a thread to listen on the worker channel for a message to stop processing.
    :param worker_ch:
    :return:
    """
    global keep_running
    logger.info("Worker subscribing to worker channel...")
    while True:
        try:
            msg = worker_ch.get(timeout=2)
        except channelpy.ChannelTimeoutException:
            continue
        logger.debug("Received message in worker channel: {}".format(msg))
        logger.debug("Type(msg)={}".format(type(msg)))
        if type(msg) == dict:
            value = msg.get('value', '')
            if value == 'status':
                # this is a health check, return 'ok' to the reply_to channel.
                logger.debug("received health check. returning 'ok'.")
                ch = msg['reply_to']
                ch.put('ok')
        elif msg == 'stop':
            logger.info("Received stop message, stopping worker...")
            # first, delete an associated client
            # its possible this worker was not passed a client,
            # but if so, we need to delete it before shutting down.
            if ag_client:
                logger.info("Requesting client {} be deleted.".format(
                    ag_client.api_key))
                secret = os.environ.get('_abaco_secret')
                clients_ch = ClientsChannel()
                msg = clients_ch.request_delete_client(
                    tenant=tenant,
                    actor_id=actor_id,
                    worker_id=worker_id,
                    client_id=ag_client.api_key,
                    secret=secret)

                if msg['status'] == 'ok':
                    logger.info("Delete request completed successfully.")
                else:
                    logger.error("Error deleting client. Message: {}".format(
                        msg['message']))
            else:
                logger.info(
                    "Did not receive client. Not issuing delete. Exiting.")
            try:
                Worker.delete_worker(actor_id, worker_id)
            except WorkerException as e:
                logger.info(
                    "Got WorkerException from delete_worker(). Exception: {}".
                    format(e))
            keep_running = False
            actor_ch.close()
            logger.info("Closing actor channel for actor: {}".format(actor_id))
            logger.info("Worker is now exiting.")
            sys.exit()
Example #2
0
def clean_up_clients_store():
    logger.debug("top of clean_up_clients_store")
    secret = os.environ.get('_abaco_secret')
    if not secret:
        logger.error(
            "health.py not configured with _abaco_secret. exiting clean_up_clients_store."
        )
        return None
    for k, client in clients_store.items():
        wid = client.get('worker_id')
        if not wid:
            logger.error(
                "client object in clients_store without worker_id. client: {}".
                format(client))
            continue
        tenant = client.get('tenant')
        if not tenant:
            logger.error(
                "client object in clients_store without tenant. client: {}".
                format(client))
            continue
        actor_id = client.get('actor_id')
        if not actor_id:
            logger.error(
                "client object in clients_store without actor_id. client: {}".
                format(client))
            continue
        client_key = client.get('client_key')
        if not client_key:
            logger.error(
                "client object in clients_store without client_key. client: {}"
                .format(client))
            continue
        # check to see if the wid is the id of an actual worker:
        worker = get_worker(wid)
        if not worker:
            logger.info("worker {} is gone. deleting client {}.".format(
                wid, client))
            clients_ch = ClientsChannel()
            msg = clients_ch.request_delete_client(tenant=tenant,
                                                   actor_id=actor_id,
                                                   worker_id=wid,
                                                   client_id=client_key,
                                                   secret=secret)
            if msg['status'] == 'ok':
                logger.info("Client delete request completed successfully for "
                            "worker_id: {}, client_id: {}.".format(
                                wid, client_key))
            else:
                logger.error(
                    "Error deleting client for "
                    "worker_id: {}, client_id: {}. Message: {}".format(
                        wid, msg['message'], client_key, msg))

        else:
            logger.info("worker {} still here. ignoring client {}.".format(
                wid, client))
Example #3
0
 def delete_client(self, tenant, actor_id, worker_id, client_id, secret):
     clients_ch = ClientsChannel()
     msg = clients_ch.request_delete_client(tenant=tenant,
                                            actor_id=actor_id,
                                            worker_id=worker_id,
                                            client_id=client_id,
                                            secret=secret)
     if msg['status'] == 'ok':
         logger.info("Client delete request completed successfully for "
                     "worker_id: {}, client_id: {}.".format(worker_id, client_id))
     else:
         logger.error("Error deleting client for "
                      "worker_id: {}, client_id: {}. Message: {}".format(worker_id, msg['message'], client_id, msg))
     clients_ch.close()
Example #4
0
File: worker.py Project: TACC/abaco
def process_worker_ch(tenant, worker_ch, actor_id, actor_ch, ag_client):
    """ Target for a thread to listen on the worker channel for a message to stop processing.
    :param worker_ch:
    :return:
    """
    global keep_running
    print("Worker subscribing to worker channel...")
    while True:
        try:
            msg = worker_ch.get(timeout=2)
        except channelpy.ChannelTimeoutException:
            continue
        print("Received message in worker channel: {}".format(msg))
        print("Type(msg)={}".format(type(msg)))
        if type(msg) == dict:
            value = msg.get('value', '')
            if value == 'status':
                # this is a health check, return 'ok' to the reply_to channel.
                ch = msg['reply_to']
                ch.put('ok')
        elif msg == 'stop':
            print("Received stop message, stopping worker...")
            # first, delete an associated client
            # its possible this worker was not passed a client,
            # but if so, we need to delete it before shutting down.
            if ag_client:
                print("Requesting client {} be deleted.".format(ag_client.api_key))
                secret = os.environ.get('_abaco_secret')
                clients_ch = ClientsChannel()
                msg = clients_ch.request_delete_client(tenant=tenant,
                                                       actor_id=actor_id,
                                                       worker_id=worker_ch.name,
                                                       client_id=ag_client.api_key,
                                                       secret=secret)

                if msg['status'] == 'ok':
                    print("Delete request completed successfully.")
                else:
                    print("Error deleting client. Message: {}".format(msg['message']))
            else:
                print("Did not receive client. Not issuing delete. Exiting.")
            try:
                Worker.delete_worker(actor_id, worker_ch.name)
            except WorkerException:
                pass
            keep_running = False
            actor_ch.close()
            sys.exit()
Example #5
0
def process_worker_ch(tenant, worker_ch, actor_id, worker_id, actor_ch, ag_client):
    """ Target for a thread to listen on the worker channel for a message to stop processing.
    :param worker_ch:
    :return:
    """
    global keep_running
    logger.info("Worker subscribing to worker channel...")
    while True:
        msg = worker_ch.get_one()
        logger.debug("Received message in worker channel: {}".format(msg))
        logger.debug("Type(msg)={}".format(type(msg)))
        if type(msg) == dict:
            value = msg.get('value', '')
            if value == 'status':
                # this is a health check, return 'ok' to the reply_to channel.
                logger.debug("received health check. returning 'ok'.")
                ch = msg['reply_to']
                ch.put('ok')
                # @TODO -
                # delete the anonymous channel from this thread but sleep first to avoid the race condition.
                time.sleep(1.5)
                ch.delete()
                # NOT doing this for now -- deleting entire anon channel instead (see above)
                # clean up the event queue on this anonymous channel. this should be fixed in channelpy.
                # ch._queue._event_queue
        elif msg == 'stop' or msg == 'stop-no-delete':
            logger.info("Worker with worker_id: {} (actor_id: {}) received stop message, "
                        "stopping worker...".format(worker_id, actor_id))

            # when an actor's image is updated, old workers are deleted while new workers are
            # created. Deleting the actor msg channel in this case leads to race conditions
            delete_actor_ch = True
            if msg == 'stop-no-delete':
                logger.info("Got stop-no-delete; will not delete actor_ch.")
                delete_actor_ch = False
            # first, delete an associated client
            # its possible this worker was not passed a client,
            # but if so, we need to delete it before shutting down.
            if ag_client:
                logger.info("Requesting client {} be deleted.".format(ag_client.api_key))
                secret = os.environ.get('_abaco_secret')
                clients_ch = ClientsChannel()
                msg = clients_ch.request_delete_client(tenant=tenant,
                                                       actor_id=actor_id,
                                                       worker_id=worker_id,
                                                       client_id=ag_client.api_key,
                                                       secret=secret)

                if msg['status'] == 'ok':
                    logger.info("Client delete request completed successfully for "
                                "worker_id: {}, client_id: {}.".format(worker_id, ag_client.api_key))
                else:
                    logger.error("Error deleting client for "
                                 "worker_id: {}, client_id: {}. Message: {}".format(worker_id, msg['message'],
                                                                                    ag_client.api_key))
                clients_ch.close()
            else:
                logger.info("Did not receive client. Not issuing delete. Exiting.")
            try:
                Worker.delete_worker(actor_id, worker_id)
            except WorkerException as e:
                logger.info("Got WorkerException from delete_worker(). "
                            "worker_id: {}"
                            "Exception: {}".format(worker_id, e))
            keep_running = False
            # delete associated channels:
            if delete_actor_ch:
                actor_ch.delete()
            worker_ch.delete()
            logger.info("WorkerChannel deleted and ActorMsgChannel closed for actor: {} worker_id: {}".format(actor_id, worker_id))
            logger.info("Worker with worker_id: {} is now exiting.".format(worker_id))
            _thread.interrupt_main()
            logger.info("main thread interruptted.")
            os._exit()
Example #6
0
def process_worker_ch(tenant, worker_ch, actor_id, worker_id, actor_ch,
                      ag_client):
    """ Target for a thread to listen on the worker channel for a message to stop processing.
    :param worker_ch:
    :return:
    """
    global keep_running
    logger.info("Worker subscribing to worker channel...")
    while keep_running:
        msg, msg_obj = worker_ch.get_one()
        # receiving the message is enough to ack it - resiliency is currently handled in the calling code.
        msg_obj.ack()
        logger.debug("Received message in worker channel: {}".format(msg))
        logger.debug("Type(msg)={}".format(type(msg)))
        if type(msg) == dict:
            value = msg.get('value', '')
            if value == 'status':
                # this is a health check, return 'ok' to the reply_to channel.
                logger.debug("received health check. returning 'ok'.")
                ch = msg['reply_to']
                ch.put('ok')
                # @TODO -
                # delete the anonymous channel from this thread but sleep first to avoid the race condition.
                time.sleep(1.5)
                ch.delete()
                # NOT doing this for now -- deleting entire anon channel instead (see above)
                # clean up the event queue on this anonymous channel. this should be fixed in channelpy.
                # ch._queue._event_queue
        elif msg == 'force_quit':
            logger.info(
                "Worker with worker_id: {} (actor_id: {}) received a force_quit message, "
                "forcing the execution to halt...".format(worker_id, actor_id))
            globals.force_quit = True

        elif msg == 'stop' or msg == 'stop-no-delete':
            logger.info(
                "Worker with worker_id: {} (actor_id: {}) received stop message, "
                "stopping worker...".format(worker_id, actor_id))
            # set the worker status to SHUTTING_DOWN:
            try:
                Worker.update_worker_status(actor_id, worker_id, SHUTTING_DOWN)
            except Exception as e:
                logger.error(
                    f"worker got exception trying to update status to SHUTTING_DOWN. actor_id: {actor_id};"
                    f"worker_id: {worker_id}; exception: {e}")

            globals.keep_running = False

            # when an actor's image is updated, old workers are deleted while new workers are
            # created. Deleting the actor msg channel in this case leads to race conditions
            delete_actor_ch = True
            if msg == 'stop-no-delete':
                logger.info("Got stop-no-delete; will not delete actor_ch.")
                delete_actor_ch = False
            # if a `stop` was sent, the actor is being deleted, and so we want to immediately shutdown processing.
            else:
                globals.force_quit = True
            # first, delete an associated client
            # its possible this worker was not passed a client,
            # but if so, we need to delete it before shutting down.
            if ag_client:
                logger.info("Requesting client {} be deleted.".format(
                    ag_client.api_key))
                secret = os.environ.get('_abaco_secret')
                clients_ch = ClientsChannel()
                msg = clients_ch.request_delete_client(
                    tenant=tenant,
                    actor_id=actor_id,
                    worker_id=worker_id,
                    client_id=ag_client.api_key,
                    secret=secret)

                if msg['status'] == 'ok':
                    logger.info(
                        "Client delete request completed successfully for "
                        "worker_id: {}, client_id: {}.".format(
                            worker_id, ag_client.api_key))
                else:
                    logger.error(
                        "Error deleting client for "
                        "worker_id: {}, client_id: {}. Message: {}".format(
                            worker_id, msg['message'], ag_client.api_key))
                clients_ch.close()
            else:
                logger.info(
                    "Did not receive client. Not issuing delete. Exiting.")
            try:
                Worker.delete_worker(actor_id, worker_id)
            except WorkerException as e:
                logger.info("Got WorkerException from delete_worker(). "
                            "worker_id: {}"
                            "Exception: {}".format(worker_id, e))
            # delete associated channels:
            # it is possible the actor channel was already deleted, in which case we just keep processing
            if delete_actor_ch:
                try:
                    actor_ch.delete()
                    logger.info(
                        "ActorChannel deleted for actor: {} worker_id: {}".
                        format(actor_id, worker_id))
                except Exception as e:
                    logger.info(
                        "Got exception deleting ActorChannel for actor: {} "
                        "worker_id: {}; exception: {}".format(
                            actor_id, worker_id, e))
            try:
                worker_ch.delete()
                logger.info(
                    "WorkerChannel deleted for actor: {} worker_id: {}".format(
                        actor_id, worker_id))
            except Exception as e:
                logger.info(
                    "Got exception deleting WorkerChannel for actor: {} "
                    "worker_id: {}; exception: {}".format(
                        actor_id, worker_id, e))

            logger.info(
                "Worker with worker_id: {} is now exiting.".format(worker_id))
            _thread.interrupt_main()
            logger.info("main thread interrupted, issuing os._exit()...")
            os._exit(0)