Example #1
0
 def delete(self, actor_id, ch_name):
     try:
         worker = get_worker(actor_id, ch_name)
     except WorkerException as e:
         raise APIException(e.message, 404)
     ch = WorkerChannel(name=ch_name)
     ch.put("stop")
     return ok(result=worker, msg="Worker scheduled to be stopped.")
Example #2
0
File: worker.py Project: TACC/abaco
def main(worker_ch_name, image):
    worker_ch = WorkerChannel(name=worker_ch_name)
    # first, attempt to pull image from docker hub:
    try:
        print("Worker pulling image {}...".format(image))
        pull_image(image)
    except DockerError as e:
        # return a message to the spawner that there was an error pulling image and abort
        worker_ch.put({'status': 'error', 'msg': str(e)})
        raise e
    # inform spawner that image pulled successfully
    print("Image pulled successfully")

    # wait to receive message from spawner that it is time to subscribe to the actor channel
    print("Worker waiting on message from spawner...")
    result = worker_ch.put_sync({'status': 'ok'})

    if result['status'] == 'error':
        print("Worker received error message from spawner: {}. Quiting...".format(str(result)))
        raise WorkerException(str(result))
    actor_id = result.get('actor_id')
    tenant = result.get('tenant')
    print("Worker received ok from spawner. Message: {}, actor_id:{}".format(result, actor_id))
    api_server = None
    client_id = None
    client_secret = None
    access_token = None
    refresh_token = None
    if result.get('client') == 'yes':
        api_server = result.get('api_server')
        client_id = result.get('client_id')
        client_secret = result.get('client_secret')
        access_token = result.get('access_token')
        refresh_token = result.get('refresh_token')
    else:
        print("Did not get client:yes, got client:{}".format(result.get('client')))
    Actor.set_status(actor_id, READY)
    subscribe(tenant,
              actor_id,
              api_server,
              client_id,
              client_secret,
              access_token,
              refresh_token,
              worker_ch)
Example #3
0
    def stop_workers(self, actor_id):
        """Stop existing workers; used when updating an actor's image."""

        try:
            workers_dict = workers_store[actor_id]
        except KeyError:
            workers_dict = {}

        # if there are existing workers, we need to close the actor message channel and
        # gracefully shutdown the existing worker processes.
        if len(workers_dict.items()) > 0:
            # first, close the actor msg channel to prevent any new messages from being pulled
            # by the old workers.
            actor_ch = ActorMsgChannel(actor_id)
            actor_ch.close()
            # now, send messages to workers for a graceful shutdown:
            for _, worker in workers_dict.items():
                ch = WorkerChannel(name=worker['ch_name'])
                ch.put('stop')
Example #4
0
    def stop_workers(self, actor_id):
        """Stop existing workers; used when updating an actor's image."""

        try:
            workers = json.loads(workers_store[actor_id])
            print("Found existing workers: {}".format(str(workers)))
        except KeyError:
            print("No existing workers.")
            workers = {}

        # if there are existing workers, we need to close the actor message channel and
        # gracefully shutdown the existing worker processes.
        if len(workers) > 0 :
            # first, close the actor msg channel to prevent any new messages from being pulled
            # by the old workers.
            actor_ch = ActorMsgChannel(actor_id)
            actor_ch.close()

            # now, send messages to workers for a graceful shutdown:
            for worker in workers:
                ch = WorkerChannel(name=worker['ch_name'])
                ch.put('stop')
Example #5
0
def main(worker_ch_name, image):
    worker_ch = WorkerChannel(name=worker_ch_name)
    # first, attempt to pull image from docker hub:
    try:
        print("Worker pulling image {}...".format(image))
        pull_image(image)
    except DockerError as e:
        # return a message to the spawner that there was an error pulling image and abort
        worker_ch.put({"status": "error", "msg": str(e)})
        raise e
    # inform spawner that image pulled successfully
    print("Image pulled successfully")

    # wait to receive message from spawner that it is time to subscribe to the actor channel
    print("Worker waiting on message from spawner...")
    result = worker_ch.put_sync({"status": "ok"})

    if result["status"] == "error":
        print("Worker received error message from spawner: {}. Quiting...".format(str(result)))
        raise WorkerException(str(result))
    actor_id = result.get("actor_id")
    print("Worker received ok from spawner. Message: {}, actor_id:{}".format(result, actor_id))
    Actor.set_status(actor_id, READY)
    subscribe(actor_id, worker_ch)
Example #6
0
File: worker.py Project: TACC/abaco
def shutdown_worker(ch_name):
    """Gracefully shutdown a single worker."""
    ch = WorkerChannel(name=ch_name)
    ch.put("stop")
Example #7
0
def shutdown_workers(actor_id):
    """Graceful shutdown of all workers for an actor"""
    workers = get_workers(actor_id)
    for worker in workers:
        ch = WorkerChannel(name=worker["ch_name"])
        ch.put("stop")
Example #8
0
    try:
        main()
    except Exception as e:
        try:
            worker_id = os.environ.get('worker_id')
        except:
            logger.error(
                f"worker main thread got exception trying to get worker id from environment."
                f"not able to send stop-no-delete message to itself."
                f"worker_id: {worker_id}.")
            worker_id = ''
        if worker_id:
            try:
                ch = WorkerChannel(worker_id=worker_id)
                # since this is an exception, we don't know that the actor has been deleted
                # don't delete the actor msg channel:
                ch.put('stop-no-delete')
                logger.info(
                    f"Worker main loop sent 'stop-no-delete' message to itself; worker_id: {worker_id}."
                )
                ch.close()
                msg = "worker caught exception from main loop. worker exiting. e" \
                      "Exception: {} worker_id: {}".format(e, worker_id)
                logger.info(msg)
            except Exception as e:
                logger.error(
                    f"worker main thread got exception trying to send stop-no-delete message to itself;"
                    f"worker_id: {worker_id}.")
    keep_running = False
    sys.exit()
Example #9
0
def shutdown_worker(ch_name):
    """Gracefully shutdown a single worker."""
    ch = WorkerChannel(name=ch_name)
    ch.put("stop")