Esempio n. 1
0
 def check_new_params(self, cmd):
     """Additional checks for new client requests."""
     valid, msg = self.check_common(cmd)
     # validate the actor_id
     try:
         actor = Actor.from_db(actors_store[cmd.get('actor_id')])
     except KeyError:
         m = "Unable to look up actor with id: {}".format(
             cmd.get('actor_id'))
         logger.error(m)
         return False, m, None
     # validate the worker id
     try:
         Worker.get_worker(actor_id=cmd.get('actor_id'),
                           worker_id=cmd.get('worker_id'))
     except WorkerException as e:
         m = "Unable to look up worker: {}".format(e.msg)
         logger.error(m)
         return False, m, None
     logger.debug("new params were valid.")
     owner_prefix = get_tenant_userstore_prefix(actor.tenant)
     logger.debug(
         f"using owner prefix: {owner_prefix} for tenant: {actor.tenant}")
     if owner_prefix:
         owner = f"{owner_prefix}/{actor.owner}"
     else:
         owner = actor.owner
     logger.debug(f"using owner: {owner}")
     return valid, msg, owner
Esempio n. 2
0
 def check_new_params(self, cmd):
     valid, msg = self.check_common(cmd)
     # validate the actor_id
     try:
         actor = Actor.from_db(actors_store[cmd.get('actor_id')])
     except KeyError:
         return False, "Unable to look up actor with id: {}".format(cmd.get('actor_id')), None
     # validate the worker id
     try:
         Worker.get_worker(actor_id=cmd.get('actor_id'), ch_name=cmd.get('worker_id'))
     except WorkerException as e:
         return False, "Unable to look up worker: {}".format(e.msg), None
     return valid, msg, actor.owner
Esempio n. 3
0
 def check_new_params(self, cmd):
     valid, msg = self.check_common(cmd)
     # validate the actor_id
     try:
         actor = Actor.from_db(actors_store[cmd.get('actor_id')])
     except KeyError:
         return False, "Unable to look up actor with id: {}".format(cmd.get('actor_id')), None
     # validate the worker id
     try:
         Worker.get_worker(actor_id=cmd.get('actor_id'), worker_id=cmd.get('worker_id'))
     except WorkerException as e:
         return False, "Unable to look up worker: {}".format(e.msg), None
     return valid, msg, actor.owner
Esempio n. 4
0
    def error_out_actor(self, actor_id, worker_id, message):
        """In case of an error, put the actor in error state and kill all workers"""
        logger.debug("top of error_out_actor for worker: {}_{}".format(
            actor_id, worker_id))
        # it is possible the actor was deleted already -- only set the actor status to ERROR if
        # it still exists in the store
        actor = actors_store.get(actor_id)
        if actor:
            Actor.set_status(actor_id, ERROR, status_message=message)
        # check to see how far the spawner got setting up the worker:
        try:
            worker = Worker.get_worker(actor_id, worker_id)
            worker_status = worker.get('status')
            logger.debug(
                f"got worker status for {actor_id}_{worker_id}; status: {worker_status}"
            )
        except Exception as e:
            logger.debug(
                f"got exception in error_out_actor trying to determine worker status for {actor_id}_{worker_id}; "
                f"e:{e};")
            # skip all worker processing is skipped.
            return

        if worker_status == UPDATING_STORE or worker_status == READY or worker_status == BUSY:
            logger.debug(
                f"worker status was: {worker_status}; trying to stop_worker")
            # for workers whose containers are running, we first try to stop workers using the "graceful" approach -
            try:
                self.stop_workers(actor_id, worker_ids=[])
                logger.info(
                    "Spawner just stopped worker {}_{} in error_out_actor".
                    format(actor_id, worker_id))
                return
            except Exception as e:
                logger.error(
                    "spawner got exception trying to run stop_workers. Exception: {}"
                    .format(e))
                logger.info(
                    "setting worker_status to ERROR so that kill_worker will run."
                )
                worker_status = ERROR

        # if the spawner was never able to start the worker container, we need to simply delete the worker record
        if worker_status == REQUESTED or worker_status == SPAWNER_SETUP or worker_status == PULLING_IMAGE or \
            worker_status == ERROR:
            logger.debug(
                f"worker status was: {worker_status}; trying to kill_worker")
            try:
                self.kill_worker(actor_id, worker_id)
                logger.info(
                    "Spawner just killed worker {}_{} in error_out_actor".
                    format(actor_id, worker_id))
            except DockerError as e:
                logger.info(
                    "Received DockerError trying to kill worker: {}. Exception: {}"
                    .format(worker_id, e))
                logger.info(
                    "Spawner will continue on since this is exception processing."
                )
Esempio n. 5
0
 def delete(self, actor_id, ch_name):
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         worker = Worker.get_worker(id, ch_name)
     except WorkerException as e:
         raise APIException(e.msg, 404)
     shutdown_worker(ch_name)
     return ok(result=None, msg="Worker scheduled to be stopped.")
Esempio n. 6
0
 def delete(self, actor_id, worker_id):
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         worker = Worker.get_worker(id, worker_id)
     except WorkerException as e:
         raise ResourceError(e.msg, 404)
     shutdown_worker(worker['ch_name'])
     return ok(result=None, msg="Worker scheduled to be stopped.")
Esempio n. 7
0
 def get(self, actor_id, ch_name):
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         Actor.from_db(actors_store[id])
     except KeyError:
         raise WorkerException("actor not found: {}'".format(actor_id))
     try:
         worker = Worker.get_worker(id, ch_name)
     except WorkerException as e:
         raise APIException(e.msg, 404)
     return ok(result=worker, msg="Worker retrieved successfully.")
Esempio n. 8
0
 def get(self, actor_id, worker_id):
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         Actor.from_db(actors_store[id])
     except KeyError:
         raise WorkerException("actor not found: {}'".format(actor_id))
     try:
         worker = Worker.get_worker(id, worker_id)
     except WorkerException as e:
         raise ResourceError(e.msg, 404)
     return ok(result=worker, msg="Worker retrieved successfully.")
Esempio n. 9
0
 def delete(self, actor_id, worker_id):
     logger.debug("top of DELETE /actors/{}/workers/{}.".format(actor_id, worker_id))
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         worker = Worker.get_worker(id, worker_id)
     except WorkerException as e:
         logger.debug("Did not find worker: {}. actor: {}.".format(worker_id, actor_id))
         raise ResourceError(e.msg, 404)
     logger.info("calling shutdown_worker(). worker: {}. actor: {}.".format(worker_id, actor_id))
     shutdown_worker(worker['id'])
     logger.info("shutdown_worker() called for worker: {}. actor: {}.".format(worker_id, actor_id))
     return ok(result=None, msg="Worker scheduled to be stopped.")
Esempio n. 10
0
 def check_new_params(self, cmd):
     """Additional checks for new client requests."""
     valid, msg = self.check_common(cmd)
     # validate the actor_id
     try:
         actor = Actor.from_db(actors_store[cmd.get('actor_id')])
     except KeyError:
         m = "Unable to look up actor with id: {}".format(
             cmd.get('actor_id'))
         logger.error(m)
         return False, m, None
     # validate the worker id
     try:
         Worker.get_worker(actor_id=cmd.get('actor_id'),
                           worker_id=cmd.get('worker_id'))
     except WorkerException as e:
         m = "Unable to look up worker: {}".format(e.msg)
         logger.error(m)
         return False, m, None
     logger.debug("new params were valid.")
     return valid, msg, actor.owner
Esempio n. 11
0
 def get(self, actor_id, worker_id):
     logger.debug("top of GET /actors/{}/workers/{}.".format(actor_id, worker_id))
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         Actor.from_db(actors_store[id])
     except KeyError:
         logger.debug("Did not find actor: {}.".format(actor_id))
         raise ResourceError("No actor found with id: {}.".format(actor_id), 404)
     try:
         worker = Worker.get_worker(id, worker_id)
     except WorkerException as e:
         logger.debug("Did not find worker: {}. actor: {}.".format(worker_id, actor_id))
         raise ResourceError(e.msg, 404)
     return ok(result=worker, msg="Worker retrieved successfully.")
Esempio n. 12
0
 def get(self, actor_id, worker_id):
     logger.debug("top of GET /actors/{}/workers/{}.".format(actor_id, worker_id))
     id = Actor.get_dbid(g.tenant, actor_id)
     try:
         Actor.from_db(actors_store[id])
     except KeyError:
         logger.debug("Did not find actor: {}.".format(actor_id))
         raise ResourceError("No actor found with id: {}.".format(actor_id), 404)
     try:
         worker = Worker.get_worker(id, worker_id)
     except WorkerException as e:
         logger.debug("Did not find worker: {}. actor: {}.".format(worker_id, actor_id))
         raise ResourceError(e.msg, 404)
     # worker is an honest python dictionary with a single key, the id of the worker. need to
     # convert it to a Worker object
     worker.update({'id': worker_id})
     w = Worker(**worker)
     return ok(result=w.display(), msg="Worker retrieved successfully.")
Esempio n. 13
0
    def process(self, cmd):
        """Main spawner method for processing a command from the CommandChannel."""
        logger.info("top of process; cmd: {}".format(cmd))
        actor_id = cmd['actor_id']
        try:
            actor = Actor.from_db(actors_store[actor_id])
        except Exception as e:
            msg = f"Exception in spawner trying to retrieve actor object from store. Aborting. Exception: {e}"
            logger.error(msg)
            return
        worker_id = cmd['worker_id']
        image = cmd['image']
        tenant = cmd['tenant']
        stop_existing = cmd.get('stop_existing', True)
        num_workers = 1
        logger.debug("spawner command params: actor_id: {} worker_id: {} image: {} tenant: {}"
                    "stop_existing: {} num_workers: {}".format(actor_id, worker_id,
                                                               image, tenant, stop_existing, num_workers))
        # if the worker was sent a delete request before spawner received this message to create the worker,
        # the status will be SHUTDOWN_REQUESTED, not REQUESTED. in that case, we simply abort and remove the
        # worker from the collection.
        try:
            logger.debug("spawner checking worker's status for SHUTDOWN_REQUESTED")
            worker = Worker.get_worker(actor_id, worker_id)
            logger.debug(f"spawner got worker; worker: {worker}")
        except Exception as e:
            logger.error(f"spawner got exception trying to retrieve worker. "
                         f"actor_id: {actor_id}; worker_id: {worker_id}; e: {e}")
            return

        status = worker.get('status')
        if not status == REQUESTED:
            logger.debug(f"worker was NOT in REQUESTED status. status: {status}")
            if status == SHUTDOWN_REQUESTED or status == SHUTTING_DOWN or status == ERROR:
                logger.debug(f"worker status was {status}; spawner deleting worker and returning..")
                try:
                    Worker.delete_worker(actor_id, worker_id)
                    logger.debug("spawner deleted worker because it was SHUTDOWN_REQUESTED.")
                    return
                except Exception as e:
                    logger.error(f"spawner got exception trying to delete a worker in SHUTDOWN_REQUESTED status."
                                 f"actor_id: {actor_id}; worker_id: {worker_id}; e: {e}")
                    return
            else:
                logger.error(f"spawner found worker in unexpected status: {status}. Not processing command and returning.")
                return

        # worker status was REQUESTED; moving on to SPAWNER_SETUP ----
        Worker.update_worker_status(actor_id, worker_id, SPAWNER_SETUP)
        logger.debug("spawner has updated worker status to SPAWNER_SETUP; worker_id: {}".format(worker_id))
        client_id = None
        client_secret = None
        client_access_token = None
        client_refresh_token = None
        api_server = None
        client_secret = None

        # ---- Oauth client generation for the worker -------
        # check if tenant and instance configured for client generation -
        try:
            generate_clients =  Config.get('workers', f'{tenant}_generate_clients').lower()
        except:
            logger.debug(f"Did not find a {tenant}_generate_clients config. Looking for a global config.")
            generate_clients = Config.get('workers', 'generate_clients').lower()
        logger.debug(f"final generate_clients: {generate_clients}")
        if generate_clients == "true":
            logger.debug("client generation was configured to be available; now checking the actor's token attr.")
            # updated 1.3.0-- check whether the actor requires a token:
            if actor.token:
                logger.debug("spawner starting client generation")
                client_id, \
                client_access_token, \
                client_refresh_token, \
                api_server, \
                client_secret = self.client_generation(actor_id, worker_id, tenant)
            else:
                logger.debug("actor's token attribute was False. Not generating client.")
        ch = SpawnerWorkerChannel(worker_id=worker_id)

        logger.debug("spawner attempting to start worker; worker_id: {}".format(worker_id))
        try:
            worker = self.start_worker(
                image,
                tenant,
                actor_id,
                worker_id,
                client_id,
                client_access_token,
                client_refresh_token,
                ch,
                api_server,
                client_secret
            )
        except Exception as e:
            msg = "Spawner got an exception from call to start_worker. Exception:{}".format(e)
            logger.error(msg)
            self.error_out_actor(actor_id, worker_id, msg)
            if client_id:
                self.delete_client(tenant, actor_id, worker_id, client_id, client_secret)
            return

        logger.debug("Returned from start_worker; Created new worker: {}".format(worker))
        ch.close()
        logger.debug("Client channel closed")

        if stop_existing:
            logger.info("Stopping existing workers: {}".format(worker_id))
            # TODO - update status to stop_requested
            self.stop_workers(actor_id, [worker_id])