Ejemplo n.º 1
0
    def stop_workers(self, actor_id, worker_ids):
        """Stop existing workers; used when updating an actor's image."""
        logger.debug("Top of stop_workers() for actor: {}.".format(actor_id))
        try:
            workers_dict = workers_store[actor_id]
        except KeyError:
            logger.debug("workers_store had no workers for actor: {}".format(actor_id))
            workers_dict = {}

        # if there are existing workers, we need to close the actor message channel and
        # gracefully shutdown the existing worker processes.
        if len(workers_dict.items()) > 0:
            logger.info("Found {} workers to stop.".format(len(workers_dict.items())))
            # first, close the actor msg channel to prevent any new messages from being pulled
            # by the old workers.
            actor_ch = ActorMsgChannel(actor_id)
            actor_ch.close()
            logger.info("Actor channel closed for actor: {}".format(actor_id))
            # now, send messages to workers for a graceful shutdown:
            for _, worker in workers_dict.items():
                # don't stop the new workers:
                if worker['id'] not in worker_ids:
                    ch = WorkerChannel(worker_id=worker['id'])
                    # since this is an update, there are new workers being started, so
                    # don't delete the actor msg channel:
                    ch.put('stop-no-delete')
                    logger.info("Sent 'stop-no-delete' message to worker_id: {}".format(worker['id']))
                    ch.close()
                else:
                    logger.debug("skipping worker {} as it it not in worker_ids.".format(worker))
        else:
            logger.info("No workers to stop.")
Ejemplo n.º 2
0
    def get(self, actor_id):
        def get_hypermedia(actor):
            return {
                '_links': {
                    'self':
                    '{}/actors/v2/{}/messages'.format(actor.api_server,
                                                      actor.id),
                    'owner':
                    '{}/profiles/v2/{}'.format(actor.api_server, actor.owner),
                },
            }

        logger.debug("top of GET /actors/{}/messages".format(actor_id))
        # check that actor exists
        id = Actor.get_dbid(g.tenant, actor_id)
        try:
            actor = Actor.from_db(actors_store[id])
        except KeyError:
            logger.debug("did not find actor: {}.".format(actor_id))
            raise ResourceError("No actor found with id: {}.".format(actor_id),
                                404)
        ch = ActorMsgChannel(actor_id=id)
        result = {'messages': len(ch._queue._queue)}
        ch.close()
        logger.debug("messages found for actor: {}.".format(actor_id))
        result.update(get_hypermedia(actor))
        return ok(result)
Ejemplo n.º 3
0
def create_gauges(actor_ids):
    logger.debug("METRICS: Made it to create_gauges")
    for actor_id in actor_ids:
        if actor_id not in message_gauges.keys():
            try:
                g = Gauge(
                    'message_count_for_actor_{}'.format(
                        actor_id.decode("utf-8").replace('-', '_')),
                    'Number of messages for actor {}'.format(
                        actor_id.decode("utf-8").replace('-', '_')))
                message_gauges.update({actor_id: g})
                logger.debug('Created gauge {}'.format(g))
            except Exception as e:
                logger.info(
                    "got exception trying to instantiate the Gauge: {}".format(
                        e))
        else:
            g = message_gauges[actor_id]

        try:
            ch = ActorMsgChannel(actor_id=actor_id.decode("utf-8"))
        except Exception as e:
            logger.error(
                "Exception connecting to ActorMsgChannel: {}".format(e))
            raise e
        result = {'messages': len(ch._queue._queue)}
        ch.close()
        g.set(result['messages'])
        logger.debug("METRICS: {} messages found for actor: {}.".format(
            result['messages'], actor_id))
        if actor_id not in worker_gaueges.keys():
            try:
                g = Gauge(
                    'worker_count_for_actor_{}'.format(
                        actor_id.decode("utf-8").replace('-', '_')),
                    'Number of workers for actor {}'.format(
                        actor_id.decode("utf-8").replace('-', '_')))
                worker_gaueges.update({actor_id: g})
                logger.debug('Created worker gauge {}'.format(g))
            except Exception as e:
                logger.info(
                    "got exception trying to instantiate the Worker Gauge: {}".
                    format(e))
        else:
            g = worker_gaueges[actor_id]
        workers = Worker.get_workers(actor_id)
        result = {'workers': len(workers)}
        g.set(result['workers'])

    return actor_ids
Ejemplo n.º 4
0
 def get(self):
     logger.debug("top of GET /admin/actors")
     actors = []
     for k, v in actors_store.items():
         actor = Actor.from_db(v)
         actor.workers = Worker.get_workers(actor.db_id)
         for id, worker in actor.workers.items():
             actor.worker = worker
             break
         ch = ActorMsgChannel(actor_id=actor.db_id)
         actor.messages = len(ch._queue._queue)
         ch.close()
         summary = ExecutionsSummary(db_id=actor.db_id)
         actor.executions = summary.total_executions
         actor.runtime = summary.total_runtime
         actors.append(actor)
     logger.info("actors retrieved.")
     return ok(result=actors, msg="Actors retrieved successfully.")
Ejemplo n.º 5
0
    def stop_workers(self, actor_id):
        """Stop existing workers; used when updating an actor's image."""

        try:
            workers_dict = workers_store[actor_id]
        except KeyError:
            workers_dict = {}

        # if there are existing workers, we need to close the actor message channel and
        # gracefully shutdown the existing worker processes.
        if len(workers_dict.items()) > 0:
            # first, close the actor msg channel to prevent any new messages from being pulled
            # by the old workers.
            actor_ch = ActorMsgChannel(actor_id)
            actor_ch.close()
            # now, send messages to workers for a graceful shutdown:
            for _, worker in workers_dict.items():
                ch = WorkerChannel(name=worker['ch_name'])
                ch.put('stop')
Ejemplo n.º 6
0
    def stop_workers(self, actor_id, worker_ids):
        """Stop existing workers; used when updating an actor's image."""

        try:
            workers_dict = workers_store[actor_id]
        except KeyError:
            workers_dict = {}

        # if there are existing workers, we need to close the actor message channel and
        # gracefully shutdown the existing worker processes.
        if len(workers_dict.items()) > 0:
            # first, close the actor msg channel to prevent any new messages from being pulled
            # by the old workers.
            actor_ch = ActorMsgChannel(actor_id)
            actor_ch.close()
            # now, send messages to workers for a graceful shutdown:
            for _, worker in workers_dict.items():
                # don't stop the new workers:
                if worker['id'] not in worker_ids:
                    ch = WorkerChannel(name=worker['ch_name'])
                    ch.put('stop')
Ejemplo n.º 7
0
    def stop_workers(self, actor_id):
        """Stop existing workers; used when updating an actor's image."""

        try:
            workers = json.loads(workers_store[actor_id])
            print("Found existing workers: {}".format(str(workers)))
        except KeyError:
            print("No existing workers.")
            workers = {}

        # if there are existing workers, we need to close the actor message channel and
        # gracefully shutdown the existing worker processes.
        if len(workers) > 0 :
            # first, close the actor msg channel to prevent any new messages from being pulled
            # by the old workers.
            actor_ch = ActorMsgChannel(actor_id)
            actor_ch.close()

            # now, send messages to workers for a graceful shutdown:
            for worker in workers:
                ch = WorkerChannel(name=worker['ch_name'])
                ch.put('stop')
Ejemplo n.º 8
0
    def get_metrics(self):
        logger.debug("top of get in MetricResource")

        actor_ids = [
            db_id
            for db_id, _
            in actors_store.items()
        ]
        logger.debug("ACTOR IDS: {}".format(actor_ids))
        try:
            if actor_ids:
                for actor_id in actor_ids:
                    if actor_id not in message_gauges.keys():
                        try:
                            g = Gauge(
                                'message_count_for_actor_{}'.format(actor_id.decode("utf-8").replace('-', '_')),
                                'Number of messages for actor {}'.format(actor_id.decode("utf-8").replace('-', '_'))
                            )
                            message_gauges.update({actor_id: g})
                        except Exception as e:
                            logger.info("got exception trying to instantiate the Gauge: {}".format(e))
                    else:
                        g = message_gauges[actor_id]

                    try:
                        ch = ActorMsgChannel(actor_id=actor_id.decode("utf-8"))
                    except Exception as e:
                        logger.error("Exception connecting to ActorMsgChannel: {}".format(e))
                        raise e
                    result = {'messages': len(ch._queue._queue)}
                    ch.close()
                    g.set(result['messages'])
                    logger.debug("METRICS: {} messages found for actor: {}.".format(result['messages'], actor_id))
                return actor_ids
        except Exception as e:
            logger.info("Got exception in get_metrics: {}".format(e))
            return []
Ejemplo n.º 9
0
def process_link(link, msg, d):
    """
    Process an event with a link.
    :return: 
    """
    # ensure that the linked actor still exists; the link attribute is *always* the dbid of the linked
    # actor
    logger.debug("top of process_link")
    try:
        actors_store[link]
    except KeyError as e:
        logger.error(
            "Processing event message for actor {} that does not exist. Quiting"
            .format(link))
        raise e

    # create an execution for the linked actor with message
    exc = Execution.add_execution(
        link, {
            'cpu': 0,
            'io': 0,
            'runtime': 0,
            'status': SUBMITTED,
            'executor': 'Abaco Event'
        })
    logger.info(
        "Events processor agent added execution {} for actor {}".format(
            exc, link))
    d['_abaco_execution_id'] = exc
    logger.debug(
        "sending message to actor. Final message {} and message dictionary: {}"
        .format(msg, d))
    ch = ActorMsgChannel(actor_id=link)
    ch.put_msg(message=msg, d=d)
    ch.close()
    logger.info("link processed.")
Ejemplo n.º 10
0
    def post(self, actor_id):
        def get_hypermedia(actor, exc):
            return {'_links': {'self': '{}/actors/v2/{}/executions/{}'.format(actor.api_server, actor.id, exc),
                               'owner': '{}/profiles/v2/{}'.format(actor.api_server, actor.owner),
                               'messages': '{}/actors/v2/{}/messages'.format(actor.api_server, actor.id)},}

        logger.debug("top of POST /actors/{}/messages.".format(actor_id))
        dbid = Actor.get_dbid(g.tenant, actor_id)
        try:
            Actor.from_db(actors_store[dbid])
        except KeyError:
            logger.debug("did not find actor: {}.".format(actor_id))
            raise ResourceError("No actor found with id: {}.".format(actor_id), 404)
        args = self.validate_post()
        d = {}
        # build a dictionary of k:v pairs from the query parameters, and pass a single
        # additional object 'message' from within the post payload. Note that 'message'
        # need not be JSON data.
        logger.debug("POST body validated. actor: {}.".format(actor_id))
        for k, v in request.args.items():
            if k == 'message':
                continue
            d[k] = v
        logger.debug("extra fields added to message from query parameters: {}.".format(d))
        if hasattr(g, 'user'):
            d['_abaco_username'] = g.user
            logger.debug("_abaco_username: {} added to message.".format(g.user))
        if hasattr(g, 'api_server'):
            d['_abaco_api_server'] = g.api_server
            logger.debug("_abaco_api_server: {} added to message.".format(g.api_server))
        # if hasattr(g, 'jwt'):
        #     d['_abaco_jwt'] = g.jwt
        # if hasattr(g, 'jwt_server'):
        #     d['_abaco_jwt_server'] = g.jwt_server
        if hasattr(g, 'jwt_header_name'):
            d['_abaco_jwt_header_name'] = g.jwt_header_name
            logger.debug("abaco_jwt_header_name: {} added to message.".format(g.jwt_header_name))

        # create an execution
        exc = Execution.add_execution(dbid, {'cpu': 0,
                                             'io': 0,
                                             'runtime': 0,
                                             'status': SUBMITTED,
                                             'executor': g.user})
        logger.info("Execution {} added for actor {}".format(exc, actor_id))
        d['_abaco_execution_id'] = exc
        d['_abaco_Content_Type'] = args.get('_abaco_Content_Type', '')
        logger.debug("Final message dictionary: {}".format(d))
        ch = ActorMsgChannel(actor_id=dbid)
        ch.put_msg(message=args['message'], d=d)
        ch.close()
        logger.debug("Message added to actor inbox. id: {}.".format(actor_id))
        # make sure at least one worker is available
        actor = Actor.from_db(actors_store[dbid])
        actor.ensure_one_worker()
        logger.debug("ensure_one_worker() called. id: {}.".format(actor_id))
        if args.get('_abaco_Content_Type') == 'application/octet-stream':
            result = {'execution_id': exc, 'msg': 'binary - omitted'}
        else:
            result={'execution_id': exc, 'msg': args['message']}
        result.update(get_hypermedia(actor, exc))
        case = Config.get('web', 'case')
        if not case == 'camel':
            return ok(result)
        else:
            return ok(dict_to_camel(result))
Ejemplo n.º 11
0
def create_gauges(actor_ids):
    logger.debug(
        "METRICS: Made it to create_gauges; actor_ids: {}".format(actor_ids))
    inbox_lengths = {}
    for actor_id in actor_ids:
        logger.debug("top of for loop for actor_id: {}".format(actor_id))

        try:
            actor = actors_store[actor_id]
        except KeyError:
            logger.error("actor {} does not exist.".format(actor_id))
            continue

            # If the actor doesn't have a gauge, add one
        if actor_id not in message_gauges.keys():
            try:
                g = Gauge(
                    'message_count_for_actor_{}'.format(
                        actor_id.decode("utf-8").replace('-', '_')),
                    'Number of messages for actor {}'.format(
                        actor_id.decode("utf-8").replace('-', '_')))
                message_gauges.update({actor_id: g})
                logger.debug('Created gauge {}'.format(g))
            except Exception as e:
                logger.error(
                    "got exception trying to create/instantiate the gauge; "
                    "actor {}; exception: {}".format(actor_id, e))
        else:
            # Otherwise, get this actor's existing gauge
            try:
                g = message_gauges[actor_id]
            except Exception as e:
                logger.info(
                    "got exception trying to instantiate an existing gauge; "
                    "actor: {}: exception:{}".format(actor_id, e))

            # Update this actor's command channel metric
            channel_name = actor.get("queue")

            queues_list = Config.get('spawner', 'host_queues').replace(' ', '')
            valid_queues = queues_list.split(',')

            if not channel_name or channel_name not in valid_queues:
                channel_name = 'default'

        # Update this actor's gauge to its current # of messages
        try:
            ch = ActorMsgChannel(actor_id=actor_id.decode("utf-8"))
        except Exception as e:
            logger.error(
                "Exception connecting to ActorMsgChannel: {}".format(e))
            raise e
        result = {'messages': len(ch._queue._queue)}
        inbox_lengths[actor_id.decode("utf-8")] = len(ch._queue._queue)
        ch.close()
        g.set(result['messages'])
        logger.debug("METRICS: {} messages found for actor: {}.".format(
            result['messages'], actor_id))

        # add a worker gauge for this actor if one does not exist
        if actor_id not in worker_gaueges.keys():
            try:
                g = Gauge(
                    'worker_count_for_actor_{}'.format(
                        actor_id.decode("utf-8").replace('-', '_')),
                    'Number of workers for actor {}'.format(
                        actor_id.decode("utf-8").replace('-', '_')))
                worker_gaueges.update({actor_id: g})
                logger.debug('Created worker gauge {}'.format(g))
            except Exception as e:
                logger.info(
                    "got exception trying to instantiate the Worker Gauge: {}".
                    format(e))
        else:
            # Otherwise, get the worker gauge that already exists
            g = worker_gaueges[actor_id]

        # Update this actor's worker IDs
        workers = Worker.get_workers(actor_id)
        result = {'workers': len(workers)}
        g.set(result['workers'])

    ch = CommandChannel(name=channel_name)
    cmd_length = len(ch._queue._queue)
    command_gauge.labels(channel_name).set(cmd_length)
    logger.debug("METRICS COMMAND CHANNEL {} size: {}".format(
        channel_name, command_gauge))
    ch.close()

    # Return actor_ids so we don't have to query for them again later
    return actor_ids, inbox_lengths, cmd_length
Ejemplo n.º 12
0
def create_gauges(actor_ids):
    """
    Creates a Prometheus gauge for each actor id. The gauge is used to track the number of
    pending messages in the actor's queue.
    :param actor_ids: list of actors that should be processed. Does not include stateful actors or
    actors in a shutting down state.
    :return:
    """
    logger.debug("top of create_gauges; actor_ids: {}".format(actor_ids))
    # dictionary mapping actor_ids to their message queue lengths
    inbox_lengths = {}
    for actor_id in actor_ids:
        logger.debug("top of for loop for actor_id: {}".format(actor_id))
        # first, make sure the actor still exists in the actor store
        try:
            actor = actors_store[actor_id]
        except KeyError:
            logger.error(
                f"actor {actor_id} does not exist in store; continuing to next actor."
            )
            continue
        # If the actor doesn't have a gauge, add one
        if actor_id not in message_gauges.keys():
            try:
                g = Gauge(
                    'message_count_for_actor_{}'.format(
                        actor_id.replace('-', '_')),
                    'Number of messages for actor {}'.format(
                        actor_id.replace('-', '_')))
                message_gauges.update({actor_id: g})
                logger.debug('Created gauge {}'.format(g))
            except Exception as e:
                logger.error(
                    "got exception trying to create/instantiate the gauge; "
                    "actor {}; exception: {}".format(actor_id, e))
                g = None
        else:
            # Otherwise, get this actor's existing gauge
            try:
                g = message_gauges[actor_id]
            except Exception as e:
                logger.info(
                    "got exception trying to instantiate an existing gauge; "
                    "actor: {}: exception:{}".format(actor_id, e))
                g = None
        # Update this actor's gauge to its current # of messages
        try:
            ch = ActorMsgChannel(actor_id=actor_id)
            msg_length = len(ch._queue._queue)
        except Exception as e:
            logger.error(
                "Exception connecting to ActorMsgChannel: {}".format(e))
            raise e
        ch.close()
        result = {'messages': msg_length}
        # add the actor's current message queue length to the inbox_lengths in-memory variable
        inbox_lengths[actor_id] = msg_length
        # if we were able to create the gauge, set it to the current message:
        if g:
            try:
                g.set(result['messages'])
            except Exception as e:
                logger.error(
                    f"Got exception trying to set the messages on the gauge for actor: {actor_id}; "
                    f"exception: {e}")
        logger.debug("METRICS: {} messages found for actor: {}.".format(
            result['messages'], actor_id))

        # add a worker gauge for this actor if one does not exist
        if actor_id not in worker_gaueges.keys():
            try:
                g = Gauge(
                    'worker_count_for_actor_{}'.format(
                        actor_id.replace('-', '_')),
                    'Number of workers for actor {}'.format(
                        actor_id.replace('-', '_')))
                worker_gaueges.update({actor_id: g})
                logger.debug('Created worker gauge {}'.format(g))
            except Exception as e:
                logger.info(
                    "got exception trying to instantiate the Worker Gauge: {}".
                    format(e))
        else:
            # Otherwise, get the worker gauge that already exists
            g = worker_gaueges[actor_id]

        # Update this actor's worker IDs
        workers = Worker.get_workers(actor_id)
        result = {'workers': len(workers)}
        try:
            g.set(result['workers'])
        except Exception as e:
            logger.error(
                f"got exception trying to set the worker gauge for actor {actor_id}; exception: {e}"
            )
        logger.debug(
            f"METRICS: {result['workers']} workers found for actor: {actor_id}."
        )

        # Update this actor's command channel metric
        # channel_name = actor.get("queue")
        #
        # queues_list = Config.get('spawner', 'host_queues').replace(' ', '')
        # valid_queues = queues_list.split(',')
        #
        # if not channel_name or channel_name not in valid_queues:
        #     channel_name = 'default'
        #
        # if not channel_name:
        #     # TODO -- this must be changed. there is no way returning no arguments will result in
        #     # anythng but an exception. The calling function is expecting 3 arguments...
        #     # if we really want to blow up right here we should just raise an appropriate exception.
        #     return

    # TODO -- this code needs to be fixed. What follows is only a partial fix; what I think we want to do
    # is set the length of all of the different command channels once at the end of this loop. What was
    # happening instead was that it was only setting one of the command channel's lengths -- whatever command
    # channel happened to belong to the last actor in the loop.
    channel_name = 'default'
    ch = CommandChannel(name=channel_name)
    cmd_length = len(ch._queue._queue)
    command_gauge.labels(channel_name).set(cmd_length)
    logger.debug(
        f"METRICS COMMAND CHANNEL {channel_name} size: {command_gauge}")
    ch.close()

    # Return actor_ids so we don't have to query for them again later
    return actor_ids, inbox_lengths, cmd_length