async def _public_brain_handler(self, recv): """asychronous handler for public channel all initial workers listen to the public channel, which is for a worker registers to brain when the worker initializing Args: recv (:obj:`str`): include 'subject', 'reply', 'msg' subject (str): the src channel name reply (str): the reply channel name msg (binary str): request message """ ch = recv.subject reply = recv.reply msg = jsonify(recv.data) try: verb = msg['verb'] context = msg['context'] except Exception as e: self._logger.error('Exception in {}, type: {} error: {}'.format( get_func_name(), type(e), e)) else: if verb == 'hshake-1': self._logger.info('Received "hshake-1" msg in {func}: "{subject}": {data}'.\ format(func=get_func_name(), subject=ch, reply=reply, data=msg)) try: worker_id = context['workerID'] # TODO(Ray): the channel name would be convention or just recv from worker? ch_worker_to_brain = context['ch_to_brain'] ch_brain_to_worker = context['ch_to_worker'] except Exception as e: self._logger.error( 'Exception in {}, type: {} error: {}'.format( get_func_name(), type(e), e)) else: worker_status = await self._worker_agent.get_status( worker_id=worker_id) if worker_status != WorkerStatus.INITIAL.name: self._logger.error('Receive "hshake1" in {}: with unexpected worker status "{}"'.\ format(get_func_name(), worker_status)) return # update worker status await self._worker_agent.update_status( WorkerStatus.HSHAKE_1.name, worker_id=worker_id) hshake_reply = {'verb': 'hshake-2', 'context': context} await self._nats_cli.subscribe( ch_worker_to_brain, cb=self._private_worker_handler) await self._nats_cli.publish(ch_brain_to_worker, str(hshake_reply).encode())
async def get(self, id): """Get ticket by ID. Args: id: ticket ID Returns: dict: The ticket content if the ticket ID exists, None otherwise. """ key = gen_key(id) result = await self._mem_db.get(key) if result: return jsonify(result) else: return None
async def _res_handler(self, recv): """asychronous handler for listen response from resource manager Args: recv (:obj:`str`): include 'subject', 'reply', 'msg' subject (str): the src channel name reply (str): the reply channel name msg (binary str): message """ msg = jsonify(recv.data) # Check has error or not. if 'error' in msg: # TODO(JiaKuan Su): Error handling. self._logger.error('Error code: "{}" from resource manager'.format( msg['error']['code'])) return if msg['command'] == MESSAGES['ch_brain_res']['CREATE_WORKER']: # whenver the resource manager create a worker for the brain # then inform to brain worker_id = msg['response']['workerId'] analyzer_id = msg['analyzerId'] self._logger.info( 'Receive launch ok in {} for worker "{}":analyzer "{}"'.format( get_func_name(), worker_id, analyzer_id)) await self._worker_agent.create_analyzer(analyzer_id, worker_id) elif msg['command'] == MESSAGES['ch_brain_res']['REMOVE_WORKER']: # TODO: handle error by chekcing response message if it failed worker_id = msg['params']['workerId'] analyzer_id = msg['analyzerId'] # get the ticket ticket = await self._ticket_agent.get(analyzer_id) if not ticket: return reply_api_ch = ticket['reply'] await self._worker_agent.del_anal_and_worker( analyzer_id, worker_id) await self._ticket_agent.delete(analyzer_id) await self._nats_cli.publish( reply_api_ch, jsondumps(self._API.reply_anal_removed(analyzer_id)).encode())
async def get_info(self, anal_id=None, worker_id=None): # TODO(Ray): should prohibit call get_status with both anal_id and worker_id? if worker_id: pass elif anal_id: # get worker_id by anal_id worker_id = await self._mem_db.get(self._get_anal_key(anal_id)) worker_id = worker_id.decode() # TODO(Ray): if worker_id not exist, error handler if not worker_id: return None else: return None # mget 'status', 'pipelines' mget_cmds = [] mget_cmds.append(self._get_worker_key(worker_id, 'status')) mget_cmds.append(self._get_worker_key(worker_id, 'pipelines')) result = await self._mem_db.mget(*mget_cmds) status = result[0].decode() pipelines = jsonify(result[1].decode()) return status, pipelines
async def consume_from_worker(self, worker_id): """Get event array by worker ID. Args: worker_id (string): worker ID Returns: list of dict: an array of events from the worker """ # Construct the key of event queue. event_queue_key = 'event:brain:{}'.format(worker_id) # Get the events. #TODO(Ray): I think if it need a redis lock for these 2 redis operation events_bin = await self._mem_db.lrange(event_queue_key, 0, -1) # Remove the got events. await self._mem_db.ltrim(event_queue_key, len(events_bin), -1) # Convert the events from binary to dictionary type. events = [] for event_bin in events_bin: event_dict = jsonify(event_bin) event_dict['timestamp'] = float(event_dict['timestamp']) events.append(event_dict) return events
async def _api_handler(self, recv): """asychronous handler for listen cmd from api server Args: recv (:obj:`str`): include 'subject', 'reply', 'msg' subject (str): the src channel name reply (str): the reply channel name msg (binary str): request message """ ch = recv.subject reply = recv.reply msg = jsonify(recv.data) timestamp = round(time.time()) self._logger.info('Received in api_handler() "{subject} {reply}": {data}'.\ format(subject=ch, reply=reply, data=msg)) try: self._API.validate(msg) except InvalidRequestFormat: self._logger.error( 'Exception in {}: invalid request format from api'.format( get_func_name())) return except InvalidRequestType: # ignore the exception since the request is not for us return analyzer_id = msg['params']['id'] if msg['command'] == MESSAGES['ch_api_brain']['REQ_ANALYZER_STATUS']: # TODO(Ray): error handler, if analyzer_id not existed status, pipelines = await self._worker_agent.get_info( anal_id=analyzer_id) # TODO(Ray): check if in WorkerStatus if status: return await self._nats_cli.publish(reply, \ jsondumps(self._API.reply_status(status, pipelines)).encode()) else: return await self._nats_cli.publish( reply, jsondumps(self._API.reply_not_found()).encode()) return await self._nats_cli.publish(reply, str(reply_msg).encode()) elif msg['command'] == MESSAGES['ch_api_brain']['START_ANALYZER']: context = {'msg': msg, 'reply': reply, 'timestamp': timestamp} if (await self._ticket_agent.set(analyzer_id, context)) == 0: # ticket already exists for analyzer #id, reject the request return await self._nats_cli.publish( reply, jsondumps(self._API.reply_not_aval()).encode()) # check if worker for the analyzer #id exists? worker_id = await self._worker_agent.get_worker_id(analyzer_id) if worker_id: # TODO(Ray): if yes, just re-config worker self._logger.info( 'worker {} exists, re-configure it'.format(worker_id)) # XXX: We haven't implement woker reconfiguring yet, so we need # to delete ticket here, otherwise it will block future # operations on analyzer 'analyzer_id'. await self._ticket_agent.delete(analyzer_id) else: self._logger.info( 'Create a worker for analyzer "{}"'.format(analyzer_id)) # reply back to api_server await self._nats_cli.publish( reply, jsondumps(self._API.reply_status( WorkerStatus.CREATE.name)).encode()) ticket_id = analyzer_id # request resource manager to launch a worker req = { 'command': MESSAGES['ch_brain_res']['CREATE_WORKER'], 'ticketId': ticket_id, 'analyzerId': analyzer_id, 'params': { # TODO: For running multiple brain instances, the id # should combine with a brain id to create a # unique id across brains 'workerName': 'jagereye/worker_{}'.format(self._typename) } } #TODO(Ray) need to abstract await self._nats_cli.publish(CH_BRAIN_TO_RES, str(req).encode()) elif msg['command'] == MESSAGES['ch_api_brain']['STOP_ANALYZER']: # TODO: Stop application context = {'msg': msg, 'reply': reply, 'timestamp': timestamp} if await self._ticket_agent.set(analyzer_id, context) == 0: # ticket already exists for analyzer #id, reject the request return await self._nats_cli.publish( reply, jsondumps(self._API.reply_not_aval()).encode()) worker_id = await self._worker_agent.get_worker_id(analyzer_id) if not worker_id: return await self._nats_cli.publish( reply, jsondumps(self._API.reply_not_found()).encode()) # request resource manager to remove a worker req = { 'command': MESSAGES['ch_brain_res']['REMOVE_WORKER'], 'analyzerId': analyzer_id, 'params': { 'workerId': worker_id } } await self._nats_cli.publish(CH_BRAIN_TO_RES, str(req).encode()) else: self._logger.error('Undefined command: {}'.format(msg['command']))
async def _private_worker_handler(self, recv): """asychronous handler for private channel with each workers listen to the private channel with each workers, and interact depends on the received msg Args: recv (:obj:`str`): include 'subject', 'reply', 'msg' subject (str): the src channel name reply (str): the reply channel name msg (binary str): request message """ ch = recv.subject reply = recv.reply msg = jsonify(recv.data) try: verb = msg['verb'] context = msg['context'] except Exception as e: self._logger.error('Exception in {}, type: {} error: {}'.format( get_func_name(), type(e), e)) else: if verb == 'hshake-3': self._logger.info('Received "hshake-3" in {func}: "{subject} {reply}": {data}'.\ format(func=get_func_name(), subject=ch, reply=reply, data=msg)) self._logger.info('finish handshake') # change worker status worker_id = context['workerID'] # check worker status is HSHAKE-1 worker_status = await self._worker_agent.get_status( worker_id=worker_id) if worker_status != WorkerStatus.HSHAKE_1.name: self._logger.error('Receive "hshake3" in {}: with unexpected worker status "{}"'.\ format(get_func_name(), worker_status)) return # update worker status to READY await self._worker_agent.update_status(WorkerStatus.READY.name, worker_id=worker_id) # make listen the worker's heartbeat await self._worker_agent.start_listen_hbeat(worker_id) # check if there is a ticket for the worker analyzer_id = await self._worker_agent.get_anal_id(worker_id) result = await self._ticket_agent.get(analyzer_id) if result: # assign job to worker context['ticket'] = result context['ticket']['ticket_id'] = analyzer_id config_req = {'verb': 'config', 'context': context} # update worker status to CONFIG await self._worker_agent.update_status( WorkerStatus.CONFIG.name, worker_id=worker_id) await self._nats_cli.publish(context['ch_to_worker'], str(config_req).encode()) else: # no ticket for the analyzer self._logger.debug('Receive "hshake3" in {}: no ticket for analyzer {}'.\ format(get_func_name(), analyzer_id)) elif verb == 'config_ok': self._logger.info('Received "config_ok" in {func}: "{subject} {reply}": {data}'.\ format(func=get_func_name(), subject=ch, reply=reply, data=msg)) ticket_id = context['ticket']['ticket_id'] worker_id = context['workerID'] pipelines = context['ticket']['msg']['params']['pipelines'] worker_status = await self._worker_agent.get_status( worker_id=worker_id) if worker_status != WorkerStatus.CONFIG.name: # TODO(Ray): when status not correct, what to do? self._logger.error('Receive "config_ok" in {}, but the worker status is {}'.\ format(get_func_name, worker_status)) return # update the status to RUNNING await self._worker_agent.update_status( WorkerStatus.RUNNING.name, worker_id=worker_id) # update pipelines await self._worker_agent.update_pipelines(pipelines, worker_id=worker_id) # delete ticket await self._ticket_agent.delete(ticket_id) elif verb == 'event': worker_id = context['workerID'] # retrieve analyzer_id analyzer_id = await self._worker_agent.get_anal_id(worker_id) # consume events from the worker events = await self._event_agent.consume_from_worker(worker_id) if not events: return self._logger.info('Received events: {}'.format(events)) events_for_notify = self._event_agent.save_in_db( events, analyzer_id) self._logger.debug('Push events to notification: "{}"'.format( events_for_notify, worker_id)) await self._nats_cli.publish(CH_NOTIFICATION, str(events_for_notify).encode()) # TODO: Send events back to notification service. elif verb == 'hbeat': worker_id = context['workerID'] self._logger.debug('receive hbeat: {}'.format(str(msg))) # TODO: error handler if not (await self._worker_agent.update_hbeat(worker_id)): self._logger.debug( 'failed update hbeat for worker {}'.format(worker_id))
from jagereye.brain.utils import jsonify, jsondumps from jagereye.brain import ticket from jagereye.brain.event_agent import EventAgent from jagereye.brain.worker_agent import WorkerAgent from jagereye.brain.status_enum import WorkerStatus from jagereye.brain.contract import API, InvalidRequestType, InvalidRequestFormat from jagereye.util import timer from jagereye.util import logging from jagereye.util import static_util from jagereye.util.generic import get_func_name # TODO(Ray): must merge to the STATUS enum in jagereye/worker/worker.py # Loading messaging with open(static_util.get_path('messaging.json'), 'r') as f: MESSAGES = jsonify(f.read()) # NATS channels CH_API_TO_BRAIN = "ch_api_brain" CH_PUBLIC_BRAIN = "ch_brain" CH_BRAIN_TO_RES = "ch_brain_res" CH_RES_TO_BRAIN = "ch_res_brain" CH_NOTIFICATION = "ch_notification" EXAMINE_INTERVAL = 6 EXAMINE_THREASHOLD = 10 class Brain(object): """Brain the base class for brain service.