class EPUAgentCoreTests(unittest.TestCase): def setUp(self): self.sup = FakeSupervisor() self.core = EPUAgentCore(NODE_ID, supervisor=self.sup) def assertBasics(self, state, expected="OK"): self.assertEqual(NODE_ID, state['node_id']) self.assertTrue(state['timestamp']) self.assertEqual(expected, state['state']) def test_supervisor_error(self): self.sup.error = SupervisorError('faaaaaaaail') state = self.core.get_state() self.assertBasics(state, "MONITOR_ERROR") self.assertTrue('faaaaaaaail' in state['error']) def test_series(self): self.sup.processes = [ _one_process(ProcessStates.RUNNING), _one_process(ProcessStates.RUNNING) ] state = self.core.get_state() self.assertBasics(state) # mark one of the processes as failed and give it a fake logfile # to read and send back fail = self.sup.processes[1] fail['state'] = ProcessStates.FATAL fail['exitstatus'] = -1 stderr = "this is the errros!" err_path = _write_tempfile(stderr) fail['stderr_logfile'] = err_path try: state = self.core.get_state() finally: os.unlink(err_path) self.assertBasics(state, "PROCESS_ERROR") failed_processes = state['failed_processes'] self.assertEqual(1, len(failed_processes)) failed = failed_processes[0] self.assertEqual(stderr, failed['stderr']) # next time around process should still be failed but no stderr state = self.core.get_state() self.assertBasics(state, "PROCESS_ERROR") failed_processes = state['failed_processes'] self.assertEqual(1, len(failed_processes)) failed = failed_processes[0] self.assertFalse(failed.get('stderr')) # make it all ok again fail['state'] = ProcessStates.RUNNING state = self.core.get_state() self.assertBasics(state)
class EPUAgentCoreTests(unittest.TestCase): def setUp(self): self.sup = FakeSupervisor() self.core = EPUAgentCore(NODE_ID, supervisor=self.sup) def assertBasics(self, state, expected="OK"): self.assertEqual(NODE_ID, state['node_id']) self.assertTrue(state['timestamp']) self.assertEqual(expected, state['state']) def test_supervisor_error(self): self.sup.error = SupervisorError('faaaaaaaail') state = self.core.get_state() self.assertBasics(state, "MONITOR_ERROR") self.assertTrue('faaaaaaaail' in state['error']) def test_series(self): self.sup.processes = [_one_process(ProcessStates.RUNNING), _one_process(ProcessStates.RUNNING)] state = self.core.get_state() self.assertBasics(state) # mark one of the processes as failed and give it a fake logfile # to read and send back fail = self.sup.processes[1] fail['state'] = ProcessStates.FATAL fail['exitstatus'] = -1 stderr = "this is the errros!" err_path = _write_tempfile(stderr) fail['stderr_logfile'] = err_path try: state = self.core.get_state() finally: os.unlink(err_path) self.assertBasics(state, "PROCESS_ERROR") failed_processes = state['failed_processes'] self.assertEqual(1, len(failed_processes)) failed = failed_processes[0] self.assertEqual(stderr, failed['stderr']) # next time around process should still be failed but no stderr state = self.core.get_state() self.assertBasics(state, "PROCESS_ERROR") failed_processes = state['failed_processes'] self.assertEqual(1, len(failed_processes)) failed = failed_processes[0] self.assertFalse(failed.get('stderr')) # make it all ok again fail['state'] = ProcessStates.RUNNING state = self.core.get_state() self.assertBasics(state)
def __init__(self, *args, **kwargs): configs = ["epuagent"] config_files = get_config_paths(configs) self.CFG = bootstrap.configure(config_files) topic = self.CFG.epuagent.get('service_name') self.topic = topic or "epu_agent_%s" % uuid.uuid4() heartbeat_dest = kwargs.get('heartbeat_dest') self.heartbeat_dest = heartbeat_dest or self.CFG.epuagent.heartbeat_dest node_id = kwargs.get('node_id') self.node_id = node_id or self.CFG.epuagent.node_id heartbeat_op = kwargs.get('heartbeat_op') self.heartbeat_op = heartbeat_op or self.CFG.epuagent.heartbeat_op period = kwargs.get('period_seconds') self.period = float(period or self.CFG.epuagent.period_seconds) # for testing, allow for not starting heartbeat automatically self.start_beat = kwargs.get('start_heartbeat', True) amqp_uri = kwargs.get('amqp_uri') sock = kwargs.get('supervisor_socket') sock = sock or self.CFG.epuagent.get('supervisor_socket') if sock: log.debug("monitoring a process supervisor at: %s", sock) self.supervisor = Supervisor(sock) else: log.debug("not monitoring process supervisor") self.supervisor = None self.core = EPUAgentCore(self.node_id, supervisor=self.supervisor) self.dashi = bootstrap.dashi_connect(self.topic, self.CFG, amqp_uri)
class EPUAgent(object): """Elastic Process Unit (EPU) Agent. Monitors vitals in running VMs. """ def __init__(self, *args, **kwargs): configs = ["epuagent"] config_files = get_config_paths(configs) self.CFG = bootstrap.configure(config_files) topic = self.CFG.epuagent.get('service_name') self.topic = topic or "epu_agent_%s" % uuid.uuid4() heartbeat_dest = kwargs.get('heartbeat_dest') self.heartbeat_dest = heartbeat_dest or self.CFG.epuagent.heartbeat_dest node_id = kwargs.get('node_id') self.node_id = node_id or self.CFG.epuagent.node_id heartbeat_op = kwargs.get('heartbeat_op') self.heartbeat_op = heartbeat_op or self.CFG.epuagent.heartbeat_op period = kwargs.get('period_seconds') self.period = float(period or self.CFG.epuagent.period_seconds) # for testing, allow for not starting heartbeat automatically self.start_beat = kwargs.get('start_heartbeat', True) amqp_uri = kwargs.get('amqp_uri') sock = kwargs.get('supervisor_socket') sock = sock or self.CFG.epuagent.get('supervisor_socket') if sock: log.debug("monitoring a process supervisor at: %s", sock) self.supervisor = Supervisor(sock) else: log.debug("not monitoring process supervisor") self.supervisor = None self.core = EPUAgentCore(self.node_id, supervisor=self.supervisor) self.dashi = bootstrap.dashi_connect(self.topic, self.CFG, amqp_uri) def start(self): log.info('EPUAgent starting') self.dashi.handle(self.heartbeat) self.loop = LoopingCall(self._loop) if self.start_beat: log.debug('Starting heartbeat loop - %s second interval', self.period) self.loop.start(self.period) try: self.dashi.consume() except KeyboardInterrupt: log.warning("Caught terminate signal. Exiting") else: log.info("Exiting normally.") def _loop(self): return self.heartbeat() def heartbeat(self): try: state = self.core.get_state() self.dashi.fire(self.heartbeat_dest, self.heartbeat_op, heartbeat=state) except Exception, e: # unhandled exceptions will terminate the LoopingCall log.error('Error heartbeating: %s', e, exc_info=True)
def setUp(self): self.sup = FakeSupervisor() self.core = EPUAgentCore(NODE_ID, supervisor=self.sup)