Example #1
0
class EPUAgentCoreTests(unittest.TestCase):
    def setUp(self):
        self.sup = FakeSupervisor()
        self.core = EPUAgentCore(NODE_ID, supervisor=self.sup)

    def assertBasics(self, state, expected="OK"):
        self.assertEqual(NODE_ID, state['node_id'])
        self.assertTrue(state['timestamp'])
        self.assertEqual(expected, state['state'])

    def test_supervisor_error(self):
        self.sup.error = SupervisorError('faaaaaaaail')
        state = self.core.get_state()
        self.assertBasics(state, "MONITOR_ERROR")
        self.assertTrue('faaaaaaaail' in state['error'])

    def test_series(self):
        self.sup.processes = [
            _one_process(ProcessStates.RUNNING),
            _one_process(ProcessStates.RUNNING)
        ]
        state = self.core.get_state()
        self.assertBasics(state)

        # mark one of the processes as failed and give it a fake logfile
        # to read and send back
        fail = self.sup.processes[1]
        fail['state'] = ProcessStates.FATAL
        fail['exitstatus'] = -1

        stderr = "this is the errros!"
        err_path = _write_tempfile(stderr)
        fail['stderr_logfile'] = err_path
        try:
            state = self.core.get_state()
        finally:
            os.unlink(err_path)

        self.assertBasics(state, "PROCESS_ERROR")

        failed_processes = state['failed_processes']
        self.assertEqual(1, len(failed_processes))
        failed = failed_processes[0]
        self.assertEqual(stderr, failed['stderr'])

        # next time around process should still be failed but no stderr
        state = self.core.get_state()
        self.assertBasics(state, "PROCESS_ERROR")
        failed_processes = state['failed_processes']
        self.assertEqual(1, len(failed_processes))
        failed = failed_processes[0]
        self.assertFalse(failed.get('stderr'))

        # make it all ok again
        fail['state'] = ProcessStates.RUNNING
        state = self.core.get_state()
        self.assertBasics(state)
Example #2
0
class EPUAgentCoreTests(unittest.TestCase):
    def setUp(self):
        self.sup = FakeSupervisor()
        self.core = EPUAgentCore(NODE_ID, supervisor=self.sup)

    def assertBasics(self, state, expected="OK"):
        self.assertEqual(NODE_ID, state['node_id'])
        self.assertTrue(state['timestamp'])
        self.assertEqual(expected, state['state'])

    def test_supervisor_error(self):
        self.sup.error = SupervisorError('faaaaaaaail')
        state = self.core.get_state()
        self.assertBasics(state, "MONITOR_ERROR")
        self.assertTrue('faaaaaaaail' in state['error'])

    def test_series(self):
        self.sup.processes = [_one_process(ProcessStates.RUNNING),
                              _one_process(ProcessStates.RUNNING)]
        state = self.core.get_state()
        self.assertBasics(state)

        # mark one of the processes as failed and give it a fake logfile
        # to read and send back
        fail = self.sup.processes[1]
        fail['state'] = ProcessStates.FATAL
        fail['exitstatus'] = -1

        stderr = "this is the errros!"
        err_path = _write_tempfile(stderr)
        fail['stderr_logfile'] = err_path
        try:
            state = self.core.get_state()
        finally:
            os.unlink(err_path)
            
        self.assertBasics(state, "PROCESS_ERROR")

        failed_processes = state['failed_processes']
        self.assertEqual(1, len(failed_processes))
        failed = failed_processes[0]
        self.assertEqual(stderr, failed['stderr'])

        # next time around process should still be failed but no stderr
        state = self.core.get_state()
        self.assertBasics(state, "PROCESS_ERROR")
        failed_processes = state['failed_processes']
        self.assertEqual(1, len(failed_processes))
        failed = failed_processes[0]
        self.assertFalse(failed.get('stderr'))

        # make it all ok again
        fail['state'] = ProcessStates.RUNNING
        state = self.core.get_state()
        self.assertBasics(state)
Example #3
0
    def __init__(self, *args, **kwargs):

        configs = ["epuagent"]
        config_files = get_config_paths(configs)
        self.CFG = bootstrap.configure(config_files)

        topic = self.CFG.epuagent.get('service_name')
        self.topic = topic or "epu_agent_%s" % uuid.uuid4()

        heartbeat_dest = kwargs.get('heartbeat_dest')
        self.heartbeat_dest = heartbeat_dest or self.CFG.epuagent.heartbeat_dest

        node_id = kwargs.get('node_id')
        self.node_id = node_id or self.CFG.epuagent.node_id

        heartbeat_op = kwargs.get('heartbeat_op')
        self.heartbeat_op = heartbeat_op or self.CFG.epuagent.heartbeat_op

        period = kwargs.get('period_seconds')
        self.period = float(period or self.CFG.epuagent.period_seconds)

        # for testing, allow for not starting heartbeat automatically
        self.start_beat = kwargs.get('start_heartbeat', True)

        amqp_uri = kwargs.get('amqp_uri')

        sock = kwargs.get('supervisor_socket')
        sock = sock or self.CFG.epuagent.get('supervisor_socket')
        if sock:
            log.debug("monitoring a process supervisor at: %s", sock)
            self.supervisor = Supervisor(sock)
        else:
            log.debug("not monitoring process supervisor")
            self.supervisor = None

        self.core = EPUAgentCore(self.node_id, supervisor=self.supervisor)

        self.dashi = bootstrap.dashi_connect(self.topic, self.CFG, amqp_uri)
Example #4
0
    def __init__(self, *args, **kwargs):

        configs = ["epuagent"]
        config_files = get_config_paths(configs)
        self.CFG = bootstrap.configure(config_files)

        topic = self.CFG.epuagent.get('service_name')
        self.topic = topic or "epu_agent_%s" % uuid.uuid4()

        heartbeat_dest = kwargs.get('heartbeat_dest')
        self.heartbeat_dest = heartbeat_dest or self.CFG.epuagent.heartbeat_dest

        node_id = kwargs.get('node_id')
        self.node_id = node_id or self.CFG.epuagent.node_id

        heartbeat_op = kwargs.get('heartbeat_op')
        self.heartbeat_op = heartbeat_op or self.CFG.epuagent.heartbeat_op

        period = kwargs.get('period_seconds')
        self.period = float(period or self.CFG.epuagent.period_seconds)

        # for testing, allow for not starting heartbeat automatically
        self.start_beat = kwargs.get('start_heartbeat', True)

        amqp_uri = kwargs.get('amqp_uri')

        sock = kwargs.get('supervisor_socket')
        sock = sock or self.CFG.epuagent.get('supervisor_socket')
        if sock:
            log.debug("monitoring a process supervisor at: %s", sock)
            self.supervisor = Supervisor(sock)
        else:
            log.debug("not monitoring process supervisor")
            self.supervisor = None

        self.core = EPUAgentCore(self.node_id, supervisor=self.supervisor)

        self.dashi = bootstrap.dashi_connect(self.topic, self.CFG, amqp_uri)
Example #5
0
class EPUAgent(object):
    """Elastic Process Unit (EPU) Agent. Monitors vitals in running VMs.
    """
    def __init__(self, *args, **kwargs):

        configs = ["epuagent"]
        config_files = get_config_paths(configs)
        self.CFG = bootstrap.configure(config_files)

        topic = self.CFG.epuagent.get('service_name')
        self.topic = topic or "epu_agent_%s" % uuid.uuid4()

        heartbeat_dest = kwargs.get('heartbeat_dest')
        self.heartbeat_dest = heartbeat_dest or self.CFG.epuagent.heartbeat_dest

        node_id = kwargs.get('node_id')
        self.node_id = node_id or self.CFG.epuagent.node_id

        heartbeat_op = kwargs.get('heartbeat_op')
        self.heartbeat_op = heartbeat_op or self.CFG.epuagent.heartbeat_op

        period = kwargs.get('period_seconds')
        self.period = float(period or self.CFG.epuagent.period_seconds)

        # for testing, allow for not starting heartbeat automatically
        self.start_beat = kwargs.get('start_heartbeat', True)

        amqp_uri = kwargs.get('amqp_uri')

        sock = kwargs.get('supervisor_socket')
        sock = sock or self.CFG.epuagent.get('supervisor_socket')
        if sock:
            log.debug("monitoring a process supervisor at: %s", sock)
            self.supervisor = Supervisor(sock)
        else:
            log.debug("not monitoring process supervisor")
            self.supervisor = None

        self.core = EPUAgentCore(self.node_id, supervisor=self.supervisor)

        self.dashi = bootstrap.dashi_connect(self.topic, self.CFG, amqp_uri)

    def start(self):
        log.info('EPUAgent starting')

        self.dashi.handle(self.heartbeat)

        self.loop = LoopingCall(self._loop)
        if self.start_beat:
            log.debug('Starting heartbeat loop - %s second interval',
                      self.period)
            self.loop.start(self.period)

        try:
            self.dashi.consume()
        except KeyboardInterrupt:
            log.warning("Caught terminate signal. Exiting")
        else:
            log.info("Exiting normally.")

    def _loop(self):
        return self.heartbeat()

    def heartbeat(self):
        try:
            state = self.core.get_state()
            self.dashi.fire(self.heartbeat_dest,
                            self.heartbeat_op,
                            heartbeat=state)
        except Exception, e:
            # unhandled exceptions will terminate the LoopingCall
            log.error('Error heartbeating: %s', e, exc_info=True)
Example #6
0
 def setUp(self):
     self.sup = FakeSupervisor()
     self.core = EPUAgentCore(NODE_ID, supervisor=self.sup)
Example #7
0
class EPUAgent(object):
    """Elastic Process Unit (EPU) Agent. Monitors vitals in running VMs.
    """

    def __init__(self, *args, **kwargs):

        configs = ["epuagent"]
        config_files = get_config_paths(configs)
        self.CFG = bootstrap.configure(config_files)

        topic = self.CFG.epuagent.get('service_name')
        self.topic = topic or "epu_agent_%s" % uuid.uuid4()

        heartbeat_dest = kwargs.get('heartbeat_dest')
        self.heartbeat_dest = heartbeat_dest or self.CFG.epuagent.heartbeat_dest

        node_id = kwargs.get('node_id')
        self.node_id = node_id or self.CFG.epuagent.node_id

        heartbeat_op = kwargs.get('heartbeat_op')
        self.heartbeat_op = heartbeat_op or self.CFG.epuagent.heartbeat_op

        period = kwargs.get('period_seconds')
        self.period = float(period or self.CFG.epuagent.period_seconds)

        # for testing, allow for not starting heartbeat automatically
        self.start_beat = kwargs.get('start_heartbeat', True)

        amqp_uri = kwargs.get('amqp_uri')

        sock = kwargs.get('supervisor_socket')
        sock = sock or self.CFG.epuagent.get('supervisor_socket')
        if sock:
            log.debug("monitoring a process supervisor at: %s", sock)
            self.supervisor = Supervisor(sock)
        else:
            log.debug("not monitoring process supervisor")
            self.supervisor = None

        self.core = EPUAgentCore(self.node_id, supervisor=self.supervisor)

        self.dashi = bootstrap.dashi_connect(self.topic, self.CFG, amqp_uri)

    def start(self):
        log.info('EPUAgent starting')

        self.dashi.handle(self.heartbeat)

        self.loop = LoopingCall(self._loop)
        if self.start_beat:
            log.debug('Starting heartbeat loop - %s second interval', self.period)
            self.loop.start(self.period)

        try:
            self.dashi.consume()
        except KeyboardInterrupt:
            log.warning("Caught terminate signal. Exiting")
        else:
            log.info("Exiting normally.")


    def _loop(self):
        return self.heartbeat()

    def heartbeat(self):
        try:
            state = self.core.get_state()
            self.dashi.fire(self.heartbeat_dest, self.heartbeat_op,
                    heartbeat=state)
        except Exception, e:
            # unhandled exceptions will terminate the LoopingCall
            log.error('Error heartbeating: %s', e, exc_info=True)
Example #8
0
 def setUp(self):
     self.sup = FakeSupervisor()
     self.core = EPUAgentCore(NODE_ID, supervisor=self.sup)