Esempio n. 1
0
class EPUManagementService(object):
    """EPU management service interface

    See: https://confluence.oceanobservatories.org/display/syseng/CIAD+CEI+OV+Elastic+Computing
    """

    def __init__(self):
        configs = ["service", "epumanagement"]
        config_files = get_config_paths(configs)
        self.CFG = bootstrap.configure(config_files)

        self.dashi = bootstrap.dashi_connect(self.CFG.epumanagement.service_name, self.CFG)

        self.default_user = self.CFG.epumanagement.get('default_user')

        # TODO: create ION class here or depend on epuagent repo as a dep
        ou_client = MockOUAgentClient()

        statsd_cfg = self.CFG.get('statsd')

        if 'mock_provisioner' in self.CFG.epumanagement and \
           self.CFG.epumanagement['mock_provisioner']:
            prov_client = MockProvisionerClient()
        else:
            provisioner_topic = self.CFG.epumanagement.provisioner_service_name
            prov_client = ProvisionerClient(self.dashi, topic=provisioner_topic, statsd_cfg=statsd_cfg,
                                            client_name="epumanagement")

        self.service_name = self.CFG.epumanagement.get(EPUM_INITIALCONF_SERVICE_NAME, EPUM_DEFAULT_SERVICE_NAME)
        self.proc_name = self.CFG.epumanagement.get(EPUM_INITIALCONF_PROC_NAME, None)

        self.store = get_epum_store(self.CFG, service_name=self.service_name,
            proc_name=self.proc_name)
        self.store.initialize()

        dtrs_client = DTRSClient(self.dashi, statsd_cfg=statsd_cfg, client_name=self.CFG.epumanagement.service_name)

        self.epumanagement = EPUManagement(self.CFG.epumanagement, SubscriberNotifier(self.dashi), prov_client,
                                           ou_client, dtrs_client, store=self.store, statsd_cfg=statsd_cfg)

        # hack to inject epum reference for mock prov client
        if isinstance(prov_client, MockProvisionerClient):
            prov_client._set_epum(self.epumanagement)

    def start(self):

        epu.dashiproc.link_dashi_exceptions(self.dashi)

        self.dashi.handle(self.subscribe_domain)
        self.dashi.handle(self.unsubscribe_domain)
        self.dashi.handle(self.add_domain)
        self.dashi.handle(self.remove_domain)
        self.dashi.handle(self.list_domains)
        self.dashi.handle(self.describe_domain)
        self.dashi.handle(self.reconfigure_domain)
        self.dashi.handle(self.add_domain_definition)
        self.dashi.handle(self.remove_domain_definition)
        self.dashi.handle(self.list_domain_definitions)
        self.dashi.handle(self.describe_domain_definition)
        self.dashi.handle(self.update_domain_definition)
        self.dashi.handle(self.ou_heartbeat)
        self.dashi.handle(self.instance_info)

        # this may spawn some background threads
        self.epumanagement.initialize()

        # hack to load some domain definitions at boot. later this should be client driven.
        initial_definitions = self.CFG.epumanagement.initial_definitions
        for definition_id, definition in initial_definitions.iteritems():
            log.info("Loading Domain Definition %s", definition_id)
            try:
                self.epumanagement.msg_add_domain_definition(definition_id, definition)
            except WriteConflictError:
                log.warn("Conflict while loading domain definition. It probably exists.", exc_info=True)
            except Exception:
                log.exception("Failed to load Domain Definition %s", definition_id)

        # hack to load some domains at boot. later this should be client driven.
        initial_domains = self.CFG.epumanagement.initial_domains
        for domain_id, params in initial_domains.iteritems():
            log.info("Loading Domain %s", domain_id)
            definition_id = params['definition']
            config = params['config']
            try:
                self.epumanagement.msg_add_domain(self.default_user, domain_id, definition_id, config)
            except WriteConflictError:
                log.warn("Conflict while loading domain definition. It probably exists.", exc_info=True)
            except Exception:
                log.exception("Failed to load Domain %s", domain_id)

        # blocks til dashi.cancel() is called
        self.dashi.consume()

    @property
    def default_user(self):
        if not self._default_user:
            msg = "Operation called for the default user, but none is defined."
            raise UserNotPermittedError(msg)
        else:
            return self._default_user

    @default_user.setter  # noqa
    def default_user(self, default_user):
        self._default_user = default_user

    def subscribe_domain(self, domain_id, subscriber_name, subscriber_op, caller=None):
        caller = caller or self.default_user

        self.epumanagement.msg_subscribe_domain(caller, domain_id,
            subscriber_name, subscriber_op)

    def unsubscribe_domain(self, domain_id, subscriber_name, caller=None):
        caller = caller or self.default_user

        self.epumanagement.msg_unsubscribe_domain(caller, domain_id, subscriber_name)

    def list_domains(self, caller=None):
        """Return a list of domains in the system
        """
        caller = caller or self.default_user
        return self.epumanagement.msg_list_domains(caller=caller)

    def describe_domain(self, domain_id, caller=None):
        """Return a state structure for a domain, or None
        """
        caller = caller or self.default_user
        return self.epumanagement.msg_describe_domain(caller, domain_id)

    def add_domain(self, domain_id, definition_id, config, subscriber_name=None,
                subscriber_op=None, caller=None):
        caller = caller or self.default_user
        self.epumanagement.msg_add_domain(caller, domain_id, definition_id, config,
            subscriber_name=subscriber_name, subscriber_op=subscriber_op)

    def remove_domain(self, domain_id, caller=None):
        caller = caller or self.default_user
        self.epumanagement.msg_remove_domain(caller, domain_id)

    def reconfigure_domain(self, domain_id, config, caller=None):
        caller = caller or self.default_user
        self.epumanagement.msg_reconfigure_domain(caller, domain_id, config)

    def list_domain_definitions(self):
        return self.epumanagement.msg_list_domain_definitions()

    def describe_domain_definition(self, definition_id):
        return self.epumanagement.msg_describe_domain_definition(definition_id)

    def add_domain_definition(self, definition_id, definition):
        self.epumanagement.msg_add_domain_definition(definition_id, definition)

    def remove_domain_definition(self, definition_id):
        self.epumanagement.msg_remove_domain_definition(definition_id)

    def update_domain_definition(self, definition_id, definition):
        self.epumanagement.msg_update_domain_definition(definition_id, definition)

    def ou_heartbeat(self, heartbeat):
        self.epumanagement.msg_heartbeat(None, heartbeat)  # epum parses

    def instance_info(self, record):
        self.epumanagement.msg_instance_info(None, record)  # epum parses
Esempio n. 2
0
class SubscriberTests(unittest.TestCase):

    def setUp(self):
        # Mock mode:
        initial_conf = {EPUM_INITIALCONF_EXTERNAL_DECIDE: True}
        self.notifier = MockSubscriberNotifier()
        self.provisioner_client = MockProvisionerClient()
        self.dtrs_client = MockDTRSClient()
        self.ou_client = MockOUAgentClient()
        self.epum_store = LocalEPUMStore(EPUM_DEFAULT_SERVICE_NAME)
        self.epum_store.initialize()
        self.epum = EPUManagement(
            initial_conf, self.notifier, self.provisioner_client, self.ou_client,
            self.dtrs_client, store=self.epum_store)

        # For instance-state changes "from the provisioner"
        self.provisioner_client._set_epum(self.epum)

        # For heartbeats "from the OU instance"
        self.ou_client._set_epum(self.epum)

    def _get_simplest_domain_definition(self):
        engine_class = "epu.decisionengine.impls.simplest.SimplestEngine"
        general = {EPUM_CONF_ENGINE_CLASS: engine_class}
        health = {EPUM_CONF_HEALTH_MONITOR: False}
        return {EPUM_CONF_GENERAL: general, EPUM_CONF_HEALTH: health}

    def _config_simplest_domainconf(self, n_preserving, dt="00_dt_id"):
        """Get 'simplest' domain conf with specified NPreserving policy
        """
        engine = {CONF_PRESERVE_N: n_preserving, "epuworker_type": dt}
        return {EPUM_CONF_ENGINE: engine}

    def _reset(self):
        self.notifier.notify_by_name_called = 0
        self.notifier.receiver_names = []
        self.notifier.operations = []
        self.notifier.messages = []

    def _mock_checks(self, num_called, idx_check, subscriber_name, subscriber_op, expected_state, expected_domain):
        self.assertEqual(self.notifier.notify_by_name_called, num_called)
        self.assertEqual(len(self.notifier.receiver_names), num_called)
        self.assertEqual(len(self.notifier.operations), num_called)
        self.assertEqual(len(self.notifier.messages), num_called)
        self.assertEqual(self.notifier.receiver_names[idx_check], subscriber_name)
        self.assertEqual(self.notifier.operations[idx_check], subscriber_op)
        self.assertTrue("state" in self.notifier.messages[idx_check])
        self.assertEqual(self.notifier.messages[idx_check]["state"], expected_state)
        self.assertEqual(self.notifier.messages[idx_check]["domain_id"], expected_domain)

    def test_ignore_subscriber(self):

        self._reset()
        self.epum.initialize()
        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 0)
        definition_id = "definition1"
        definition = self._get_simplest_domain_definition()
        self.epum.msg_add_domain_definition(definition_id, definition)
        self.epum.msg_add_domain("owner", "domain1", definition_id, self._config_simplest_domainconf(1))
        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 1)
        self.assertEqual(len(self.provisioner_client.launched_instance_ids), 1)
        self.assertEqual(len(self.provisioner_client.deployable_types_launched), 1)
        self.assertEqual(self.provisioner_client.deployable_types_launched[0], "00_dt_id")
        self.assertEqual(self.notifier.notify_by_name_called, 0)

        # Simulate provisioner
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.RUNNING}
        self.epum.msg_instance_info(None, content)

        self.assertEqual(self.notifier.notify_by_name_called, 0)

    def test_one_subscriber(self):
        subscriber_name = "subscriber01_name"
        subscriber_op = "subscriber01_op"

        self._reset()
        self.epum.initialize()
        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 0)
        self.assertEqual(self.provisioner_client.provision_count, 0)
        definition_id = "definition1"
        definition = self._get_simplest_domain_definition()
        self.epum.msg_add_domain_definition(definition_id, definition)
        self.epum.msg_add_domain("owner", "domain1", definition_id, self._config_simplest_domainconf(1))
        self.epum.msg_subscribe_domain("owner", "domain1", subscriber_name, subscriber_op)
        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 1)
        self.assertEqual(len(self.provisioner_client.launched_instance_ids), 1)
        self.assertEqual(len(self.provisioner_client.deployable_types_launched), 1)
        self.assertEqual(self.notifier.notify_by_name_called, 0)

        # Simulate provisioner
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.STARTED}
        self.epum.msg_instance_info(None, content)
        self.assertEqual(self.notifier.notify_by_name_called, 0)

        # Running signal should be first notification
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.RUNNING}
        self.epum.msg_instance_info(None, content)

        self._mock_checks(1, 0, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")

    def test_multiple_subscribers(self):
        subscriber_name = "subscriber01_name"
        subscriber_op = "subscriber01_op"
        subscriber2_name = "subscriber02_name"
        subscriber2_op = "subscriber02_op"
        subscriber3_name = "subscriber03_name"
        subscriber3_op = "subscriber03_op"

        self._reset()
        self.epum.initialize()
        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 0)

        definition_id = "definition1"
        definition = self._get_simplest_domain_definition()
        self.epum.msg_add_domain_definition(definition_id, definition)
        self.epum.msg_add_domain("owner", "domain1", definition_id, self._config_simplest_domainconf(1))
        self.epum.msg_subscribe_domain("owner", "domain1", subscriber_name, subscriber_op)
        self.epum.msg_subscribe_domain("owner", "domain1", subscriber2_name, subscriber2_op)
        self.epum.msg_subscribe_domain("owner", "domain1", subscriber3_name, subscriber3_op)

        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 1)
        self.assertEqual(len(self.provisioner_client.launched_instance_ids), 1)
        self.assertEqual(len(self.provisioner_client.deployable_types_launched), 1)
        self.assertEqual(self.provisioner_client.deployable_types_launched[0], "00_dt_id")
        self.assertEqual(self.notifier.notify_by_name_called, 0)

        # Simulate provisioner
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.STARTED}
        self.epum.msg_instance_info(None, content)
        self.assertEqual(self.notifier.notify_by_name_called, 0)

        # Running signal should be first notification
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.RUNNING}
        self.epum.msg_instance_info(None, content)

        self._mock_checks(3, 0, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")
        self._mock_checks(3, 1, subscriber2_name, subscriber2_op, InstanceState.RUNNING, "domain1")
        self._mock_checks(3, 2, subscriber3_name, subscriber3_op, InstanceState.RUNNING, "domain1")

    def test_multiple_subscribers_multiple_domains(self):
        """Three subscribers, two for one domain, one for another.  One VM for each domain.
        """

        subscriber_name = "subscriber01_name"
        subscriber_op = "subscriber01_op"
        subscriber2_name = "subscriber02_name"
        subscriber2_op = "subscriber02_op"
        subscriber3_name = "subscriber03_name"
        subscriber3_op = "subscriber03_op"

        self._reset()
        self.epum.initialize()
        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 0)

        definition_id = "definition1"
        definition = self._get_simplest_domain_definition()
        self.epum.msg_add_domain_definition(definition_id, definition)
        self.epum.msg_add_domain("owner", "domain1", definition_id, self._config_simplest_domainconf(1))
        self.epum.msg_subscribe_domain("owner", "domain1", subscriber_name, subscriber_op)
        self.epum.msg_subscribe_domain("owner", "domain1", subscriber2_name, subscriber2_op)

        # Subscriber 3 is for a different domain
        self.epum.msg_add_domain("owner", "domain2", definition_id, self._config_simplest_domainconf(1, dt="01_dt_id"))
        self.epum.msg_subscribe_domain("owner", "domain2", subscriber3_name, subscriber3_op)

        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 2)
        self.assertEqual(len(self.provisioner_client.launched_instance_ids), 2)
        self.assertEqual(len(self.provisioner_client.deployable_types_launched), 2)

        # Find out which order these were launched ...
        subscriber3_index = -1
        for i, dt_id in enumerate(self.provisioner_client.deployable_types_launched):
            if dt_id == "01_dt_id":
                subscriber3_index = i
        self.assertNotEqual(subscriber3_index, -1)

        # Now we know which was provisioned first... give opposite index to other one
        if subscriber3_index:
            subscriber1and2_index = 0
        else:
            subscriber1and2_index = 1

        self.assertEqual(self.provisioner_client.deployable_types_launched[subscriber1and2_index], "00_dt_id")
        self.assertEqual(self.provisioner_client.deployable_types_launched[subscriber3_index], "01_dt_id")

        # No notifications until RUNNING
        self.assertEqual(self.notifier.notify_by_name_called, 0)

        # Simulate provisioner update for BOTH VMs launched
        content = {"node_id": self.provisioner_client.launched_instance_ids[subscriber1and2_index],
                   "state": InstanceState.STARTED}
        self.epum.msg_instance_info(None, content)
        content = {"node_id": self.provisioner_client.launched_instance_ids[subscriber3_index],
                   "state": InstanceState.STARTED}
        self.epum.msg_instance_info(None, content)
        self.assertEqual(self.notifier.notify_by_name_called, 0)

        # Running signal should be first notification, send RUNNING just for 01_dt_id instance (subscriber 3)
        content = {"node_id": self.provisioner_client.launched_instance_ids[subscriber3_index],
                   "state": InstanceState.RUNNING}
        self.epum.msg_instance_info(None, content)
        self._mock_checks(1, 0, subscriber3_name, subscriber3_op, InstanceState.RUNNING, "domain2")

        # Now for 00_dt_id instance (subscribers 1 and 2)
        content = {"node_id": self.provisioner_client.launched_instance_ids[subscriber1and2_index],
                   "state": InstanceState.RUNNING}
        self.epum.msg_instance_info(None, content)
        self._mock_checks(3, 1, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")
        self._mock_checks(3, 2, subscriber2_name, subscriber2_op, InstanceState.RUNNING, "domain1")

    def _fail_setup(self):
        subscriber_name = "subscriber01_name"
        subscriber_op = "subscriber01_op"
        self._reset()
        self.epum.initialize()
        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 0)
        definition_id = "definition1"
        definition = self._get_simplest_domain_definition()
        self.epum.msg_add_domain_definition(definition_id, definition)
        self.epum.msg_add_domain("owner", "domain1", definition_id, self._config_simplest_domainconf(1))
        self.epum.msg_subscribe_domain("owner", "domain1", subscriber_name, subscriber_op)
        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 1)
        self.assertEqual(len(self.provisioner_client.launched_instance_ids), 1)
        self.assertEqual(len(self.provisioner_client.deployable_types_launched), 1)
        self.assertEqual(self.provisioner_client.deployable_types_launched[0], "00_dt_id")
        self.assertEqual(self.notifier.notify_by_name_called, 0)

        # Simulate provisioner
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.STARTED}
        self.epum.msg_instance_info(None, content)
        self.assertEqual(self.notifier.notify_by_name_called, 0)

        # Running signal should be first notification
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.RUNNING}
        self.epum.msg_instance_info(None, content)

    # The "test_fail*" methods are for checking on notifications after RUNNING.  If the provisioner
    # doesn't 'increase' states, EPUM throws them out, no need to test that scenario.

    def test_fail_650(self):
        subscriber_name = "subscriber01_name"
        subscriber_op = "subscriber01_op"
        self._fail_setup()
        self._mock_checks(1, 0, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")

        # Failing
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.RUNNING_FAILED}
        self.epum.msg_instance_info(None, content)

        # All non-RUNNING notifications should be FAILED
        self._mock_checks(2, 1, subscriber_name, subscriber_op, InstanceState.FAILED, "domain1")

    def test_fail_700(self):
        subscriber_name = "subscriber01_name"
        subscriber_op = "subscriber01_op"
        self._fail_setup()
        self._mock_checks(1, 0, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")

        # Failing
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.TERMINATING}
        self.epum.msg_instance_info(None, content)

        # All non-RUNNING notifications should be FAILED
        self._mock_checks(2, 1, subscriber_name, subscriber_op, InstanceState.FAILED, "domain1")

    def test_fail_800(self):
        subscriber_name = "subscriber01_name"
        subscriber_op = "subscriber01_op"
        self._fail_setup()
        self._mock_checks(1, 0, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")

        # Failing
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.TERMINATED}
        self.epum.msg_instance_info(None, content)

        # All non-RUNNING notifications should be FAILED
        self._mock_checks(2, 1, subscriber_name, subscriber_op, InstanceState.FAILED, "domain1")

    def test_fail_900(self):
        subscriber_name = "subscriber01_name"
        subscriber_op = "subscriber01_op"
        self._fail_setup()
        self._mock_checks(1, 0, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")

        # Failing
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.FAILED}
        self.epum.msg_instance_info(None, content)

        # All non-RUNNING notifications should be FAILED
        self._mock_checks(2, 1, subscriber_name, subscriber_op, InstanceState.FAILED, "domain1")

    def test_updated_node_ip(self):
        subscriber_name = "subscriber01_name"
        subscriber_op = "subscriber01_op"

        self._reset()
        self.epum.initialize()
        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 0)
        definition_id = "definition1"
        definition = self._get_simplest_domain_definition()
        self.epum.msg_add_domain_definition(definition_id, definition)
        self.epum.msg_add_domain("owner", "domain1", definition_id, self._config_simplest_domainconf(1))
        self.epum.msg_subscribe_domain("owner", "domain1", subscriber_name, subscriber_op)
        self.epum._run_decisions()
        self.assertEqual(self.provisioner_client.provision_count, 1)
        self.assertEqual(len(self.provisioner_client.launched_instance_ids), 1)
        self.assertEqual(len(self.provisioner_client.deployable_types_launched), 1)
        self.assertEqual(self.notifier.notify_by_name_called, 0)

        domain = self.epum_store.get_domain("owner", "domain1")

        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.STARTED,
                   "update_counter": 1}
        self.epum.msg_instance_info(None, content)

        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.RUNNING,
                   "public_ip": "vm-1234",
                   "update_counter": 2}
        self.epum.msg_instance_info(None, content)

        self._mock_checks(1, 0, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")
        self.assertEqual(domain.get_instance(self.provisioner_client.launched_instance_ids[0]).public_ip, "vm-1234")

        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.RUNNING,
                   "public_ip": "1.2.3.4",
                   "update_counter": 3}
        self.epum.msg_instance_info(None, content)

        self._mock_checks(2, 0, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")
        self.assertEqual(domain.get_instance(self.provisioner_client.launched_instance_ids[0]).public_ip, "1.2.3.4")

        # Check that sequential update_counter is respected
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.RUNNING,
                   "public_ip": "localhost",
                   "update_counter": 2}
        self.epum.msg_instance_info(None, content)

        self._mock_checks(2, 0, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")
        self.assertEqual(domain.get_instance(self.provisioner_client.launched_instance_ids[0]).public_ip, "1.2.3.4")

        # A state going backwards should not happen, but double-check
        content = {"node_id": self.provisioner_client.launched_instance_ids[0],
                   "state": InstanceState.STARTED,
                   "public_ip": "localhost",
                   "update_counter": 4}
        self.epum.msg_instance_info(None, content)

        self._mock_checks(2, 0, subscriber_name, subscriber_op, InstanceState.RUNNING, "domain1")
        self.assertEqual(domain.get_instance(self.provisioner_client.launched_instance_ids[0]).public_ip, "1.2.3.4")