class HighAvailabilityAgent(ResourceAgent):
    """Agent to manage high availability processes

    """

    def __init__(self):
        log.debug("HighAvailabilityAgent init")
        ResourceAgent.__init__(self)

    def on_init(self):
        if not HighAvailabilityCore:
            msg = "HighAvailabilityCore isn't available. Use production.cfg buildout"
            log.error(msg)
            return
        log.debug("HighAvailabilityCore Pyon on_init")

        policy_name = self.CFG.get_safe("highavailability.policy.name")
        if policy_name is None:
            msg = "HA service requires a policy name at CFG.highavailability.policy.name"
            raise Exception(msg)
        try:
            self.policy = policy.policy_map[policy_name.lower()]
        except KeyError:
            raise Exception("HA Service doesn't support '%s' policy" % policy_name)

        policy_parameters = self.CFG.get_safe("highavailability.policy.parameters")

        self.policy_interval = self.CFG.get_safe("highavailability.policy.interval",
                DEFAULT_INTERVAL)

        cfg = self.CFG.get_safe("highavailability")
        pds = self.CFG.get_safe("highavailability.process_dispatchers", [])
        process_spec = self.CFG.get_safe("highavailability.process_spec")
        # TODO: Allow other core class?
        self.core = HighAvailabilityCore(cfg, ProcessDispatcherSimpleAPIClient,
                pds, process_spec, self.policy)

        self.policy_thread = looping_call(self.policy_interval, self.core.apply_policy)

    def on_quit(self):
        self.policy_thread.kill(block=True, timeout=3)

    def rcmd_reconfigure_policy(self, new_policy):
        """Service operation: Change the parameters of the policy used for service

        @param new_policy: parameters of policy
        @return:
        """
        self.core.reconfigure_policy(new_policy)

    def rcmd_status(self):
        """Service operation: Get the status of the HA Service

        @return: {PENDING, READY, STEADY, BROKEN}
        """
        return self.core.status()

    def rcmd_dump(self):
        return self.core.dump()
    def on_init(self):
        if not HighAvailabilityCore:
            msg = "HighAvailabilityCore isn't available. Use production.cfg buildout"
            log.error(msg)
            return
        log.debug("HighAvailabilityCore Pyon on_init")

        policy_name = self.CFG.get_safe("highavailability.policy.name")
        if policy_name is None:
            msg = "HA service requires a policy name at CFG.highavailability.policy.name"
            raise Exception(msg)
        try:
            self.policy = policy.policy_map[policy_name.lower()]
        except KeyError:
            raise Exception("HA Service doesn't support '%s' policy" % policy_name)

        policy_parameters = self.CFG.get_safe("highavailability.policy.parameters")

        self.policy_interval = self.CFG.get_safe("highavailability.policy.interval",
                DEFAULT_INTERVAL)

        cfg = self.CFG.get_safe("highavailability")
        pds = self.CFG.get_safe("highavailability.process_dispatchers", [])
        process_spec = self.CFG.get_safe("highavailability.process_spec")
        # TODO: Allow other core class?
        self.core = HighAvailabilityCore(cfg, ProcessDispatcherSimpleAPIClient,
                pds, process_spec, self.policy)

        self.policy_thread = looping_call(self.policy_interval, self.core.apply_policy)
    def on_init(self):
        if not HighAvailabilityCore:
            msg = "HighAvailabilityCore isn't available. Use autolaunch.cfg buildout"
            log.error(msg)
            return

        cfg = self.CFG.get_safe("highavailability")

        # use default PD name as the sole PD if none are provided in config
        self.pds = self.CFG.get_safe("highavailability.process_dispatchers",
            [ProcessDispatcherService.name])
        if not len(self.pds) == 1:
            raise Exception("HA Service doesn't support multiple Process Dispatchers")

        self.process_definition_id, self.process_definition = self._get_process_definition()

        self.process_configuration = self.CFG.get_safe("highavailability.process_configuration")
        aggregator_config = _get_aggregator_config(self.CFG)

        self.service_id, self.service_name = self._register_service()
        self.policy_event = Event()

        stored_policy = self._stored_policy
        if stored_policy != {}:
            policy_name = stored_policy.get('name')
            policy_parameters = stored_policy.get('parameters')
            self._validate_policy_name(policy_name)
            self.policy_name = policy_name.lower()
            self.policy_parameters = policy_parameters
        else:

            policy_name = self.CFG.get_safe("highavailability.policy.name")
            self._validate_policy_name(policy_name)
            self.policy_name = policy_name.lower()
            self.policy_parameters = self.CFG.get_safe("highavailability.policy.parameters")

        self.policy_interval = self.CFG.get_safe("highavailability.policy.interval",
                DEFAULT_INTERVAL)

        self.logprefix = "HA Agent (%s): " % self.service_name

        self.control = HAProcessControl(self.pds[0],
            self.container.resource_registry, self.service_id,
            self.policy_event.set, logprefix=self.logprefix)

        self.core = HighAvailabilityCore(cfg, self.control,
                self.pds, self.policy_name, process_definition_id=self.process_definition_id,
                parameters=self.policy_parameters,
                process_configuration=self.process_configuration,
                aggregator_config=aggregator_config, name=self.service_name)

        dashi_messaging = self.CFG.get_safe("highavailability.dashi_messaging", False)
        if dashi_messaging:

            dashi_name = self.CFG.get_safe("highavailability.dashi_name")
            if not dashi_name:
                raise Exception("dashi_name unknown")
            dashi_uri = self.CFG.get_safe("highavailability.dashi_uri")
            if not dashi_uri:
                rabbit_host = self.CFG.get_safe("server.amqp.host")
                rabbit_user = self.CFG.get_safe("server.amqp.username")
                rabbit_pass = self.CFG.get_safe("server.amqp.password")

                if not (rabbit_host and rabbit_user and rabbit_pass):
                    raise Exception("cannot form dashi URI")

                dashi_uri = "amqp://%s:%s@%s/" % (rabbit_user, rabbit_pass,
                                                  rabbit_host)
            dashi_exchange = self.CFG.get_safe("highavailability.dashi_exchange")
            if not dashi_exchange:
                dashi_exchange = get_sys_name()

            self.dashi_handler = HADashiHandler(self, dashi_name, dashi_uri, dashi_exchange)

        else:
            self.dashi_handler = None
class HighAvailabilityAgent(SimpleResourceAgent):
    """Agent to manage high availability processes

    """

    def __init__(self):
        SimpleResourceAgent.__init__(self)
        self.dashi_handler = None
        self.service_id = None
        self.policy_thread = None
        self.policy_event = None

    def on_init(self):
        if not HighAvailabilityCore:
            msg = "HighAvailabilityCore isn't available. Use autolaunch.cfg buildout"
            log.error(msg)
            return

        cfg = self.CFG.get_safe("highavailability")

        # use default PD name as the sole PD if none are provided in config
        self.pds = self.CFG.get_safe("highavailability.process_dispatchers",
            [ProcessDispatcherService.name])
        if not len(self.pds) == 1:
            raise Exception("HA Service doesn't support multiple Process Dispatchers")

        self.process_definition_id, self.process_definition = self._get_process_definition()

        self.process_configuration = self.CFG.get_safe("highavailability.process_configuration")
        aggregator_config = _get_aggregator_config(self.CFG)

        self.service_id, self.service_name = self._register_service()
        self.policy_event = Event()

        stored_policy = self._stored_policy
        if stored_policy != {}:
            policy_name = stored_policy.get('name')
            policy_parameters = stored_policy.get('parameters')
            self._validate_policy_name(policy_name)
            self.policy_name = policy_name.lower()
            self.policy_parameters = policy_parameters
        else:

            policy_name = self.CFG.get_safe("highavailability.policy.name")
            self._validate_policy_name(policy_name)
            self.policy_name = policy_name.lower()
            self.policy_parameters = self.CFG.get_safe("highavailability.policy.parameters")

        self.policy_interval = self.CFG.get_safe("highavailability.policy.interval",
                DEFAULT_INTERVAL)

        self.logprefix = "HA Agent (%s): " % self.service_name

        self.control = HAProcessControl(self.pds[0],
            self.container.resource_registry, self.service_id,
            self.policy_event.set, logprefix=self.logprefix)

        self.core = HighAvailabilityCore(cfg, self.control,
                self.pds, self.policy_name, process_definition_id=self.process_definition_id,
                parameters=self.policy_parameters,
                process_configuration=self.process_configuration,
                aggregator_config=aggregator_config, name=self.service_name)

        dashi_messaging = self.CFG.get_safe("highavailability.dashi_messaging", False)
        if dashi_messaging:

            dashi_name = self.CFG.get_safe("highavailability.dashi_name")
            if not dashi_name:
                raise Exception("dashi_name unknown")
            dashi_uri = self.CFG.get_safe("highavailability.dashi_uri")
            if not dashi_uri:
                rabbit_host = self.CFG.get_safe("server.amqp.host")
                rabbit_user = self.CFG.get_safe("server.amqp.username")
                rabbit_pass = self.CFG.get_safe("server.amqp.password")

                if not (rabbit_host and rabbit_user and rabbit_pass):
                    raise Exception("cannot form dashi URI")

                dashi_uri = "amqp://%s:%s@%s/" % (rabbit_user, rabbit_pass,
                                                  rabbit_host)
            dashi_exchange = self.CFG.get_safe("highavailability.dashi_exchange")
            if not dashi_exchange:
                dashi_exchange = get_sys_name()

            self.dashi_handler = HADashiHandler(self, dashi_name, dashi_uri, dashi_exchange)

        else:
            self.dashi_handler = None

    def _get_process_definition(self):
        process_definition_id = self.CFG.get_safe("highavailability.process_definition_id")
        process_definition_name = self.CFG.get_safe("highavailability.process_definition_name")

        if process_definition_id:
            pd_name = self.pds[0]
            pd = ProcessDispatcherServiceClient(to_name=pd_name)
            definition = pd.read_process_definition(process_definition_id)

        elif process_definition_name:
            definitions, _ = self.container.resource_registry.find_resources(
                restype="ProcessDefinition", name=process_definition_name)
            if len(definitions) == 0:
                raise Exception("Process definition with name '%s' not found" %
                    process_definition_name)
            elif len(definitions) > 1:
                raise Exception("multiple process definitions found with name '%s'" %
                    process_definition_name)
            definition = definitions[0]
            process_definition_id = definition._id

        else:
            raise Exception("HA Agent requires either process definition ID or name")

        return process_definition_id, definition

    def on_start(self):
        if self.dashi_handler:
            self.dashi_handler.start()

        self.control.start()

        # override the core's list of currently managed processes. This is to support
        # restart of an HAAgent.
        self.core.set_managed_upids(self.control.get_managed_upids())

        self.policy_thread = gevent.spawn(self._policy_thread_loop)

        # kickstart the policy once. future invocations will happen via event callbacks.
        self.policy_event.set()

    def on_quit(self):
        self.control.stop()
        self.policy_thread.kill(block=True, timeout=3)
        if self.dashi_handler:
            self.dashi_handler.stop()

        # DL: do we ever want to remove this object?
        #self._unregister_service()

    def _register_service(self):

        definition = self.process_definition
        existing_services, _ = self.container.resource_registry.find_resources(
            restype="Service", name=definition.name)

        if len(existing_services) > 0:
            if len(existing_services) > 1:
                log.warning("There is more than one service object for %s. Using the first one" % definition.name)
            service_id = existing_services[0]._id
        else:
            svc_obj = Service(name=definition.name, exchange_name=definition.name)
            service_id, _ = self.container.resource_registry.create(svc_obj)

        svcdefs, _ = self.container.resource_registry.find_resources(
            restype="ServiceDefinition", name=definition.name)

        if svcdefs:
            try:
                self.container.resource_registry.create_association(
                    service_id, "hasServiceDefinition", svcdefs[0]._id)
            except BadRequest:
                log.warn("Failed to associate %s Service and ServiceDefinition. It probably exists.",
                    definition.name)
        else:
            log.error("Cannot find ServiceDefinition resource for %s",
                    definition.name)

        return service_id, definition.name

    def _unregister_service(self):
        if not self.service_id:
            log.error("No service id. Cannot unregister service")
            return

        self.container.resource_registry.delete(self.service_id, del_associations=True)

    def _policy_thread_loop(self):
        """Single thread runs policy loops, to prevent races
        """
        while True:
            # wait until our event is set, up to policy_interval seconds
            self.policy_event.wait(self.policy_interval)
            if self.policy_event.is_set():
                self.policy_event.clear()
                log.debug("%sapplying policy due to event", self.logprefix)
            else:

                # on a regular basis, we check for the current state of each process.
                # this is essentially a hedge against bugs in the HAAgent, or in the
                # ION events system that could prevent us from seeing state changes
                # of processes.
                log.debug("%sapplying policy due to timer. Reloading process cache first.",
                    self.logprefix)
                try:
                    self.control.reload_processes()
                except (Exception, gevent.Timeout):
                    log.warn("%sFailed to reload processes from PD. Will retry later.",
                        self.logprefix, exc_info=True)

            try:
                self._apply_policy()
            except (Exception, gevent.Timeout):
                log.warn("%sFailed to apply policy. Will retry later.",
                    self.logprefix, exc_info=True)

    def _validate_policy_name(self, policy_name):
        if policy_name is None:
            msg = "HA service requires a policy name at CFG.highavailability.policy.name"
            raise Exception(msg)
        try:
            policy.policy_map[policy_name.lower()]
        except KeyError:
            raise Exception("HA Service doesn't support '%s' policy" % policy_name)

    @property
    def _policy_dict(self):
        policy_dict = {
            'name': self.core.policy_type,
            'parameters': self.core.policy.parameters
        }
        return policy_dict

    @property
    def _stored_policy(self):
        service = self.container.resource_registry.read(self.service_id)
        return service.policy

    def _apply_policy(self):

        self.core.apply_policy()

        try:
            new_service_state = _core_hastate_to_service_state(self.core.status())
            new_policy = self._policy_dict
            service = self.container.resource_registry.read(self.service_id)

            update_service = False
            if service.state != new_service_state:
                service.state = new_service_state
                update_service = True

            if service.policy != new_policy:
                service.policy = new_policy
                update_service = True

            if update_service is True:
                self.container.resource_registry.update(service)
        except Exception:
            log.warn("%sProblem when updating Service state", self.logprefix, exc_info=True)

    def rcmd_reconfigure_policy(self, new_policy_params, new_policy_name=None):
        """Service operation: Change the parameters of the policy used for service

        @param new_policy_params: parameters of policy
        @param new_policy_name: name of policy
        @return:
        """
        self.core.reconfigure_policy(new_policy_params, new_policy_name)
        #trigger policy thread to wake up
        self.policy_event.set()

    def rcmd_status(self):
        """Service operation: Get the status of the HA Service

        @return: {PENDING, READY, STEADY, BROKEN}
        """
        return self.core.status()

    def rcmd_dump(self):
        dump = self.core.dump()
        dump['service_id'] = self.service_id
        return dump
    def on_init(self):
        if not HighAvailabilityCore:
            msg = "HighAvailabilityCore isn't available. Use autolaunch.cfg buildout"
            log.error(msg)
            return

        cfg = self.CFG.get_safe("highavailability")

        # use default PD name as the sole PD if none are provided in config
        self.pds = self.CFG.get_safe("highavailability.process_dispatchers",
                                     [ProcessDispatcherService.name])
        if not len(self.pds) == 1:
            raise Exception(
                "HA Service doesn't support multiple Process Dispatchers")

        self.process_definition_id, self.process_definition = self._get_process_definition(
        )

        self.process_configuration = self.CFG.get_safe(
            "highavailability.process_configuration")
        aggregator_config = _get_aggregator_config(self.CFG)

        self.service_id, self.service_name = self._register_service()
        self.policy_event = Event()

        stored_policy = self._stored_policy
        if stored_policy != {}:
            policy_name = stored_policy.get('name')
            policy_parameters = stored_policy.get('parameters')
            self._validate_policy_name(policy_name)
            self.policy_name = policy_name.lower()
            self.policy_parameters = policy_parameters
        else:

            policy_name = self.CFG.get_safe("highavailability.policy.name")
            self._validate_policy_name(policy_name)
            self.policy_name = policy_name.lower()
            self.policy_parameters = self.CFG.get_safe(
                "highavailability.policy.parameters")

        self.policy_interval = self.CFG.get_safe(
            "highavailability.policy.interval", DEFAULT_INTERVAL)

        self.logprefix = "HA Agent (%s): " % self.service_name

        self.control = HAProcessControl(self.pds[0],
                                        self.container.resource_registry,
                                        self.service_id,
                                        self.policy_event.set,
                                        logprefix=self.logprefix)

        self.core = HighAvailabilityCore(
            cfg,
            self.control,
            self.pds,
            self.policy_name,
            process_definition_id=self.process_definition_id,
            parameters=self.policy_parameters,
            process_configuration=self.process_configuration,
            aggregator_config=aggregator_config,
            name=self.service_name)

        dashi_messaging = self.CFG.get_safe("highavailability.dashi_messaging",
                                            False)
        if dashi_messaging:

            dashi_name = self.CFG.get_safe("highavailability.dashi_name")
            if not dashi_name:
                raise Exception("dashi_name unknown")
            dashi_uri = self.CFG.get_safe("highavailability.dashi_uri")
            if not dashi_uri:
                rabbit_host = self.CFG.get_safe("server.amqp.host")
                rabbit_user = self.CFG.get_safe("server.amqp.username")
                rabbit_pass = self.CFG.get_safe("server.amqp.password")

                if not (rabbit_host and rabbit_user and rabbit_pass):
                    raise Exception("cannot form dashi URI")

                dashi_uri = "amqp://%s:%s@%s/" % (rabbit_user, rabbit_pass,
                                                  rabbit_host)
            dashi_exchange = self.CFG.get_safe(
                "highavailability.dashi_exchange")
            if not dashi_exchange:
                dashi_exchange = get_sys_name()

            self.dashi_handler = HADashiHandler(self, dashi_name, dashi_uri,
                                                dashi_exchange)

        else:
            self.dashi_handler = None
class HighAvailabilityAgent(SimpleResourceAgent):
    """Agent to manage high availability processes

    """
    def __init__(self):
        SimpleResourceAgent.__init__(self)
        self.dashi_handler = None
        self.service_id = None
        self.policy_thread = None
        self.policy_event = None
        self._policy_loop_event = Event()

    def on_init(self):
        if not HighAvailabilityCore:
            msg = "HighAvailabilityCore isn't available. Use autolaunch.cfg buildout"
            log.error(msg)
            return

        cfg = self.CFG.get_safe("highavailability")

        # use default PD name as the sole PD if none are provided in config
        self.pds = self.CFG.get_safe("highavailability.process_dispatchers",
                                     [ProcessDispatcherService.name])
        if not len(self.pds) == 1:
            raise Exception(
                "HA Service doesn't support multiple Process Dispatchers")

        self.process_definition_id, self.process_definition = self._get_process_definition(
        )

        self.process_configuration = self.CFG.get_safe(
            "highavailability.process_configuration")
        aggregator_config = _get_aggregator_config(self.CFG)

        self.service_id, self.service_name = self._register_service()
        self.policy_event = Event()

        stored_policy = self._stored_policy
        if stored_policy != {}:
            policy_name = stored_policy.get('name')
            policy_parameters = stored_policy.get('parameters')
            self._validate_policy_name(policy_name)
            self.policy_name = policy_name.lower()
            self.policy_parameters = policy_parameters
        else:

            policy_name = self.CFG.get_safe("highavailability.policy.name")
            self._validate_policy_name(policy_name)
            self.policy_name = policy_name.lower()
            self.policy_parameters = self.CFG.get_safe(
                "highavailability.policy.parameters")

        self.policy_interval = self.CFG.get_safe(
            "highavailability.policy.interval", DEFAULT_INTERVAL)

        self.logprefix = "HA Agent (%s): " % self.service_name

        self.control = HAProcessControl(self.pds[0],
                                        self.container.resource_registry,
                                        self.service_id,
                                        self.policy_event.set,
                                        logprefix=self.logprefix)

        self.core = HighAvailabilityCore(
            cfg,
            self.control,
            self.pds,
            self.policy_name,
            process_definition_id=self.process_definition_id,
            parameters=self.policy_parameters,
            process_configuration=self.process_configuration,
            aggregator_config=aggregator_config,
            name=self.service_name)

        dashi_messaging = self.CFG.get_safe("highavailability.dashi_messaging",
                                            False)
        if dashi_messaging:

            dashi_name = self.CFG.get_safe("highavailability.dashi_name")
            if not dashi_name:
                raise Exception("dashi_name unknown")
            dashi_uri = self.CFG.get_safe("highavailability.dashi_uri")
            if not dashi_uri:
                rabbit_host = self.CFG.get_safe("server.amqp.host")
                rabbit_user = self.CFG.get_safe("server.amqp.username")
                rabbit_pass = self.CFG.get_safe("server.amqp.password")

                if not (rabbit_host and rabbit_user and rabbit_pass):
                    raise Exception("cannot form dashi URI")

                dashi_uri = "amqp://%s:%s@%s/" % (rabbit_user, rabbit_pass,
                                                  rabbit_host)
            dashi_exchange = self.CFG.get_safe(
                "highavailability.dashi_exchange")
            if not dashi_exchange:
                dashi_exchange = get_sys_name()

            self.dashi_handler = HADashiHandler(self, dashi_name, dashi_uri,
                                                dashi_exchange)

        else:
            self.dashi_handler = None

    def _get_process_definition(self):
        process_definition_id = self.CFG.get_safe(
            "highavailability.process_definition_id")
        process_definition_name = self.CFG.get_safe(
            "highavailability.process_definition_name")

        if process_definition_id:
            pd_name = self.pds[0]
            pd = ProcessDispatcherServiceClient(to_name=pd_name)
            definition = pd.read_process_definition(process_definition_id)

        elif process_definition_name:
            definitions, _ = self.container.resource_registry.find_resources(
                restype="ProcessDefinition", name=process_definition_name)
            if len(definitions) == 0:
                raise Exception("Process definition with name '%s' not found" %
                                process_definition_name)
            elif len(definitions) > 1:
                raise Exception(
                    "multiple process definitions found with name '%s'" %
                    process_definition_name)
            definition = definitions[0]
            process_definition_id = definition._id

        else:
            raise Exception(
                "HA Agent requires either process definition ID or name")

        return process_definition_id, definition

    def on_start(self):
        if self.dashi_handler:
            self.dashi_handler.start()

        self.control.start()

        # override the core's list of currently managed processes. This is to support
        # restart of an HAAgent.
        self.core.set_managed_upids(self.control.get_managed_upids())

        self.policy_thread = gevent.spawn(self._policy_thread_loop)

        # kickstart the policy once. future invocations will happen via event callbacks.
        self.policy_event.set()

    def on_quit(self):
        self.control.stop()
        self._policy_loop_event.set()
        self.policy_thread.join()
        self.policy_thread.kill(block=True, timeout=3)
        if self.dashi_handler:
            self.dashi_handler.stop()

        # DL: do we ever want to remove this object?
        #self._unregister_service()

    def _register_service(self):

        definition = self.process_definition
        existing_services, _ = self.container.resource_registry.find_resources(
            restype="Service", name=definition.name)

        if len(existing_services) > 0:
            if len(existing_services) > 1:
                log.warning(
                    "There is more than one service object for %s. Using the first one"
                    % definition.name)
            service_id = existing_services[0]._id
        else:
            svc_obj = Service(name=definition.name,
                              exchange_name=definition.name)
            service_id, _ = self.container.resource_registry.create(svc_obj)

        svcdefs, _ = self.container.resource_registry.find_resources(
            restype="ServiceDefinition", name=definition.name)

        if svcdefs:
            try:
                self.container.resource_registry.create_association(
                    service_id, "hasServiceDefinition", svcdefs[0]._id)
            except BadRequest:
                log.warn(
                    "Failed to associate %s Service and ServiceDefinition. It probably exists.",
                    definition.name)
        else:
            log.error("Cannot find ServiceDefinition resource for %s",
                      definition.name)

        return service_id, definition.name

    def _unregister_service(self):
        if not self.service_id:
            log.error("No service id. Cannot unregister service")
            return

        self.container.resource_registry.delete(self.service_id,
                                                del_associations=True)

    def _policy_thread_loop(self):
        """Single thread runs policy loops, to prevent races
        """
        while not self._policy_loop_event.wait(timeout=0.1):
            # wait until our event is set, up to policy_interval seconds
            self.policy_event.wait(self.policy_interval)
            if self.policy_event.is_set():
                self.policy_event.clear()
                log.debug("%sapplying policy due to event", self.logprefix)
            else:

                # on a regular basis, we check for the current state of each process.
                # this is essentially a hedge against bugs in the HAAgent, or in the
                # ION events system that could prevent us from seeing state changes
                # of processes.
                log.debug(
                    "%sapplying policy due to timer. Reloading process cache first.",
                    self.logprefix)
                try:
                    self.control.reload_processes()
                except (Exception, gevent.Timeout):
                    log.warn(
                        "%sFailed to reload processes from PD. Will retry later.",
                        self.logprefix,
                        exc_info=True)

            try:
                self._apply_policy()
            except (Exception, gevent.Timeout):
                log.warn("%sFailed to apply policy. Will retry later.",
                         self.logprefix,
                         exc_info=True)

    def _validate_policy_name(self, policy_name):
        if policy_name is None:
            msg = "HA service requires a policy name at CFG.highavailability.policy.name"
            raise Exception(msg)
        try:
            policy.policy_map[policy_name.lower()]
        except KeyError:
            raise Exception("HA Service doesn't support '%s' policy" %
                            policy_name)

    @property
    def _policy_dict(self):
        policy_dict = {
            'name': self.core.policy_type,
            'parameters': self.core.policy.parameters
        }
        return policy_dict

    @property
    def _stored_policy(self):
        service = self.container.resource_registry.read(self.service_id)
        return service.policy

    def _apply_policy(self):

        self.core.apply_policy()

        try:
            new_service_state = _core_hastate_to_service_state(
                self.core.status())
            new_policy = self._policy_dict
            service = self.container.resource_registry.read(self.service_id)

            update_service = False
            if service.state != new_service_state:
                service.state = new_service_state
                update_service = True

            if service.policy != new_policy:
                service.policy = new_policy
                update_service = True

            if update_service is True:
                self.container.resource_registry.update(service)
        except Exception:
            log.warn("%sProblem when updating Service state",
                     self.logprefix,
                     exc_info=True)

    def rcmd_reconfigure_policy(self, new_policy_params, new_policy_name=None):
        """Service operation: Change the parameters of the policy used for service

        @param new_policy_params: parameters of policy
        @param new_policy_name: name of policy
        @return:
        """
        self.core.reconfigure_policy(new_policy_params, new_policy_name)
        #trigger policy thread to wake up
        self.policy_event.set()

    def rcmd_status(self):
        """Service operation: Get the status of the HA Service

        @return: {PENDING, READY, STEADY, BROKEN}
        """
        return self.core.status()

    def rcmd_dump(self):
        dump = self.core.dump()
        dump['service_id'] = self.service_id
        return dump
    def on_init(self):
        if not HighAvailabilityCore:
            msg = "HighAvailabilityCore isn't available. Use autolaunch.cfg buildout"
            log.error(msg)
            return
        log.debug("HighAvailabilityCore Pyon on_init")

        policy_name = self.CFG.get_safe("highavailability.policy.name")
        if policy_name is None:
            msg = "HA service requires a policy name at CFG.highavailability.policy.name"
            raise Exception(msg)
        try:
            self.policy = policy.policy_map[policy_name.lower()]
        except KeyError:
            raise Exception("HA Service doesn't support '%s' policy" % policy_name)

        policy_parameters = self.CFG.get_safe("highavailability.policy.parameters")

        self.policy_interval = self.CFG.get_safe("highavailability.policy.interval",
                DEFAULT_INTERVAL)

        cfg = self.CFG.get_safe("highavailability")

        # use default PD name as the sole PD if none are provided in config
        self.pds = self.CFG.get_safe("highavailability.process_dispatchers",
            [ProcessDispatcherService.name])

        self.process_definition_id = self.CFG.get_safe("highavailability.process_definition_id")
        self.process_configuration = self.CFG.get_safe("highavailability.process_configuration")
        aggregator_config = self.CFG.get_safe("highavailability.aggregator")

        self.service_id = self._register_service()

        # TODO: Allow other core class?
        self.core = HighAvailabilityCore(cfg, ProcessDispatcherSimpleAPIClient,
                self.pds, self.policy, process_definition_id=self.process_definition_id,
                parameters=policy_parameters,
                process_configuration=self.process_configuration,
                aggregator_config=aggregator_config,
                pd_client_kwargs={'container': self.container,
                    'service_id': self.service_id})

        self.policy_thread = looping_call(self.policy_interval, self.core.apply_policy)

        dashi_messaging = self.CFG.get_safe("highavailability.dashi_messaging", False)
        if dashi_messaging:

            dashi_name = self.CFG.get_safe("highavailability.dashi_name")
            if not dashi_name:
                raise Exception("dashi_name unknown")
            dashi_uri = self.CFG.get_safe("highavailability.dashi_uri")
            if not dashi_uri:
                rabbit_host = self.CFG.get_safe("server.amqp.host")
                rabbit_user = self.CFG.get_safe("server.amqp.username")
                rabbit_pass = self.CFG.get_safe("server.amqp.password")

                if not (rabbit_host and rabbit_user and rabbit_pass):
                    raise Exception("cannot form dashi URI")

                dashi_uri = "amqp://%s:%s@%s/" % (rabbit_user, rabbit_pass,
                                                  rabbit_host)
            dashi_exchange = self.CFG.get_safe("highavailability.dashi_exchange")
            if not dashi_exchange:
                dashi_exchange = get_sys_name()

            self.dashi_handler = HADashiHandler(self, dashi_name, dashi_uri, dashi_exchange)

        else:
            self.dashi_handler = None
class HighAvailabilityAgent(SimpleResourceAgent):
    """Agent to manage high availability processes

    """

    def __init__(self):
        log.debug("HighAvailabilityAgent init")
        SimpleResourceAgent.__init__(self)
        self.dashi_handler = None
        self.service_id = None

    def on_init(self):
        if not HighAvailabilityCore:
            msg = "HighAvailabilityCore isn't available. Use autolaunch.cfg buildout"
            log.error(msg)
            return
        log.debug("HighAvailabilityCore Pyon on_init")

        policy_name = self.CFG.get_safe("highavailability.policy.name")
        if policy_name is None:
            msg = "HA service requires a policy name at CFG.highavailability.policy.name"
            raise Exception(msg)
        try:
            self.policy = policy.policy_map[policy_name.lower()]
        except KeyError:
            raise Exception("HA Service doesn't support '%s' policy" % policy_name)

        policy_parameters = self.CFG.get_safe("highavailability.policy.parameters")

        self.policy_interval = self.CFG.get_safe("highavailability.policy.interval",
                DEFAULT_INTERVAL)

        cfg = self.CFG.get_safe("highavailability")

        # use default PD name as the sole PD if none are provided in config
        self.pds = self.CFG.get_safe("highavailability.process_dispatchers",
            [ProcessDispatcherService.name])

        self.process_definition_id = self.CFG.get_safe("highavailability.process_definition_id")
        self.process_configuration = self.CFG.get_safe("highavailability.process_configuration")
        aggregator_config = self.CFG.get_safe("highavailability.aggregator")

        self.service_id = self._register_service()

        # TODO: Allow other core class?
        self.core = HighAvailabilityCore(cfg, ProcessDispatcherSimpleAPIClient,
                self.pds, self.policy, process_definition_id=self.process_definition_id,
                parameters=policy_parameters,
                process_configuration=self.process_configuration,
                aggregator_config=aggregator_config,
                pd_client_kwargs={'container': self.container,
                    'service_id': self.service_id})

        self.policy_thread = looping_call(self.policy_interval, self.core.apply_policy)

        dashi_messaging = self.CFG.get_safe("highavailability.dashi_messaging", False)
        if dashi_messaging:

            dashi_name = self.CFG.get_safe("highavailability.dashi_name")
            if not dashi_name:
                raise Exception("dashi_name unknown")
            dashi_uri = self.CFG.get_safe("highavailability.dashi_uri")
            if not dashi_uri:
                rabbit_host = self.CFG.get_safe("server.amqp.host")
                rabbit_user = self.CFG.get_safe("server.amqp.username")
                rabbit_pass = self.CFG.get_safe("server.amqp.password")

                if not (rabbit_host and rabbit_user and rabbit_pass):
                    raise Exception("cannot form dashi URI")

                dashi_uri = "amqp://%s:%s@%s/" % (rabbit_user, rabbit_pass,
                                                  rabbit_host)
            dashi_exchange = self.CFG.get_safe("highavailability.dashi_exchange")
            if not dashi_exchange:
                dashi_exchange = get_sys_name()

            self.dashi_handler = HADashiHandler(self, dashi_name, dashi_uri, dashi_exchange)

        else:
            self.dashi_handler = None

    def on_start(self):
        if self.dashi_handler:
            self.dashi_handler.start()

    def on_quit(self):
        self.policy_thread.kill(block=True, timeout=3)
        if self.dashi_handler:
            self.dashi_handler.stop()

        self._unregister_service()

    def _register_service(self):
        if not self.process_definition_id:
            log.error("No process definition id. Not registering service")
            return

        if len(self.pds) < 1:
            log.error("Must have at least one PD available to register a service")
            return

        pd_name = self.pds[0]
        pd = ProcessDispatcherServiceClient(to_name=pd_name)
        definition = pd.read_process_definition(self.process_definition_id)

        existing_services, _ = self.container.resource_registry.find_resources(
                restype="Service", name=definition.name)

        if len(existing_services) > 0:
            if len(existing_services) > 1:
                log.warning("There is more than one service object for %s. Using the first one" % definition.name)
            service_id = existing_services[0]._id
        else:
            svc_obj = Service(name=definition.name, exchange_name=definition.name)
            service_id, _ = self.container.resource_registry.create(svc_obj)

        svcdefs, _ = self.container.resource_registry.find_resources(
                restype="ServiceDefinition", name=definition.name)

        if svcdefs:
            self.container.resource_registry.create_association(
                    service_id, "hasServiceDefinition", svcdefs[0]._id)
        else:
            log.error("Cannot find ServiceDefinition resource for %s",
                    definition.name)

        return service_id

    def _unregister_service(self):
        if not self.service_id:
            log.error("No service id. Cannot unregister service")
            return

        self.container.resource_registry.delete(self.service_id, del_associations=True)

    def rcmd_reconfigure_policy(self, new_policy):
        """Service operation: Change the parameters of the policy used for service

        @param new_policy: parameters of policy
        @return:
        """
        self.core.reconfigure_policy(new_policy)

    def rcmd_status(self):
        """Service operation: Get the status of the HA Service

        @return: {PENDING, READY, STEADY, BROKEN}
        """
        return self.core.status()

    def rcmd_dump(self):
        dump = self.core.dump()
        dump['service_id'] = self.service_id
        return dump