Ejemplo n.º 1
0
 def manual_update(self, path, key, value):
     """
     Manual override from client of specific value
     :type path: str
     :type key: str
     :type value: str
     """
     state = self._cache.application_states.get(path, None)
     if state is not None:
         self._update_override_info(path, key, value)
         message = ApplicationStatesMessage()
         state[key] = value
         message.update({path: state})
         self._message_throttle.add_message(message)
 def manual_update(self, path, key, value):
     """
     Manual override from client of specific value
     :type path: str
     :type key: str
     :type value: str
     """
     state = self._cache.application_states.get(path, None)
     if state is not None:
         self._update_override_info(path, key, value)
         message = ApplicationStatesMessage()
         state[key] = value
         message.update({path: state})
         self._message_throttle.add_message(message)
    def test_walk_no_children(self):
        cache = self._create_app_state_cache()

        self.zoo_keeper.connected = True
        self.zoo_keeper.get_children('path1',
                                     watch=mox.IgnoreArg()).AndReturn(None)

        app_state = ApplicationStateMock()
        app_state.mock_dict = {'key': 'value'}
        result = ApplicationStatesMessage()
        compare = dict()
        compare.update({
            ApplicationStateMock().configuration_path:
            app_state.to_dictionary()
        })

        self.mox.StubOutWithMock(cache, "_get_application_state")
        cache._get_application_state('path1').AndReturn(app_state)

        self.mox.ReplayAll()

        cache._walk('path1', result)
        self.assertEquals(result, compare)

        self.mox.VerifyAll()
    def test_walk_children(self):
        cache = ApplicationStateCache(self.configuration, self.zoo_keeper,
                                      self.web_socket_clients,
                                      self.time_estimate_cache)
        app_state1 = ApplicationStateMock()
        app_state1.mock_dict = {'key': 'value'}
        app_state1.configuration_path = "path1/foo"

        app_state2 = ApplicationStateMock()
        app_state2.mock_dict = {'key': 'value'}
        app_state2.configuration_path = "path1/bar"

        result = ApplicationStatesMessage()
        compare = dict()
        compare.update({'path1/bar': app_state2.to_dictionary()})
        compare.update({'path1/foo': app_state1.to_dictionary()})

        self.zoo_keeper.connected = True
        self.zoo_keeper.get_children('path1', watch=mox.IgnoreArg()).AndReturn(
            ['foo', 'bar'])
        self.zoo_keeper.get_children('path1/foo',
                                     watch=mox.IgnoreArg()).AndReturn([])
        self.zoo_keeper.get_children('path1/bar',
                                     watch=mox.IgnoreArg()).AndReturn([])

        self.mox.StubOutWithMock(cache, "_get_application_state")
        cache._get_application_state('path1/foo').AndReturn(app_state2)
        cache._get_application_state('path1/bar').AndReturn(app_state1)

        self.mox.ReplayAll()

        cache._walk('path1', result)
        self.assertEquals(result, compare)

        self.mox.VerifyAll()
Ejemplo n.º 5
0
    def __init__(self, configuration, zoo_keeper, web_socket_clients,
                 time_estimate_cache):
        """
        :type configuration: zoom.config.configuration.Configuration
        :type zoo_keeper: zoom.zoo_keeper.ZooKeeper
        :type web_socket_clients: list
        :type time_estimate_cache: zoom.www.cache.time_estimate_cache.TimeEstimateCache
        """
        self._path_to_host_mapping = dict()
        self._configuration = configuration
        self._cache = ApplicationStatesMessage()
        self._cache.set_environment(self._configuration.environment)
        self._zoo_keeper = zoo_keeper
        self._web_socket_clients = web_socket_clients

        self._time_estimate_cache = time_estimate_cache
        self._message_throttle = MessageThrottle(configuration,
                                                 web_socket_clients)
Ejemplo n.º 6
0
    def _on_update_path(self, path):
        try:
            message = ApplicationStatesMessage()

            self._walk(path, message)

            self._cache.update(message.application_states)
            self._cache.remove_deletes()

            self._message_throttle.add_message(message)

            self._time_estimate_cache.update_states(
                self._cache.application_states)

        except Exception:
            logging.exception('An unhandled Exception has occurred')
    def __init__(self, configuration, zoo_keeper, web_socket_clients,
                 time_estimate_cache):
        """
        :type configuration: zoom.config.configuration.Configuration
        :type zoo_keeper: zoom.zoo_keeper.ZooKeeper
        :type web_socket_clients: list
        :type time_estimate_cache: zoom.www.cache.time_estimate_cache.TimeEstimateCache
        """
        self._path_to_host_mapping = dict()
        self._configuration = configuration
        self._cache = ApplicationStatesMessage()
        self._cache.set_environment(self._configuration.environment)
        self._zoo_keeper = zoo_keeper
        self._web_socket_clients = web_socket_clients

        self._time_estimate_cache = time_estimate_cache
        self._message_throttle = MessageThrottle(configuration,
                                                 web_socket_clients)
Ejemplo n.º 8
0
class ApplicationStateCache(object):
    def __init__(self, configuration, zoo_keeper, web_socket_clients,
                 time_estimate_cache):
        """
        :type configuration: zoom.config.configuration.Configuration
        :type zoo_keeper: zoom.zoo_keeper.ZooKeeper
        :type web_socket_clients: list
        :type time_estimate_cache: zoom.www.cache.time_estimate_cache.TimeEstimateCache
        """
        self._path_to_host_mapping = dict()
        self._configuration = configuration
        self._cache = ApplicationStatesMessage()
        self._cache.set_environment(self._configuration.environment)
        self._zoo_keeper = zoo_keeper
        self._web_socket_clients = web_socket_clients

        self._time_estimate_cache = time_estimate_cache
        self._message_throttle = MessageThrottle(configuration,
                                                 web_socket_clients)

    @property
    def host_mapping(self):
        return self._path_to_host_mapping

    def start(self):
        self._message_throttle.start()

    def stop(self):
        self._message_throttle.stop()

    def load(self):
        """
        :rtype: zoom.messages.application_states.ApplicationStatesMessage
        """
        if not len(self._cache):
            self._load()

        return self._cache

    def reload(self):
        self._cache.clear()
        self._on_update_path(self._configuration.application_state_path)

    @TimeThis(__file__)
    def _load(self):
        self._cache.clear()

        self._walk(self._configuration.application_state_path, self._cache)
        logging.info(
            "Application state cache loaded from ZooKeeper {0}".format(
                self._configuration.application_state_path))

        self._time_estimate_cache.update_states(self._cache.application_states)
        self._cache.remove_deletes()

    def _build_default_override_store(self):
        logging.debug('Override storage node not found, creating')
        _template = {}
        self._zoo_keeper.create(self._configuration.override_node,
                                json.dumps(_template),
                                makepath=True)

    def _update_override_info(self, path, override_key, override_value):
        """
        Update zookeeper with override values passed in
        """
        logging.debug(
            "_update_override_info: path: {0}, override_key: {1} override_value: {2}"
            .format(path, override_key, override_value))
        try:
            override_str, stat = self._zoo_keeper.get(
                self._configuration.override_node)
            override_dict = json.loads(override_str)

            update = {override_key: override_value}
            state = override_dict.get(path, {})
            state.update(update)
            override_dict.update({path: state})

            self._zoo_keeper.set(self._configuration.override_node,
                                 json.dumps(override_dict))
        except NoNodeError as err:
            logging.debug('Unable to find {0}, {1}'.format(
                self._configuration.override_node, err))
            self._build_default_override_store()
            self._update_override_info(path, override_key, override_value)

    def manual_update(self, path, key, value):
        """
        Manual override from client of specific value
        :type path: str
        :type key: str
        :type value: str
        """
        state = self._cache.application_states.get(path, None)
        if state is not None:
            self._update_override_info(path, key, value)
            message = ApplicationStatesMessage()
            state[key] = value
            message.update({path: state})
            self._message_throttle.add_message(message)

    @connected_with_return(None)
    def _walk(self, path, result):
        """
        :type path: str
        :type result: zoom.www.messages.application_states.ApplicationStatesMessage
        """
        try:
            children = self._zoo_keeper.get_children(path,
                                                     watch=self._on_update)

            if children:
                for child in children:
                    self._walk(zk_path_join(path, child), result)
            else:
                app_state = self._get_application_state(path)
                result.update(
                    {app_state.configuration_path: app_state.to_dictionary()})

        except NoNodeError:
            result.update({
                path:
                ApplicationState(configuration_path=path,
                                 delete=True).to_dictionary(),
            })

        except Exception:
            logging.exception('An unhandled Exception has occurred while '
                              'running ApplicationStateCache.walk.')

    def _get_app_details(self, path):
        """
        :type path: str
        :rtype: dict, kazoo.protocol.states.ZnodeStat
        """
        raw_data, stat = self._zoo_keeper.get(path, watch=self._on_update)

        data = {}

        if raw_data:
            try:
                data = json.loads(raw_data)
            except ValueError:
                pass

        return data, stat

    def _get_application_state(self, path):
        """
        :type path: str
        :rtype: zoom.entities.application_state.ApplicationState
        """
        data, stat = self._get_app_details(path)

        # persistent node
        if stat.ephemeralOwner == 0:
            # watch node to see if children are created
            self._zoo_keeper.get_children(path, watch=self._on_update)
            host = data.get('host', 'Unknown')
            name = data.get('name', os.path.basename(path))
            agent_path = zk_path_join(self._configuration.agent_state_path,
                                      host)

            # if the agent is down, update state and mode with unknown
            agent_up = bool(
                self._zoo_keeper.exists(agent_path,
                                        watch=self._on_agent_state_update))

            valid = True
            if host in (None, 'Unknown'):
                data['state'] = 'invalid'
                data['mode'] = 'unknown'
                valid = False
            elif not agent_up:
                data['state'] = 'unknown'
                data['mode'] = 'unknown'
                valid = False
            elif agent_up:
                d, s = self._zoo_keeper.get(agent_path)
                registered_comps = json.loads(d).get('components', [])
                if name not in registered_comps:
                    data['state'] = 'invalid'
                    data['mode'] = 'unknown'
                    valid = False

            self._update_mapping(host, {path: valid})

            application_state = ApplicationState(
                application_name=name,
                configuration_path=path,
                application_status=ApplicationStatus.STOPPED,
                application_host=host,
                last_update=stat.last_modified,
                start_stop_time=data.get('start_stop_time', ''),
                error_state=data.get('state', 'unknown'),
                local_mode=data.get('mode', 'unknown'),
                login_user=data.get('login_user', 'Zoom'),
                read_only=data.get('read_only', False),
                last_command=self._get_last_command(data),
                pd_disabled=self._get_existing_attribute(path, 'pd_disabled'),
                grayed=self._get_existing_attribute(path, 'grayed'),
                platform=data.get('platform', 'unknown'),
                restart_count=data.get('restart_count', 0),
                load_times=self._time_estimate_cache.get_graphite_data(path))

        # ephemeral node
        else:
            # watch node to see if it goes away
            self._zoo_keeper.get_children(os.path.dirname(path),
                                          watch=self._on_update)

            host = os.path.basename(path)
            # if it is running, path = /app/path/HOSTNAME
            # need to convert to /app/path to get the app_details
            config_path = os.path.dirname(path)
            parent_data, parent_stat = self._get_app_details(config_path)

            self._update_mapping(host, {config_path: True})

            application_state = ApplicationState(
                application_name=parent_data.get(
                    'name', os.path.basename(config_path)),
                configuration_path=config_path,
                application_status=ApplicationStatus.RUNNING,
                application_host=host,
                last_update=stat.last_modified,
                start_stop_time=parent_data.get('start_stop_time', ''),
                error_state=parent_data.get('state', 'unknown'),
                local_mode=parent_data.get('mode', 'unknown'),
                login_user=parent_data.get('login_user', 'Zoom'),
                read_only=parent_data.get('read_only', False),
                last_command=self._get_last_command(parent_data),
                pd_disabled=self._get_existing_attribute(
                    config_path, 'pd_disabled'),
                grayed=self._get_existing_attribute(config_path, 'grayed'),
                platform=parent_data.get('platform', 'unknown'),
                restart_count=parent_data.get('restart_count', 0),
                load_times=self._time_estimate_cache.get_graphite_data(
                    config_path))

        return application_state

    def _update_mapping(self, host, data):
        """
        :type host: str
        :type data: dict
            {'/some/path', bool}
        """
        d = self._path_to_host_mapping.get(host, {})
        d.update(data)
        self._path_to_host_mapping[host] = d

    def _on_update(self, event):
        """
        Callback to send updates via websocket on application state changes.
        :type event: kazoo.protocol.states.WatchedEvent
        """
        self._on_update_path(event.path)

    def _on_update_path(self, path):
        try:
            message = ApplicationStatesMessage()

            self._walk(path, message)

            self._cache.update(message.application_states)
            self._cache.remove_deletes()

            self._message_throttle.add_message(message)

            self._time_estimate_cache.update_states(
                self._cache.application_states)

        except Exception:
            logging.exception('An unhandled Exception has occurred')

    def _on_agent_state_update(self, event):
        """
        This is to capture when an agent goes down.
        :type event: kazoo.protocol.states.WatchedEvent
        """
        host = os.path.basename(event.path)
        logging.info(
            'Agent on host {0} has changed up/down state.'.format(host))
        paths = self._path_to_host_mapping.get(host, {})
        for p in paths.keys():
            self._on_update_path(p)

    def _get_last_command(self, data):
        """
        :type data: dict
        :rtype: str
        """
        if data.get('state', 'Unknown') in ['starting', 'started']:
            return "Start"
        elif data.get('state', 'Unknown') in ['stopping', 'stopped']:
            return "Stop"
        else:
            return ''

    def _get_existing_attribute(self, path, attr):
        """
        Look for existing value for some value in the app state cache.
        If there is an override value, use that. Else use the existing state.
        :type path: str
        :type attr: str
        """
        state = self._cache.application_states.get(path, None)
        override = {}

        try:
            data, stat = self._zoo_keeper.get(
                self._configuration.override_node)
            override = json.loads(data)
        except (TypeError, ValueError) as err:
            logging.critical('There was a problem returning values from the '
                             'override cache: {0}'.format(err))

        setting = override.get(path, {}).get(attr, None)

        if setting is not None:
            return setting
        elif state is None:
            return None
        else:
            return state.get(attr)
class ApplicationStateCache(object):
    def __init__(self, configuration, zoo_keeper, web_socket_clients,
                 time_estimate_cache):
        """
        :type configuration: zoom.config.configuration.Configuration
        :type zoo_keeper: zoom.zoo_keeper.ZooKeeper
        :type web_socket_clients: list
        :type time_estimate_cache: zoom.www.cache.time_estimate_cache.TimeEstimateCache
        """
        self._path_to_host_mapping = dict()
        self._configuration = configuration
        self._cache = ApplicationStatesMessage()
        self._cache.set_environment(self._configuration.environment)
        self._zoo_keeper = zoo_keeper
        self._web_socket_clients = web_socket_clients

        self._time_estimate_cache = time_estimate_cache
        self._message_throttle = MessageThrottle(configuration,
                                                 web_socket_clients)

    @property
    def host_mapping(self):
        return self._path_to_host_mapping

    def start(self):
        self._message_throttle.start()

    def stop(self):
        self._message_throttle.stop()

    def load(self):
        """
        :rtype: zoom.messages.application_states.ApplicationStatesMessage
        """
        if not len(self._cache):
            self._load()

        return self._cache

    def reload(self):
        self._cache.clear()
        self._on_update_path(self._configuration.application_state_path)

    @TimeThis(__file__)
    def _load(self):
        self._cache.clear()

        self._walk(self._configuration.application_state_path, self._cache)
        logging.info("Application state cache loaded from ZooKeeper {0}"
                     .format(self._configuration.application_state_path))

        self._time_estimate_cache.update_states(
            self._cache.application_states)
        self._cache.remove_deletes()

    def _build_default_override_store(self):
        logging.debug('Override storage node not found, creating')
        _template = {}
        self._zoo_keeper.create(self._configuration.override_node,
                                json.dumps(_template), makepath=True)

    def _update_override_info(self, path, override_key, override_value):
        """
        Update zookeeper with override values passed in
        """
        logging.debug("_update_override_info: path: {0}, override_key: {1} override_value: {2}"
                      .format(path, override_key, override_value))
        try:
            override_str, stat = self._zoo_keeper.get(self._configuration.override_node)
            override_dict = json.loads(override_str)

            update = {override_key: override_value}
            state = override_dict.get(path, {})
            state.update(update)
            override_dict.update({path: state})

            self._zoo_keeper.set(self._configuration.override_node,
                                 json.dumps(override_dict))
        except NoNodeError as err:
            logging.debug('Unable to find {0}, {1}'
                          .format(self._configuration.override_node, err))
            self._build_default_override_store()
            self._update_override_info(path, override_key, override_value)

    def manual_update(self, path, key, value):
        """
        Manual override from client of specific value
        :type path: str
        :type key: str
        :type value: str
        """
        state = self._cache.application_states.get(path, None)
        if state is not None:
            self._update_override_info(path, key, value)
            message = ApplicationStatesMessage()
            state[key] = value
            message.update({path: state})
            self._message_throttle.add_message(message)

    @connected_with_return(None)
    def _walk(self, path, result):
        """
        :type path: str
        :type result: zoom.www.messages.application_states.ApplicationStatesMessage
        """
        try:
            children = self._zoo_keeper.get_children(path, watch=self._on_update)

            if children:
                for child in children:
                    self._walk(zk_path_join(path, child), result)
            else:
                app_state = self._get_application_state(path)
                result.update(
                    {app_state.configuration_path: app_state.to_dictionary()}
                )

        except NoNodeError:
            result.update({path: ApplicationState(configuration_path=path,
                                                  delete=True).to_dictionary(),
            })

        except Exception:
            logging.exception('An unhandled Exception has occurred while '
                              'running ApplicationStateCache.walk.')

    def _get_app_details(self, path):
        """
        :type path: str
        :rtype: dict, kazoo.protocol.states.ZnodeStat
        """
        raw_data, stat = self._zoo_keeper.get(path, watch=self._on_update)

        data = {}

        if raw_data:
            try:
                data = json.loads(raw_data)
            except ValueError:
                pass

        return data, stat

    def _get_application_state(self, path):
        """
        :type path: str
        :rtype: zoom.entities.application_state.ApplicationState
        """
        data, stat = self._get_app_details(path)

        # persistent node
        if stat.ephemeralOwner == 0:
            # watch node to see if children are created
            self._zoo_keeper.get_children(path, watch=self._on_update)
            host = data.get('host', 'Unknown')
            name = data.get('name', os.path.basename(path))
            agent_path = zk_path_join(self._configuration.agent_state_path,
                                      host)

            # if the agent is down, update state and mode with unknown
            agent_up = bool(self._zoo_keeper.exists(
                agent_path,
                watch=self._on_agent_state_update))

            valid = True
            if host in (None, 'Unknown'):
                data['state'] = 'invalid'
                data['mode'] = 'unknown'
                valid = False
            elif not agent_up:
                data['state'] = 'unknown'
                data['mode'] = 'unknown'
                valid = False
            elif agent_up:
                d, s = self._zoo_keeper.get(agent_path)
                registered_comps = json.loads(d).get('components', [])
                if name not in registered_comps:
                    data['state'] = 'invalid'
                    data['mode'] = 'unknown'
                    valid = False

            self._update_mapping(host, {path: valid})

            application_state = ApplicationState(
                application_name=name,
                configuration_path=path,
                application_status=ApplicationStatus.STOPPED,
                application_host=host,
                last_update=stat.last_modified,
                start_stop_time=data.get('start_stop_time', ''),
                error_state=data.get('state', 'unknown'),
                local_mode=data.get('mode', 'unknown'),
                login_user=data.get('login_user', 'Zoom'),
                read_only=data.get('read_only', False),
                last_command=self._get_last_command(data),
                pd_disabled=self._get_existing_attribute(path, 'pd_disabled'),
                grayed=self._get_existing_attribute(path, 'grayed'),
                platform=data.get('platform', 'unknown'),
                restart_count=data.get('restart_count', 0),
                load_times=self._time_estimate_cache.get_graphite_data(path)
            )

        # ephemeral node
        else:
            # watch node to see if it goes away
            self._zoo_keeper.get_children(os.path.dirname(path),
                                          watch=self._on_update)

            host = os.path.basename(path)
            # if it is running, path = /app/path/HOSTNAME
            # need to convert to /app/path to get the app_details
            config_path = os.path.dirname(path)
            parent_data, parent_stat = self._get_app_details(config_path)

            self._update_mapping(host, {config_path: True})

            application_state = ApplicationState(
                application_name=parent_data.get('name',
                                                 os.path.basename(config_path)),
                configuration_path=config_path,
                application_status=ApplicationStatus.RUNNING,
                application_host=host,
                last_update=stat.last_modified,
                start_stop_time=parent_data.get('start_stop_time', ''),
                error_state=parent_data.get('state', 'unknown'),
                local_mode=parent_data.get('mode', 'unknown'),
                login_user=parent_data.get('login_user', 'Zoom'),
                read_only=parent_data.get('read_only', False),
                last_command=self._get_last_command(parent_data),
                pd_disabled=self._get_existing_attribute(config_path, 'pd_disabled'),
                grayed=self._get_existing_attribute(config_path, 'grayed'),
                platform=parent_data.get('platform', 'unknown'),
                restart_count=parent_data.get('restart_count', 0),
                load_times=self._time_estimate_cache.get_graphite_data(config_path)
            )

        return application_state

    def _update_mapping(self, host, data):
        """
        :type host: str
        :type data: dict
            {'/some/path', bool}
        """
        d = self._path_to_host_mapping.get(host, {})
        d.update(data)
        self._path_to_host_mapping[host] = d

    def _on_update(self, event):
        """
        Callback to send updates via websocket on application state changes.
        :type event: kazoo.protocol.states.WatchedEvent
        """
        self._on_update_path(event.path)

    def _on_update_path(self, path):
        try:
            message = ApplicationStatesMessage()

            self._walk(path, message)

            self._cache.update(message.application_states)
            self._cache.remove_deletes()

            self._message_throttle.add_message(message)

            self._time_estimate_cache.update_states(
                self._cache.application_states)

        except Exception:
            logging.exception('An unhandled Exception has occurred')

    def _on_agent_state_update(self, event):
        """
        This is to capture when an agent goes down.
        :type event: kazoo.protocol.states.WatchedEvent
        """
        host = os.path.basename(event.path)
        logging.info('Agent on host {0} has changed up/down state.'.format(host))
        paths = self._path_to_host_mapping.get(host, {})
        for p in paths.keys():
            self._on_update_path(p)

    def _get_last_command(self, data):
        """
        :type data: dict
        :rtype: str
        """
        if data.get('state', 'Unknown') in ['starting', 'started']:
            return "Start"
        elif data.get('state', 'Unknown') in ['stopping', 'stopped']:
            return "Stop"
        else:
            return ''

    def _get_existing_attribute(self, path, attr):
        """
        Look for existing value for some value in the app state cache.
        If there is an override value, use that. Else use the existing state.
        :type path: str
        :type attr: str
        """
        state = self._cache.application_states.get(path, None)
        override = {}

        try:
            data, stat = self._zoo_keeper.get(self._configuration.override_node)
            override = json.loads(data)
        except (TypeError, ValueError) as err:
            logging.critical('There was a problem returning values from the '
                             'override cache: {0}'.format(err))

        setting = override.get(path, {}).get(attr, None)

        if setting is not None:
            return setting
        elif state is None:
            return None
        else:
            return state.get(attr)
Ejemplo n.º 10
0
class ApplicationStateCache(object):
    def __init__(self, configuration, zoo_keeper, web_socket_clients,
                 time_estimate_cache):
        """
        :type configuration: zoom.config.configuration.Configuration
        :type zoo_keeper: zoom.zoo_keeper.ZooKeeper
        :type web_socket_clients: list
        :type time_estimate_cache: zoom.www.cache.time_estimate_cache.TimeEstimateCache
        """
        self._path_to_host_mapping = dict()
        self._configuration = configuration
        self._cache = ApplicationStatesMessage()
        self._cache.set_environment(self._configuration.environment)
        self._zoo_keeper = zoo_keeper
        self._web_socket_clients = web_socket_clients

        self._time_estimate_cache = time_estimate_cache
        self._message_throttle = MessageThrottle(configuration,
                                                 web_socket_clients)
        self._last_command = None

    def start(self):
        self._message_throttle.start()

    def stop(self):
        self._message_throttle.stop()

    def load(self):
        """
        :rtype: zoom.messages.application_states.ApplicationStatesMessage
        """
        if not len(self._cache):
            self._load()

        return self._cache

    def reload(self):
        self._cache.clear()
        self._on_update_path(self._configuration.application_state_path)

    def _load(self):
        self._cache.clear()

        self._walk(self._configuration.application_state_path, self._cache)
        logging.info("Application state cache loaded from ZooKeeper {0}"
                     .format(self._configuration.application_state_path))

        self._time_estimate_cache.update_states(
            self._cache.application_states)
        self._cache.remove_deletes()

    @connected_with_return(None)
    def _walk(self, path, result):
        """
        :type path: str
        :type result: zoom.www.messages.application_states.ApplicationStatesMessage
        """
        try:
            children = self._zoo_keeper.get_children(path, watch=self._on_update)

            if children:
                for child in children:
                    self._walk(os.path.join(path, child), result)
            else:
                app_state = self._get_application_state(path)
                result.update(
                    {app_state.configuration_path: app_state.to_dictionary()}
                )

        except NoNodeError:
            logging.debug('Node at {0} no longer exists.'.format(path))
            result.update({path: ApplicationState(configuration_path=path,
                                                  delete=True).to_dictionary(),
            })

        except Exception:
            logging.exception('An unhandled Exception has occurred while '
                              'running ApplicationStateCache.walk.')

    def _get_app_details(self, path):
        """
        :type path: str
        :rtype: dict, kazoo.protocol.states.ZnodeStat
        """
        rawData, stat = self._zoo_keeper.get(path, watch=self._on_update)

        data = {}

        if rawData != '':
            try:
                data = json.loads(rawData)
            except ValueError:
                pass

        return data, stat

    def _get_application_state(self, path):
        """
        :type path: str
        :rtype: zoom.entities.application_state.ApplicationState
        """
        data, stat = self._get_app_details(path)

        # persistent node
        if stat.ephemeralOwner == 0:
            # watch node to see if children are created
            self._zoo_keeper.get_children(path, watch=self._on_update)
            host = data.get('host', 'Unknown')
            agent_path = os.path.join(self._configuration.agent_state_path,
                                      host)

            # if the agent is down, update state and mode with unknown
            if (host is None or 
                    not self._zoo_keeper.exists(
                            agent_path,
                            watch=self._on_agent_state_update)):
                data['state'] = 'unknown'
                data['mode'] = 'unknown'
            else:
                self._path_to_host_mapping[host] = path

            application_state = ApplicationState(
                application_name=data.get('name', os.path.basename(path)),
                configuration_path=path,
                application_status=ApplicationStatus.STOPPED,
                application_host=host,
                completion_time=stat.last_modified,
                trigger_time=data.get('trigger_time', ''),
                error_state=data.get('state', 'unknown'),
                local_mode=data.get('mode', 'unknown'),
                login_user=data.get('login_user', 'Zoom'),
                fqdn=data.get('fqdn', host),
                last_command=self._get_last_command(data)
            )

        # ephemeral node
        else:
            # watch node to see if it goes away
            self._zoo_keeper.get_children(os.path.dirname(path),
                                          watch=self._on_update)

            host = os.path.basename(path)
            # if it is running, path = /app/path/HOSTNAME
            # need to convert to /app/path to get the app_details
            config_path = os.path.dirname(path)
            parent_data, parent_stat = self._get_app_details(config_path)

            self._path_to_host_mapping[host] = config_path

            application_state = ApplicationState(
                application_name=parent_data.get('name',
                                                 os.path.basename(config_path)),
                configuration_path=config_path,
                application_status=ApplicationStatus.RUNNING,
                application_host=host,
                completion_time=stat.last_modified,
                trigger_time=parent_data.get('trigger_time', ''),
                error_state=parent_data.get('state', 'unknown'),
                local_mode=parent_data.get('mode', 'unknown'),
                login_user=parent_data.get('login_user', 'Zoom'),
                fqdn=parent_data.get('fqdn', host),
                last_command=self._get_last_command(parent_data)
            )
        return application_state

    def _on_update(self, event):
        """
        Callback to send updates via websocket on application state changes.
        :type event: kazoo.protocol.states.WatchedEvent
        """
        self._on_update_path(event.path)

    def _on_update_path(self, path):
        try:
            message = ApplicationStatesMessage()

            self._walk(path, message)

            self._cache.update(message.application_states)
            self._cache.remove_deletes()

            self._message_throttle.add_message(message)

            self._time_estimate_cache.update_states(
                self._cache.application_states)

        except Exception:
            logging.exception('An unhandled Exception has occurred')

    def _on_agent_state_update(self, event):
        """
        This is to capture when an agent goes down.
        :type event: kazoo.protocol.states.WatchedEvent
        """
        host = os.path.basename(event.path)
        path = self._path_to_host_mapping.get(host, None)
        if path is not None:  # if data is in the cache
            self._on_update_path(path)

    def _get_last_command(self, data):
        if data.get('state', 'Unknown') == 'starting':
            self._last_command = "Start"
        elif data.get('state', 'Unknown') == 'stopping':
            self._last_command = "Stop"
        else:
            pass

        return self._last_command
Ejemplo n.º 11
0
class DataStore(object):
    def __init__(self, configuration, zoo_keeper, task_server):
        """
        :type configuration: zoom.config.configuration.Configuration
        :type zoo_keeper: kazoo.client.KazooClient
        :type task_server: zoom.www.entities.task_server.TaskServer
        """
        self._configuration = configuration
        self._zoo_keeper = zoo_keeper
        self._task_server = task_server
        self._alert_exceptions = list()

        self._pd = \
            PagerDuty(self._configuration.pagerduty_subdomain,
                      self._configuration.pagerduty_api_token,
                      self._configuration.pagerduty_default_svc_key,
                      alert_footer=self._configuration.pagerduty_alert_footer)

        self._alert_manager = AlertManager(
            configuration.alert_path, configuration.override_node,
            configuration.application_state_path, zoo_keeper, self._pd,
            self._alert_exceptions)

        self._web_socket_clients = list()

        self._time_estimate_cache = TimeEstimateCache(self._configuration,
                                                      self._web_socket_clients)

        self._application_dependency_cache = \
            ApplicationDependencyCache(self._configuration,
                                       self._zoo_keeper,
                                       self._web_socket_clients,
                                       self._time_estimate_cache)

        self._application_state_cache = \
            ApplicationStateCache(self._configuration,
                                  self._zoo_keeper,
                                  self._web_socket_clients,
                                  self._time_estimate_cache)

        self._global_cache = GlobalCache(self._configuration, self._zoo_keeper,
                                         self._web_socket_clients)
        self._pd_svc_list_cache = {}

    def start(self):
        logging.info('Starting data store.')
        self._global_cache.start()
        self._application_state_cache.start()
        self._application_dependency_cache.start()
        self._time_estimate_cache.start()
        self._alert_manager.start()

    def stop(self):
        logging.info('Stopping data store.')
        self._global_cache.stop()
        self._application_state_cache.stop()
        self._application_dependency_cache.stop()
        self._time_estimate_cache.stop()
        self._alert_manager.stop()

    @connected_with_return(ApplicationStatesMessage())
    def load_application_state_cache(self):
        """
        :rtype: zoom.messages.application_states.ApplicationStatesMessage
        """
        logging.info('Loading application states.')
        return self._application_state_cache.load()

    @connected_with_return(ApplicationDependenciesMessage())
    def load_application_dependency_cache(self):
        """
        :rtype: zoom.messages.application_dependencies.ApplicationDependenciesMessage
        """
        logging.info('Loading application dependencies.')
        return self._application_dependency_cache.load()

    @connected_with_return(TimeEstimateMessage())
    def load_time_estimate_cache(self):
        """
        :rtype: zoom.messages.timing_estimate.TimeEstimateMessage
        """
        return self._time_estimate_cache.load()

    def get_start_time(self, path):
        """
        :rtype: dict
        """
        return self._time_estimate_cache.get_graphite_data(path)

    @connected_with_return(GlobalModeMessage('{"mode":"Unknown"}'))
    def get_global_mode(self):
        """
        :rtype: zoom.messages.global_mode_message.GlobalModeMessage
        """
        logging.info('Loading global mode.')
        return self._global_cache.get_mode()

    def reload(self):
        """
        Clear all cache objects and send reloaded data as updates.
        """
        # restart client to destroy any existing watches
        # self._zoo_keeper.restart()
        logging.info('Reloading all cache types.')
        self._task_server.clear_all_tasks()
        self._global_cache.on_update()
        self._application_state_cache.reload()
        self._application_dependency_cache.reload()
        self._time_estimate_cache.reload()
        self._alert_manager.start()
        self._pd_svc_list_cache = self._pd.get_service_dict()
        return {'cache_clear': 'okay'}

    def load(self):
        """
        Clear all cache objects and send reloaded data as updates.
        """
        logging.info('Loading all cache types.')
        self._global_cache.on_update()
        self._application_state_cache.load()
        self._application_dependency_cache.load()
        self._time_estimate_cache.load()
        self._pd_svc_list_cache = self._pd.get_service_dict()
        return {'cache_load': 'okay'}

    @property
    def pd_client(self):
        """
        :rtype: zoom.common.pagerduty.PagerDuty
        """
        return self._pd

    @property
    def web_socket_clients(self):
        """
        :rtype: list
        """
        return self._web_socket_clients

    @property
    def application_state_cache(self):
        """
        :rtype: zoom.www.cache.application_state_cache.ApplicationStateCache
        """
        return self._application_state_cache

    @property
    def alert_exceptions(self):
        """
        :rtype: list
        """
        return self._alert_exceptions

    @property
    def pagerduty_services(self):
        """
        :rtype: dict
        """
        return self._pd_svc_list_cache