Example #1
0
    def start(self):
        """
        Start KazooClient and add connection listener.
        """
        try:
            self.kazoo = KazooClient(hosts=get_zk_conn_string(env=self._env),
                                     timeout=60.0)
            self.kazoo.add_listener(self._zk_listener)
            self.kazoo.start()
            logging.info("ZooKeeper client started against cluster <{0}>"
                         .format(get_zk_conn_string(env=self._env)))

        except Exception as e:
            logging.error(e)
Example #2
0
    def __init__(self, *args, **kwargs):
        signal.signal(signal.SIGINT, self._handle_sig_event)
        signal.signal(signal.SIGTERM, self._handle_sig_event)

        self._prev_connection_state = None
        self._zoo_keeper = KazooClient(hosts=get_zk_conn_string(kwargs.get('environment')),
                                       timeout=60.0)
        self._zoo_keeper.start()
        self._configuration = Configuration(self._zoo_keeper, **kwargs)

        self._bootstrap_zookeeper_paths()

        self._task_server = TaskServer(self._configuration, self._zoo_keeper)
        self._data_store = DataStore(self._configuration, self._zoo_keeper,
                                     self._task_server)
        self._web_server = WebServer(self._configuration, self._data_store,
                                     self._task_server, self._zoo_keeper)
Example #3
0
    def __init__(self, port):
        """
        Read config and spawn child processes.
        :type port: int
        """
        self._log = logging.getLogger('sent.daemon')
        self._log.info('Creating Sentinel')

        self._port = port
        self.children = dict()
        self._settings = None
        self._system = get_system()
        self._hostname = socket.getfqdn()
        self._prev_state = None
        self.listener_lock = Lock()
        self.version = get_version()
        self.task_client = None

        self.zkclient = KazooClient(hosts=get_zk_conn_string(),
                                    timeout=60.0,
                                    handler=SequentialThreadingHandler(),
                                    logger=logging.getLogger('kazoo.daemon'))

        self.zkclient.add_listener(self._zk_listener)
        # this will run self._reset_after_connection_loss
        self.zkclient.start()
        while not self._settings:
            self._log.info('Waiting for settings.')
            time.sleep(1)

        self._tmp_dir = os.path.join(self._settings.get('zookeeper').get('temp_directory', '/'), 'ruok')

        self.task_client = ZKTaskClient(self.children,
                                        self.zkclient,
                                        self._settings.get('zookeeper', {}).get('task'))

        self._rest_server = tornado.httpserver.HTTPServer(RestServer(self.children,
                                                                     self.version,
                                                                     self._tmp_dir,
                                                                     self._hostname,
                                                                     self.zkclient))

        signal.signal(signal.SIGINT, self._handle_sigint)
        signal.signal(signal.SIGTERM, self._handle_sigint)
        self._log.info('Created Sentinel')
Example #4
0
    def __init__(self, *args, **kwargs):
        signal.signal(signal.SIGINT, self._handle_sig_event)
        signal.signal(signal.SIGTERM, self._handle_sig_event)

        self._prev_connection_state = None
        self._zoo_keeper = KazooClient(hosts=get_zk_conn_string(
            kwargs.get('environment')),
                                       timeout=60.0)
        self._zoo_keeper.start()
        self._configuration = Configuration(self._zoo_keeper, **kwargs)

        self._bootstrap_zookeeper_paths()

        self._task_server = TaskServer(self._configuration, self._zoo_keeper)
        self._data_store = DataStore(self._configuration, self._zoo_keeper,
                                     self._task_server)
        self._web_server = WebServer(self._configuration, self._data_store,
                                     self._task_server, self._zoo_keeper)
Example #5
0
    def __init__(self, port):
        """
        Read config and spawn child processes.
        :type port: int
        """
        self._log = logging.getLogger('sent.daemon')
        self._log.info('Creating Sentinel')

        self._port = port
        self.children = dict()
        self._settings = None
        self._system = get_system()
        self._hostname = socket.getfqdn()
        self._prev_state = None
        self.listener_lock = Lock()
        self.version = get_version()
        self.task_client = None

        self.zkclient = KazooClient(hosts=get_zk_conn_string(),
                                    timeout=60.0,
                                    handler=SequentialThreadingHandler(),
                                    logger=logging.getLogger('kazoo.daemon'))

        self.zkclient.add_listener(self._zk_listener)
        # this will run self._reset_after_connection_loss
        self.zkclient.start()
        while not self._settings:
            self._log.info('Waiting for settings.')
            time.sleep(1)

        self._tmp_dir = os.path.join(
            self._settings.get('zookeeper').get('temp_directory', '/'), 'ruok')

        self.task_client = ZKTaskClient(
            self.children, self.zkclient,
            self._settings.get('zookeeper', {}).get('task'))

        self._rest_server = tornado.httpserver.HTTPServer(
            RestServer(self.children, self.version, self._tmp_dir,
                       self._hostname, self.zkclient))

        signal.signal(signal.SIGINT, self._handle_sigint)
        signal.signal(signal.SIGTERM, self._handle_sigint)
        self._log.info('Created Sentinel')
Example #6
0
 def __init__(self, temp_path, timeout,
              parent='None', acquire_lock=None, app_state=None):
     """
     :type temp_path: str
     :type timeout: int
     :type parent: str
     :type acquire_lock: zoom.agent.entities.thread_safe_object.ThreadSafeObject or None
     :type app_state: zoom.agent.entities.thread_safe_object.ThreadSafeObject or None
     """
     self._path = temp_path
     self._timeout = timeout
     self._parent = parent
     self._thread = None
     self._prev_state = None
     self._zk = KazooClient(hosts=get_zk_conn_string(),
                            timeout=60.0)
     self._zk.add_listener(self._zk_listener)
     self._log = logging.getLogger('sent.{0}.sl'.format(parent))
     self._counter = 0
     self._acquire_lock = acquire_lock
     self._app_state = app_state
Example #7
0
    def __init__(self, zookeeper, **kwargs):
        """
        :type zookeeper: kazoo.client.KazooClient
        """
        self._zookeeper = zookeeper
        self._settings = kwargs
        try:
            data, stat = self._zookeeper.get(ZOOM_CONFIG)
            config = json.loads(data)

            # create 'logs' directory if it does not exist
            if not os.path.exists("logs"):
                os.makedirs("logs")

            # initialize logging
            logging_config = config.get('logging')
            logging.config.dictConfig(logging_config)

            # get system type
            running_os = self._get_system()

            self._host = socket.gethostname()
            # web_server
            web_server_settings = config.get('web_server')
            self._port = self._get_setting('port',
                                           web_server_settings.get('port'))
            self._is_debug = web_server_settings.get('debug')

            self._application_path = os.getcwd()
            self._client_path = zk_path_join(
                (os.path.normpath(os.getcwd() + os.sep + os.pardir)), 'client')
            self._doc_path = zk_path_join(
                (os.path.normpath(os.getcwd() + os.sep + os.pardir)), "doc")
            self._html_path = zk_path_join(self._client_path, "views")
            self._images_path = zk_path_join(self._client_path, "images")
            self._pid = os.getpid()
            self._environment = self._get_setting(
                'environment', os.environ.get('EnvironmentToUse', 'Staging'))

            # zookeeper
            zookeeper_settings = config.get('zookeeper')
            self._zookeeper_paths = zookeeper_settings
            self._agent_configuration_path = zookeeper_settings.get(
                'agent_configuration_path')
            self._agent_state_path = zookeeper_settings.get('agent_state_path')
            self._task_path = zookeeper_settings.get('task_path')
            self._application_state_path = zookeeper_settings.get(
                'application_state_path')
            self._global_mode_path = zookeeper_settings.get('global_mode_path')
            self._pillar_path = zookeeper_settings.get('pillar_path')
            self._alert_path = zookeeper_settings.get('alert_path')
            self._override_node = zookeeper_settings.get(
                'override_node', '/spot/software/config/override')
            self._temp_directory = zookeeper_settings.get('temp_directory')
            self._zookeeper_host = get_zk_conn_string(self._environment)

            # pagerduty
            pagerduty_settings = config.get('pagerduty')
            self._pagerduty_default_svc_key = pagerduty_settings.get(
                'pagerduty_default_svc_key')
            self._pagerduty_api_token = pagerduty_settings.get(
                'pagerduty_api_token')
            self._pagerduty_subdomain = pagerduty_settings.get(
                'pagerduty_subdomain')
            self._pagerduty_enabled_environments = pagerduty_settings.get(
                'pagerduty_enabled_environments')
            self._pagerduty_alert_footer = pagerduty_settings.get(
                'pagerduty_footer', '')

            # database
            db_settings = config.get('database')
            self._db_type = db_settings.get('db_type')
            if running_os == PlatformType.WINDOWS:
                self._sql_connection = db_settings.get(
                    'sql_connection_windows')
            else:
                self._sql_connection = db_settings.get('sql_connection')

            # authentication
            ad_settings = config.get('active_directory')
            self._ldap_server = ad_settings.get('host')
            self._ldap_port = ad_settings.get('port')

            # environment specific
            env_settings = config.get(self._environment.lower())
            self._read_write_groups = env_settings.get('read_write_groups')
            self._graphite_host = env_settings.get('graphite_host')
            self._graphite_recheck = env_settings.get('graphite_recheck', '5m')

            # chatops
            chatops_settings = env_settings.get('chatops', {})
            self._chatops_url = chatops_settings.get('url')
            self._chatops_group = chatops_settings.get('group')
            self._chatops_commands_to_chat = chatops_settings.get(
                'commands_to_chat')

            # message throttling
            throttle_settings = config.get('message_throttle')
            self._throttle_interval = throttle_settings.get('interval')

            # salt
            self._salt_settings = env_settings.get('saltREST')

        except ValueError as e:
            logging.error('Data at {0} is not valid JSON.'.format(ZOOM_CONFIG))
            raise e
        except NoNodeError as e:
            logging.error('Config node missing: {}'.format(ZOOM_CONFIG))
            raise e
        except Exception as e:
            logging.exception('An unhandled exception occurred.')
            raise e
Example #8
0
    def __init__(self, zookeeper, **kwargs):
        """
        :type zookeeper: :rtype: zoom.www.entities.zoo_keeper.ZooKeeper
        """
        self._zookeeper = zookeeper
        self._settings = kwargs
        try:
            data, stat = self._zookeeper.get(ZOOM_CONFIG)
            config = json.loads(data)

            # create 'logs' directory if it does not exist
            if not os.path.exists("logs"):
                os.makedirs("logs")

            # initialize logging
            logging_config = config.get('logging')
            logging.config.dictConfig(logging_config)

            # get system type
            running_os = self._get_system()

            self._host = socket.gethostname()
            # web_server
            web_server_settings = config.get('web_server')
            self._port = self._get_setting('port', web_server_settings.get('port'))
            self._is_debug = web_server_settings.get('debug')

            self._application_path = os.getcwd()
            self._client_path = zk_path_join((os.path.normpath(os.getcwd() + os.sep + os.pardir)), 'client')
            self._doc_path = zk_path_join((os.path.normpath(os.getcwd() + os.sep + os.pardir)), "doc")
            self._html_path = zk_path_join(self._client_path, "views")
            self._images_path = zk_path_join(self._client_path, "images")
            self._pid = os.getpid()
            self._environment = self._get_setting('environment',
                                                  os.environ.get('EnvironmentToUse', 'Staging'))

            # zookeeper
            zookeeper_settings = config.get('zookeeper')
            self._agent_configuration_path = zookeeper_settings.get('agent_configuration_path')
            self._agent_state_path = zookeeper_settings.get('agent_state_path')
            self._task_path = zookeeper_settings.get('task_path')
            self._application_state_path = zookeeper_settings.get('application_state_path')
            self._global_mode_path = zookeeper_settings.get('global_mode_path')
            self._pillar_path = zookeeper_settings.get('pillar_path')
            self._alert_path = zookeeper_settings.get('alert_path')
            self._override_node = zookeeper_settings.get('override_node', '/spot/software/config/override')
            self._zookeeper_host = get_zk_conn_string(self._environment)

            # pagerduty
            pagerduty_settings = config.get('pagerduty')
            self._pagerduty_default_svc_key = pagerduty_settings.get('pagerduty_default_svc_key')
            self._pagerduty_api_token = pagerduty_settings.get('pagerduty_api_token')
            self._pagerduty_subdomain = pagerduty_settings.get('pagerduty_subdomain')
            self._pagerduty_enabled_environments = pagerduty_settings.get('pagerduty_enabled_environments')
            self._pagerduty_alert_footer = pagerduty_settings.get('pagerduty_footer', '')

            # database
            db_settings = config.get('database')
            self._db_type = db_settings.get('db_type')
            if running_os == PlatformType.WINDOWS:
                self._sql_connection = db_settings.get('sql_connection_windows')
            elif running_os == PlatformType.LINUX:
                self._sql_connection = db_settings.get('sql_connection')

            # authentication
            ad_settings = config.get('active_directory')
            self._ldap_server = ad_settings.get('host')
            self._ldap_port = ad_settings.get('port')

            # environment specific
            env_settings = config.get(self._environment.lower())
            self._read_write_groups = env_settings.get('read_write_groups')
            self._graphite_host = env_settings.get('graphite_host')
            self._graphite_recheck = env_settings.get('graphite_recheck', '5m')

            # chatops
            chatops_settings = env_settings.get('chatops', {})
            self._chatops_url = chatops_settings.get('url')
            self._chatops_group = chatops_settings.get('group')
            self._chatops_commands_to_chat = chatops_settings.get('commands_to_chat')

            # message throttling
            throttle_settings = config.get('message_throttle')
            self._throttle_interval = throttle_settings.get('interval')

            # salt
            self._salt_settings = env_settings.get('saltREST')

        except ValueError as e:
            logging.error('Data at {0} is not valid JSON.'.format(ZOOM_CONFIG))
            raise e
        except Exception as e:
            logging.exception('An unhandled exception occurred.')
            raise e
Example #9
0
    def __init__(self, config, settings, queue, system, application_type,
                 cancel_flag):
        """
        :type config: dict (xml)
        :type settings: dict
        :type queue: zoom.agent.entities.unique_queue.UniqueQueue
        :type system: zoom.common.types.PlatformType
        :type application_type: zoom.common.types.ApplicationType
        :type cancel_flag: zoom.agent.entities.thread_safe_object.ThreadSafeObject
        """
        self.config = config
        self._settings = settings
        self.name = verify_attribute(self.config, 'id', none_allowed=False)
        self._log = logging.getLogger('sent.{0}.app'.format(self.name))
        # informational attributes
        self._host = socket.getfqdn()
        self._system = system
        self._predicates = list()
        self._running = True  # used to manually stop the run loop
        self._prev_state = None
        self._actions = dict()  # created in _reset_watches on zk connect
        self._env = os.environ.get('EnvironmentToUse', 'Staging')
        self._apptype = application_type
        self._restart_on_crash = \
            verify_attribute(self.config, 'restart_on_crash', none_allowed=True)
        self._post_stop_sleep = verify_attribute(self.config, 'post_stop_sleep',
                                                 none_allowed=True, cast=int,
                                                 default=5)

        # tool-like attributes
        self.listener_lock = Lock()
        self._action_queue = queue
        self._mode = ApplicationMode(
            ApplicationMode.MANUAL,
            callback=self._update_agent_node_with_app_details)
        self._state = ThreadSafeObject(
            ApplicationState.OK,
            callback=self._update_agent_node_with_app_details)
        self._start_stop_time = ''  # Default to empty string for comparison
        self._login_user = '******'  # Default to Zoom
        self._user_set_in_react = False
        self._run_check_mode = False
        self._pd_svc_key = verify_attribute(config, 'pagerduty_service',
                                            none_allowed=True)

        restartmax = verify_attribute(config, 'restartmax', none_allowed=True,
                                      cast=int, default=3)
        self._rl = RestartLogic(
            self.name,
            restartmax,
            count_callback=self._update_agent_node_with_app_details)

        self._read_only = False

        self._paths = self._init_paths(self.config, settings, application_type)

        # clients
        self.zkclient = KazooClient(
            hosts=get_zk_conn_string(),
            timeout=60.0,
            handler=SequentialThreadingHandler(),
            logger=logging.getLogger('kazoo.app.{0}'.format(self.name)))

        self.zkclient.add_listener(self._zk_listener)
        self._proc_client = self._init_proc_client(self.config,
                                                   application_type,
                                                   cancel_flag)

        self._actions = self._init_actions(settings)
        self._work_manager = self._init_work_manager(self._action_queue)
Example #10
0
    def __init__(self, config, settings, queue, system, application_type,
                 cancel_flag):
        """
        :type config: dict (xml)
        :type settings: dict
        :type queue: zoom.agent.entities.unique_queue.UniqueQueue
        :type system: zoom.common.types.PlatformType
        :type application_type: zoom.common.types.ApplicationType
        :type cancel_flag: zoom.agent.entities.thread_safe_object.ThreadSafeObject
        """
        self.config = config
        self._settings = settings
        self.name = verify_attribute(self.config, 'id', none_allowed=False)
        self._log = logging.getLogger('sent.{0}.app'.format(self.name))
        # informational attributes
        self._host = socket.getfqdn()
        self._system = system
        self._predicates = list()
        self._running = True  # used to manually stop the run loop
        self._prev_state = None
        self._actions = dict()  # created in _reset_watches on zk connect
        self._env = os.environ.get('EnvironmentToUse', 'Staging')
        self._apptype = application_type
        self._restart_on_crash = \
            verify_attribute(self.config, 'restart_on_crash', none_allowed=True)
        self._post_stop_sleep = verify_attribute(self.config,
                                                 'post_stop_sleep',
                                                 none_allowed=True,
                                                 cast=int,
                                                 default=5)

        # tool-like attributes
        self.listener_lock = Lock()
        self._action_queue = queue
        self._mode = ApplicationMode(
            ApplicationMode.MANUAL,
            callback=self._update_agent_node_with_app_details)
        self._state = ThreadSafeObject(
            ApplicationState.OK,
            callback=self._update_agent_node_with_app_details)
        self._start_stop_time = ''  # Default to empty string for comparison
        self._login_user = '******'  # Default to Zoom
        self._user_set_in_react = False
        self._run_check_mode = False
        self._pd_svc_key = verify_attribute(config,
                                            'pagerduty_service',
                                            none_allowed=True)

        restartmax = verify_attribute(config,
                                      'restartmax',
                                      none_allowed=True,
                                      cast=int,
                                      default=3)
        self._rl = RestartLogic(
            self.name,
            restartmax,
            count_callback=self._update_agent_node_with_app_details)

        self._read_only = False

        self._paths = self._init_paths(self.config, settings, application_type)

        # clients
        self.zkclient = KazooClient(hosts=get_zk_conn_string(),
                                    timeout=60.0,
                                    handler=SequentialThreadingHandler(),
                                    logger=logging.getLogger(
                                        'kazoo.app.{0}'.format(self.name)))

        self.zkclient.add_listener(self._zk_listener)
        self._proc_client = self._init_proc_client(self.config,
                                                   application_type,
                                                   cancel_flag)

        self._actions = self._init_actions(settings)
        self._work_manager = self._init_work_manager(self._action_queue)