def restartAgent(stop_event, graceful_stop_timeout=30): ExitHelper().exitcode = AGENT_AUTO_RESTART_EXIT_CODE stop_event.set() t = threading.Timer(graceful_stop_timeout, ExitHelper().exit, [AGENT_AUTO_RESTART_EXIT_CODE]) t.start()
def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir, alert_grace_period, cluster_configuration, config, recovery_manager, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir self.common_services_dir = common_services_dir self.host_scripts_dir = host_scripts_dir self._cluster_configuration = cluster_configuration if not os.path.exists(cachedir): try: os.makedirs(cachedir) except: logger.critical("[AlertScheduler] Could not create the cache directory {0}".format(cachedir)) self.APS_CONFIG = { 'apscheduler.threadpool.core_threads': 3, 'apscheduler.coalesce': True, 'apscheduler.standalone': False, 'apscheduler.misfire_grace_time': alert_grace_period } self._collector = AlertCollector() self.__scheduler = Scheduler(self.APS_CONFIG) self.__in_minutes = in_minutes self.config = config self.recovery_manger = recovery_manager # register python exit handler ExitHelper().register(self.exit_handler)
def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir, cluster_configuration, config, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir self.common_services_dir = common_services_dir self.host_scripts_dir = host_scripts_dir self._cluster_configuration = cluster_configuration if not os.path.exists(cachedir): try: os.makedirs(cachedir) except: logger.critical( "[AlertScheduler] Could not create the cache directory {0}" .format(cachedir)) self._collector = AlertCollector() self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG) self.__in_minutes = in_minutes self.config = config # register python exit handler ExitHelper().register(self.exit_handler)
def __init__(self, initializer_module, in_minutes=True): self.cachedir = initializer_module.config.alerts_cachedir self.stacks_dir = initializer_module.config.stacks_dir self.common_services_dir = initializer_module.config.common_services_dir self.extensions_dir = initializer_module.config.extensions_dir self.host_scripts_dir = initializer_module.config.host_scripts_dir self.configuration_builder = initializer_module.configuration_builder self._cluster_configuration = initializer_module.configurations_cache self.alert_definitions_cache = initializer_module.alert_definitions_cache self.config = initializer_module.config # the amount of time, in seconds, that an alert can run after it's scheduled time alert_grace_period = int(self.config.get('agent', 'alert_grace_period', 5)) apscheduler_standalone = False self.APS_CONFIG = { 'apscheduler.threadpool.core_threads': 3, 'apscheduler.coalesce': True, 'apscheduler.standalone': apscheduler_standalone, 'apscheduler.misfire_grace_time': alert_grace_period, 'apscheduler.threadpool.context_injector': self._job_context_injector if not apscheduler_standalone else None, 'apscheduler.threadpool.agent_config': self.config } self._collector = AlertCollector() self.__scheduler = Scheduler(self.APS_CONFIG) self.__in_minutes = in_minutes self.recovery_manger = initializer_module.recovery_manager # register python exit handler ExitHelper().register(self.exit_handler)
def restartAgent(stop_event, graceful_stop_timeout=30): from ambari_agent import main main.EXIT_CODE_ON_STOP = AGENT_AUTO_RESTART_EXIT_CODE stop_event.set() t = threading.Timer( graceful_stop_timeout, ExitHelper().exit, [AGENT_AUTO_RESTART_EXIT_CODE]) t.start()
def __init__(self, cachedir, stacks_dir, common_services_dir, extensions_dir, host_scripts_dir, cluster_configuration, config, recovery_manager, in_minutes=True): self.cachedir = cachedir self.stacks_dir = stacks_dir self.common_services_dir = common_services_dir self.extensions_dir = extensions_dir self.host_scripts_dir = host_scripts_dir self._cluster_configuration = cluster_configuration # a mapping between a cluster name and a unique hash for all definitions self._cluster_hashes = {} # the amount of time, in seconds, that an alert can run after it's scheduled time alert_grace_period = int(config.get('agent', 'alert_grace_period', 5)) if not os.path.exists(cachedir): try: os.makedirs(cachedir) except: logger.critical( "[AlertScheduler] Could not create the cache directory {0}" .format(cachedir)) apscheduler_standalone = False self.APS_CONFIG = { 'apscheduler.threadpool.core_threads': 3, 'apscheduler.coalesce': True, 'apscheduler.standalone': apscheduler_standalone, 'apscheduler.misfire_grace_time': alert_grace_period, 'apscheduler.threadpool.context_injector': self._job_context_injector if not apscheduler_standalone else None, 'apscheduler.threadpool.agent_config': config } self._collector = AlertCollector() self.__scheduler = Scheduler(self.APS_CONFIG) self.__in_minutes = in_minutes self.config = config self.recovery_manger = recovery_manager # register python exit handler ExitHelper().register(self.exit_handler)
def __init__(self, config, actionQueue): multiprocessing.Process.__init__(self) self.config = config self.actionQueue = actionQueue self.status_command_timeout = int( self.config.get('agent', 'status_command_timeout', 5)) # in seconds self.hasTimeoutedEvent = multiprocessing.Event() ExitHelper().register(self.kill)
def main(heartbeat_stop_callback=None): global config global home_dir parser = OptionParser() parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="verbose log output", default=False) parser.add_option( "-e", "--expected-hostname", dest="expected_hostname", action="store", help= "expected hostname of current host. If hostname differs, agent will fail", default=None) parser.add_option("--home", dest="home_dir", action="store", help="Home directory", default="") (options, args) = parser.parse_args() expected_hostname = options.expected_hostname home_dir = options.home_dir logging_level = logging.DEBUG if options.verbose else logging.INFO setup_logging(logger, AmbariConfig.AmbariConfig.getLogFile(), logging_level) global is_logger_setup is_logger_setup = True setup_logging(alerts_logger, AmbariConfig.AmbariConfig.getAlertsLogFile(), logging_level) Logger.initialize_logger('resource_management', logging_level=logging_level) if home_dir != "": # When running multiple Ambari Agents on this host for simulation, each one will use a unique home directory. Logger.info("Agent is using Home Dir: %s" % str(home_dir)) # use the host's locale for numeric formatting try: locale.setlocale(locale.LC_ALL, '') except locale.Error as ex: logger.warning( "Cannot set locale for ambari-agent. Please check your systemwide locale settings. Failed due to: {0}." .format(str(ex))) default_cfg = {'agent': {'prefix': '/home/ambari'}} config.load(default_cfg) if (len(sys.argv) > 1) and sys.argv[1] == 'stop': stop_agent() if (len(sys.argv) > 2) and sys.argv[1] == 'reset': reset_agent(sys.argv) # Check for ambari configuration file. resolve_ambari_config() # Add syslog hanlder based on ambari config file add_syslog_handler(logger) # Starting data cleanup daemon data_cleaner = None if config.has_option('agent', 'data_cleanup_interval') and int( config.get('agent', 'data_cleanup_interval')) > 0: data_cleaner = DataCleaner(config) data_cleaner.start() perform_prestart_checks(expected_hostname) # Starting ping port listener try: #This acts as a single process machine-wide lock (albeit incomplete, since # we still need an extra file to track the Agent PID) ping_port_listener = PingPortListener(config) except Exception as ex: err_message = "Failed to start ping port listener of: " + str(ex) logger.error(err_message) sys.stderr.write(err_message) sys.exit(1) ping_port_listener.start() update_log_level(config) update_open_files_ulimit(config) if not config.use_system_proxy_setting(): logger.info('Agent is configured to ignore system proxy settings') reconfigure_urllib2_opener(ignore_system_proxy=True) if not OSCheck.get_os_family() == OSConst.WINSRV_FAMILY: daemonize() # # Iterate through the list of server hostnames and connect to the first active server # active_server = None server_hostnames = hostname.server_hostnames(config) connected = False stopped = False # Keep trying to connect to a server or bail out if ambari-agent was stopped while not connected and not stopped: for server_hostname in server_hostnames: server_url = config.get_api_url(server_hostname) try: server_ip = socket.gethostbyname(server_hostname) logger.info('Connecting to Ambari server at %s (%s)', server_url, server_ip) except socket.error: logger.warn( "Unable to determine the IP address of the Ambari server '%s'", server_hostname) # Wait until MAX_RETRIES to see if server is reachable netutil = NetUtil(config, heartbeat_stop_callback) (retries, connected, stopped) = netutil.try_to_connect(server_url, MAX_RETRIES, logger) # if connected, launch controller if connected: logger.info('Connected to Ambari server %s', server_hostname) # Set the active server active_server = server_hostname # Launch Controller communication run_threads(server_hostname, heartbeat_stop_callback) # # If Ambari Agent connected to the server or # Ambari Agent was stopped using stop event # Clean up if not Windows OS # if connected or stopped: ExitHelper().exit(0) logger.info("finished") break pass # for server_hostname in server_hostnames pass # while not (connected or stopped) return active_server
from Queue import Queue, Empty logger = logging.getLogger(__name__) _threadpools = set() # Worker threads are daemonic in order to let the interpreter exit without # an explicit shutdown of the thread pool. The following trick is necessary # to allow worker threads to finish cleanly. def _shutdown_all(): for pool_ref in tuple(_threadpools): pool = pool_ref() if pool: pool.shutdown() ExitHelper().register(_shutdown_all) class ThreadPool(object): def __init__(self, core_threads=0, max_threads=20, keepalive=1, context_injector=None, agent_config=None): """ :param core_threads: maximum number of persistent threads in the pool :param max_threads: maximum number of total threads in the pool :param thread_class: callable that creates a Thread object :param keepalive: seconds to keep non-core worker threads waiting for new tasks :type context_injector func :type agent_config AmbariConfig.AmbariConfig """ self.core_threads = core_threads