예제 #1
0
    def restartAgent(stop_event, graceful_stop_timeout=30):
        ExitHelper().exitcode = AGENT_AUTO_RESTART_EXIT_CODE
        stop_event.set()

        t = threading.Timer(graceful_stop_timeout,
                            ExitHelper().exit, [AGENT_AUTO_RESTART_EXIT_CODE])
        t.start()
예제 #2
0
  def __init__(self, cachedir, stacks_dir, common_services_dir, host_scripts_dir,
      alert_grace_period, cluster_configuration, config, recovery_manager,
      in_minutes=True):

    self.cachedir = cachedir
    self.stacks_dir = stacks_dir
    self.common_services_dir = common_services_dir
    self.host_scripts_dir = host_scripts_dir

    self._cluster_configuration = cluster_configuration
    
    if not os.path.exists(cachedir):
      try:
        os.makedirs(cachedir)
      except:
        logger.critical("[AlertScheduler] Could not create the cache directory {0}".format(cachedir))

    self.APS_CONFIG = {
      'apscheduler.threadpool.core_threads': 3,
      'apscheduler.coalesce': True,
      'apscheduler.standalone': False,
      'apscheduler.misfire_grace_time': alert_grace_period
    }

    self._collector = AlertCollector()
    self.__scheduler = Scheduler(self.APS_CONFIG)
    self.__in_minutes = in_minutes
    self.config = config
    self.recovery_manger = recovery_manager

    # register python exit handler
    ExitHelper().register(self.exit_handler)
예제 #3
0
    def __init__(self,
                 cachedir,
                 stacks_dir,
                 common_services_dir,
                 host_scripts_dir,
                 cluster_configuration,
                 config,
                 in_minutes=True):

        self.cachedir = cachedir
        self.stacks_dir = stacks_dir
        self.common_services_dir = common_services_dir
        self.host_scripts_dir = host_scripts_dir

        self._cluster_configuration = cluster_configuration

        if not os.path.exists(cachedir):
            try:
                os.makedirs(cachedir)
            except:
                logger.critical(
                    "[AlertScheduler] Could not create the cache directory {0}"
                    .format(cachedir))

        self._collector = AlertCollector()
        self.__scheduler = Scheduler(AlertSchedulerHandler.APS_CONFIG)
        self.__in_minutes = in_minutes
        self.config = config

        # register python exit handler
        ExitHelper().register(self.exit_handler)
예제 #4
0
  def __init__(self, initializer_module, in_minutes=True):

    self.cachedir = initializer_module.config.alerts_cachedir
    self.stacks_dir = initializer_module.config.stacks_dir
    self.common_services_dir = initializer_module.config.common_services_dir
    self.extensions_dir = initializer_module.config.extensions_dir
    self.host_scripts_dir = initializer_module.config.host_scripts_dir
    self.configuration_builder = initializer_module.configuration_builder

    self._cluster_configuration = initializer_module.configurations_cache
    self.alert_definitions_cache = initializer_module.alert_definitions_cache

    self.config = initializer_module.config

    # the amount of time, in seconds, that an alert can run after it's scheduled time
    alert_grace_period = int(self.config.get('agent', 'alert_grace_period', 5))

    apscheduler_standalone = False

    self.APS_CONFIG = {
      'apscheduler.threadpool.core_threads': 3,
      'apscheduler.coalesce': True,
      'apscheduler.standalone': apscheduler_standalone,
      'apscheduler.misfire_grace_time': alert_grace_period,
      'apscheduler.threadpool.context_injector': self._job_context_injector if not apscheduler_standalone else None,
      'apscheduler.threadpool.agent_config': self.config
    }

    self._collector = AlertCollector()
    self.__scheduler = Scheduler(self.APS_CONFIG)
    self.__in_minutes = in_minutes
    self.recovery_manger = initializer_module.recovery_manager

    # register python exit handler
    ExitHelper().register(self.exit_handler)
예제 #5
0
  def restartAgent(stop_event, graceful_stop_timeout=30):
    from ambari_agent import main
    main.EXIT_CODE_ON_STOP = AGENT_AUTO_RESTART_EXIT_CODE
    stop_event.set()

    t = threading.Timer( graceful_stop_timeout, ExitHelper().exit, [AGENT_AUTO_RESTART_EXIT_CODE])
    t.start()
예제 #6
0
    def __init__(self,
                 cachedir,
                 stacks_dir,
                 common_services_dir,
                 extensions_dir,
                 host_scripts_dir,
                 cluster_configuration,
                 config,
                 recovery_manager,
                 in_minutes=True):

        self.cachedir = cachedir
        self.stacks_dir = stacks_dir
        self.common_services_dir = common_services_dir
        self.extensions_dir = extensions_dir
        self.host_scripts_dir = host_scripts_dir

        self._cluster_configuration = cluster_configuration

        # a mapping between a cluster name and a unique hash for all definitions
        self._cluster_hashes = {}

        # the amount of time, in seconds, that an alert can run after it's scheduled time
        alert_grace_period = int(config.get('agent', 'alert_grace_period', 5))

        if not os.path.exists(cachedir):
            try:
                os.makedirs(cachedir)
            except:
                logger.critical(
                    "[AlertScheduler] Could not create the cache directory {0}"
                    .format(cachedir))

        apscheduler_standalone = False

        self.APS_CONFIG = {
            'apscheduler.threadpool.core_threads':
            3,
            'apscheduler.coalesce':
            True,
            'apscheduler.standalone':
            apscheduler_standalone,
            'apscheduler.misfire_grace_time':
            alert_grace_period,
            'apscheduler.threadpool.context_injector':
            self._job_context_injector if not apscheduler_standalone else None,
            'apscheduler.threadpool.agent_config':
            config
        }

        self._collector = AlertCollector()
        self.__scheduler = Scheduler(self.APS_CONFIG)
        self.__in_minutes = in_minutes
        self.config = config
        self.recovery_manger = recovery_manager

        # register python exit handler
        ExitHelper().register(self.exit_handler)
예제 #7
0
    def __init__(self, config, actionQueue):
        multiprocessing.Process.__init__(self)

        self.config = config
        self.actionQueue = actionQueue

        self.status_command_timeout = int(
            self.config.get('agent', 'status_command_timeout',
                            5))  # in seconds
        self.hasTimeoutedEvent = multiprocessing.Event()
        ExitHelper().register(self.kill)
예제 #8
0
def main(heartbeat_stop_callback=None):
    global config
    global home_dir

    parser = OptionParser()
    parser.add_option("-v",
                      "--verbose",
                      dest="verbose",
                      action="store_true",
                      help="verbose log output",
                      default=False)
    parser.add_option(
        "-e",
        "--expected-hostname",
        dest="expected_hostname",
        action="store",
        help=
        "expected hostname of current host. If hostname differs, agent will fail",
        default=None)
    parser.add_option("--home",
                      dest="home_dir",
                      action="store",
                      help="Home directory",
                      default="")
    (options, args) = parser.parse_args()

    expected_hostname = options.expected_hostname
    home_dir = options.home_dir

    logging_level = logging.DEBUG if options.verbose else logging.INFO

    setup_logging(logger, AmbariConfig.AmbariConfig.getLogFile(),
                  logging_level)
    global is_logger_setup
    is_logger_setup = True
    setup_logging(alerts_logger, AmbariConfig.AmbariConfig.getAlertsLogFile(),
                  logging_level)
    Logger.initialize_logger('resource_management',
                             logging_level=logging_level)

    if home_dir != "":
        # When running multiple Ambari Agents on this host for simulation, each one will use a unique home directory.
        Logger.info("Agent is using Home Dir: %s" % str(home_dir))

    # use the host's locale for numeric formatting
    try:
        locale.setlocale(locale.LC_ALL, '')
    except locale.Error as ex:
        logger.warning(
            "Cannot set locale for ambari-agent. Please check your systemwide locale settings. Failed due to: {0}."
            .format(str(ex)))

    default_cfg = {'agent': {'prefix': '/home/ambari'}}
    config.load(default_cfg)

    if (len(sys.argv) > 1) and sys.argv[1] == 'stop':
        stop_agent()

    if (len(sys.argv) > 2) and sys.argv[1] == 'reset':
        reset_agent(sys.argv)

    # Check for ambari configuration file.
    resolve_ambari_config()

    # Add syslog hanlder based on ambari config file
    add_syslog_handler(logger)

    # Starting data cleanup daemon
    data_cleaner = None
    if config.has_option('agent', 'data_cleanup_interval') and int(
            config.get('agent', 'data_cleanup_interval')) > 0:
        data_cleaner = DataCleaner(config)
        data_cleaner.start()

    perform_prestart_checks(expected_hostname)

    # Starting ping port listener
    try:
        #This acts as a single process machine-wide lock (albeit incomplete, since
        # we still need an extra file to track the Agent PID)
        ping_port_listener = PingPortListener(config)
    except Exception as ex:
        err_message = "Failed to start ping port listener of: " + str(ex)
        logger.error(err_message)
        sys.stderr.write(err_message)
        sys.exit(1)
    ping_port_listener.start()

    update_log_level(config)

    update_open_files_ulimit(config)

    if not config.use_system_proxy_setting():
        logger.info('Agent is configured to ignore system proxy settings')
        reconfigure_urllib2_opener(ignore_system_proxy=True)

    if not OSCheck.get_os_family() == OSConst.WINSRV_FAMILY:
        daemonize()

    #
    # Iterate through the list of server hostnames and connect to the first active server
    #

    active_server = None
    server_hostnames = hostname.server_hostnames(config)

    connected = False
    stopped = False

    # Keep trying to connect to a server or bail out if ambari-agent was stopped
    while not connected and not stopped:
        for server_hostname in server_hostnames:
            server_url = config.get_api_url(server_hostname)
            try:
                server_ip = socket.gethostbyname(server_hostname)
                logger.info('Connecting to Ambari server at %s (%s)',
                            server_url, server_ip)
            except socket.error:
                logger.warn(
                    "Unable to determine the IP address of the Ambari server '%s'",
                    server_hostname)

            # Wait until MAX_RETRIES to see if server is reachable
            netutil = NetUtil(config, heartbeat_stop_callback)
            (retries, connected,
             stopped) = netutil.try_to_connect(server_url, MAX_RETRIES, logger)

            # if connected, launch controller
            if connected:
                logger.info('Connected to Ambari server %s', server_hostname)
                # Set the active server
                active_server = server_hostname
                # Launch Controller communication
                run_threads(server_hostname, heartbeat_stop_callback)

            #
            # If Ambari Agent connected to the server or
            # Ambari Agent was stopped using stop event
            # Clean up if not Windows OS
            #
            if connected or stopped:
                ExitHelper().exit(0)
                logger.info("finished")
                break
        pass  # for server_hostname in server_hostnames
    pass  # while not (connected or stopped)

    return active_server
예제 #9
0
    from Queue import Queue, Empty

logger = logging.getLogger(__name__)
_threadpools = set()


# Worker threads are daemonic in order to let the interpreter exit without
# an explicit shutdown of the thread pool. The following trick is necessary
# to allow worker threads to finish cleanly.
def _shutdown_all():
    for pool_ref in tuple(_threadpools):
        pool = pool_ref()
        if pool:
            pool.shutdown()

ExitHelper().register(_shutdown_all)


class ThreadPool(object):
    def __init__(self, core_threads=0, max_threads=20, keepalive=1, context_injector=None, agent_config=None):
        """
        :param core_threads: maximum number of persistent threads in the pool
        :param max_threads: maximum number of total threads in the pool
        :param thread_class: callable that creates a Thread object
        :param keepalive: seconds to keep non-core worker threads waiting
            for new tasks

        :type context_injector func
        :type agent_config AmbariConfig.AmbariConfig
        """
        self.core_threads = core_threads