def handle_ext_handlers(self, etag=None): if not conf.get_extensions_enabled(): logger.verbose("Extension handling is disabled") return if self.ext_handlers.extHandlers is None or \ len(self.ext_handlers.extHandlers) == 0: logger.verbose("No extension handler config found") return if conf.get_enable_overprovisioning(): artifacts_profile = self.protocol.get_artifacts_profile() if artifacts_profile and artifacts_profile.is_on_hold(): logger.info("Extension handling is on hold") return self.ext_handlers.extHandlers.sort(key=operator.methodcaller('sort_key')) for ext_handler in self.ext_handlers.extHandlers: # TODO: handle install in sequence, enable in parallel self.handle_ext_handler(ext_handler, etag)
def _emit_changes_in_default_configuration(): try: def log_event(msg): logger.info(msg) add_event(AGENT_NAME, op=WALAEventOperation.ConfigurationChange, message=msg) def log_if_int_changed_from_default(name, current): default = conf.get_int_default_value(name) if default != current: log_event( "{0} changed from its default: {1}. New value: {2}". format(name, default, current)) def log_if_op_disabled(name, value): if not value: log_event( "{0} is set to False, not processing the operation". format(name)) log_if_int_changed_from_default("Extensions.GoalStatePeriod", conf.get_goal_state_period()) log_if_op_disabled("OS.EnableFirewall", conf.enable_firewall()) log_if_op_disabled("Extensions.Enabled", conf.get_extensions_enabled()) if conf.enable_firewall(): log_if_int_changed_from_default( "OS.EnableFirewallPeriod", conf.get_enable_firewall_period()) if conf.get_lib_dir() != "/var/lib/waagent": log_event("lib dir is in an unexpected location: {0}".format( conf.get_lib_dir())) except Exception as e: logger.warn("Failed to log changes in configuration: {0}", ustr(e))
def test_get_extensions_enabled(self): self.assertTrue(conf.get_extensions_enabled(self.conf))
def run(self, debug=False): """ This is the main loop which watches for agent and extension updates. """ try: # NOTE: Do not add any telemetry events until after the monitoring handler has been started with the # call to 'monitor_thread.run()'. That method call initializes the protocol, which is needed in order to # load the goal state and update the container id in memory. Any telemetry events sent before this happens # will result in an uninitialized container id value. logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Log OS-specific info locally. os_info_msg = u"Distro info: {0} {1}, osutil class being used: {2}, " \ u"agent service name: {3}".format(DISTRO_NAME, DISTRO_VERSION, type(self.osutil).__name__, self.osutil.service_name) logger.info(os_info_msg) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler monitor_thread = get_monitor_handler() monitor_thread.run() # NOTE: Any telemetry events added from this point on will be properly populated with the container id. from azurelinuxagent.ga.env import get_env_handler env_thread = get_env_handler() env_thread.run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler() self._ensure_no_orphans() self._emit_restart_event() self._ensure_partition_assigned() self._ensure_readonly_files() self._ensure_cgroups_initialized() # Send OS-specific info as a telemetry event after the monitoring thread has been initialized, and with # it the container id too. add_event(AGENT_NAME, op=WALAEventOperation.OSInfo, message=os_info_msg) goal_state_interval = GOAL_STATE_INTERVAL \ if conf.get_extensions_enabled() \ else GOAL_STATE_INTERVAL_DISABLED while self.running: if not debug and self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break if not monitor_thread.is_alive(): logger.warn(u"Monitor thread died, restarting") monitor_thread.start() if not env_thread.is_alive(): logger.warn(u"Environment thread died, restarting") env_thread.start() if self._upgrade_available(): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() remote_access_handler.run() if last_etag != exthandlers_handler.last_etag: self._ensure_readonly_files() duration = elapsed_milliseconds(utc_start) logger.info( 'ProcessGoalState completed [incarnation {0}; {1} ms]', exthandlers_handler.last_etag, duration) add_event(AGENT_NAME, op=WALAEventOperation.ProcessGoalState, duration=duration, message="Incarnation {0}".format( exthandlers_handler.last_etag)) time.sleep(goal_state_interval) except Exception as e: msg = u"Agent {0} failed with exception: {1}".format( CURRENT_AGENT, ustr(e)) self._set_sentinel(msg=msg) logger.warn(msg) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)
def run(self, debug=False): """ This is the main loop which watches for agent and extension updates. """ try: logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler monitor_thread = get_monitor_handler() monitor_thread.run() from azurelinuxagent.ga.env import get_env_handler env_thread = get_env_handler() env_thread.run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler() self._ensure_no_orphans() self._emit_restart_event() self._ensure_partition_assigned() self._ensure_readonly_files() self._ensure_cgroups_initialized() goal_state_interval = GOAL_STATE_INTERVAL \ if conf.get_extensions_enabled() \ else GOAL_STATE_INTERVAL_DISABLED while self.running: if not debug and self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break if not monitor_thread.is_alive(): logger.warn(u"Monitor thread died, restarting") monitor_thread.start() if not env_thread.is_alive(): logger.warn(u"Environment thread died, restarting") env_thread.start() if self._upgrade_available(): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() remote_access_handler.run() if last_etag != exthandlers_handler.last_etag: self._ensure_readonly_files() duration = elapsed_milliseconds(utc_start) logger.info('ProcessGoalState completed [incarnation {0}; {1} ms]', exthandlers_handler.last_etag, duration) add_event( AGENT_NAME, op=WALAEventOperation.ProcessGoalState, duration=duration, message="Incarnation {0}".format(exthandlers_handler.last_etag)) time.sleep(goal_state_interval) except Exception as e: msg = u"Agent {0} failed with exception: {1}".format(CURRENT_AGENT, ustr(e)) self._set_sentinel(msg=msg) logger.warn(msg) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)
def run(self, debug=False): """ This is the main loop which watches for agent and extension updates. """ try: logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # # Fetch the goal state one time; some components depend on information provided by the goal state and this # call ensures the required info is initialized (e.g telemetry depends on the container ID.) # protocol = self.protocol_util.get_protocol() protocol.update_goal_state() initialize_event_logger_vminfo_common_parameters(protocol) # Log OS-specific info. os_info_msg = u"Distro: {0}-{1}; OSUtil: {2}; AgentService: {3}; Python: {4}.{5}.{6}".format( DISTRO_NAME, DISTRO_VERSION, type(self.osutil).__name__, self.osutil.service_name, PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) logger.info(os_info_msg) add_event(AGENT_NAME, op=WALAEventOperation.OSInfo, message=os_info_msg) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler monitor_thread = get_monitor_handler() monitor_thread.run() from azurelinuxagent.ga.env import get_env_handler env_thread = get_env_handler() env_thread.run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler(protocol) migrate_handler_state() from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler(protocol) self._ensure_no_orphans() self._emit_restart_event() self._emit_changes_in_default_configuration() self._ensure_partition_assigned() self._ensure_readonly_files() self._ensure_cgroups_initialized() goal_state_interval = conf.get_goal_state_period( ) if conf.get_extensions_enabled( ) else GOAL_STATE_INTERVAL_DISABLED while self.running: # # Check that the parent process (the agent's daemon) is still running # if not debug and self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break # # Check that all the threads are still running # if not monitor_thread.is_alive(): logger.warn(u"Monitor thread died, restarting") monitor_thread.start() if not env_thread.is_alive(): logger.warn(u"Environment thread died, restarting") env_thread.start() # # Process the goal state # if not protocol.try_update_goal_state(): self._heartbeat_update_goal_state_error_count += 1 else: if self._upgrade_available(protocol): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() remote_access_handler.run() if last_etag != exthandlers_handler.last_etag: self._ensure_readonly_files() duration = elapsed_milliseconds(utc_start) logger.info( 'ProcessGoalState completed [incarnation {0}; {1} ms]', exthandlers_handler.last_etag, duration) add_event(AGENT_NAME, op=WALAEventOperation.ProcessGoalState, duration=duration, message="Incarnation {0}".format( exthandlers_handler.last_etag)) self._send_heartbeat_telemetry(protocol) time.sleep(goal_state_interval) except Exception as e: msg = u"Agent {0} failed with exception: {1}".format( CURRENT_AGENT, ustr(e)) self._set_sentinel(msg=msg) logger.warn(msg) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)
def run(self): """ This is the main loop which watches for agent and extension updates. """ try: logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler monitor_thread = get_monitor_handler() monitor_thread.run() from azurelinuxagent.ga.env import get_env_handler env_thread = get_env_handler() env_thread.run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler() self._ensure_no_orphans() self._emit_restart_event() self._ensure_partition_assigned() self._ensure_readonly_files() goal_state_interval = GOAL_STATE_INTERVAL \ if conf.get_extensions_enabled() \ else GOAL_STATE_INTERVAL_DISABLED while self.running: if self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break if not monitor_thread.is_alive(): logger.warn(u"Monitor thread died, restarting") monitor_thread.start() if not env_thread.is_alive(): logger.warn(u"Environment thread died, restarting") env_thread.start() if self._upgrade_available(): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() remote_access_handler.run() if last_etag != exthandlers_handler.last_etag: self._ensure_readonly_files() duration = elapsed_milliseconds(utc_start) logger.info('ProcessGoalState completed [incarnation {0}; {1} ms]', exthandlers_handler.last_etag, duration) add_event( AGENT_NAME, op=WALAEventOperation.ProcessGoalState, duration=duration, message="Incarnation {0}".format(exthandlers_handler.last_etag)) time.sleep(goal_state_interval) except Exception as e: msg = u"Agent {0} failed with exception: {1}".format(CURRENT_AGENT, ustr(e)) self._set_sentinel(msg=msg) logger.warn(msg) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)
def run(self, debug=False): """ This is the main loop which watches for agent and extension updates. """ try: logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # # Fetch the goal state one time; some components depend on information provided by the goal state and this # call ensures the required info is initialized (e.g telemetry depends on the container ID.) # protocol = self.protocol_util.get_protocol() protocol.update_goal_state() # Initialize the common parameters for telemetry events initialize_event_logger_vminfo_common_parameters(protocol) # Log OS-specific info. os_info_msg = u"Distro: {dist_name}-{dist_ver}; "\ u"OSUtil: {util_name}; AgentService: {service_name}; "\ u"Python: {py_major}.{py_minor}.{py_micro}; "\ u"systemd: {systemd}; "\ u"LISDrivers: {lis_ver}; "\ u"logrotate: {has_logrotate};".format( dist_name=DISTRO_NAME, dist_ver=DISTRO_VERSION, util_name=type(self.osutil).__name__, service_name=self.osutil.service_name, py_major=PY_VERSION_MAJOR, py_minor=PY_VERSION_MINOR, py_micro=PY_VERSION_MICRO, systemd=systemd.is_systemd(), lis_ver=get_lis_version(), has_logrotate=has_logrotate() ) logger.info(os_info_msg) add_event(AGENT_NAME, op=WALAEventOperation.OSInfo, message=os_info_msg) # # Perform initialization tasks # from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler(protocol) migrate_handler_state() from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler(protocol) self._ensure_no_orphans() self._emit_restart_event() self._emit_changes_in_default_configuration() self._ensure_partition_assigned() self._ensure_readonly_files() self._ensure_cgroups_initialized() self._ensure_extension_telemetry_state_configured_properly( protocol) self._ensure_firewall_rules_persisted( dst_ip=protocol.get_endpoint()) # Get all thread handlers telemetry_handler = get_send_telemetry_events_handler( self.protocol_util) all_thread_handlers = [ get_monitor_handler(), get_env_handler(), telemetry_handler, get_collect_telemetry_events_handler(telemetry_handler) ] if is_log_collection_allowed(): all_thread_handlers.append(get_collect_logs_handler()) # Launch all monitoring threads for thread_handler in all_thread_handlers: thread_handler.run() goal_state_interval = conf.get_goal_state_period( ) if conf.get_extensions_enabled( ) else GOAL_STATE_INTERVAL_DISABLED while self.running: # # Check that the parent process (the agent's daemon) is still running # if not debug and self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break # # Check that all the threads are still running # for thread_handler in all_thread_handlers: if not thread_handler.is_alive(): logger.warn("{0} thread died, restarting".format( thread_handler.get_thread_name())) thread_handler.start() # # Process the goal state # if not protocol.try_update_goal_state(): self._heartbeat_update_goal_state_error_count += 1 else: if self._upgrade_available(protocol): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() remote_access_handler.run() if last_etag != exthandlers_handler.last_etag: self._ensure_readonly_files() duration = elapsed_milliseconds(utc_start) activity_id, correlation_id, gs_creation_time = exthandlers_handler.get_goal_state_debug_metadata( ) msg = 'ProcessGoalState completed [Incarnation: {0}; {1} ms; Activity Id: {2}; Correlation Id: {3}; GS Creation Time: {4}]'.format( exthandlers_handler.last_etag, duration, activity_id, correlation_id, gs_creation_time) logger.info(msg) add_event(AGENT_NAME, op=WALAEventOperation.ProcessGoalState, duration=duration, message=msg) self._send_heartbeat_telemetry(protocol) time.sleep(goal_state_interval) except Exception as error: msg = u"Agent {0} failed with exception: {1}".format( CURRENT_AGENT, ustr(error)) self._set_sentinel(msg=msg) logger.warn(msg) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)