def _send_heartbeat_telemetry(self, protocol): if self._last_telemetry_heartbeat is None: self._last_telemetry_heartbeat = datetime.utcnow( ) - UpdateHandler.TELEMETRY_HEARTBEAT_PERIOD if datetime.utcnow() >= (self._last_telemetry_heartbeat + UpdateHandler.TELEMETRY_HEARTBEAT_PERIOD): dropped_packets = self.osutil.get_firewall_dropped_packets( protocol.get_endpoint()) auto_update_enabled = 1 if conf.get_autoupdate_enabled() else 0 telemetry_msg = "{0};{1};{2};{3};{4}".format( self._heartbeat_counter, self._heartbeat_id, dropped_packets, self._heartbeat_update_goal_state_error_count, auto_update_enabled) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HeartBeat, is_success=True, message=telemetry_msg, log_event=False) self._heartbeat_counter += 1 self._heartbeat_update_goal_state_error_count = 0 debug_log_msg = "[DEBUG HeartbeatCounter: {0};HeartbeatId: {1};DroppedPackets: {2};" \ "UpdateGSErrors: {3};AutoUpdate: {4}]".format(self._heartbeat_counter, self._heartbeat_id, dropped_packets, self._heartbeat_update_goal_state_error_count, auto_update_enabled) logger.info( u"[HEARTBEAT] Agent {0} is running as the goal state agent {1}", CURRENT_AGENT, debug_log_msg) self._last_telemetry_heartbeat = datetime.utcnow()
def _upgrade_available(self, protocol, base_version=CURRENT_VERSION): # Ignore new agents if updating is disabled if not conf.get_autoupdate_enabled(): return False now = time.time() if self.last_attempt_time is not None: next_attempt_time = self.last_attempt_time + \ conf.get_autoupdate_frequency() else: next_attempt_time = now if next_attempt_time > now: return False family = conf.get_autoupdate_gafamily() logger.info("Checking for agent updates (family: {0})", family) self.last_attempt_time = now try: manifest_list, etag = protocol.get_vmagent_manifests() manifests = [m for m in manifest_list.vmAgentManifests \ if m.family == family and len(m.versionsManifestUris) > 0] if len(manifests) == 0: logger.verbose(u"Incarnation {0} has no {1} agent updates", etag, family) return False pkg_list = protocol.get_vmagent_pkgs(manifests[0]) # Set the agents to those available for download at least as # current as the existing agent and remove from disk any agent # no longer reported to the VM. # Note: # The code leaves on disk available, but blacklisted, agents # so as to preserve the state. Otherwise, those agents could be # again downloaded and inappropriately retried. host = self._get_host_plugin(protocol=protocol) self._set_agents( [GuestAgent(pkg=pkg, host=host) for pkg in pkg_list.versions]) self._purge_agents() self._filter_blacklisted_agents() # Return True if current agent is no longer available or an # agent with a higher version number is available return not self._is_version_eligible(base_version) \ or (len(self.agents) > 0 and self.agents[0].version > base_version) except Exception as e: msg = u"Exception retrieving agent manifests: {0}".format( ustr(traceback.format_exc())) add_event(AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) return False
def get_latest_agent(self): """ If autoupdate is enabled, return the most current, downloaded, non-blacklisted agent (if any). Otherwise, return None (implying to use the installed agent). """ if not conf.get_autoupdate_enabled(): return None self._load_agents() available_agents = [agent for agent in self.agents if agent.is_available] return available_agents[0] if len(available_agents) >= 1 else None
def get_latest_agent(self): """ If autoupdate is enabled, return the most current, downloaded, non-blacklisted agent (if any). Otherwise, return None (implying to use the installed agent). """ if not conf.get_autoupdate_enabled(): return None self._load_agents() available_agents = [ agent for agent in self.agents if agent.is_available ] return available_agents[0] if len(available_agents) >= 1 else None
def get_latest_agent(self): """ If autoupdate is enabled, return the most current, downloaded, non-blacklisted agent which is not the current version (if any). Otherwise, return None (implying to use the installed agent). """ if not conf.get_autoupdate_enabled(): return None self._load_agents() available_agents = [agent for agent in self.agents if agent.is_available and agent.version > FlexibleVersion(AGENT_VERSION)] return available_agents[0] if len(available_agents) >= 1 else None
def get_latest_agent(self): """ If autoupdate is enabled, return the most current, downloaded, non-blacklisted agent which is not the current version (if any). Otherwise, return None (implying to use the installed agent). """ if not conf.get_autoupdate_enabled(): return None self._load_agents() available_agents = [agent for agent in self.agents if agent.is_available and agent.version > FlexibleVersion(AGENT_VERSION)] return available_agents[0] if len(available_agents) >= 1 else None
def _upgrade_available(self, base_version=CURRENT_VERSION): # Ignore new agents if updating is disabled if not conf.get_autoupdate_enabled(): return False now = time.time() if self.last_attempt_time is not None: next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency() else: next_attempt_time = now if next_attempt_time > now: return False family = conf.get_autoupdate_gafamily() logger.verbose("Checking for agent family {0} updates", family) self.last_attempt_time = now try: protocol = self.protocol_util.get_protocol() manifest_list, etag = protocol.get_vmagent_manifests() except Exception as e: msg = u"Exception retrieving agent manifests: {0}".format(ustr(e)) logger.warn(msg) add_event( AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) return False manifests = [m for m in manifest_list.vmAgentManifests \ if m.family == family and len(m.versionsManifestUris) > 0] if len(manifests) == 0: logger.verbose(u"Incarnation {0} has no agent family {1} updates", etag, family) return False try: pkg_list = protocol.get_vmagent_pkgs(manifests[0]) except ProtocolError as e: msg = u"Incarnation {0} failed to get {1} package list: " \ u"{2}".format( etag, family, ustr(e)) logger.warn(msg) add_event( AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) return False # Set the agents to those available for download at least as current # as the existing agent and remove from disk any agent no longer # reported to the VM. # Note: # The code leaves on disk available, but blacklisted, agents so as to # preserve the state. Otherwise, those agents could be again # downloaded and inappropriately retried. host = None if protocol and protocol.client: host = protocol.client.get_host_plugin() self._set_agents([GuestAgent(pkg=pkg, host=host) for pkg in pkg_list.versions]) self._purge_agents() self._filter_blacklisted_agents() # Return True if agents more recent than the current are available return len(self.agents) > 0 and self.agents[0].version > base_version
def run_latest(self): """ This method is called from the daemon to find and launch the most current, downloaded agent. Note: - Most events should be tagged to the launched agent (agent_version) """ if self.child_process is not None: raise Exception("Illegal attempt to launch multiple goal state Agent processes") if self.signal_handler is None: self.signal_handler = signal.signal(signal.SIGTERM, self.forward_signal) latest_agent = self.get_latest_agent() if latest_agent is None: logger.info(u"Installed Agent {0} is the most current agent", CURRENT_AGENT) agent_cmd = "python -u {0} -run-exthandlers".format(sys.argv[0]) agent_dir = os.getcwd() agent_name = CURRENT_AGENT agent_version = CURRENT_VERSION else: logger.info(u"Determined Agent {0} to be the latest agent", latest_agent.name) agent_cmd = latest_agent.get_agent_cmd() agent_dir = latest_agent.get_agent_dir() agent_name = latest_agent.name agent_version = latest_agent.version try: # Launch the correct Python version for python-based agents cmds = textutil.safe_shlex_split(agent_cmd) if cmds[0].lower() == "python": cmds[0] = get_python_cmd() agent_cmd = " ".join(cmds) self._evaluate_agent_health(latest_agent) self.child_process = subprocess.Popen( cmds, cwd=agent_dir, stdout=sys.stdout, stderr=sys.stderr, env=os.environ) logger.verbose(u"Agent {0} launched with command '{1}'", agent_name, agent_cmd) # If the most current agent is the installed agent and update is enabled, # assume updates are likely available and poll every second. # This reduces the start-up impact of finding / launching agent updates on # fresh VMs. if latest_agent is None and conf.get_autoupdate_enabled(): poll_interval = 1 else: poll_interval = CHILD_POLL_INTERVAL ret = None start_time = time.time() while (time.time() - start_time) < CHILD_HEALTH_INTERVAL: time.sleep(poll_interval) ret = self.child_process.poll() if ret is not None: break if ret is None or ret <= 0: msg = u"Agent {0} launched with command '{1}' is successfully running".format( agent_name, agent_cmd) logger.info(msg) add_event( AGENT_NAME, version=agent_version, op=WALAEventOperation.Enable, is_success=True, message=msg) if ret is None: ret = self.child_process.wait() else: msg = u"Agent {0} launched with command '{1}' failed with return code: {2}".format( agent_name, agent_cmd, ret) logger.warn(msg) add_event( AGENT_NAME, version=agent_version, op=WALAEventOperation.Enable, is_success=False, message=msg) if ret is not None and ret > 0: msg = u"Agent {0} launched with command '{1}' returned code: {2}".format( agent_name, agent_cmd, ret) logger.warn(msg) if latest_agent is not None: latest_agent.mark_failure() except Exception as e: msg = u"Agent {0} launched with command '{1}' failed with exception: {2}".format( agent_name, agent_cmd, ustr(e)) logger.warn(msg) add_event( AGENT_NAME, version=agent_version, op=WALAEventOperation.Enable, is_success=False, message=msg) if latest_agent is not None: latest_agent.mark_failure(is_fatal=True) self.child_process = None return
def _upgrade_available(self, base_version=CURRENT_VERSION): # Ignore new agents if updating is disabled if not conf.get_autoupdate_enabled(): return False now = time.time() if self.last_attempt_time is not None: next_attempt_time = self.last_attempt_time + conf.get_autoupdate_frequency( ) else: next_attempt_time = now if next_attempt_time > now: return False family = conf.get_autoupdate_gafamily() logger.info("Checking for agent family {0} updates", family) self.last_attempt_time = now try: protocol = self.protocol_util.get_protocol() manifest_list, etag = protocol.get_vmagent_manifests() except Exception as e: msg = u"Exception retrieving agent manifests: {0}".format(ustr(e)) logger.warn(msg) add_event(AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) return False manifests = [m for m in manifest_list.vmAgentManifests \ if m.family == family and len(m.versionsManifestUris) > 0] if len(manifests) == 0: logger.info(u"Incarnation {0} has no agent family {1} updates", etag, family) return False try: pkg_list = protocol.get_vmagent_pkgs(manifests[0]) except ProtocolError as e: msg = u"Incarnation {0} failed to get {1} package list: " \ u"{2}".format( etag, family, ustr(e)) logger.warn(msg) add_event(AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) return False # Set the agents to those available for download at least as current # as the existing agent and remove from disk any agent no longer # reported to the VM. # Note: # The code leaves on disk available, but blacklisted, agents so as to # preserve the state. Otherwise, those agents could be again # downloaded and inappropriately retried. self._set_agents([GuestAgent(pkg=pkg) for pkg in pkg_list.versions]) self._purge_agents() self._filter_blacklisted_agents() # Return True if agents more recent than the current are available return len(self.agents) > 0 and self.agents[0].version > base_version
def _upgrade_available(self, base_version=CURRENT_VERSION): # Emit an event expressing the state of AutoUpdate # Note: # - Duplicate events get suppressed; state transitions always emit add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.AutoUpdate, is_success=conf.get_autoupdate_enabled()) # Ignore new agents if updating is disabled if not conf.get_autoupdate_enabled(): return False now = time.time() if self.last_attempt_time is not None: next_attempt_time = self.last_attempt_time + \ conf.get_autoupdate_frequency() else: next_attempt_time = now if next_attempt_time > now: return False family = conf.get_autoupdate_gafamily() logger.verbose("Checking for agent family {0} updates", family) self.last_attempt_time = now protocol = self.protocol_util.get_protocol() for update_goal_state in [False, True]: try: if update_goal_state: protocol.update_goal_state(forced=True) manifest_list, etag = protocol.get_vmagent_manifests() manifests = [m for m in manifest_list.vmAgentManifests \ if m.family == family and \ len(m.versionsManifestUris) > 0] if len(manifests) == 0: logger.verbose(u"Incarnation {0} has no {1} agent updates", etag, family) return False pkg_list = protocol.get_vmagent_pkgs(manifests[0]) # Set the agents to those available for download at least as # current as the existing agent and remove from disk any agent # no longer reported to the VM. # Note: # The code leaves on disk available, but blacklisted, agents # so as to preserve the state. Otherwise, those agents could be # again downloaded and inappropriately retried. host = self._get_host_plugin(protocol=protocol) self._set_agents([GuestAgent(pkg=pkg, host=host) \ for pkg in pkg_list.versions]) self._purge_agents() self._evaluate_deployments() self._filter_blacklisted_agents() # Return True if current agent is no longer available or an # agent with a higher version number is available return not self._is_version_eligible(base_version) \ or (len(self.agents) > 0 \ and self.agents[0].version > base_version) except Exception as e: if isinstance(e, ResourceGoneError): continue msg = u"Exception retrieving agent manifests: {0}".format( ustr(e)) logger.warn(msg) add_event(AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) return False
def run_latest(self, child_args=None): """ This method is called from the daemon to find and launch the most current, downloaded agent. Note: - Most events should be tagged to the launched agent (agent_version) """ if self.child_process is not None: raise Exception( "Illegal attempt to launch multiple goal state Agent processes" ) if self.signal_handler is None: self.signal_handler = signal.signal(signal.SIGTERM, self.forward_signal) latest_agent = self.get_latest_agent() if latest_agent is None: logger.info(u"Installed Agent {0} is the most current agent", CURRENT_AGENT) agent_cmd = "python -u {0} -run-exthandlers".format(sys.argv[0]) agent_dir = os.getcwd() agent_name = CURRENT_AGENT agent_version = CURRENT_VERSION else: logger.info(u"Determined Agent {0} to be the latest agent", latest_agent.name) agent_cmd = latest_agent.get_agent_cmd() agent_dir = latest_agent.get_agent_dir() agent_name = latest_agent.name agent_version = latest_agent.version if child_args is not None: agent_cmd = "{0} {1}".format(agent_cmd, child_args) try: # Launch the correct Python version for python-based agents cmds = textutil.safe_shlex_split(agent_cmd) if cmds[0].lower() == "python": cmds[0] = get_python_cmd() agent_cmd = " ".join(cmds) self._evaluate_agent_health(latest_agent) self.child_process = subprocess.Popen(cmds, cwd=agent_dir, stdout=sys.stdout, stderr=sys.stderr, env=os.environ) logger.verbose(u"Agent {0} launched with command '{1}'", agent_name, agent_cmd) # If the most current agent is the installed agent and update is enabled, # assume updates are likely available and poll every second. # This reduces the start-up impact of finding / launching agent updates on # fresh VMs. if latest_agent is None and conf.get_autoupdate_enabled(): poll_interval = 1 else: poll_interval = CHILD_POLL_INTERVAL ret = None start_time = time.time() while (time.time() - start_time) < CHILD_HEALTH_INTERVAL: time.sleep(poll_interval) ret = self.child_process.poll() if ret is not None: break if ret is None or ret <= 0: msg = u"Agent {0} launched with command '{1}' is successfully running".format( agent_name, agent_cmd) logger.info(msg) add_event(AGENT_NAME, version=agent_version, op=WALAEventOperation.Enable, is_success=True, message=msg) if ret is None: ret = self.child_process.wait() else: msg = u"Agent {0} launched with command '{1}' failed with return code: {2}".format( agent_name, agent_cmd, ret) logger.warn(msg) add_event(AGENT_NAME, version=agent_version, op=WALAEventOperation.Enable, is_success=False, message=msg) if ret is not None and ret > 0: msg = u"Agent {0} launched with command '{1}' returned code: {2}".format( agent_name, agent_cmd, ret) logger.warn(msg) if latest_agent is not None: latest_agent.mark_failure(is_fatal=True) except Exception as e: # Ignore child errors during termination if self.running: msg = u"Agent {0} launched with command '{1}' failed with exception: {2}".format( agent_name, agent_cmd, ustr(e)) logger.warn(msg) add_event(AGENT_NAME, version=agent_version, op=WALAEventOperation.Enable, is_success=False, message=msg) if latest_agent is not None: latest_agent.mark_failure(is_fatal=True) self.child_process = None return
def _upgrade_available(self, base_version=CURRENT_VERSION): # Emit an event expressing the state of AutoUpdate # Note: # - Duplicate events get suppressed; state transitions always emit add_event( AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.AutoUpdate, is_success=conf.get_autoupdate_enabled()) # Ignore new agents if updating is disabled if not conf.get_autoupdate_enabled(): return False now = time.time() if self.last_attempt_time is not None: next_attempt_time = self.last_attempt_time + \ conf.get_autoupdate_frequency() else: next_attempt_time = now if next_attempt_time > now: return False family = conf.get_autoupdate_gafamily() logger.verbose("Checking for agent family {0} updates", family) self.last_attempt_time = now protocol = self.protocol_util.get_protocol() for update_goal_state in [False, True]: try: if update_goal_state: protocol.update_goal_state(forced=True) manifest_list, etag = protocol.get_vmagent_manifests() manifests = [m for m in manifest_list.vmAgentManifests \ if m.family == family and \ len(m.versionsManifestUris) > 0] if len(manifests) == 0: logger.verbose(u"Incarnation {0} has no {1} agent updates", etag, family) return False pkg_list = protocol.get_vmagent_pkgs(manifests[0]) # Set the agents to those available for download at least as # current as the existing agent and remove from disk any agent # no longer reported to the VM. # Note: # The code leaves on disk available, but blacklisted, agents # so as to preserve the state. Otherwise, those agents could be # again downloaded and inappropriately retried. host = self._get_host_plugin(protocol=protocol) self._set_agents([GuestAgent(pkg=pkg, host=host) \ for pkg in pkg_list.versions]) self._purge_agents() self._filter_blacklisted_agents() # Return True if current agent is no longer available or an # agent with a higher version number is available return not self._is_version_eligible(base_version) \ or (len(self.agents) > 0 \ and self.agents[0].version > base_version) except Exception as e: if isinstance(e, ResourceGoneError): continue msg = u"Exception retrieving agent manifests: {0}".format( ustr(traceback.format_exc())) logger.warn(msg) add_event( AGENT_NAME, op=WALAEventOperation.Download, version=CURRENT_VERSION, is_success=False, message=msg) return False