def _detect_protocol(self, protocols): """ Probe protocol endpoints in turn. """ self.clear_protocol() for retry in range(0, MAX_RETRY): for protocol_name in protocols: try: protocol = self._detect_wire_protocol() \ if protocol_name == "WireProtocol" \ else self._detect_metadata_protocol() IOErrorCounter.set_protocol_endpoint( endpoint=protocol.endpoint) return protocol except ProtocolError as e: logger.info("Protocol endpoint not found: {0}, {1}", protocol_name, e) if retry < MAX_RETRY -1: logger.info("Retry detect protocols: retry={0}", retry) time.sleep(PROBE_INTERVAL) raise ProtocolNotFoundError("No protocol found.")
def _detect_protocol(self, protocols): """ Probe protocol endpoints in turn. """ self.clear_protocol() for retry in range(0, MAX_RETRY): for protocol_name in protocols: try: protocol = self._detect_wire_protocol() \ if protocol_name == "WireProtocol" \ else self._detect_metadata_protocol() IOErrorCounter.set_protocol_endpoint( endpoint=protocol.endpoint) return protocol except ProtocolError as e: logger.info("Protocol endpoint not found: {0}, {1}", protocol_name, e) if retry < MAX_RETRY - 1: logger.info("Retry detect protocols: retry={0}", retry) time.sleep(PROBE_INTERVAL) raise ProtocolNotFoundError("No protocol found.")
def get_protocol(self): """ Detect protocol by endpoint. :returns: protocol instance """ self._lock.acquire() try: if self._protocol is not None: return self._protocol # If the protocol file contains MetadataProtocol we need to fall through to # _detect_protocol so that we can generate the WireServer transport certificates. protocol_file_path = self._get_protocol_file_path() if os.path.isfile(protocol_file_path) and fileutil.read_file( protocol_file_path) == WIRE_PROTOCOL_NAME: endpoint = self.get_wireserver_endpoint() self._protocol = WireProtocol(endpoint) # If metadataserver certificates are present we clean certificates # and remove MetadataServer firewall rule. It is possible # there was a previous intermediate upgrade before 2.2.48 but metadata artifacts # were not cleaned up (intermediate updated agent does not have cleanup # logic but we transitioned from Metadata to Wire protocol) if is_metadata_server_artifact_present(): cleanup_metadata_server_artifacts(self.osutil) return self._protocol logger.info("Detect protocol endpoint") protocol = self._detect_protocol() IOErrorCounter.set_protocol_endpoint( endpoint=protocol.get_endpoint()) self._save_protocol(WIRE_PROTOCOL_NAME) self._protocol = protocol # Need to clean up MDS artifacts only after _detect_protocol so that we don't # delete MDS certificates if we can't reach WireServer and have to roll back # the update if is_metadata_server_artifact_present(): cleanup_metadata_server_artifacts(self.osutil) return self._protocol finally: self._lock.release()
def _operation(self): io_errors = IOErrorCounter.get_and_reset() hostplugin_errors = io_errors.get("hostplugin") protocol_errors = io_errors.get("protocol") other_errors = io_errors.get("other") if hostplugin_errors > 0 or protocol_errors > 0 or other_errors > 0: msg = "hostplugin:{0};protocol:{1};other:{2}".format(hostplugin_errors, protocol_errors, other_errors) add_event(op=WALAEventOperation.HttpErrors, message=msg)
def daemon(self): period = datetime.timedelta(minutes=30) protocol = self.protocol_util.get_protocol() last_heartbeat = datetime.datetime.utcnow() - period # Create a new identifier on each restart and reset the counter heartbeat_id = str(uuid.uuid4()).upper() counter = 0 while True: if datetime.datetime.utcnow() >= (last_heartbeat + period): last_heartbeat = datetime.datetime.utcnow() incarnation = protocol.get_incarnation() dropped_packets = self.osutil.get_firewall_dropped_packets( protocol.endpoint) msg = "{0};{1};{2};{3}".format(incarnation, counter, heartbeat_id, dropped_packets) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HeartBeat, is_success=True, message=msg, log_event=False) counter += 1 io_errors = IOErrorCounter.get_and_reset() hostplugin_errors = io_errors.get("hostplugin") protocol_errors = io_errors.get("protocol") other_errors = io_errors.get("other") if hostplugin_errors > 0 \ or protocol_errors > 0 \ or other_errors > 0: msg = "hostplugin:{0};protocol:{1};other:{2}"\ .format(hostplugin_errors, protocol_errors, other_errors) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HttpErrors, is_success=True, message=msg, log_event=False) try: self.collect_and_send_events() except Exception as e: logger.warn("Failed to send events: {0}", e) time.sleep(60)
def send_telemetry_heartbeat(self): io_errors = IOErrorCounter.get_and_reset() hostplugin_errors = io_errors.get("hostplugin") protocol_errors = io_errors.get("protocol") other_errors = io_errors.get("other") if hostplugin_errors > 0 or protocol_errors > 0 or other_errors > 0: msg = "hostplugin:{0};protocol:{1};other:{2}".format( hostplugin_errors, protocol_errors, other_errors) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HttpErrors, is_success=True, message=msg, log_event=False)
def get_protocol(self, by_file=False): """ Detect protocol by endpoints, if by_file is True, detect MetadataProtocol in priority. :returns: protocol instance """ self.lock.acquire() try: if self.protocol is not None: return self.protocol try: self.protocol = self._get_protocol() return self.protocol except ProtocolNotFoundError: pass logger.info("Detect protocol endpoints") protocols = [prots.WireProtocol] if by_file: tag_file_path = self._get_tag_file_path() if os.path.isfile(tag_file_path): protocols.insert(0, prots.MetadataProtocol) else: protocols.append(prots.MetadataProtocol) protocol_name, protocol = self._detect_protocol(protocols) IOErrorCounter.set_protocol_endpoint(endpoint=protocol.endpoint) self._save_protocol(protocol_name) self.protocol = protocol return self.protocol finally: self.lock.release()
def get_protocol(self, by_file=False): """ Detect protocol by endpoints, if by_file is True, detect MetadataProtocol in priority. :returns: protocol instance """ self.lock.acquire() try: if self.protocol is not None: return self.protocol try: self.protocol = self._get_protocol() return self.protocol except ProtocolNotFoundError: pass logger.info("Detect protocol endpoints") protocols = [prots.WireProtocol] if by_file: tag_file_path = self._get_tag_file_path() if os.path.isfile(tag_file_path): protocols.insert(0, prots.MetadataProtocol) else: protocols.append(prots.MetadataProtocol) protocol_name, protocol = self._detect_protocol(protocols) IOErrorCounter.set_protocol_endpoint(endpoint=protocol.endpoint) self._save_protocol(protocol_name) self.protocol = protocol return self.protocol finally: self.lock.release()
def send_telemetry_heartbeat(self): if self.last_telemetry_heartbeat is None: self.last_telemetry_heartbeat = datetime.datetime.utcnow( ) - MonitorHandler.TELEMETRY_HEARTBEAT_PERIOD if datetime.datetime.utcnow() >= ( self.last_telemetry_heartbeat + MonitorHandler.TELEMETRY_HEARTBEAT_PERIOD): try: incarnation = self.protocol.get_incarnation() dropped_packets = self.osutil.get_firewall_dropped_packets( self.protocol.endpoint) msg = "{0};{1};{2};{3}".format(incarnation, self.counter, self.heartbeat_id, dropped_packets) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HeartBeat, is_success=True, message=msg, log_event=False) self.counter += 1 io_errors = IOErrorCounter.get_and_reset() hostplugin_errors = io_errors.get("hostplugin") protocol_errors = io_errors.get("protocol") other_errors = io_errors.get("other") if hostplugin_errors > 0 or protocol_errors > 0 or other_errors > 0: msg = "hostplugin:{0};protocol:{1};other:{2}".format( hostplugin_errors, protocol_errors, other_errors) add_event(name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HttpErrors, is_success=True, message=msg, log_event=False) except Exception as e: logger.warn("Failed to send heartbeat: {0}", e) self.last_telemetry_heartbeat = datetime.datetime.utcnow()
def send_telemetry_heartbeat(self): if self.last_telemetry_heartbeat is None: self.last_telemetry_heartbeat = datetime.datetime.utcnow() - MonitorHandler.TELEMETRY_HEARTBEAT_PERIOD if datetime.datetime.utcnow() >= (self.last_telemetry_heartbeat + MonitorHandler.TELEMETRY_HEARTBEAT_PERIOD): try: incarnation = self.protocol.get_incarnation() dropped_packets = self.osutil.get_firewall_dropped_packets(self.protocol.endpoint) msg = "{0};{1};{2};{3}".format(incarnation, self.counter, self.heartbeat_id, dropped_packets) add_event( name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HeartBeat, is_success=True, message=msg, log_event=False) self.counter += 1 io_errors = IOErrorCounter.get_and_reset() hostplugin_errors = io_errors.get("hostplugin") protocol_errors = io_errors.get("protocol") other_errors = io_errors.get("other") if hostplugin_errors > 0 or protocol_errors > 0 or other_errors > 0: msg = "hostplugin:{0};protocol:{1};other:{2}".format(hostplugin_errors, protocol_errors, other_errors) add_event( name=AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.HttpErrors, is_success=True, message=msg, log_event=False) except Exception as e: logger.warn("Failed to send heartbeat: {0}", e) self.last_telemetry_heartbeat = datetime.datetime.utcnow()