def _generate_cpu_data(self): """Create & transmit a cpu_data message as defined by the sensor response json schema""" current_time = Utility.get_current_time() # Notify the node sensor to update its data required for the cpu_data message successful = self._node_sensor.read_data("cpu_data", self._get_debug()) if not successful: logger.error( "NodeDataMsgHandler, _generate_cpu_data was NOT successful.") self._cpu_usage_threshold = str(self._cpu_usage_threshold) try: if self._cpu_usage_threshold.isdigit(): self._cpu_usage_threshold = int(self._cpu_usage_threshold) else: self._cpu_usage_threshold = float(self._cpu_usage_threshold) except ValueError: logger.warn( "CPU Usage Alert, Invalid host_memory_usage_threshold value are entered in config." ) # Assigning default value to _cpu_usage_threshold self._cpu_usage_threshold = self.DEFAULT_CPU_USAGE_THRESHOLD cpu_persistent_data = self.read_persistent_data('CPU_USAGE_DATA') if cpu_persistent_data.get('cpu_usage_time_map') is not None: previous_check_time = int( cpu_persistent_data['cpu_usage_time_map']) else: previous_check_time = int(-1) if cpu_persistent_data.get( 'cpu_fault_resolved_iterations') is not None: fault_resolved_iters = int( cpu_persistent_data['cpu_fault_resolved_iterations']) else: fault_resolved_iters = 0 try: iteration_limit = int(self._high_cpu_usage_wait_threshold / self._transmit_interval) except ZeroDivisionError: iteration_limit = 0 self.usage_time_map['cpu'] = current_time if self._node_sensor.cpu_usage >= self._cpu_usage_threshold \ and not self.high_usage['cpu']: if previous_check_time == -1: previous_check_time = current_time self.persist_state_data('cpu', 'CPU_USAGE_DATA') if self.usage_time_map[ 'cpu'] - previous_check_time >= self._high_cpu_usage_wait_threshold: self.high_usage['cpu'] = True self.fault_resolved_iterations['cpu'] = 0 # Create the cpu usage data message and hand it over # to the egress processor to transmit fault_event = "CPU usage has increased to {}%, "\ "beyond the configured threshold of {}% "\ "for more than {} seconds.".format( self._node_sensor.cpu_usage, self._cpu_usage_threshold, self._high_cpu_usage_wait_threshold ) logger.warn(fault_event) # Create the cpu usage update message and hand it over to the egress processor to transmit cpuDataMsg = CPUdataMsg( self._node_sensor.host_id, self._epoch_time, self._node_sensor.csps, self._node_sensor.idle_time, self._node_sensor.interrupt_time, self._node_sensor.iowait_time, self._node_sensor.nice_time, self._node_sensor.softirq_time, self._node_sensor.steal_time, self._node_sensor.system_time, self._node_sensor.user_time, self._node_sensor.cpu_core_data, self._node_sensor.cpu_usage, self.FAULT, fault_event) # Add in uuid if it was present in the json request if self._uuid is not None: cpuDataMsg.set_uuid(self._uuid) jsonMsg = cpuDataMsg.getJson() self.cpu_sensor_data = jsonMsg self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data # Transmit it to message processor self._write_internal_msgQ(EgressProcessor.name(), jsonMsg) # Store the state to Persistent Cache. self.persist_state_data('cpu', 'CPU_USAGE_DATA') if self._node_sensor.cpu_usage < self._cpu_usage_threshold: if not self.high_usage['cpu']: self.persist_state_data('cpu', 'CPU_USAGE_DATA') else: if fault_resolved_iters < iteration_limit: fault_resolved_iters += 1 self.fault_resolved_iterations[ 'cpu'] = fault_resolved_iters self.persist_state_data('cpu', 'CPU_USAGE_DATA') elif fault_resolved_iters >= iteration_limit: # Create the cpu usage data message and hand it over # to the egress processor to transmit fault_resolved_event = "CPU usage has decreased to {}%, "\ "lower than the configured threshold of {}%.".format( self._node_sensor.cpu_usage, self._cpu_usage_threshold ) logger.info(fault_resolved_event) # Create the cpu usage update message and hand it over to the egress processor to transmit cpuDataMsg = CPUdataMsg( self._node_sensor.host_id, self._epoch_time, self._node_sensor.csps, self._node_sensor.idle_time, self._node_sensor.interrupt_time, self._node_sensor.iowait_time, self._node_sensor.nice_time, self._node_sensor.softirq_time, self._node_sensor.steal_time, self._node_sensor.system_time, self._node_sensor.user_time, self._node_sensor.cpu_core_data, self._node_sensor.cpu_usage, self.FAULT_RESOLVED, fault_resolved_event) # Add in uuid if it was present in the json request if self._uuid is not None: cpuDataMsg.set_uuid(self._uuid) jsonMsg = cpuDataMsg.getJson() self.cpu_sensor_data = jsonMsg self.os_sensor_type["cpu_usage"] = self.cpu_sensor_data # Transmit it to message processor self._write_internal_msgQ(EgressProcessor.name(), jsonMsg) self.high_usage['cpu'] = False self.usage_time_map['cpu'] = int(-1) self.fault_resolved_iterations['cpu'] = 0 # Store the state to Persistent Cache. self.persist_state_data('cpu', 'CPU_USAGE_DATA')
def _generate_host_update(self): """Create & transmit a host update message as defined by the sensor response json schema""" current_time = Utility.get_current_time() # Notify the node sensor to update its data required for the host_update message successful = self._node_sensor.read_data("host_update", self._get_debug(), self._units) if not successful: logger.error( "NodeDataMsgHandler, _generate_host_update was NOT successful." ) self._host_memory_usage_threshold = str( self._host_memory_usage_threshold) try: if self._host_memory_usage_threshold.isdigit(): self._host_memory_usage_threshold = int( self._host_memory_usage_threshold) else: self._host_memory_usage_threshold = float( self._host_memory_usage_threshold) except ValueError: logger.warn( "Host Memory Alert, Invalid host_memory_usage_threshold value are entered in config." ) # Assigning default value to _memory_usage_threshold self._host_memory_usage_threshold = self.DEFAULT_HOST_MEMORY_USAGE_THRESHOLD memory_persistent_data = self.read_persistent_data('MEMORY_USAGE_DATA') if memory_persistent_data.get('memory_usage_time_map') is not None: previous_check_time = int( memory_persistent_data['memory_usage_time_map']) else: previous_check_time = int(-1) if memory_persistent_data\ .get('memory_fault_resolved_iterations') is not None: fault_resolved_iters = int( memory_persistent_data['memory_fault_resolved_iterations']) else: fault_resolved_iters = 0 try: iteration_limit = int(self._high_memory_usage_wait_threshold / self._transmit_interval) except ZeroDivisionError: iteration_limit = 0 self.usage_time_map['memory'] = current_time if self._node_sensor.total_memory["percent"] >= self._host_memory_usage_threshold \ and not self.high_usage['memory']: if previous_check_time == -1: previous_check_time = current_time self.persist_state_data('memory', 'MEMORY_USAGE_DATA') if self.usage_time_map[ 'memory'] - previous_check_time >= self._high_memory_usage_wait_threshold: self.high_usage['memory'] = True self.fault_resolved_iterations['memory'] = 0 # Create the memory data message and hand it over # to the egress processor to transmit fault_event = "Host memory usage has increased to {}%,"\ "beyond the configured threshold of {}% "\ "for more than {} seconds.".format( self._node_sensor.total_memory["percent"], self._host_memory_usage_threshold, self._high_memory_usage_wait_threshold ) logger.warn(fault_event) logged_in_users = [] # Create the host update message and hand it over to the egress processor to transmit hostUpdateMsg = HostUpdateMsg( self._node_sensor.host_id, self._epoch_time, self._node_sensor.boot_time, self._node_sensor.up_time, self._node_sensor.uname, self._units, self._node_sensor.total_memory, self._node_sensor.logged_in_users, self._node_sensor.process_count, self._node_sensor.running_process_count, self.FAULT, fault_event) # Add in uuid if it was present in the json request if self._uuid is not None: hostUpdateMsg.set_uuid(self._uuid) jsonMsg = hostUpdateMsg.getJson() # Transmit it to message processor self.host_sensor_data = jsonMsg self.os_sensor_type["memory_usage"] = self.host_sensor_data self._write_internal_msgQ(EgressProcessor.name(), jsonMsg) self.persist_state_data('memory', 'MEMORY_USAGE_DATA') if self._node_sensor.total_memory[ "percent"] < self._host_memory_usage_threshold: if not self.high_usage['memory']: self.persist_state_data('memory', 'MEMORY_USAGE_DATA') else: if fault_resolved_iters < iteration_limit: fault_resolved_iters += 1 self.fault_resolved_iterations[ 'memory'] = fault_resolved_iters self.persist_state_data('memory', 'MEMORY_USAGE_DATA') elif fault_resolved_iters >= iteration_limit: # Create the memory data message and hand it over # to the egress processor to transmit fault_resolved_event = "Host memory usage has decreased to {}%, "\ "lower than the configured threshold of {}%.".format( self._node_sensor.total_memory["percent"], self._host_memory_usage_threshold ) logger.info(fault_resolved_event) logged_in_users = [] # Create the host update message and hand it over to the egress processor to transmit hostUpdateMsg = HostUpdateMsg( self._node_sensor.host_id, self._epoch_time, self._node_sensor.boot_time, self._node_sensor.up_time, self._node_sensor.uname, self._units, self._node_sensor.total_memory, self._node_sensor.logged_in_users, self._node_sensor.process_count, self._node_sensor.running_process_count, self.FAULT_RESOLVED, fault_resolved_event) # Add in uuid if it was present in the json request if self._uuid is not None: hostUpdateMsg.set_uuid(self._uuid) jsonMsg = hostUpdateMsg.getJson() # Transmit it to message processor self.host_sensor_data = jsonMsg self.os_sensor_type["memory_usage"] = self.host_sensor_data self._write_internal_msgQ(EgressProcessor.name(), jsonMsg) self.high_usage['memory'] = False self.usage_time_map['memory'] = int(-1) self.fault_resolved_iterations['memory'] = 0 self.persist_state_data('memory', 'MEMORY_USAGE_DATA')