Exemple #1
0
    def _generate_host_update(self):
        """Create & transmit a host update message as defined
            by the sensor response json schema"""

        # Notify the node sensor to update its data required for the host_update message
        successful = self._node_sensor.read_data("host_update",
                                                 self._get_debug(),
                                                 self._units)
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_host_update was NOT successful."
            )

        self._host_memory_usage_threshold = str(
            self._host_memory_usage_threshold)
        try:
            if self._host_memory_usage_threshold.isdigit():
                self._host_memory_usage_threshold = int(
                    self._host_memory_usage_threshold)
            else:
                self._host_memory_usage_threshold = float(
                    self._host_memory_usage_threshold)
        except ValueError:
            logger.warning(
                "Host Memory Alert, Invalid host_memory_usage_threshold value are entered in config."
            )
            # Assigning default value to _disk_usage_threshold
            self._host_memory_usage_threshold = self.DEFAULT_HOST_MEMORY_USAGE_THRESHOLD
        if self._node_sensor.total_memory[
                "percent"] >= self._host_memory_usage_threshold:
            # Create the disk space data message and hand it over to the egress processor to transmit
            if not self.host_fault:
                self.host_fault = True
                # Create the disk space data message and hand it over to the egress processor to transmit
                fault_event = "Host memory usage increased to %s, beyond configured threshold of %s" \
                                %(self._node_sensor.total_memory["percent"], self._host_memory_usage_threshold)

                logger.warning(fault_event)

                logged_in_users = []
                # Create the host update message and hand it over to the egress processor to transmit
                hostUpdateMsg = HostUpdateMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.boot_time, self._node_sensor.up_time,
                    self._node_sensor.uname, self._units, self.site_id,
                    self.rack_id, self.node_id, self.cluster_id,
                    self._node_sensor.total_memory,
                    self._node_sensor.logged_in_users,
                    self._node_sensor.process_count,
                    self._node_sensor.running_process_count, self.FAULT,
                    fault_event)
                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    hostUpdateMsg.set_uuid(self._uuid)
                jsonMsg = hostUpdateMsg.getJson()
                # Transmit it out over rabbitMQ channel
                self.host_sensor_data = jsonMsg
                self.os_sensor_type["memory_usage"] = self.host_sensor_data
                self._write_internal_msgQ(RabbitMQegressProcessor.name(),
                                          jsonMsg)

        if (self._node_sensor.total_memory["percent"] <
                self._host_memory_usage_threshold) and (self.host_fault
                                                        == True):
            fault_resolved_event = "Host memory usage decreased to %s, lesser than configured threshold of %s" \
                                    %(self._node_sensor.total_memory["percent"], self._host_memory_usage_threshold)
            logger.warning(fault_resolved_event)
            logged_in_users = []
            # Create the host update message and hand it over to the egress processor to transmit
            hostUpdateMsg = HostUpdateMsg(
                self._node_sensor.host_id, self._epoch_time,
                self._node_sensor.boot_time, self._node_sensor.up_time,
                self._node_sensor.uname, self._units, self.site_id,
                self.rack_id, self.node_id, self.cluster_id,
                self._node_sensor.total_memory,
                self._node_sensor.logged_in_users,
                self._node_sensor.process_count,
                self._node_sensor.running_process_count, self.FAULT_RESOLVED,
                fault_resolved_event)

            # Add in uuid if it was present in the json request
            if self._uuid is not None:
                hostUpdateMsg.set_uuid(self._uuid)
            jsonMsg = hostUpdateMsg.getJson()
            # Transmit it out over rabbitMQ channel
            self.host_sensor_data = jsonMsg
            self.os_sensor_type["memory_usage"] = self.host_sensor_data

            self._write_internal_msgQ(RabbitMQegressProcessor.name(), jsonMsg)
            self.host_fault = False
Exemple #2
0
    def _generate_host_update(self):
        """Create & transmit a host update message as defined
            by the sensor response json schema"""

        current_time = Utility.get_current_time()

        # Notify the node sensor to update its data required for the host_update message
        successful = self._node_sensor.read_data("host_update",
                                                 self._get_debug(),
                                                 self._units)
        if not successful:
            logger.error(
                "NodeDataMsgHandler, _generate_host_update was NOT successful."
            )

        self._host_memory_usage_threshold = str(
            self._host_memory_usage_threshold)
        try:
            if self._host_memory_usage_threshold.isdigit():
                self._host_memory_usage_threshold = int(
                    self._host_memory_usage_threshold)
            else:
                self._host_memory_usage_threshold = float(
                    self._host_memory_usage_threshold)
        except ValueError:
            logger.warn(
                "Host Memory Alert, Invalid host_memory_usage_threshold value are entered in config."
            )
            # Assigning default value to _memory_usage_threshold
            self._host_memory_usage_threshold = self.DEFAULT_HOST_MEMORY_USAGE_THRESHOLD

        memory_persistent_data = self.read_persistent_data('MEMORY_USAGE_DATA')
        if memory_persistent_data.get('memory_usage_time_map') is not None:
            previous_check_time = int(
                memory_persistent_data['memory_usage_time_map'])
        else:
            previous_check_time = int(-1)
        if memory_persistent_data\
                .get('memory_fault_resolved_iterations') is not None:
            fault_resolved_iters = int(
                memory_persistent_data['memory_fault_resolved_iterations'])
        else:
            fault_resolved_iters = 0
        try:
            iteration_limit = int(self._high_memory_usage_wait_threshold /
                                  self._transmit_interval)
        except ZeroDivisionError:
            iteration_limit = 0
        self.usage_time_map['memory'] = current_time

        if self._node_sensor.total_memory["percent"] >= self._host_memory_usage_threshold \
           and not self.high_usage['memory']:
            if previous_check_time == -1:
                previous_check_time = current_time
                self.persist_state_data('memory', 'MEMORY_USAGE_DATA')

            if self.usage_time_map[
                    'memory'] - previous_check_time >= self._high_memory_usage_wait_threshold:
                self.high_usage['memory'] = True
                self.fault_resolved_iterations['memory'] = 0
                # Create the memory data message and hand it over
                # to the egress processor to transmit
                fault_event = "Host memory usage has increased to {}%,"\
                    "beyond the configured threshold of {}% "\
                    "for more than {} seconds.".format(
                        self._node_sensor.total_memory["percent"],
                        self._host_memory_usage_threshold,
                        self._high_memory_usage_wait_threshold
                    )

                logger.warn(fault_event)

                logged_in_users = []
                # Create the host update message and hand it over to the egress processor to transmit
                hostUpdateMsg = HostUpdateMsg(
                    self._node_sensor.host_id, self._epoch_time,
                    self._node_sensor.boot_time, self._node_sensor.up_time,
                    self._node_sensor.uname, self._units,
                    self._node_sensor.total_memory,
                    self._node_sensor.logged_in_users,
                    self._node_sensor.process_count,
                    self._node_sensor.running_process_count, self.FAULT,
                    fault_event)
                # Add in uuid if it was present in the json request
                if self._uuid is not None:
                    hostUpdateMsg.set_uuid(self._uuid)
                jsonMsg = hostUpdateMsg.getJson()
                # Transmit it to message processor
                self.host_sensor_data = jsonMsg
                self.os_sensor_type["memory_usage"] = self.host_sensor_data
                self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
                self.persist_state_data('memory', 'MEMORY_USAGE_DATA')

        if self._node_sensor.total_memory[
                "percent"] < self._host_memory_usage_threshold:
            if not self.high_usage['memory']:
                self.persist_state_data('memory', 'MEMORY_USAGE_DATA')
            else:
                if fault_resolved_iters < iteration_limit:
                    fault_resolved_iters += 1
                    self.fault_resolved_iterations[
                        'memory'] = fault_resolved_iters
                    self.persist_state_data('memory', 'MEMORY_USAGE_DATA')
                elif fault_resolved_iters >= iteration_limit:
                    # Create the memory data message and hand it over
                    # to the egress processor to transmit
                    fault_resolved_event = "Host memory usage has decreased to {}%, "\
                        "lower than the configured threshold of {}%.".format(
                            self._node_sensor.total_memory["percent"],
                            self._host_memory_usage_threshold
                        )
                    logger.info(fault_resolved_event)
                    logged_in_users = []

                    # Create the host update message and hand it over to the egress processor to transmit
                    hostUpdateMsg = HostUpdateMsg(
                        self._node_sensor.host_id, self._epoch_time,
                        self._node_sensor.boot_time, self._node_sensor.up_time,
                        self._node_sensor.uname, self._units,
                        self._node_sensor.total_memory,
                        self._node_sensor.logged_in_users,
                        self._node_sensor.process_count,
                        self._node_sensor.running_process_count,
                        self.FAULT_RESOLVED, fault_resolved_event)

                    # Add in uuid if it was present in the json request
                    if self._uuid is not None:
                        hostUpdateMsg.set_uuid(self._uuid)
                    jsonMsg = hostUpdateMsg.getJson()
                    # Transmit it to message processor
                    self.host_sensor_data = jsonMsg
                    self.os_sensor_type["memory_usage"] = self.host_sensor_data
                    self._write_internal_msgQ(EgressProcessor.name(), jsonMsg)
                    self.high_usage['memory'] = False
                    self.usage_time_map['memory'] = int(-1)
                    self.fault_resolved_iterations['memory'] = 0
                    self.persist_state_data('memory', 'MEMORY_USAGE_DATA')