Exemple #1
0
    def collectAgentInfo(self):
        #TODO: agent info (need to include job Slots for the sites)
        # always checks couch first
        source = self.config.JobStateMachine.jobSummaryDBName
        target = self.config.AnalyticsDataCollector.centralWMStatsURL
        couchInfo = self.localCouchServer.recoverReplicationErrors(source, target)
        logging.info("getting couchdb replication status: %s" % couchInfo)
        
        agentInfo = self.wmagentDB.getComponentStatus(self.config)
        agentInfo.update(self.agentInfo)
        
        if isDrainMode(self.config):
            logging.info("Agent is in DrainMode")
            agentInfo['drain_mode'] = True
            agentInfo['status'] = "warning"
        
        else:
            agentInfo['drain_mode'] = False
        
        if (couchInfo['status'] != 'ok'):
            agentInfo['down_components'].append("CouchServer")
            agentInfo['status'] = couchInfo['status']
            couchInfo['name'] = "CouchServer"
            agentInfo['down_component_detail'].append(couchInfo)
        
        
        # Disk space warning   
        diskUseList = diskUse()
        diskUseThreshold = float(self.config.AnalyticsDataCollector.diskUseThreshold)
        agentInfo['disk_warning'] = []
        for disk in diskUseList:
            if float(disk['percent'].strip('%')) >= diskUseThreshold:
                agentInfo['disk_warning'].append(disk)
        
        # Couch process warning
        couchProc = numberCouchProcess()
        couchProcessThreshold = float(self.config.AnalyticsDataCollector.couchProcessThreshold)
        if couchProc >= couchProcessThreshold:
            agentInfo['couch_process_warning'] = couchProc
        else:
            agentInfo['couch_process_warning'] = 0
        
        # This adds the last time and message when data was updated to agentInfo
        lastDataUpload = DataUploadTime.getInfo(self)
        if lastDataUpload['data_last_update']!=0:
            agentInfo['data_last_update'] = lastDataUpload['data_last_update']
        if lastDataUpload['data_error']!="":
            agentInfo['data_error'] = lastDataUpload['data_error']
        
        # Change status if there is data_error, couch process maxed out or disk full problems.
        if agentInfo['status'] == 'ok':
            if agentInfo['disk_warning'] != []:
                agentInfo['status'] = "warning"
                
        if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning':
            if (agentInfo.has_key('data_error') and agentInfo['data_error'] != 'ok') or \
               (agentInfo.has_key('couch_process_warning') and agentInfo['couch_process_warning'] != 0):
                agentInfo['status'] = "error"

        return agentInfo
    def collectAgentInfo(self):
        
        agentInfo = self.wmagentDB.getComponentStatus(self.config)
        agentInfo.update(self.agentInfo)
        
        if isDrainMode(self.config):
            logging.info("Agent is in DrainMode")
            agentInfo['drain_mode'] = True
            agentInfo['status'] = "warning"
        
        else:
            agentInfo['drain_mode'] = False
        
        couchInfo = self.collectCouchDBInfo()
        
        if (couchInfo['status'] != 'ok'):
            agentInfo['down_components'].append("CouchServer")
            agentInfo['status'] = couchInfo['status']
            couchInfo['name'] = "CouchServer"
            agentInfo['down_component_detail'].append(couchInfo)
        
        
        # Disk space warning   
        diskUseList = diskUse()
        diskUseThreshold = float(self.config.AnalyticsDataCollector.diskUseThreshold)
        agentInfo['disk_warning'] = []
        for disk in diskUseList:
            if float(disk['percent'].strip('%')) >= diskUseThreshold and disk['mounted'] not in self.config.AnalyticsDataCollector.ignoreDisk:
                agentInfo['disk_warning'].append(disk)
        
        # Couch process warning
        couchProc = numberCouchProcess()
        couchProcessThreshold = float(self.config.AnalyticsDataCollector.couchProcessThreshold)
        if couchProc >= couchProcessThreshold:
            agentInfo['couch_process_warning'] = couchProc
        else:
            agentInfo['couch_process_warning'] = 0
        
        # This adds the last time and message when data was updated to agentInfo
        lastDataUpload = DataUploadTime.getInfo(self)
        if lastDataUpload['data_last_update']!=0:
            agentInfo['data_last_update'] = lastDataUpload['data_last_update']
        if lastDataUpload['data_error']!="":
            agentInfo['data_error'] = lastDataUpload['data_error']
        
        # Change status if there is data_error, couch process maxed out or disk full problems.
        if agentInfo['status'] == 'ok':
            if agentInfo['disk_warning'] != []:
                agentInfo['status'] = "warning"
                
        if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning':
            if ('data_error' in agentInfo and agentInfo['data_error'] != 'ok') or \
               ('couch_process_warning' in agentInfo and agentInfo['couch_process_warning'] != 0):
                agentInfo['status'] = "error"

        return agentInfo
Exemple #3
0
    def collectAgentInfo(self):
        """
        Monitors the general health of the agent, as:
          1. status of the agent processes
          2. status of the agent threads based on the database info
          3. couchdb active tasks and its replications
          4. check the disk usage
          5. check the number of couch processes

        :return: a dict with all the info collected
        """
        logging.info("Getting agent info ...")
        agentInfo = self.wmagentDB.getComponentStatus(self.config)
        agentInfo.update(self.agentInfo)

        if isDrainMode(self.config):
            logging.info("Agent is in DrainMode")
            agentInfo['drain_mode'] = True
        else:
            agentInfo['drain_mode'] = False

        couchInfo = self.collectCouchDBInfo()
        if couchInfo['status'] != 'ok':
            agentInfo['down_components'].append(couchInfo['name'])
            agentInfo['status'] = couchInfo['status']
            agentInfo['down_component_detail'].append(couchInfo)

        # Disk space warning
        diskUseList = diskUse()
        diskUseThreshold = float(
            self.config.AnalyticsDataCollector.diskUseThreshold)
        agentInfo['disk_warning'] = []
        for disk in diskUseList:
            if float(disk['percent'].strip('%')) >= diskUseThreshold and \
                            disk['mounted'] not in self.config.AnalyticsDataCollector.ignoreDisk:
                agentInfo['disk_warning'].append(disk)

        # Couch process warning
        couchProc = numberCouchProcess()
        logging.info("CouchDB is running with %d processes", couchProc)
        couchProcessThreshold = self.config.AnalyticsDataCollector.couchProcessThreshold
        if couchProc >= couchProcessThreshold:
            agentInfo['couch_process_warning'] = couchProc
        else:
            agentInfo['couch_process_warning'] = 0

        # This adds the last time and message when data was updated to agentInfo
        lastDataUpload = DataUploadTime.getInfo()
        if lastDataUpload['data_last_update']:
            agentInfo['data_last_update'] = lastDataUpload['data_last_update']
        if lastDataUpload['data_error']:
            agentInfo['data_error'] = lastDataUpload['data_error']

        # Change status if there is data_error, couch process maxed out or disk full problems.
        if agentInfo['status'] == 'ok' and (agentInfo['drain_mode']
                                            or agentInfo['disk_warning']):
            agentInfo['status'] = "warning"

        if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning':
            if agentInfo.get('data_error', 'ok') != 'ok' or agentInfo.get(
                    'couch_process_warning', 0):
                agentInfo['status'] = "error"

        if agentInfo['down_components']:
            logging.info("List of agent components down: %s" %
                         agentInfo['down_components'])

        return agentInfo
    def collectAgentInfo(self):
        #TODO: agent info (need to include job Slots for the sites)
        # always checks couch first
        source = self.config.JobStateMachine.jobSummaryDBName
        target = self.config.AnalyticsDataCollector.centralWMStatsURL
        couchInfo = self.localCouchServer.recoverReplicationErrors(
            source, target, filter="WMStats/repfilter")
        logging.info("getting couchdb replication status: %s" % couchInfo)

        agentInfo = self.wmagentDB.getComponentStatus(self.config)
        agentInfo.update(self.agentInfo)

        if isDrainMode(self.config):
            logging.info("Agent is in DrainMode")
            agentInfo['drain_mode'] = True
            agentInfo['status'] = "warning"

        else:
            agentInfo['drain_mode'] = False

        if (couchInfo['status'] != 'ok'):
            agentInfo['down_components'].append("CouchServer")
            agentInfo['status'] = couchInfo['status']
            couchInfo['name'] = "CouchServer"
            agentInfo['down_component_detail'].append(couchInfo)

        # Disk space warning
        diskUseList = diskUse()
        diskUseThreshold = float(
            self.config.AnalyticsDataCollector.diskUseThreshold)
        agentInfo['disk_warning'] = []
        for disk in diskUseList:
            if float(disk['percent'].strip('%')) >= diskUseThreshold and disk[
                    'mounted'] not in self.config.AnalyticsDataCollector.ignoreDisk:
                agentInfo['disk_warning'].append(disk)

        # Couch process warning
        couchProc = numberCouchProcess()
        couchProcessThreshold = float(
            self.config.AnalyticsDataCollector.couchProcessThreshold)
        if couchProc >= couchProcessThreshold:
            agentInfo['couch_process_warning'] = couchProc
        else:
            agentInfo['couch_process_warning'] = 0

        # This adds the last time and message when data was updated to agentInfo
        lastDataUpload = DataUploadTime.getInfo(self)
        if lastDataUpload['data_last_update'] != 0:
            agentInfo['data_last_update'] = lastDataUpload['data_last_update']
        if lastDataUpload['data_error'] != "":
            agentInfo['data_error'] = lastDataUpload['data_error']

        # Change status if there is data_error, couch process maxed out or disk full problems.
        if agentInfo['status'] == 'ok':
            if agentInfo['disk_warning'] != []:
                agentInfo['status'] = "warning"

        if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning':
            if (agentInfo.has_key('data_error') and agentInfo['data_error'] != 'ok') or \
               (agentInfo.has_key('couch_process_warning') and agentInfo['couch_process_warning'] != 0):
                agentInfo['status'] = "error"

        return agentInfo
    def collectAgentInfo(self):
        """
        Monitors the general health of the agent, as:
          1. status of the agent processes
          2. status of the agent threads based on the database info
          3. couchdb active tasks and its replications
          4. check the disk usage
          5. check the number of couch processes

        :return: a dict with all the info collected
        """
        logging.info("Getting agent info ...")
        agentInfo = self.wmagentDB.getComponentStatus(self.config)
        agentInfo.update(self.agentInfo)

        if isDrainMode(self.config):
            logging.info("Agent is in DrainMode")
            agentInfo['drain_mode'] = True
        else:
            agentInfo['drain_mode'] = False

        couchInfo = self.collectCouchDBInfo()
        if couchInfo['status'] != 'ok':
            agentInfo['down_components'].append(couchInfo['name'])
            agentInfo['status'] = couchInfo['status']
            agentInfo['down_component_detail'].append(couchInfo)

        # Disk space warning
        diskUseList = diskUse()
        diskUseThreshold = float(
            self.config.AnalyticsDataCollector.diskUseThreshold)
        agentInfo['disk_warning'] = []
        for disk in diskUseList:
            if float(disk['percent'].strip('%')) >= diskUseThreshold and \
                            disk['mounted'] not in self.config.AnalyticsDataCollector.ignoreDisk:
                agentInfo['disk_warning'].append(disk)

        # Couch process warning
        couchProc = numberCouchProcess()
        logging.info("CouchDB is running with %d processes", couchProc)
        couchProcessThreshold = self.config.AnalyticsDataCollector.couchProcessThreshold
        if couchProc >= couchProcessThreshold:
            agentInfo['couch_process_warning'] = couchProc
        else:
            agentInfo['couch_process_warning'] = 0

        # This adds the last time and message when data was updated to agentInfo
        lastDataUpload = DataUploadTime.getInfo()
        if lastDataUpload['data_last_update']:
            agentInfo['data_last_update'] = lastDataUpload['data_last_update']
        if lastDataUpload['data_error']:
            agentInfo['data_error'] = lastDataUpload['data_error']

        # Change status if there is data_error, couch process maxed out or disk full problems.
        if agentInfo['status'] == 'ok' and (agentInfo['drain_mode']
                                            or agentInfo['disk_warning']):
            agentInfo['status'] = "warning"

        if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning':
            if agentInfo.get('data_error', 'ok') != 'ok' or agentInfo.get(
                    'couch_process_warning', 0):
                agentInfo['status'] = "error"

        if agentInfo['down_components']:
            logging.info("List of agent components down: %s",
                         agentInfo['down_components'])

        return agentInfo