def collectAgentInfo(self): #TODO: agent info (need to include job Slots for the sites) # always checks couch first source = self.config.JobStateMachine.jobSummaryDBName target = self.config.AnalyticsDataCollector.centralWMStatsURL couchInfo = self.localCouchServer.recoverReplicationErrors(source, target) logging.info("getting couchdb replication status: %s" % couchInfo) agentInfo = self.wmagentDB.getComponentStatus(self.config) agentInfo.update(self.agentInfo) if isDrainMode(self.config): logging.info("Agent is in DrainMode") agentInfo['drain_mode'] = True agentInfo['status'] = "warning" else: agentInfo['drain_mode'] = False if (couchInfo['status'] != 'ok'): agentInfo['down_components'].append("CouchServer") agentInfo['status'] = couchInfo['status'] couchInfo['name'] = "CouchServer" agentInfo['down_component_detail'].append(couchInfo) # Disk space warning diskUseList = diskUse() diskUseThreshold = float(self.config.AnalyticsDataCollector.diskUseThreshold) agentInfo['disk_warning'] = [] for disk in diskUseList: if float(disk['percent'].strip('%')) >= diskUseThreshold: agentInfo['disk_warning'].append(disk) # Couch process warning couchProc = numberCouchProcess() couchProcessThreshold = float(self.config.AnalyticsDataCollector.couchProcessThreshold) if couchProc >= couchProcessThreshold: agentInfo['couch_process_warning'] = couchProc else: agentInfo['couch_process_warning'] = 0 # This adds the last time and message when data was updated to agentInfo lastDataUpload = DataUploadTime.getInfo(self) if lastDataUpload['data_last_update']!=0: agentInfo['data_last_update'] = lastDataUpload['data_last_update'] if lastDataUpload['data_error']!="": agentInfo['data_error'] = lastDataUpload['data_error'] # Change status if there is data_error, couch process maxed out or disk full problems. if agentInfo['status'] == 'ok': if agentInfo['disk_warning'] != []: agentInfo['status'] = "warning" if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning': if (agentInfo.has_key('data_error') and agentInfo['data_error'] != 'ok') or \ (agentInfo.has_key('couch_process_warning') and agentInfo['couch_process_warning'] != 0): agentInfo['status'] = "error" return agentInfo
def collectAgentInfo(self): agentInfo = self.wmagentDB.getComponentStatus(self.config) agentInfo.update(self.agentInfo) if isDrainMode(self.config): logging.info("Agent is in DrainMode") agentInfo['drain_mode'] = True agentInfo['status'] = "warning" else: agentInfo['drain_mode'] = False couchInfo = self.collectCouchDBInfo() if (couchInfo['status'] != 'ok'): agentInfo['down_components'].append("CouchServer") agentInfo['status'] = couchInfo['status'] couchInfo['name'] = "CouchServer" agentInfo['down_component_detail'].append(couchInfo) # Disk space warning diskUseList = diskUse() diskUseThreshold = float( self.config.AnalyticsDataCollector.diskUseThreshold) agentInfo['disk_warning'] = [] for disk in diskUseList: if float(disk['percent'].strip('%')) >= diskUseThreshold and disk[ 'mounted'] not in self.config.AnalyticsDataCollector.ignoreDisk: agentInfo['disk_warning'].append(disk) # Couch process warning couchProc = numberCouchProcess() couchProcessThreshold = float( self.config.AnalyticsDataCollector.couchProcessThreshold) if couchProc >= couchProcessThreshold: agentInfo['couch_process_warning'] = couchProc else: agentInfo['couch_process_warning'] = 0 # This adds the last time and message when data was updated to agentInfo lastDataUpload = DataUploadTime.getInfo(self) if lastDataUpload['data_last_update'] != 0: agentInfo['data_last_update'] = lastDataUpload['data_last_update'] if lastDataUpload['data_error'] != "": agentInfo['data_error'] = lastDataUpload['data_error'] # Change status if there is data_error, couch process maxed out or disk full problems. if agentInfo['status'] == 'ok': if agentInfo['disk_warning'] != []: agentInfo['status'] = "warning" if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning': if ('data_error' in agentInfo and agentInfo['data_error'] != 'ok') or \ ('couch_process_warning' in agentInfo and agentInfo['couch_process_warning'] != 0): agentInfo['status'] = "error" return agentInfo
def collectAgentInfo(self): """ Monitors the general health of the agent, as: 1. status of the agent processes 2. status of the agent threads based on the database info 3. couchdb active tasks and its replications 4. check the disk usage 5. check the number of couch processes :return: a dict with all the info collected """ logging.info("Getting agent info ...") agentInfo = self.wmagentDB.getComponentStatus(self.config) agentInfo.update(self.agentInfo) agentInfo['disk_warning'] = listDiskUsageOverThreshold(self.config, updateDB=True) if isDrainMode(self.config): logging.info("Agent is in DrainMode") agentInfo['drain_mode'] = True agentInfo['drain_stats'] = DrainStatusPoller.getDrainInfo() else: agentInfo['drain_mode'] = False couchInfo = self.collectCouchDBInfo() if couchInfo['status'] != 'ok': agentInfo['down_components'].append(couchInfo['name']) agentInfo['status'] = couchInfo['status'] agentInfo['down_component_detail'].append(couchInfo) # Couch process warning couchProc = numberCouchProcess() logging.info("CouchDB is running with %d processes", couchProc) couchProcessThreshold = self.config.AnalyticsDataCollector.couchProcessThreshold if couchProc >= couchProcessThreshold: agentInfo['couch_process_warning'] = couchProc else: agentInfo['couch_process_warning'] = 0 # This adds the last time and message when data was updated to agentInfo lastDataUpload = DataUploadTime.getInfo() if lastDataUpload['data_last_update']: agentInfo['data_last_update'] = lastDataUpload['data_last_update'] if lastDataUpload['data_error']: agentInfo['data_error'] = lastDataUpload['data_error'] # Change status if there is data_error, couch process maxed out or disk full problems. if agentInfo['status'] == 'ok' and (agentInfo['drain_mode'] or agentInfo['disk_warning']): agentInfo['status'] = "warning" if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning': if agentInfo.get('data_error', 'ok') != 'ok' or agentInfo.get('couch_process_warning', 0): agentInfo['status'] = "error" logging.info("List of agent components down: %s", agentInfo['down_components']) return agentInfo
def collectAgentInfo(self): agentInfo = self.wmagentDB.getComponentStatus(self.config) agentInfo.update(self.agentInfo) if isDrainMode(self.config): logging.info("Agent is in DrainMode") agentInfo['drain_mode'] = True agentInfo['status'] = "warning" else: agentInfo['drain_mode'] = False couchInfo = self.collectCouchDBInfo() if (couchInfo['status'] != 'ok'): agentInfo['down_components'].append("CouchServer") agentInfo['status'] = couchInfo['status'] couchInfo['name'] = "CouchServer" agentInfo['down_component_detail'].append(couchInfo) # Disk space warning diskUseList = diskUse() diskUseThreshold = float(self.config.AnalyticsDataCollector.diskUseThreshold) agentInfo['disk_warning'] = [] for disk in diskUseList: if float(disk['percent'].strip('%')) >= diskUseThreshold and disk['mounted'] not in self.config.AnalyticsDataCollector.ignoreDisk: agentInfo['disk_warning'].append(disk) # Couch process warning couchProc = numberCouchProcess() couchProcessThreshold = float(self.config.AnalyticsDataCollector.couchProcessThreshold) if couchProc >= couchProcessThreshold: agentInfo['couch_process_warning'] = couchProc else: agentInfo['couch_process_warning'] = 0 # This adds the last time and message when data was updated to agentInfo lastDataUpload = DataUploadTime.getInfo(self) if lastDataUpload['data_last_update']!=0: agentInfo['data_last_update'] = lastDataUpload['data_last_update'] if lastDataUpload['data_error']!="": agentInfo['data_error'] = lastDataUpload['data_error'] # Change status if there is data_error, couch process maxed out or disk full problems. if agentInfo['status'] == 'ok': if agentInfo['disk_warning'] != []: agentInfo['status'] = "warning" if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning': if ('data_error' in agentInfo and agentInfo['data_error'] != 'ok') or \ ('couch_process_warning' in agentInfo and agentInfo['couch_process_warning'] != 0): agentInfo['status'] = "error" return agentInfo
def collectAgentInfo(self): """ Monitors the general health of the agent, as: 1. status of the agent processes 2. status of the agent threads based on the database info 3. couchdb active tasks and its replications 4. check the disk usage 5. check the number of couch processes 6. check proxy and certificate validity :return: a dict with all the info collected """ logging.info("Getting agent info ...") agentInfo = self.wmagentDB.getComponentStatus(self.config) agentInfo.update(self.agentInfo) if isDrainMode(self.config): logging.info("Agent is in DrainMode") agentInfo['drain_mode'] = True else: agentInfo['drain_mode'] = False couchInfo = self.collectCouchDBInfo() if couchInfo['status'] != 'ok': agentInfo['down_components'].append(couchInfo['name']) agentInfo['status'] = couchInfo['status'] agentInfo['down_component_detail'].append(couchInfo) # Disk space warning diskUseList = diskUse() diskUseThreshold = float(self.config.AnalyticsDataCollector.diskUseThreshold) agentInfo['disk_warning'] = [] for disk in diskUseList: if float(disk['percent'].strip('%')) >= diskUseThreshold and \ disk['mounted'] not in self.config.AnalyticsDataCollector.ignoreDisk: agentInfo['disk_warning'].append(disk) # Couch process warning couchProc = numberCouchProcess() logging.info("CouchDB is running with %d processes", couchProc) couchProcessThreshold = self.config.AnalyticsDataCollector.couchProcessThreshold if couchProc >= couchProcessThreshold: agentInfo['couch_process_warning'] = couchProc else: agentInfo['couch_process_warning'] = 0 # This adds the last time and message when data was updated to agentInfo lastDataUpload = DataUploadTime.getInfo() if lastDataUpload['data_last_update']: agentInfo['data_last_update'] = lastDataUpload['data_last_update'] if lastDataUpload['data_error']: agentInfo['data_error'] = lastDataUpload['data_error'] # Change status if there is data_error, couch process maxed out or disk full problems. if agentInfo['status'] == 'ok' and (agentInfo['drain_mode'] or agentInfo['disk_warning']): agentInfo['status'] = "warning" if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning': if agentInfo.get('data_error', 'ok') != 'ok' or agentInfo.get('couch_process_warning', 0): agentInfo['status'] = "error" if agentInfo['down_components']: logging.info("List of agent components down: %s" % agentInfo['down_components']) # Check agent proxy and certificate validity collectProxyInfo(agentInfo) return agentInfo
def collectAgentInfo(self): #TODO: agent info (need to include job Slots for the sites) # always checks couch first source = self.config.JobStateMachine.jobSummaryDBName target = self.config.AnalyticsDataCollector.centralWMStatsURL couchInfo = self.localCouchServer.recoverReplicationErrors( source, target, filter="WMStats/repfilter") logging.info("getting couchdb replication status: %s" % couchInfo) agentInfo = self.wmagentDB.getComponentStatus(self.config) agentInfo.update(self.agentInfo) if isDrainMode(self.config): logging.info("Agent is in DrainMode") agentInfo['drain_mode'] = True agentInfo['status'] = "warning" else: agentInfo['drain_mode'] = False if (couchInfo['status'] != 'ok'): agentInfo['down_components'].append("CouchServer") agentInfo['status'] = couchInfo['status'] couchInfo['name'] = "CouchServer" agentInfo['down_component_detail'].append(couchInfo) # Disk space warning diskUseList = diskUse() diskUseThreshold = float( self.config.AnalyticsDataCollector.diskUseThreshold) agentInfo['disk_warning'] = [] for disk in diskUseList: if float(disk['percent'].strip('%')) >= diskUseThreshold and disk[ 'mounted'] not in self.config.AnalyticsDataCollector.ignoreDisk: agentInfo['disk_warning'].append(disk) # Couch process warning couchProc = numberCouchProcess() couchProcessThreshold = float( self.config.AnalyticsDataCollector.couchProcessThreshold) if couchProc >= couchProcessThreshold: agentInfo['couch_process_warning'] = couchProc else: agentInfo['couch_process_warning'] = 0 # This adds the last time and message when data was updated to agentInfo lastDataUpload = DataUploadTime.getInfo(self) if lastDataUpload['data_last_update'] != 0: agentInfo['data_last_update'] = lastDataUpload['data_last_update'] if lastDataUpload['data_error'] != "": agentInfo['data_error'] = lastDataUpload['data_error'] # Change status if there is data_error, couch process maxed out or disk full problems. if agentInfo['status'] == 'ok': if agentInfo['disk_warning'] != []: agentInfo['status'] = "warning" if agentInfo['status'] == 'ok' or agentInfo['status'] == 'warning': if (agentInfo.has_key('data_error') and agentInfo['data_error'] != 'ok') or \ (agentInfo.has_key('couch_process_warning') and agentInfo['couch_process_warning'] != 0): agentInfo['status'] = "error" return agentInfo