def setglobalstatus(self): self.systemStatus = self.status self.systemStatusMessage = self.statusMessage + "\r\n" self.systemStatusTime = datetime.now() if self.status != self.lastLoggedStatus or self.statusMessage != self.lastLoggedStatusMessage: db.logstatus(self.name(), self.status, self.statusMessage) self.lastLoggedStatus = self.status self.lastLoggedStatusMessage = self.statusMessage for key, worker in self.workers.iteritems(): # We've got to go back two time periods, otherwise only the first responder will be OK. if worker.statusTime < self.lastStatusRequestTime: if worker.status != Statuses.CRITICAL: worker.status = Statuses.CRITICAL worker.statusMessage = "Status response was not received. Requested at {0}. Last response: {1}".format(self.lastStatusRequestTime, worker.statusTime) db.logstatus(worker.name, Statuses.CRITICAL, worker.statusMessage) if worker.statusTime < self.lastStatusRequestTime - timedelta(seconds=300): # The worker has been unresponsive for over 5 minutes. Issue a command to reboot. self.reboot("Worker {0} has been unresponsive for over five minutes. Initiating Emergency reboot.".format(worker.name)) if worker.status > self.systemStatus: self.systemStatus = worker.status if worker.statusMessage != '': self.systemStatusMessage += "{0}: {1}\r\n".format(worker.friendlyName, worker.statusMessage) if self.systemStatus == Statuses.OK and self.systemStatusMessage.strip() == '': self.systemStatusMessage = 'OK' if self.indicatedStatus != self.systemStatus: self.loginformation("System status", "{0} - {1}".format(Statuses.codes[self.systemStatus], self.systemStatusMessage.strip())) self.indicatestatus(self.systemStatus)
def processrequest(self, request): self.debug("Received: {0} request {1}".format(request[MessageCodes.CODE], request)) if request[MessageCodes.CODE] == MessageTypes.STATUS_RESPONSE: self.debug("Received Status Response: {0} from {1}".format(request[MessageCodes.STATUS], request[MessageCodes.WORKER])) worker = self.workers[request[MessageCodes.WORKER]] worker.status = request[MessageCodes.STATUS] worker.statusMessage = request[MessageCodes.MESSAGE] worker.statusTime = request[MessageCodes.TIME] worker.program = request[MessageCodes.PROGRAM] self.setglobalstatus() sendEmail = False if (worker.status > Statuses.OK and worker.status != Statuses.UNDEFINED) or (worker.lastLoggedStatus is not None and worker.lastLoggedStatus != Statuses.UNDEFINED and worker.status != worker.lastLoggedStatus): # Don't add the initial 'OK' to the email. if worker.status != worker.lastLoggedStatus or (worker.status > Statuses.OK and worker.statusMessage != worker.lastLoggedStatusMessage): self.debug("********************Adding to email cache: {0}: {1}, {2}, {3}, {4}".format(worker.name, worker.status, worker.lastLoggedStatus, worker.statusMessage, worker.lastLoggedStatusMessage), DebugLevels.SCREEN) if worker.name not in self.emailCache: self.emailCache[worker.name] = [] self.emailCache[worker.name].append(request) if self.emailCacheFlushTime == None: self.emailCacheFlushTime = datetime.now() + timedelta(seconds=3600) self.debug("emailcache contains {0} items".format(len(self.emailCache[worker.name]))) if len(self.emailCache[worker.name]) >= 20 or (worker.status > Statuses.WARNING and worker.status > worker.lastLoggedStatus): self.debug("sending email condition A for worker {0}".format(worker.name)) sendEmail = True if not sendEmail and self.emailCacheFlushTime is not None and self.emailCacheFlushTime < datetime.now(): # Do we have something to send self.debug("TODO: Waited over an hour since last message. Checking email cache. emailCacheFlushTime {0}".format(self.emailCacheFlushTime), DebugLevels.ALL) for workerName in self.workers: if workerName not in self.emailCache: continue elif len(self.emailCache[workerName]) > 0: sendEmail = True self.debug("TODO: We have {0} items to send for {1}".format(len(self.emailCache[workerName]), workerName), DebugLevels.ALL) break if sendEmail: self.sendstatusemail() if worker.status != worker.lastLoggedStatus or worker.statusMessage != worker.lastLoggedStatusMessage: # Do log the initial 'OK' to the database db.logstatus(worker.name, worker.status, worker.statusMessage) worker.lastLoggedStatus = worker.status worker.lastLoggedStatusMessage = worker.statusMessage return True elif request[MessageCodes.CODE] == MessageTypes.PROGRAM_REQUEST: if (request[MessageCodes.WORKER] == self.name()): message = "Program {0} requested by {1} ({2} / {3})".format(request[MessageCodes.VALUE], request[MessageCodes.CALLER], request[MessageCodes.IP_ADDRESS], request[MessageCodes.USERNAME]) self.loginformation("Program request", message) savedProgram = self.program[ProgramCodes.CODE] if self.program[ProgramCodes.REPEAT_PROGRAM] == 0: self.programStack.append(self.program) self.setprogram(request[MessageCodes.VALUE], message) if self.program[ProgramCodes.DEFAULT_PROGRAM] == 1: self.sendresumedefaultprogramrequest(savedProgram) else: # Broadcast request to all workers for workerName in self.workers: self.debug("Relaying program request to worker {0}: {1}".format(workerName, request)) self.workers[workerName].queue.put({ MessageCodes.CODE:MessageTypes.BROADCAST_REQUEST, MessageCodes.WORKER:self.name(), MessageCodes.VALUE:request[MessageCodes.VALUE], MessageCodes.MESSAGE:"Program {0} requested by {1}".format(request[MessageCodes.VALUE], self.name())}) responseQueue = request[MessageCodes.RESPONSE_QUEUE] responseQueue.put({MessageCodes.CODE:MessageTypes.PROGRAM_RESPONSE, MessageCodes.VALUE:True}) else: worker = self.workers[request[MessageCodes.WORKER]] worker.relay(request) return True elif request[MessageCodes.CODE] == MessageTypes.BROADCAST_REQUEST: for workerName in self.workers: self.debug("Relaying broadcast request to worker {0}: {1}".format(workerName, request)) self.workers[workerName].relay(request) return True elif request[MessageCodes.CODE] == MessageTypes.WEB_STATUS_REQUEST: return self.processwebstatusrequest(request) elif request[MessageCodes.CODE] == MessageTypes.LIST_WORKERS: responseQueue = request[MessageCodes.RESPONSE_QUEUE] response = {} response[MessageCodes.WORKER] = [] response[MessageCodes.WORKER].append({ MessageCodes.NAME:self.name(), MessageCodes.FRIENDLY_NAME:self.FRIENDLY_NAME, MessageCodes.STATUS:self.systemStatus, MessageCodes.STATUS_CODE:Statuses.codes[self.systemStatus], MessageCodes.MESSAGE:self.systemStatusMessage }) for workerClass in self.WORKER_CLASSES: key = workerClass.__name__ worker = self.workers[key] statusCode = Statuses.codes[worker.status] if worker.statusMessage != '': statusMessage = worker.statusMessage elif worker.status == Statuses.OK: statusMessage = "OK" else: statusMessage = "Unknown error condition" response[MessageCodes.WORKER].append({ MessageCodes.NAME:key, MessageCodes.FRIENDLY_NAME:worker.friendlyName, MessageCodes.STATUS:worker.status, MessageCodes.STATUS_CODE:statusCode, MessageCodes.MESSAGE:statusMessage }) responseQueue.put(response) return True elif request[MessageCodes.CODE] == MessageTypes.SAFE_MODE: self.debug("Received a Safe Mode message") for workerName in self.workers: self.debug("Relaying safe mode request to worker {0}: {1}".format(workerName, request)) self.workers[workerName].relay(request) return True elif request[MessageCodes.CODE] == MessageTypes.REBOOT_REQUEST: self.reboot("Emergency reboot requested by {0} with message {1}".format(request[MessageCodes.WORKER], request[MessageCodes.VALUE])) return True elif request[MessageCodes.CODE] == MessageTypes.EXCEPTION: # Used by threads that don't handle their own exceptions, eg: SocketListener raise request[MessageCodes.VALUE]