def clearOldUploadedEvents(self): """Delete old, already uploaded events. Beware: if the number of servers is NOT correctly set, you might accidentally delete events which are only uploaded to the central datastore, because this function is created to delete old events when the local URL is dropped. Therefore, once that URL has been dropped, it will even delete events which have not yet been uploaded to that URL. """ self.lock.acquire() self.openConnection() c = self.db.cursor() log("StorageManager: Deleting old events which have already been " "uploaded to all currently specified servers.") sql = "SELECT COUNT(*) FROM Event WHERE UploadedTo & ? = ?" args = (self.allUploadedMask, self.allUploadedMask) c.execute(sql, args) log("StorageManager: Deleting %d events." % c.fetchone()[0]) sql = "DELETE FROM Event WHERE UploadedTo & ? = ?" args = (self.allUploadedMask, self.allUploadedMask) c.execute(sql, args) self.db.commit() c.close() self.lock.release()
def run(self): log("CheckScheduler: Thread started!", severity=2) self.storageManager.openConnection() ### Trigger rate: triggerRate = TriggerRate(self.interpreter) TR_interval = int(self.dicConfig["triggerrate_interval"]) self.sched.addJob(triggerRate.check, interval=TR_interval, args=self.dicConfig) ### Storage size: storageSize = StorageSize(self.storageManager) SS_interval = int(self.dicConfig["storagesize_interval"]) self.sched.addJob(storageSize.check, interval=SS_interval, args=self.dicConfig) ### Event rate: ER_interval = int(self.dicConfig["eventrate_interval"]) self.sched.addJob(self.eventRate.check, interval=ER_interval, args=self.dicConfig) ### Storage growth: storageGrowth = StorageGrowth(self.storageManager) SG_interval = int(self.dicConfig["storagegrowth_interval"]) self.sched.addJob(storageGrowth.check, interval=SG_interval, args=self.dicConfig) while not self.stop_event.isSet(): # run all checks self.sched.schedule(self.nagiosPush, self.dicConfig) try: time.sleep(1) except KeyboardInterrupt: break except: pass log("CheckScheduler: Thread stopped!", severity=2)
def schedule(self, nagiosPush, config): toremove = [] tostart = [] timeCurrent = time.time() # Go through all tasks for tid, schedjob in self.jobs.iteritems(): self.current_tid = tid timeDifference = timeCurrent - schedjob.last_run if timeDifference >= schedjob.interval: schedjob.last_run = timeCurrent try: returnValues = schedjob.job.next() nagiosPush.sendToNagios(returnValues) except StopIteration: log("CheckScheduler: JOB STOPPED!", severity=2) toremove.append(tid) except Exception, msg: log("CheckScheduler: Uncatched exception in job: %s. " "Restarting..." % msg, severity=2) toremove.append(tid) tostart.append(tid)
def setUploaded(self, serverID, eventIDs): """Set UploadedTo-field to the serverID to which it was uploaded. This sets the UploadedTo-field of the event to the serverID to which it was uploaded. If the event is uploaded to all servers, the event will be removed. """ serverbit = 1 << serverID self.lock.acquire() c = self.db.cursor() # First get the UploadedTo status from the db query = ("SELECT EventID, UploadedTo from Event Where EventID IN %s;" % self.__IDList2String(eventIDs)) c.execute(query) # Split the result in events whose status needs to be updated and the # events that have to be removed. need_update = [] need_remove = [] for row in c: (eid, e_upto) = row # Set serverbit to 1 e_upto |= serverbit if e_upto & self.allUploadedMask == self.allUploadedMask: need_remove.append(eid) else: need_update.append(eid) # Remove events that have been uploaded to all servers n_remove = len(need_remove) if n_remove > 0: query = ("""DELETE from Event WHERE EventID in %s;""" % self.__IDList2String(need_remove)) log("StorageManager: %d events removed from Storage" % n_remove) c.execute(query) if StorageManager.storagesize is not None: StorageManager.storagesize -= n_remove # Update status of events that havn't yet been uploaded to all servers n_need_update = len(need_update) if len(need_update) > 0: query = ("UPDATE Event Set UploadedTo = UploadedTo | ? WHERE " "EventId in %s;" % self.__IDList2String(need_update)) log("StorageManager: %d events updated in Storage" % n_need_update) c.execute(query, (serverbit,)) self.db.commit() c.close() self.lock.release()
def checkForUpdates(self): try: updateInfo = self.requestCheckFromServer() except: log('Could not reach the server to check for updates: : %s' % str(sys.exc_info()[1]), severity=SEVERITY_CRITICAL) return try: updates = self.parseAnswerServer(updateInfo) return updates except: log('Could not parse the answer of the server correctly: %s' % str(sys.exc_info()[1]), severity=SEVERITY_CRITICAL) return
def openConnection(self): """Opens a connection to the sql-storage. This function must be called before the other functions can be used. It must be executed on the same thread on which the other functions are executed: i.e. in the run()-method of a thread. """ try: self.db = sqlite3.connect(self.db_name) except Exception, msg: log("StorageManager: Error opening connection: %s." % str(msg), severity=2) raise Exception("Could not connect to sqlite3 database.")
def startAll(self): """Setup and start all threads.""" try: # Create StorageManager and Interpreter for BufferListener storMan = StorageManager() interpr = Interpreter(storMan) # Create BufferListener buffLis = self.createBufferListener(interpr) if buffLis.conn: self.hsThreads.append(buffLis) # Check scheduler # Get the nagios configuration section from config file nagiosConf = self.cfg.itemsdict('NagiosPush') machine = re.search('([a-z0-9]+).zip', self.cfg.get('Station', 'Certificate')) nagiosConf['machine_name'] = machine.group(1) checkSched = self.createCheckScheduler(interpr, nagiosConf) eventRate = checkSched.getEventRate() storMan.addObserver(eventRate) self.hsThreads.append(checkSched) # Uploader central up = self.createUploader(0, "Upload-datastore", nagiosConf) self.hsThreads.append(up) storMan.addObserver(up) up.setNumServer(self.numServers) # Try local server try: up2 = self.createUploader(1, "Upload-local", nagiosConf) self.hsThreads.append(up2) storMan.addObserver(up2) self.numServers += 1 up.setNumServer(self.numServers) up2.setNumServer(self.numServers) except Exception, msg: log("HsMonitor: Error while parsing local server: %s." % msg) log("HsMonitor: Will not upload to local server!") # Set number of servers for our own StorageManager storMan.setNumServer(self.numServers) storMan.clearOldUploadedEvents() # Start all threads, running their run() function. for thread in self.hsThreads: thread.start()
def createUploader(self, serverID, section_name, nagiosConf): stationID = self.cfg.get("Station", "Nummer") url = self.cfg.get(section_name, "URL") passw = self.cfg.get("Station", "Password") minbs = self.cfg.ifgetint(section_name, "MinBatchSize", 50) maxbs = self.cfg.ifgetint(section_name, "MaxBatchSize", 50) if (minbs > maxbs): log("Warning HsMonitor: Maximum batch size must be more than " "minimum batch size. Setting maximum=minimum.", severity=2) maxbs = minbs minwait = self.cfg.ifgetfloat(section_name, "MinWait", 1.0) maxwait = self.cfg.ifgetfloat(section_name, "MaxWait", 60.0) up = Uploader(serverID, stationID, passw, url, nagiosConf, minwait, maxwait, minbs, maxbs) return up
def __init__(self): # Setup the log mode setLogMode(MODE_BOTH) # Read the configuration file try: self.cfg = EConfigParser() self.cfg.read([CONFIG_INI_PATH1, CONFIG_INI_PATH2, CONFIG_INI_PATH3]) except: log("HsMonitor: Cannot open the config file!", severity=2) return else: log("HsMonitor: Initialize variables.") # List of all the threads self.hsThreads = [] # Assume one server (datastore) # if the local is also specified it will be added self.numServers = 1
def notify(self, count=1): """Notify the uploader that count events were received.""" if (self.isRunning): shouldRelease = 0 self.numEventsLock.acquire() oldNumEvents = self.numEvents self.numEvents += count log("Uploader %i: %i events pending." % (self.serverID, self.numEvents)) # calculate if uploader-thread should be unblocked if (self.numEvents >= self.minBatchSize and oldNumEvents < self.minBatchSize): shouldRelease = 1 self.numEventsLock.release() if (shouldRelease): self.noEventsSem.release()
def stopProcess(self): print "finding window '%s'.." % self.title w = win32gui.FindWindow(None, self.title) if w != 0: result = RUNNING win32gui.SetWindowText(w, self.title + ' (shutdown in progress...)') dword = c_ulong() tid = windll.user32.GetWindowThreadProcessId(w, byref(dword)) pid = dword.value phandle = windll.kernel32.OpenProcess(2035711, 0, pid) log("stopping process with pid %d and handle %d..." % (pid, phandle)) r = windll.kernel32.TerminateProcess(phandle, 0) if r: log("success") result = STOPPED else: log("major fail: r = %d" % r) result = EXCEPTION #for process in self.wmiObj.Win32_Process(name = self.exeName): # print 'processId: %d' % process.ProcessId # if process.Terminate() == 0: # result = STOPPED else: result = STOPPED return result
def __getNumEventsToUpload(self): """Gives the number of events that the Uploader can upload now. The result will be between min and max batch size. If insufficient events are available this function will block on noEventSem. """ shouldBlock = False self.numEventsLock.acquire() res = min(self.numEvents, self.maxBatchSize) if (res < self.minBatchSize): shouldBlock = True self.numEventsLock.release() if shouldBlock: log("Uploader %i: Blocked: Too few events" % self.serverID) self.noEventsSem.acquire() log("Uploader %i: Unblocked" % self.serverID) return self.minBatchSize else: return res
def getEventsRawSQL(self, serverID, numEvents): """Return numEvents not yet uploaded to serverID. Return the output from SQL (so with status and id, do you need this?). """ serverbit = 1 << serverID self.lock.acquire() c = self.db.cursor() ssize = StorageManager.storagesize if (ssize is not None and ssize < VACUUMTHRESHOLD and time() - StorageManager.lastvacuum > 100000): log("StorageManager: Starting VACUUM operation...") c.execute("VACUUM") StorageManager.lastvacuum = time() log("StorageManager: VACUUM finished.") c.execute("SELECT * FROM Event WHERE (UploadedTo & ?) == 0 LIMIT ?;", (serverbit, numEvents)) res = c.fetchall() c.close() self.lock.release() return res
def parseAnswerServer(self, updateInfo): updateDict = parse_qs(updateInfo, strict_parsing=True) #updateDict has: mustUpdate, urlUser, newVersionUser, urlAdmin, # newVersionAdmin downloader = Downloader() updates = dict() # updates has: mustUpdate, userFile, adminFile mustUpdate = int(updateDict['mustUpdate'][0]) updates['mustUpdate'] = mustUpdate location = "../../persistent/downloads" if (mustUpdate & UPDATE_ADMIN_MODE): adminURL = updateDict['urlAdmin'][0] print adminURL adminFile = downloader.downloadUpdate(location, adminURL) updates['adminFile'] = adminFile log('Administrator update is available called: %s' % adminFile) if DISPLAY_GUI_MESSAGES and not(checkFiles.checkIfAdmin()): root = Tk() root.title('HiSPARC') Message(root, anchor='s', text="Update is available requiring " "administrator rights!\nPlease ask your administrator " "to reboot and install it!").pack(padx=150, pady=100) root.mainloop() elif (mustUpdate & UPDATE_USER_MODE): userURL = updateDict['urlUser'][0] userFile = downloader.downloadUpdate(location, userURL) updates['userFile'] = userFile log('User update is available called: %s' % userFile) # Run the update to install it. # First call a batch file so that Python can be closed. os.system(".\\runUserUpdate.bat %s" % userFile) return updates
def addEvents(self, events): """Insert events in the storage and notifies all observers. The parameters events is a list of events. Each event is assumed to have a datetime attribute and a data attribute. The data attribute will be pickled and stored. The StorageManager is responsible for serializing the events. """ res = True n_events = len(events) if n_events: log("StorageManager: Adding %d parsed events into Storage." % n_events) self.lock.acquire() log("StorageManager: Acquired lock.") t0 = time() c = self.db.cursor() query = ("INSERT INTO Event (EventData, UploadedTo, DateTime) " "VALUES (?,0,?)") try: c.executemany(query, ((dumps(event), event['header']['datetime']) for event in events)) self.db.commit() c.close() except sqlite3.OperationalError, msg: res = False # Prevent events from being removed from buffer log("StorageManager: Error AddEvents: %s" % str(msg), severity=2) if StorageManager.storagesize is not None: StorageManager.storagesize += n_events self.lock.release() log("StorageManager: Events added in %d seconds." % (time() - t0)) # Notify the observers self.update(n_events) return res
def main(): # Create a HiSPARC monitor object hsMonitor = HsMonitor() # Start all threads hsMonitor.startAll() # Periodically check for crashed threads, and restart them if necessary try: while True: sleep(10) for thread in hsMonitor.hsThreads: if not thread.is_alive(): log("HsMonitor: Thread %s died, restarting." % thread.name, severity=2) thread.init_restart() thread.start() log("HsMonitor: Thread %s restarted." % thread.name, severity=2) except ThreadCrashError, exc: log(exc) log("HsMonitor: Thread %s keeps crashing, shutting down." % thread.name, severity=2)
def run(self): log("Uploader %i: Thread started for %s." % (self.serverID, self.URL), severity=2) # Initialize storage manager self.storageManager.openConnection() # Number of events that have been received log("Uploader %i: Getting number of events to upload." % self.serverID) self.numEvents = self.storageManager.getNumEventsServer(self.serverID) log("Uploader %i: %i events in storage." % (self.serverID, self.numEvents)) self.isRunning = True numFailedAttempts = 0 while not self.stop_event.isSet(): bsize = self.__getNumEventsToUpload() (elist, eidlist) = self.storageManager.getEvents(self.serverID, bsize) returncode = self.__upload(elist) if returncode == '100': log("Uploader %i: %d events uploaded to %s." % (self.serverID, bsize, self.URL)) numFailedAttempts = 0 # Record successful upload in storagemanager self.storageManager.setUploaded(self.serverID, eidlist) # Reduce counter self.numEventsLock.acquire() self.numEvents -= bsize self.numEventsLock.release() else: numFailedAttempts += 1 msg1 = ("Error Uploader %i: %s: Return code: %s." % (self.serverID, self.URL, returncode)) log(msg1, severity=2) msg2 = ("Error Uploader %i: %d events attempted to upload, " "number of failed attempts: %i." % (self.serverID, bsize, numFailedAttempts)) log(msg2, severity=2) msg3 = msg1 + "\n" + msg2 nr = NagiosResult(2, msg3, "ServerCheck") self.nagiosPush.sendToNagios(nr) sleeptime = min(2 ** numFailedAttempts * self.retryAfter, self.maxWait) log("Uploader %i: Sleeping for %f seconds." % (self.serverID, sleeptime)) sleep(sleeptime) log("Uploader %i: Thread stopped!" % self.serverID, severity=2)
def start(): setLogMode(MODE_BOTH) log("\nStarting User-Mode applications...") HS_ROOT = "%s" % os.getenv("HISPARC_ROOT") if HS_ROOT == "": log("FATAL: environment variable HISPARC_ROOT not set!") return configFile = "%s/persistent/configuration/config.ini" % HS_ROOT config = ConfigParser.ConfigParser() config.read(configFile) try: # start MySQL log("Starting MySQL...") datapath = "%s/persistent/data/mysql" % HS_ROOT binlogs = glob.glob(os.path.join(datapath, "mysql-bin.*")) if binlogs: log("Removing stale MySQL binary logs...") for f in binlogs: os.remove(f) binary = "mysqld.exe" exeBase = "%s/user/mysql/bin" % HS_ROOT program = '"%(exec)s/%(binary)s"' % {"exec": exeBase, "binary": binary} handler = StartStop() handler.exeName = binary handler.ShowWindow = win32con.SW_HIDE handler.command = program handler.currentDirectory = HS_ROOT handler.title = "MySQL server" res = handler.startProcess() if res == RUNNING: time.sleep(5) # check run-status again res = handler.probeProcess() log("Status: " + status(res)) except: log("An exception was generated while starting MySQL: " + str(sys.exc_info()[1])) try: # start LabVIEW detector log("Starting LabVIEW detector...") if config.getboolean("Detector", "Enabled"): handler = StartStop() handler.exeName = "hisparcdaq.exe" handler.currentDirectory = "%s/user/hisparcdaq" % HS_ROOT handler.command = "%s/user/hisparcdaq/hisparcdaq.exe" % HS_ROOT res = handler.startProcess() else: res = DISABLED log("Status: " + status(res)) except: log("An exception was generated while starting LabVIEW detector: " + str(sys.exc_info()[1])) try: # start LabVIEW weather log("Starting LabVIEW weather...") if config.getboolean("Weather", "Enabled"): handler = StartStop() handler.exeName = "HiSPARC Weather Station.exe" handler.currentDirectory = "%s/user/hisparcweather" % HS_ROOT handler.command = "%s/user/hisparcweather/HiSPARC Weather Station.exe" % HS_ROOT res = handler.startProcess() else: res = DISABLED log("Status: " + status(res)) except: log("An exception was generated while starting LabVIEW weather: " + str(sys.exc_info()[1])) # Introduce a 20-second pause to let MySQL start completely time.sleep(20) try: # start HSMonitor log("Starting HSMonitor...") handler = CMDStartStop() handler.exeName = "python.exe" handler.title = "HISPARC MONITOR: hsmonitor" handler.currentDirectory = "%s/user/hsmonitor" % HS_ROOT handler.command = "%s/user/python/python.exe HsMonitor.py" % HS_ROOT res = handler.startProcess() log("Status: " + status(res)) except: log("An exception was generated while starting HSMonitor: " + str(sys.exc_info()[1])) try: # start updater log("Starting Updater...") handler = CMDStartStop() handler.exeName = "python.exe" handler.title = "HISPARC Updater: updater" handler.currentDirectory = "%s/user/updater" % HS_ROOT handler.command = "%s/user/python/python.exe Update.py" % HS_ROOT res = handler.startProcess() log("Status: " + status(res)) except: log("An exception was generated while starting the Updater: " + str(sys.exc_info()[1]))
# Start all threads hsMonitor.startAll() # Periodically check for crashed threads, and restart them if necessary try: while True: sleep(10) for thread in hsMonitor.hsThreads: if not thread.is_alive(): log("HsMonitor: Thread %s died, restarting." % thread.name, severity=2) thread.init_restart() thread.start() log("HsMonitor: Thread %s restarted." % thread.name, severity=2) except ThreadCrashError, exc: log(exc) log("HsMonitor: Thread %s keeps crashing, shutting down." % thread.name, severity=2) except KeyboardInterrupt: log("HsMonitor: Interrupted by keyboard, closing down.", severity=2) # Close down everything hsMonitor.stopAll() # wait for all threads to finish for thread in hsMonitor.hsThreads: thread.join() if __name__ == '__main__': main()