def get_disks_to_check(srvObj): # Get mounted disks slotIdList = srvObj.getDb().getSlotIdsMountedDisks(srvObj.getHostId()) disks_to_check = srvObj.getDb().\ getDiskInfoForSlotsAndHost(srvObj.getHostId(), slotIdList) # Turn from simply SQL results into objects indexed by disk_id disks_to_check = [ ngamsDiskInfo.ngamsDiskInfo().unpackSqlResult(x) for x in disks_to_check ] disks_to_check = {x.getDiskId(): x for x in disks_to_check} # Filter out those that don't need a check now = time.time() check_period = isoTime2Secs(srvObj.getCfg().getDataCheckMinCycle()) def needs_check(x): last_check = x.getLastCheck() or 0 return check_period + last_check < now disks_to_check = { k: v for k, v in disks_to_check.items() if needs_check(v) } logger.info("Will check %d disks that are mounted in this system", len(disks_to_check)) return disks_to_check
def dataCheckThread(srvObj, stopEvt, checksum_allow_evt, checksum_stop_evt): """ The Data Check Thread is executed to run a periodic check of the consistency of the data files contained in an NG/AMS system. The periodic check is performed only when NG/AMS is Online. srvObj: Reference to server object (ngamsServer). Returns: Void. """ minCycleTime = isoTime2Secs(srvObj.getCfg().getDataCheckMinCycle()) logger.info("Data checker thread period is %f", minCycleTime) while True: # Encapsulate this whole block to avoid that the thread dies in # case a problem occurs, like e.g. a problem with the DB connection. try: # Wait until we're sure that the Janitor Thread has executed # at least once, to ensure that the check is carried out in a # clean environment. while (not srvObj.getJanitorThreadRunCount()): suspend(stopEvt, 0.5) logger.info("Data Check Thread starting iteration ...") # Everything happens here stats = data_check_cycle(srvObj, stopEvt, checksum_allow_evt, checksum_stop_evt) lastOldestCheck = srvObj.getDb().getMinLastDiskCheck( srvObj.getHostId()) time_to_compare = lastOldestCheck or stats.time_start execTime = time.time() - time_to_compare if execTime < minCycleTime: waitTime = minCycleTime - execTime nextAbsCheckTime = int(time.time() + waitTime) srvObj.setNextDataCheckTime(nextAbsCheckTime) logger.info( "Suspending Data Checking Thread for %.3f [s]. " "Next run scheduled for %s", waitTime, toiso8601(nextAbsCheckTime)) suspend(stopEvt, waitTime) except StopDataCheckThreadException: return except Exception: errMsg = "Error occurred during execution of the Data Check Thread" logger.exception(errMsg) try: suspend(stopEvt, 1) except StopDataCheckThreadException: return # EOF
def run(srvObj, stopEvt): cfg = srvObj.getCfg() cleaning_info = (("processing directory", cfg.getProcessingDirectory(), 1800, 0), ("subscription backlog buffer", os.path.join(cfg.getBackLogBufferDirectory(), NGAMS_SUBSCR_BACK_LOG_DIR), isoTime2Secs(cfg.getBackLogExpTime()), 0), ("NGAS tmp directory", ngamsHighLevelLib.getTmpDir(cfg), 12 * 3600, 1)) for desc, d, t, use_last_access in cleaning_info: logger.info("Checking/cleaning up %s: %s", desc, d) checkCleanDirs(d, t, t, use_last_access)
def userServiceThread(srvObj, stopEvt, userServicePlugin): """ The User Service Thread runs periodically a user provided plug-in (User Service Plug-In) which carries out tasks needed in a specific context. srvObj: Reference to server object (ngamsServer). dummy: Needed by the thread handling ... Returns: Void. """ plugin_name = userServicePlugin.__name__ prefix = "NgamsCfg.SystemPlugIns[1]" plugin_pars = srvObj.getCfg().getVal(prefix + ".UserServicePlugInPars") period = srvObj.getCfg().getVal(prefix + ".UserServicePlugInPeriod") period = 300 if not period else isoTime2Secs(period) # Main loop. while (True): try: startTime = time.time() logger.debug("Executing User Service Plug-In") userServicePlugin(srvObj, plugin_pars) stopTime = time.time() sleepTime = (period - (stopTime - startTime)) if (sleepTime > 0): msg = "Executed User Service Plug-In: %s. Sleeping: %.3fs" logger.debug(msg, plugin_name, sleepTime) # If signaled, return if stopEvt.wait(sleepTime): return except: errMsg = "Error occurred during execution of the User " +\ "Service Thread" logger.exception(errMsg) # We make a small wait here to avoid that the process tries # too often to carry out the tasks that failed. time.sleep(2.0)
def janitorThread(srvObj, stopEvt, srv_to_jan_queue, jan_to_srv_queue): """ Entry point for the janitor process. It checks which plug-ins should be run, how frequently, and runs them in an infinite loop. """ # No need to shut down anything on this process def noop(*args): pass signal.signal(signal.SIGTERM, noop) signal.signal(signal.SIGINT, noop) # Reset the internal multiprocess queues so the janitor_communicate() method # of the server object is usable from within this process srvObj._serv_to_jan_queue = srv_to_jan_queue srvObj._jan_to_serv_queue = jan_to_srv_queue # Set up the logging so it outputs the records into the jan->srv queue for h in list(logging.root.handlers): logging.root.removeHandler(h) logging.root.addHandler(ForwarderHandler(srvObj)) # Reset the db pointer in our server object to get fresh connections srvObj.reconnect_to_db() # => Update NGAS DB + DB Snapshot Document for the DB connected. try: checkUpdateDbSnapShots(srvObj, stopEvt) except StopJanitorThreadException: return except: logger.exception("Problem updating DB Snapshot files") # Main loop suspendTime = isoTime2Secs(srvObj.getCfg().getJanitorSuspensionTime()) plugins = get_plugins(srvObj) run_count = 0 try: while True: JanitorCycle(plugins, srvObj, stopEvt) # Suspend the thread for the time indicated. # Update the Janitor Thread run count. run_count += 1 srvObj.janitor_send('janitor-run-count', run_count) # Suspend the thread for the time indicated. logger.info("Janitor Thread executed - suspending for %d [s] ...", suspendTime) startTime = time.time() while (time.time() - startTime) < suspendTime: # Check if we should update the DB Snapshot. try: event_info_list = srvObj.janitor_communicate( 'event-info-list', timeout=0.5) except Queue.Empty: event_info_list = None if event_info_list is not None: try: diskInfo = None for diskInfo in event_info_list: updateDbSnapShots(srvObj, stopEvt, diskInfo) except: if (diskInfo): msg = "Error encountered handling DB Snapshot " +\ "for disk: %s/%s" args = (diskInfo[0], diskInfo[1]) else: msg, args = "Error encountered handling DB Snapshot", ( ) logger.exception(msg, *args) suspend(stopEvt, 5) suspend(stopEvt, 1.0) except StopJanitorThreadException: return