Example #1
0
def get_disks_to_check(srvObj):

    # Get mounted disks
    slotIdList = srvObj.getDb().getSlotIdsMountedDisks(srvObj.getHostId())
    disks_to_check = srvObj.getDb().\
                  getDiskInfoForSlotsAndHost(srvObj.getHostId(), slotIdList)

    # Turn from simply SQL results into objects indexed by disk_id
    disks_to_check = [
        ngamsDiskInfo.ngamsDiskInfo().unpackSqlResult(x)
        for x in disks_to_check
    ]
    disks_to_check = {x.getDiskId(): x for x in disks_to_check}

    # Filter out those that don't need a check
    now = time.time()
    check_period = isoTime2Secs(srvObj.getCfg().getDataCheckMinCycle())

    def needs_check(x):
        last_check = x.getLastCheck() or 0
        return check_period + last_check < now

    disks_to_check = {
        k: v
        for k, v in disks_to_check.items() if needs_check(v)
    }

    logger.info("Will check %d disks that are mounted in this system",
                len(disks_to_check))
    return disks_to_check
Example #2
0
def dataCheckThread(srvObj, stopEvt, checksum_allow_evt, checksum_stop_evt):
    """
    The Data Check Thread is executed to run a periodic check of the
    consistency of the data files contained in an NG/AMS system. The periodic
    check is performed only when NG/AMS is Online.

    srvObj:       Reference to server object (ngamsServer).

    Returns:      Void.
    """
    minCycleTime = isoTime2Secs(srvObj.getCfg().getDataCheckMinCycle())
    logger.info("Data checker thread period is %f", minCycleTime)

    while True:

        # Encapsulate this whole block to avoid that the thread dies in
        # case a problem occurs, like e.g. a problem with the DB connection.
        try:

            # Wait until we're sure that the Janitor Thread has executed
            # at least once, to ensure that the check is carried out in a
            # clean environment.
            while (not srvObj.getJanitorThreadRunCount()):
                suspend(stopEvt, 0.5)

            logger.info("Data Check Thread starting iteration ...")

            # Everything happens here
            stats = data_check_cycle(srvObj, stopEvt, checksum_allow_evt,
                                     checksum_stop_evt)

            lastOldestCheck = srvObj.getDb().getMinLastDiskCheck(
                srvObj.getHostId())
            time_to_compare = lastOldestCheck or stats.time_start
            execTime = time.time() - time_to_compare
            if execTime < minCycleTime:
                waitTime = minCycleTime - execTime
                nextAbsCheckTime = int(time.time() + waitTime)
                srvObj.setNextDataCheckTime(nextAbsCheckTime)
                logger.info(
                    "Suspending Data Checking Thread for %.3f [s]. "
                    "Next run scheduled for %s", waitTime,
                    toiso8601(nextAbsCheckTime))
                suspend(stopEvt, waitTime)

        except StopDataCheckThreadException:
            return
        except Exception:
            errMsg = "Error occurred during execution of the Data Check Thread"
            logger.exception(errMsg)

            try:
                suspend(stopEvt, 1)
            except StopDataCheckThreadException:
                return


# EOF
Example #3
0
def run(srvObj, stopEvt):

    cfg = srvObj.getCfg()
    cleaning_info = (("processing directory", cfg.getProcessingDirectory(),
                      1800, 0), ("subscription backlog buffer",
                                 os.path.join(cfg.getBackLogBufferDirectory(),
                                              NGAMS_SUBSCR_BACK_LOG_DIR),
                                 isoTime2Secs(cfg.getBackLogExpTime()), 0),
                     ("NGAS tmp directory", ngamsHighLevelLib.getTmpDir(cfg),
                      12 * 3600, 1))

    for desc, d, t, use_last_access in cleaning_info:
        logger.info("Checking/cleaning up %s: %s", desc, d)
        checkCleanDirs(d, t, t, use_last_access)
Example #4
0
def userServiceThread(srvObj, stopEvt, userServicePlugin):
    """
    The User Service Thread runs periodically a user provided plug-in
    (User Service Plug-In) which carries out tasks needed in a specific
    context.

    srvObj:      Reference to server object (ngamsServer).

    dummy:       Needed by the thread handling ...

    Returns:     Void.
    """

    plugin_name = userServicePlugin.__name__
    prefix = "NgamsCfg.SystemPlugIns[1]"

    plugin_pars = srvObj.getCfg().getVal(prefix + ".UserServicePlugInPars")
    period = srvObj.getCfg().getVal(prefix + ".UserServicePlugInPeriod")
    period = 300 if not period else isoTime2Secs(period)

    # Main loop.
    while (True):
        try:
            startTime = time.time()

            logger.debug("Executing User Service Plug-In")
            userServicePlugin(srvObj, plugin_pars)
            stopTime = time.time()
            sleepTime = (period - (stopTime - startTime))

            if (sleepTime > 0):
                msg = "Executed User Service Plug-In: %s. Sleeping: %.3fs"
                logger.debug(msg, plugin_name, sleepTime)

            # If signaled, return
            if stopEvt.wait(sleepTime):
                return

        except:
            errMsg = "Error occurred during execution of the User " +\
                     "Service Thread"
            logger.exception(errMsg)
            # We make a small wait here to avoid that the process tries
            # too often to carry out the tasks that failed.
            time.sleep(2.0)
Example #5
0
def janitorThread(srvObj, stopEvt, srv_to_jan_queue, jan_to_srv_queue):
    """
    Entry point for the janitor process. It checks which plug-ins should be run,
    how frequently, and runs them in an infinite loop.
    """

    # No need to shut down anything on this process
    def noop(*args):
        pass

    signal.signal(signal.SIGTERM, noop)
    signal.signal(signal.SIGINT, noop)

    # Reset the internal multiprocess queues so the janitor_communicate() method
    # of the server object is usable from within this process
    srvObj._serv_to_jan_queue = srv_to_jan_queue
    srvObj._jan_to_serv_queue = jan_to_srv_queue

    # Set up the logging so it outputs the records into the jan->srv queue
    for h in list(logging.root.handlers):
        logging.root.removeHandler(h)
    logging.root.addHandler(ForwarderHandler(srvObj))

    # Reset the db pointer in our server object to get fresh connections
    srvObj.reconnect_to_db()

    # => Update NGAS DB + DB Snapshot Document for the DB connected.
    try:
        checkUpdateDbSnapShots(srvObj, stopEvt)
    except StopJanitorThreadException:
        return
    except:
        logger.exception("Problem updating DB Snapshot files")

    # Main loop
    suspendTime = isoTime2Secs(srvObj.getCfg().getJanitorSuspensionTime())
    plugins = get_plugins(srvObj)
    run_count = 0
    try:
        while True:

            JanitorCycle(plugins, srvObj, stopEvt)

            # Suspend the thread for the time indicated.
            # Update the Janitor Thread run count.
            run_count += 1
            srvObj.janitor_send('janitor-run-count', run_count)

            # Suspend the thread for the time indicated.
            logger.info("Janitor Thread executed - suspending for %d [s] ...",
                        suspendTime)
            startTime = time.time()
            while (time.time() - startTime) < suspendTime:

                # Check if we should update the DB Snapshot.
                try:
                    event_info_list = srvObj.janitor_communicate(
                        'event-info-list', timeout=0.5)
                except Queue.Empty:
                    event_info_list = None

                if event_info_list is not None:
                    try:
                        diskInfo = None
                        for diskInfo in event_info_list:
                            updateDbSnapShots(srvObj, stopEvt, diskInfo)
                    except:
                        if (diskInfo):
                            msg = "Error encountered handling DB Snapshot " +\
                                  "for disk: %s/%s"
                            args = (diskInfo[0], diskInfo[1])
                        else:
                            msg, args = "Error encountered handling DB Snapshot", (
                            )
                        logger.exception(msg, *args)
                        suspend(stopEvt, 5)

                suspend(stopEvt, 1.0)

    except StopJanitorThreadException:
        return