def register_gratia(self, name):
        Gratia.RegisterReporter(name)

        try:
            slurm_version = self.get_slurm_version()
        except Exception as e:
            DebugPrint(0, "Unable to get SLURM version: %s" % str(e))
            raise

        Gratia.RegisterService("SLURM", slurm_version)
        Gratia.setProbeBatchManager("slurm")
Exemple #2
0
    def register_gratia(self):
        """Register in Gratia the Reporter (gratia probe), ReporterLibrary (Gratia library version)
        and the Service (input)

        :return:
        """
        Gratia.RegisterReporter(self.probe_name)

        try:
            input_version = self.get_version()
        except SystemExit:
            raise
        except KeyboardInterrupt:
            raise
        except Exception as e:
            DebugPrint(0, "Unable to get input version: %s" % str(e))
            raise

        # TODO: check the meaning of RegisterReporter vs RegisterService
        Gratia.RegisterService(self._probeinput.get_name(), input_version)
Exemple #3
0
        if p.returncode != 0:
            raise Exception("Unable to invoke %s" % cmd)

        name, version = output.split()
        return version

    def register_gratia(self, name):
        Gratia.RegisterReporter(name)

        try:
            slurm_version = self.get_slurm_version()
        except Exception, e:
            DebugPrint(0, "Unable to get SLURM version: %s" % str(e))
            raise

        Gratia.RegisterService("SLURM", slurm_version)
        Gratia.setProbeBatchManager("slurm")


class SlurmCheckpoint(object):
    """Read and write a checkpoint file
    If class is instantiated without a filename, class works as expected but
    data is not stored to disk
    """

    _val = None
    _fp = None

    def __init__(self, target=None):
        """
        Create a checkpoint file
Exemple #4
0
def main():
    # We need the logger variable in the exception handler.
    # So we create it here.
    logger = logging.getLogger('DCacheAggregator')

    # Ignore hangup signals. We shouldn't die just because our parent
    # shell logs out.
    signal.signal(signal.SIGHUP, signal.SIG_IGN)
    # Try to catch common signals and send email before we die
    signal.signal(signal.SIGINT, warn_of_signal)
    signal.signal(signal.SIGQUIT, warn_of_signal)
    signal.signal(signal.SIGTERM, warn_of_signal)

    try:
        # Tell Gratia what versions we are using.
        # CHRIS: is there a way to automate the version extraction
        #        using the pkg_resource package?
        Gratia.RegisterReporterLibrary("psycopg2", "2.0.6")
        #Gratia.RegisterReporterLibrary( "SQLAlchemy", "0.4.1" )
        rev = Gratia.ExtractCvsRevision("$Revision: 1.13 $")
        tag = Gratia.ExtractCvsRevision("$Name:  $")
        Gratia.RegisterReporter("dCacheBillingAggregator.py",
                                str(rev) + " (tag " + str(tag) + ")")

        # BRIAN: attempt to pull the dCache version from RPM.
        version = "UNKNOWN"
        try:
            version = os.popen("rpm -q --qf '%{VERSION}-%{RELEASE}' " \
                               "dcache-server").read()
        except:
            pass
        Gratia.RegisterService("dCache", version)

        # Initialize gratia before attempting to read its config file.
        Gratia.Initialize()
        # Extract the configuration information into local variables.
        myconf = dCacheProbeConfig()

        # Get the name of the directory where we are to store the log files.
        logDir = myconf.get_LogFolder()

        # Make sure that the logging directory is present
        if not os.path.isdir(logDir):
            os.mkdir(logDir, 0755)

        logFileName = os.path.join(logDir, "dcacheTransfer.log")

        # Set up an alarm to send an email if the program terminates.
        termSubject = "dCache-transfer probe is going down"
        termMessage = "The dCache transfer probe for Gratia has " + \
                      "terminated.\nPlease check the logfile\n\n   " + \
                      logFileName + \
                      "\n\nfor the cause.\n"

        terminationAlarm = Alarm(myconf.get_EmailServerHost(),
                                 myconf.get_EmailFromAddress(),
                                 myconf.get_EmailToList(), termSubject,
                                 termMessage, 0, 0, False)

        # Set up the logger with a suitable format
        hdlr = RotatingFileHandler(logFileName, 'a', 512000, 10)
        formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
        hdlr.setFormatter(formatter)
        logger.addHandler(hdlr)
        logger.setLevel(myconf.get_AggrLogLevel())
        logger.info("starting " + ProgramName)

        stopFileName = myconf.get_StopFileName()
        updateFreq = float(myconf.get_UpdateFrequency())
        logger.warn("update freq = %.2f" % updateFreq)

        # Create the aggregator instance that we will use.
        dataDir = myconf.get_DataFolder()
        aggregator = DCacheAggregator(myconf, dataDir)

        # If profiling was requested, turn it on.
        profiling = sys.argv.count('-profile') > 0
        if profiling:
            profiler = hotshot.Profile("profile.dat")
            logger.info("Enabling Profiling")

        # Now aggregate new records, then sleep, until somebody creates
        # the stop file...
        while 1:
            # Make sure we (still) have a connection to Gratia.
            if (not TestContainer.isTest()
                ):  # no need in that during self test
                Gratia.Maintenance()

            if profiling:
                profiler.run("aggregator.sendBillingInfoRecordsToGratia()")
            else:
                try:
                    aggregator.sendBillingInfoRecordsToGratia()
                except TestContainer.SimInterrupt:
                    logger.info("BillingRecSimulator.SimInterrupt caught, " \
                        "restarting")
                    aggregator = DCacheAggregator(myconf, dataDir)
                    continue
            # Are we are shutting down?
            if os.path.exists(stopFileName):
                break

            if TestContainer.isTest():
                break

            logger.warn("sleeping for = %.2f seconds" % updateFreq)
            sleep_check(updateFreq, stopFileName)

        # If we are profiling, print the results...
        if profiling:
            profiler.close()
            stats = hotshot.stats.load("profile.dat")
            stats.sort_stats('time', 'calls')
            stats.print_stats()

        logger.warn(ProgramName + " stop file detected.")
    except (KeyboardInterrupt, SystemExit):
        raise
    except:
        # format the traceback into a string
        tblist = traceback.format_exception(sys.exc_type, sys.exc_value,
                                            sys.exc_traceback)
        msg = ProgramName + " caught an exception:\n" + "".join(tblist)
        print msg
        logger.error(msg)

    TestContainer.dumpStatistics(logger)

    # shut down the logger to make sure nothing is lost.
    logger.critical(ProgramName + " shutting down.")
    logging.shutdown()
    # try to send an email warning of the shutdown.
    if terminationAlarm != None:
        terminationAlarm.event()

    sys.exit(1)