def __init__(self, cp): global has_gratia global Gratia global StorageElement global StorageElementRecord if not has_gratia: try: Gratia = __import__("Gratia") StorageElement = __import__("StorageElement") StorageElementRecord = __import__("StorageElementRecord") has_gratia = True except: raise if not has_gratia: print "Unable to import Gratia and Storage modules!" sys.exit(1) Gratia.Initialize() try: if Gratia.Config.get_SiteName().lower().find('generic') >= 0: Gratia.Config.setSiteName(socket.getfqdn()) except: pass try: if Gratia.Config.get_ProbeName().lower().find('generic') >= 0: Gratia.Config.setProbeName('dCache-storage:%s' % socket.getfqdn()) except: pass
def __init__(self): try: self.opts, self.args = self.parse_opts() except Exception as e: print(e, file=sys.stderr) sys.exit(1) # Initialize Gratia if not self.opts.gratia_config or not os.path.exists( self.opts.gratia_config): raise Exception("Gratia config, %s, does not exist." % self.opts.gratia_config) Gratia.Initialize(self.opts.gratia_config) if self.opts.verbose: Gratia.Config.set_DebugLevel(5) # Sanity checks for the probe's runtime environment. GratiaWrapper.CheckPreconditions() if self.opts.sleep: rnd = random.randint(1, int(self.opts.sleep)) DebugPrint(2, "Sleeping for %d seconds before proceeding." % rnd) time.sleep(rnd) # Make sure we have an exclusive lock for this probe. GratiaWrapper.ExclusiveLock() self.register_gratia("slurm_meter") # Find the checkpoint filename (if enabled) if self.opts.checkpoint: checkpoint_file = os.path.join(Gratia.Config.get_WorkingFolder(), "checkpoint") else: checkpoint_file = None # Open the checkpoint file self.checkpoint = SlurmCheckpoint(checkpoint_file) # Only process DataFileExpiration days of history # (unless we're resuming from a checkpoint file) if self.checkpoint.val is None: self.checkpoint.val = int(time.time() - (Gratia.Config.get_DataFileExpiration() * 86400)) # Connect to database self.conn = self.get_db_conn() self.cluster = Gratia.Config.getConfigAttribute('SlurmCluster') # SLURM made changes to the accounting database schema slurm_version = self.get_slurm_version() if LooseVersion(slurm_version) < LooseVersion("15.08.0"): # Original schema self.sacct = SlurmAcct_v1(self.conn, self.cluster, slurm_version) else: # Added TRES (Trackable resources) in 15.08.0pre5 self.sacct = SlurmAcct_v2(self.conn, self.cluster, slurm_version)
boincjob = False for var2 in lines: if var2.count('QDate') > 0: starttime = var2.split()[2] elif var2.count('RemoteWallClockTime') > 0: walltime = var2.split()[2] elif var2.count('CompletionDate') > 0: endtime = var2.split()[2] elif var2.count('Owner') > 0: if var2.split()[2] == '"boinc"': boincjob = True if boincjob == True: Gratia.setProbeBatchManager('Condor') Gratia.Initialize() r = Gratia.UsageRecord('Condor') r.ResourceType('Backfill') # parsing the filenames for the hostname/localjobid. # the files are in the format: history.<hostname>#<localjobid>#1#<localjobid> host = var.partition('.')[2].partition('#')[0] localjobid = var.partition('.')[2].partition('#')[2].partition( '#')[0] # print 'endtime: ' + endtime # print 'starttime: ' + starttime # print 'walltime: ' + walltime # Gratia likes ints, not strings, for times.
def start(self): """Initializes Gratia (to read the option file), does random sleep (if any), acquires the lock, initializes the input and registers Gratia. Must be invoked after options and parameters are parsed (option file name is needed) """ ### Initialize Gratia if not self._opts or not self._opts.gratia_config or not os.path.exists( self._opts.gratia_config): # TODO: print a message instead of an exception? raise Exception("Gratia config file (%s) does not exist." % self._opts.gratia_config) # Print options and initial conditions DebugPrint(5, "Initial options: %s" % self._opts) # Initialization parses the config file. No debug print will work before this Gratia.Initialize(self._opts.gratia_config) # Set to verbose in case the config changed it self.set_verbose() # Sanity checks for the probe's runtime environment. if self._opts.enable: GratiaWrapper.CheckPreconditions(check_enabled=False) else: GratiaWrapper.CheckPreconditions() if self._opts.sleep: rnd = random.randint(1, int(self._opts.sleep)) DebugPrint(2, "Sleeping for %d seconds before proceeding." % rnd) time.sleep(rnd) # Make sure we have an exclusive lock for this probe. GratiaWrapper.ExclusiveLock() ### Initialize input (config file must be available) # Input must specify which parameters it requires form the config file # The probe provides static information form the config file if not self._probeinput: self._probeinput = ProbeInput() input_parameters = self._probeinput.get_init_params() input_ini = self.get_config_att_list(input_parameters) # Check for test mode: start and other methods may change if 'input' in self._opts.test: DebugPrint(3, "Running input in test mode") self._probeinput.do_test() # Finish input initialization, including DB connection (if used) self._probeinput.start(input_ini) # get_DataFileExpiration() returns the value in the config file or 31 # TODO: Do we want to always not consider values older than 31 days or only when checkpointing is # enabled? # data_expiration = Gratia.Config.get_DataFileExpiration() # Find the checkpoint filename (if enabled) - after initializing the input! if self._opts.checkpoint: checkpoint_file = self.get_config_attribute('CheckpointFile') full_checkpoint_name = True if not checkpoint_file: full_checkpoint_name = False checkpoint_file = os.path.join( Gratia.Config.get_WorkingFolder(), "checkpoint") data_expiration = Gratia.Config.get_DataFileExpiration() # Only process DataFileExpiration days of history # (unless we're resuming from a checkpoint file) # TODO: is datafileexpiration a maximum value or a default (if no checkpoint is specified)? # Do we want both? # Open the checkpoint file self._probeinput.add_checkpoint(checkpoint_file, default_val=data_expiration, fullname=full_checkpoint_name) ### Complete Gratia initialization # This uses the input version (after Input initialization) self.register_gratia()
def main(): # We need the logger variable in the exception handler. # So we create it here. logger = logging.getLogger('DCacheAggregator') # Ignore hangup signals. We shouldn't die just because our parent # shell logs out. signal.signal(signal.SIGHUP, signal.SIG_IGN) # Try to catch common signals and send email before we die signal.signal(signal.SIGINT, warn_of_signal) signal.signal(signal.SIGQUIT, warn_of_signal) signal.signal(signal.SIGTERM, warn_of_signal) try: # Tell Gratia what versions we are using. # CHRIS: is there a way to automate the version extraction # using the pkg_resource package? Gratia.RegisterReporterLibrary("psycopg2", "2.0.6") #Gratia.RegisterReporterLibrary( "SQLAlchemy", "0.4.1" ) rev = Gratia.ExtractCvsRevision("$Revision: 1.13 $") tag = Gratia.ExtractCvsRevision("$Name: $") Gratia.RegisterReporter("dCacheBillingAggregator.py", str(rev) + " (tag " + str(tag) + ")") # BRIAN: attempt to pull the dCache version from RPM. version = "UNKNOWN" try: version = os.popen("rpm -q --qf '%{VERSION}-%{RELEASE}' " \ "dcache-server").read() except: pass Gratia.RegisterService("dCache", version) # Initialize gratia before attempting to read its config file. Gratia.Initialize() # Extract the configuration information into local variables. myconf = dCacheProbeConfig() # Get the name of the directory where we are to store the log files. logDir = myconf.get_LogFolder() # Make sure that the logging directory is present if not os.path.isdir(logDir): os.mkdir(logDir, 0755) logFileName = os.path.join(logDir, "dcacheTransfer.log") # Set up an alarm to send an email if the program terminates. termSubject = "dCache-transfer probe is going down" termMessage = "The dCache transfer probe for Gratia has " + \ "terminated.\nPlease check the logfile\n\n " + \ logFileName + \ "\n\nfor the cause.\n" terminationAlarm = Alarm(myconf.get_EmailServerHost(), myconf.get_EmailFromAddress(), myconf.get_EmailToList(), termSubject, termMessage, 0, 0, False) # Set up the logger with a suitable format hdlr = RotatingFileHandler(logFileName, 'a', 512000, 10) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.setLevel(myconf.get_AggrLogLevel()) logger.info("starting " + ProgramName) stopFileName = myconf.get_StopFileName() updateFreq = float(myconf.get_UpdateFrequency()) logger.warn("update freq = %.2f" % updateFreq) # Create the aggregator instance that we will use. dataDir = myconf.get_DataFolder() aggregator = DCacheAggregator(myconf, dataDir) # If profiling was requested, turn it on. profiling = sys.argv.count('-profile') > 0 if profiling: profiler = hotshot.Profile("profile.dat") logger.info("Enabling Profiling") # Now aggregate new records, then sleep, until somebody creates # the stop file... while 1: # Make sure we (still) have a connection to Gratia. if (not TestContainer.isTest() ): # no need in that during self test Gratia.Maintenance() if profiling: profiler.run("aggregator.sendBillingInfoRecordsToGratia()") else: try: aggregator.sendBillingInfoRecordsToGratia() except TestContainer.SimInterrupt: logger.info("BillingRecSimulator.SimInterrupt caught, " \ "restarting") aggregator = DCacheAggregator(myconf, dataDir) continue # Are we are shutting down? if os.path.exists(stopFileName): break if TestContainer.isTest(): break logger.warn("sleeping for = %.2f seconds" % updateFreq) sleep_check(updateFreq, stopFileName) # If we are profiling, print the results... if profiling: profiler.close() stats = hotshot.stats.load("profile.dat") stats.sort_stats('time', 'calls') stats.print_stats() logger.warn(ProgramName + " stop file detected.") except (KeyboardInterrupt, SystemExit): raise except: # format the traceback into a string tblist = traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback) msg = ProgramName + " caught an exception:\n" + "".join(tblist) print msg logger.error(msg) TestContainer.dumpStatistics(logger) # shut down the logger to make sure nothing is lost. logger.critical(ProgramName + " shutting down.") logging.shutdown() # try to send an email warning of the shutdown. if terminationAlarm != None: terminationAlarm.event() sys.exit(1)
class SlurmProbe: opts = None args = None checkpoint = None conn = None cluster = None sacct = None def __init__(self): try: self.opts, self.args = self.parse_opts() except Exception, e: print >> sys.stderr, str(e) sys.exit(1) # Initialize Gratia if not self.opts.gratia_config or not os.path.exists( self.opts.gratia_config): raise Exception("Gratia config, %s, does not exist." % self.opts.gratia_config) Gratia.Initialize(self.opts.gratia_config) if self.opts.verbose: Gratia.Config.set_DebugLevel(5) # Sanity checks for the probe's runtime environment. GratiaWrapper.CheckPreconditions() if self.opts.sleep: rnd = random.randint(1, int(self.opts.sleep)) DebugPrint(2, "Sleeping for %d seconds before proceeding." % rnd) time.sleep(rnd) # Make sure we have an exclusive lock for this probe. GratiaWrapper.ExclusiveLock() self.register_gratia("slurm_meter") # Find the checkpoint filename (if enabled) if self.opts.checkpoint: checkpoint_file = os.path.join(Gratia.Config.get_WorkingFolder(), "checkpoint") else: checkpoint_file = None # Open the checkpoint file self.checkpoint = SlurmCheckpoint(checkpoint_file) # Only process DataFileExpiration days of history # (unless we're resuming from a checkpoint file) if self.checkpoint.val is None: self.checkpoint.val = int(time.time() - (Gratia.Config.get_DataFileExpiration() * 86400)) # Connect to database self.conn = self.get_db_conn() self.cluster = Gratia.Config.getConfigAttribute('SlurmCluster') self.sacct = SlurmAcct(self.conn, self.cluster)