def _performWallClockChecks(self): """Watchdog performs the wall clock checks based on MJF. Signals are sent to processes if we need to stop, but function always returns S_OK() """ mjf = MJF.MJF() try: wallClockSecondsLeft = mjf.getWallClockSecondsLeft() except Exception as e: # Just stop if we can't get the wall clock seconds left return S_OK() jobstartSeconds = mjf.getIntJobFeature('jobstart_secs') if jobstartSeconds is None: # Just stop if we don't know when the job started return S_OK() if (int(time.time()) > jobstartSeconds + self.stopSigStartSeconds) and \ (wallClockSecondsLeft < self.stopSigFinishSeconds + self.wallClockCheckSeconds): # Need to send the signal! Assume it works to avoid sending the signal more than once self.log.info('Sending signal %d to JobWrapper children' % self.stopSigNumber) self.stopSigSent = True try: for childPid in getChildrenPIDs(self.wrapperPID): try: cmdline = open('/proc/%d/cmdline' % childPid, 'r').read().replace('\0', ' ').strip() except IOError: # Process gone away? Not running on Linux? Skip anyway continue if re.search(self.stopSigRegex, cmdline) is not None: self.log.info( 'Sending signal %d to process ID %d, cmdline = "%s"' % (self.stopSigNumber, childPid, cmdline)) os.kill(childPid, self.stopSigNumber) except Exception as e: self.log.error( 'Failed to send signals to JobWrapper children! (%s)' % str(e)) return S_OK()
configFile = None for unprocSw in Script.getUnprocessedSwitches(): if unprocSw[0] in ("U", "Update"): update = True elif unprocSw[0] in ("R", "Reconfig"): configFile = unprocSw[1] if __name__ == "__main__": from DIRAC import gLogger, gConfig from DIRAC.WorkloadManagementSystem.Client.DIRACbenchmark import singleDiracBenchmark from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations from DIRAC.Core.Utilities import MJF mjf = MJF.MJF() mjf.updateConfig() db12JobFeature = mjf.getJobFeature('db12') hs06JobFeature = mjf.getJobFeature('hs06') result = singleDiracBenchmark(1) if result is None: gLogger.error('Cannot make benchmark measurements') DIRAC.exit(1) db12Measured = round(result['NORM'], 1) corr = Operations().getValue('JobScheduling/CPUNormalizationCorrection', 1.) norm = round(result['NORM'] / corr, 1)