def _getCurrentConfig(self): """Return the current system configuration.""" from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData gConfig.forceRefresh() fullCfg = CFG() setup = gConfig.getValue('/DIRAC/Setup', '') setupList = gConfig.getSections('/DIRAC/Setups', []) if not setupList['OK']: return S_ERROR('Could not get /DIRAC/Setups sections') setupList = setupList['Value'] if setup not in setupList: return S_ERROR('Setup %s is not in allowed list: %s' % (setup, ', '.join(setupList))) serviceSetups = gConfig.getOptionsDict('/DIRAC/Setups/%s' % setup) if not serviceSetups['OK']: return S_ERROR('Could not get /DIRAC/Setups/%s options' % setup) serviceSetups = serviceSetups['Value'] # dict for system, setup in serviceSetups.items(): if self.systems and system not in self.systems: continue systemCfg = gConfigurationData.remoteCFG.getAsCFG("/Systems/%s/%s" % (system, setup)) for section in systemCfg.listSections(): if section not in ('Agents', 'Services', 'Executors'): systemCfg.deleteKey(section) fullCfg.createNewSection("/%s" % system, contents=systemCfg) return S_OK(fullCfg)
def _parseConfigTemplate(self, templatePath, cfg=None): """Parse the ConfigTemplate.cfg files. :param str templatePath: path to the folder containing a ConfigTemplate.cfg file :param CFG cfg: cfg to merge with the systems config :returns: CFG object """ cfg = CFG() if cfg is None else cfg system = os.path.split(templatePath.rstrip("/"))[1] if system.lower().endswith('system'): system = system[:-len('System')] if self.systems and system not in self.systems: return S_OK(cfg) templatePath = os.path.join(templatePath, 'ConfigTemplate.cfg') if not os.path.exists(templatePath): return S_ERROR("File not found: %s" % templatePath) loadCfg = CFG() loadCfg.loadFromFile(templatePath) newCfg = CFG() newCfg.createNewSection("/%s" % system, contents=loadCfg) cfg = cfg.mergeWith(newCfg) return S_OK(cfg)
class ProcessList(object): """ The ProcessList uses internally the CFG utility to store the processes and their properties. """ def __init__(self, location): self.cfg = CFG() self.location = location self.goodProcessList = True if os.path.exists(self.location): self.cfg.loadFromFile(self.location) if not self.cfg.existsKey('Processes'): self.cfg.createNewSection('Processes') else: self.goodProcessList = False def _writeProcessList(self, path): """ Write to text """ handle, tmpName = tempfile.mkstemp() written = self.cfg.writeToFile(tmpName) os.close(handle) if not written: if os.path.exists(tmpName): os.remove(tmpName) return written if os.path.exists(path): LOG.debug("Replacing %s" % path) try: shutil.move(tmpName, path) return True except OSError, err: LOG.error("Failed to overwrite process list.", err) LOG.info("If your process list is corrupted a backup can be found %s" % tmpName) return False
class ProcessList(object): """ The ProcessList uses internally the CFG utility to store the processes and their properties. """ def __init__(self, location): self.cfg = CFG() self.location = location self.goodProcessList = True if os.path.exists(self.location): self.cfg.loadFromFile(self.location) if not self.cfg.existsKey('Processes'): self.cfg.createNewSection('Processes') else: self.goodProcessList = False def _writeProcessList(self, path): """ Write to text """ handle, tmpName = tempfile.mkstemp() written = self.cfg.writeToFile(tmpName) os.close(handle) if not written: if os.path.exists(tmpName): os.remove(tmpName) return written if os.path.exists(path): gLogger.debug("Replacing %s" % path) try: shutil.move(tmpName, path) return True except OSError, err: gLogger.error("Failed to overwrite process list.", err) gLogger.info("If your process list is corrupted a backup can be found %s" % tmpName) return False
def getSystemsCFG(self): """Find all the ConfigTemplates and collate them into one CFG object.""" cfg = CFG() cfg.createNewSection('/Systems') templateLocations = self.findConfigTemplates() for templatePath in templateLocations: cfgRes = self.parseConfigTemplate(templatePath, cfg) if cfgRes['OK']: cfg = cfgRes['Value'] return cfg
def toCFG(self): """ Get the full description of the file in CFG format """ oCFG = CFG() strippedLFN = self.lfn.replace('/','&&') oCFG.createNewSection(strippedLFN) oCFG.setOption('%s/Status' % (strippedLFN), self.status) oCFG.setOption('%s/Size' % (strippedLFN), self.size) oCFG.setOption('%s/GUID' % (strippedLFN), self.guid) oCFG.setOption('%s/Checksum' % (strippedLFN), self.checksum) #TODO: still have to include the CFG from the replica objects if self.catalogReplicas: oCFG.createNewSection('%s/CatalogReplicas' % strippedLFN) for replica in self.catalogReplicas: pass # rCFG.mergeWith(CFG().loadFromBuffer(replica.toCFG()['Value'])) return S_OK(str(oCFG))
def toCFG(self): """ Get the full description of the file in CFG format """ oCFG = CFG() strippedLFN = self.lfn.replace('/', '&&') oCFG.createNewSection(strippedLFN) oCFG.setOption('%s/Status' % (strippedLFN), self.status) oCFG.setOption('%s/Size' % (strippedLFN), self.size) oCFG.setOption('%s/GUID' % (strippedLFN), self.guid) oCFG.setOption('%s/Checksum' % (strippedLFN), self.checksum) #TODO: still have to include the CFG from the replica objects if self.catalogReplicas: oCFG.createNewSection('%s/CatalogReplicas' % strippedLFN) for replica in self.catalogReplicas: pass # rCFG.mergeWith(CFG().loadFromBuffer(replica.toCFG()['Value'])) return S_OK(str(oCFG))
def checkFunction(): """ gets CPU normalisation from MFJ or calculate itself """ from DIRAC.WorkloadManagementSystem.Client.CPUNormalization import getPowerFromMJF from ILCDIRAC.Core.Utilities.CPUNormalization import getCPUNormalization from DIRAC import gLogger, gConfig result = getCPUNormalization() if not result['OK']: gLogger.error( result['Message'] ) norm = round( result['Value']['NORM'], 1 ) gLogger.notice( 'Estimated CPU power is %.1f %s' % ( norm, result['Value']['UNIT'] ) ) mjfPower = getPowerFromMJF() if mjfPower: gLogger.notice( 'CPU power from MJF is %.1f HS06' % mjfPower ) else: gLogger.notice( 'MJF not available on this node' ) if update and not configFile: gConfig.setOptionValue( '/LocalSite/CPUScalingFactor', mjfPower if mjfPower else norm ) gConfig.setOptionValue( '/LocalSite/CPUNormalizationFactor', norm ) gConfig.dumpLocalCFGToFile( gConfig.diracConfigFilePath ) if configFile: from DIRAC.Core.Utilities.CFG import CFG cfg = CFG() try: # Attempt to open the given file cfg.loadFromFile( configFile ) except: pass # Create the section if it does not exist if not cfg.existsKey( 'LocalSite' ): cfg.createNewSection( 'LocalSite' ) cfg.setOption( '/LocalSite/CPUScalingFactor', mjfPower if mjfPower else norm ) cfg.setOption( '/LocalSite/CPUNormalizationFactor', norm ) cfg.writeToFile( configFile ) DIRAC.exit()
def __gConfigDefaults(defaultPath): """ Build a cfg from a Default Section """ from DIRAC import gConfig cfgDefaults = CFG() result = gConfig.getSections(defaultPath) if not result['OK']: return cfgDefaults for name in result['Value']: typePath = cfgPath(defaultPath, name) cfgDefaults.createNewSection(name) result = gConfig.getOptionsDict(typePath) if result['OK']: optionsDict = result['Value'] for option, value in optionsDict.items(): cfgDefaults[name].setOption(option, value) return cfgDefaults
def __gConfigDefaults(defaultPath): """ Build a cfg from a Default Section """ from DIRAC import gConfig cfgDefaults = CFG() result = gConfig.getSections(defaultPath) if not result['OK']: return cfgDefaults for name in result['Value']: typePath = cfgPath(defaultPath, name) cfgDefaults.createNewSection(name) result = gConfig.getOptionsDict(typePath) if result['OK']: optionsDict = result['Value'] for option, value in optionsDict.items(): cfgDefaults[name].setOption(option, value) return cfgDefaults
def getComputingElementDefaults(ceName='', ceType='', cfg=None, currentSectionPath=''): """ Return cfgDefaults with defaults for the given CEs defined either in arguments or in the provided cfg """ cesCfg = CFG() if cfg: try: cesCfg.loadFromFile(cfg) cesPath = cfgInstallPath('ComputingElements') if cesCfg.isSection(cesPath): for section in cfgPathToList(cesPath): cesCfg = cesCfg[section] except: return CFG() # Overwrite the cfg with Command line arguments if ceName: if not cesCfg.isSection(ceName): cesCfg.createNewSection(ceName) if currentSectionPath: # Add Options from Command Line optionsDict = __getExtraOptions(currentSectionPath) for name, value in optionsDict.items(): cesCfg[ceName].setOption(name, value) #pylint: disable=no-member if ceType: cesCfg[ceName].setOption('CEType', ceType) #pylint: disable=no-member ceDefaultSection = cfgPath(defaultSection('ComputingElements')) # Load Default for the given type from Central configuration is defined ceDefaults = __gConfigDefaults(ceDefaultSection) for ceName in cesCfg.listSections(): if 'CEType' in cesCfg[ceName]: ceType = cesCfg[ceName]['CEType'] if ceType in ceDefaults: for option in ceDefaults[ceType].listOptions(): if option not in cesCfg[ceName]: cesCfg[ceName].setOption(option, ceDefaults[ceType][option]) return cesCfg
def getComputingElementDefaults(ceName="", ceType="", cfg=None, currentSectionPath=""): """ Return cfgDefaults with defaults for the given CEs defined either in arguments or in the provided cfg """ cesCfg = CFG() if cfg: try: cesCfg.loadFromFile(cfg) cesPath = cfgInstallPath("ComputingElements") if cesCfg.isSection(cesPath): for section in cfgPathToList(cesPath): cesCfg = cesCfg[section] except: return CFG() # Overwrite the cfg with Command line arguments if ceName: if not cesCfg.isSection(ceName): cesCfg.createNewSection(ceName) if currentSectionPath: # Add Options from Command Line optionsDict = __getExtraOptions(currentSectionPath) for name, value in optionsDict.items(): cesCfg[ceName].setOption(name, value) if ceType: cesCfg[ceName].setOption("CEType", ceType) ceDefaultSection = cfgPath(defaultSection("ComputingElements")) # Load Default for the given type from Central configuration is defined ceDefaults = __gConfigDefaults(ceDefaultSection) for ceName in cesCfg.listSections(): if "CEType" in cesCfg[ceName]: ceType = cesCfg[ceName]["CEType"] if ceType in ceDefaults: for option in ceDefaults[ceType].listOptions(): if option not in cesCfg[ceName]: cesCfg[ceName].setOption(option, ceDefaults[ceType][option]) return cesCfg
def getComputingElementDefaults(ceName='', ceType='', cfg=None, currentSectionPath=''): """ Return cfgDefaults with defaults for the given CEs defined either in arguments or in the provided cfg """ cesCfg = CFG() if cfg: try: cesCfg.loadFromFile(cfg) cesPath = cfgInstallPath('ComputingElements') if cesCfg.isSection(cesPath): for section in cfgPathToList(cesPath): cesCfg = cesCfg[section] except BaseException: return CFG() # Overwrite the cfg with Command line arguments if ceName: if not cesCfg.isSection(ceName): cesCfg.createNewSection(ceName) if currentSectionPath: # Add Options from Command Line optionsDict = __getExtraOptions(currentSectionPath) for name, value in optionsDict.items(): cesCfg[ceName].setOption(name, value) # pylint: disable=no-member if ceType: cesCfg[ceName].setOption('CEType', ceType) # pylint: disable=no-member ceDefaultSection = cfgPath(defaultSection('ComputingElements')) # Load Default for the given type from Central configuration is defined ceDefaults = __gConfigDefaults(ceDefaultSection) for ceName in cesCfg.listSections(): if 'CEType' in cesCfg[ceName]: ceType = cesCfg[ceName]['CEType'] if ceType in ceDefaults: for option in ceDefaults[ceType].listOptions(): # pylint: disable=no-member if option not in cesCfg[ceName]: cesCfg[ceName].setOption(option, ceDefaults[ceType][option]) # pylint: disable=unsubscriptable-object return cesCfg
def execute(self): """The JobAgent execution method. """ if self.jobCount: # Temporary mechanism to pass a shutdown message to the agent if os.path.exists('/var/lib/dirac_drain'): return self.__finish('Node is being drained by an operator') # Only call timeLeft utility after a job has been picked up self.log.info('Attempting to check CPU time left for filling mode') if self.fillingMode: if self.timeLeftError: self.log.warn( "Disabling filling mode as errors calculating time left", self.timeLeftError) return self.__finish(self.timeLeftError) self.log.info('normalized CPU units remaining in slot', self.timeLeft) if self.timeLeft <= self.minimumTimeLeft: return self.__finish('No more time left') # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft( cpuTimeLeft=self.timeLeft) if not result['OK']: return self.__finish(result['Message']) # Update local configuration to be used by submitted job wrappers localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join('.', self.extraOptions) else: localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft) localCfg.writeToFile(localConfigFile) else: return self.__finish('Filling Mode is Disabled') self.log.verbose('Job Agent execution loop') result = self.computingElement.available() if not result['OK']: self.log.info('Resource is not available', result['Message']) return self.__finish('CE Not Available') ceInfoDict = result['CEInfoDict'] runningJobs = ceInfoDict.get("RunningJobs") availableSlots = result['Value'] if not availableSlots: if runningJobs: self.log.info('No available slots', '%d running jobs' % runningJobs) return S_OK('Job Agent cycle complete with %d running jobs' % runningJobs) else: self.log.info('CE is not available') return self.__finish('CE Not Available') result = self.computingElement.getDescription() if not result['OK']: return result # We can have several prioritized job retrieval strategies if isinstance(result['Value'], dict): ceDictList = [result['Value']] elif isinstance(result['Value'], list): # This is the case for Pool ComputingElement, and parameter 'MultiProcessorStrategy' ceDictList = result['Value'] for ceDict in ceDictList: # Add pilot information gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown') if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if 'PilotReference' not in ceDict: ceDict['PilotReference'] = str(self.pilotReference) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict('/AgentJobRequirements') if result['OK']: requirementsDict = result['Value'] ceDict.update(requirementsDict) self.log.info('Requirements:', requirementsDict) self.log.verbose('CE dict', ceDict) # here finally calling the matcher start = time.time() jobRequest = MatcherClient().requestJob(ceDict) matchTime = time.time() - start self.log.info('MatcherTime', '= %.2f (s)' % (matchTime)) if jobRequest['OK']: break self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) if not jobRequest['OK']: if re.search('No match found', jobRequest['Message']): self.log.notice('Job request OK, but no match found', ': %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("seconds timeout") != -1: self.log.error('Timeout while requesting job', jobRequest['Message']) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find( "Pilot version does not match") != -1: errorMsg = 'Pilot version does not match the production version' self.log.error(errorMsg, jobRequest['Message'].replace(errorMsg, '')) return S_ERROR(jobRequest['Message']) else: self.log.notice('Failed to get jobs', ': %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) # Reset the Counter self.matchFailedCount = 0 matcherInfo = jobRequest['Value'] if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get( 'PilotInfoReportedFlag', False) jobID = matcherInfo['JobID'] matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if param not in matcherInfo: self.__report(jobID, 'Failed', 'Matcher did not return %s' % (param)) return self.__finish('Matcher Failed') elif not matcherInfo[param]: self.__report(jobID, 'Failed', 'Matcher returned null %s' % (param)) return self.__finish('Matcher Failed') else: self.log.verbose('Matcher returned', '%s = %s ' % (param, matcherInfo[param])) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] parameters = self._getJDLParameters(jobJDL) if not parameters['OK']: self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters') self.log.warn('Could Not Extract JDL Parameters', parameters['Message']) return self.__finish('JDL Problem') params = parameters['Value'] if 'JobID' not in params: msg = 'Job has not JobID defined in JDL parameters' self.__report(jobID, 'Failed', msg) self.log.warn(msg) return self.__finish('JDL Problem') else: jobID = params['JobID'] if 'JobType' not in params: self.log.warn('Job has no JobType defined in JDL parameters') jobType = 'Unknown' else: jobType = params['JobType'] if 'CPUTime' not in params: self.log.warn( 'Job has no CPU requirement defined in JDL parameters') # Job requirements for determining the number of processors # the minimum number of processors requested processors = int( params.get('NumberOfProcessors', int(params.get('MinNumberOfProcessors', 1)))) # the maximum number of processors allowed to the payload maxNumberOfProcessors = int(params.get('MaxNumberOfProcessors', 0)) # need or not the whole node for the job wholeNode = 'WholeNode' in params mpTag = 'MultiProcessor' in params.get('Tags', []) if self.extraOptions: params['Arguments'] += ' ' + self.extraOptions params['ExtraOptions'] = self.extraOptions self.log.verbose('Job request successful: \n', jobRequest['Value']) self.log.info( 'Received', 'JobID=%s, JobType=%s, OwnerDN=%s, JobGroup=%s' % (jobID, jobType, ownerDN, jobGroup)) self.jobCount += 1 try: jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName) jobReport.setJobParameter('MatcherServiceTime', str(matchTime), sendFlag=False) if 'BOINC_JOB_ID' in os.environ: # Report BOINC environment for thisp in ('BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName'): jobReport.setJobParameter(thisp, gConfig.getValue( '/LocalSite/%s' % thisp, 'Unknown'), sendFlag=False) jobReport.setJobStatus('Matched', 'Job Received by Agent') result = self._setupProxy(ownerDN, jobGroup) if not result['OK']: return self._rescheduleFailedJob(jobID, result['Message'], self.stopOnApplicationFailure) proxyChain = result.get('Value') # Save the job jdl for external monitoring self.__saveJobJDLRequest(jobID, jobJDL) software = self._checkInstallSoftware(jobID, params, ceDict) if not software['OK']: self.log.error('Failed to install software for job', '%s' % (jobID)) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self._rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure) self.log.debug('Before self._submitJob() (%sCE)' % (self.ceName)) result = self._submitJob(jobID, params, ceDict, optimizerParams, proxyChain, processors, wholeNode, maxNumberOfProcessors, mpTag) if not result['OK']: self.__report(jobID, 'Failed', result['Message']) return self.__finish(result['Message']) elif 'PayloadFailed' in result: # Do not keep running and do not overwrite the Payload error message = 'Payload execution failed with error code %s' % result[ 'PayloadFailed'] if self.stopOnApplicationFailure: return self.__finish(message, self.stopOnApplicationFailure) else: self.log.info(message) self.log.debug('After %sCE submitJob()' % (self.ceName)) except Exception as subExcept: # pylint: disable=broad-except self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True) return self._rescheduleFailedJob( jobID, 'Job processing failed with exception', self.stopOnApplicationFailure) # Sum all times but the last one (elapsed_time) and remove times at init (is this correct?) cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1]) result = self.timeLeftUtil.getTimeLeft(cpuTime, processors) if result['OK']: self.timeLeft = result['Value'] else: if result['Message'] != 'Current batch system is not supported': self.timeLeftError = result['Message'] else: # if the batch system is not defined, use the process time and the CPU normalization defined locally self.timeLeft = self._getCPUTimeLeft() return S_OK('Job Agent cycle complete')
def execute(self): """The JobAgent execution method. """ if self.jobCount: # Only call timeLeft utility after a job has been picked up self.log.info("Attempting to check CPU time left for filling mode") if self.fillingMode: if self.timeLeftError: self.log.warn(self.timeLeftError) return self.__finish(self.timeLeftError) self.log.info("%s normalized CPU units remaining in slot" % (self.timeLeft)) if self.timeLeft <= self.minimumTimeLeft: return self.__finish("No more time left") # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft(cpuTimeLeft=self.timeLeft) if not result["OK"]: return self.__finish(result["Message"]) # Update local configuration to be used by submitted job wrappers localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join(".", self.extraOptions) else: localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) if not localCfg.isSection("/LocalSite"): localCfg.createNewSection("/LocalSite") localCfg.setOption("/LocalSite/CPUTimeLeft", self.timeLeft) localCfg.writeToFile(localConfigFile) else: return self.__finish("Filling Mode is Disabled") self.log.verbose("Job Agent execution loop") available = self.computingElement.available() if not available["OK"] or not available["Value"]: self.log.info("Resource is not available") self.log.info(available["Message"]) return self.__finish("CE Not Available") self.log.info(available["Message"]) result = self.computingElement.getDescription() if not result["OK"]: return result ceDict = result["Value"] # Add pilot information gridCE = gConfig.getValue("LocalSite/GridCE", "Unknown") if gridCE != "Unknown": ceDict["GridCE"] = gridCE if not "PilotReference" in ceDict: ceDict["PilotReference"] = str(self.pilotReference) ceDict["PilotBenchmark"] = self.cpuFactor ceDict["PilotInfoReportedFlag"] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict("/AgentJobRequirements") if result["OK"]: requirementsDict = result["Value"] ceDict.update(requirementsDict) self.log.verbose(ceDict) start = time.time() jobRequest = self.__requestJob(ceDict) matchTime = time.time() - start self.log.info("MatcherTime = %.2f (s)" % (matchTime)) self.stopAfterFailedMatches = self.am_getOption("StopAfterFailedMatches", self.stopAfterFailedMatches) if not jobRequest["OK"]: if re.search("No match found", jobRequest["Message"]): self.log.notice("Job request OK: %s" % (jobRequest["Message"])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish("Nothing to do for more than %d cycles" % self.stopAfterFailedMatches) return S_OK(jobRequest["Message"]) elif jobRequest["Message"].find("seconds timeout") != -1: self.log.error("Timeout while requesting job", jobRequest["Message"]) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish("Nothing to do for more than %d cycles" % self.stopAfterFailedMatches) return S_OK(jobRequest["Message"]) elif jobRequest["Message"].find("Pilot version does not match") != -1: errorMsg = "Pilot version does not match the production version" self.log.error(errorMsg, jobRequest["Message"].replace(errorMsg, "")) return S_ERROR(jobRequest["Message"]) else: self.log.notice("Failed to get jobs: %s" % (jobRequest["Message"])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish("Nothing to do for more than %d cycles" % self.stopAfterFailedMatches) return S_OK(jobRequest["Message"]) # Reset the Counter self.matchFailedCount = 0 matcherInfo = jobRequest["Value"] if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get("PilotInfoReportedFlag", False) jobID = matcherInfo["JobID"] matcherParams = ["JDL", "DN", "Group"] for param in matcherParams: if param not in matcherInfo: self.__report(jobID, "Failed", "Matcher did not return %s" % (param)) return self.__finish("Matcher Failed") elif not matcherInfo[param]: self.__report(jobID, "Failed", "Matcher returned null %s" % (param)) return self.__finish("Matcher Failed") else: self.log.verbose("Matcher returned %s = %s " % (param, matcherInfo[param])) jobJDL = matcherInfo["JDL"] jobGroup = matcherInfo["Group"] ownerDN = matcherInfo["DN"] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] parameters = self.__getJDLParameters(jobJDL) if not parameters["OK"]: self.__report(jobID, "Failed", "Could Not Extract JDL Parameters") self.log.warn(parameters["Message"]) return self.__finish("JDL Problem") params = parameters["Value"] if "JobID" not in params: msg = "Job has not JobID defined in JDL parameters" self.__report(jobID, "Failed", msg) self.log.warn(msg) return self.__finish("JDL Problem") else: jobID = params["JobID"] if "JobType" not in params: self.log.warn("Job has no JobType defined in JDL parameters") jobType = "Unknown" else: jobType = params["JobType"] if "CPUTime" not in params: self.log.warn("Job has no CPU requirement defined in JDL parameters") if self.extraOptions: params["Arguments"] += " " + self.extraOptions params["ExtraOptions"] = self.extraOptions self.log.verbose("Job request successful: \n", jobRequest["Value"]) self.log.info("Received JobID=%s, JobType=%s" % (jobID, jobType)) self.log.info("OwnerDN: %s JobGroup: %s" % (ownerDN, jobGroup)) self.jobCount += 1 try: jobReport = JobReport(jobID, "JobAgent@%s" % self.siteName) jobReport.setJobParameter("MatcherServiceTime", str(matchTime), sendFlag=False) if "BOINC_JOB_ID" in os.environ: # Report BOINC environment for p in ("BoincUserID", "BoincHostID", "BoincHostPlatform", "BoincHostName"): jobReport.setJobParameter(p, gConfig.getValue("/LocalSite/%s" % p, "Unknown"), sendFlag=False) jobReport.setJobStatus("Matched", "Job Received by Agent") result = self.__setupProxy(ownerDN, jobGroup) if not result["OK"]: return self.__rescheduleFailedJob(jobID, result["Message"], self.stopOnApplicationFailure) proxyChain = result.get("Value") # Save the job jdl for external monitoring self.__saveJobJDLRequest(jobID, jobJDL) software = self.__checkInstallSoftware(jobID, params, ceDict) if not software["OK"]: self.log.error("Failed to install software for job", "%s" % (jobID)) errorMsg = software["Message"] if not errorMsg: errorMsg = "Failed software installation" return self.__rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure) self.log.debug("Before %sCE submitJob()" % (self.ceName)) submission = self.__submitJob(jobID, params, ceDict, optimizerParams, proxyChain) if not submission["OK"]: self.__report(jobID, "Failed", submission["Message"]) return self.__finish(submission["Message"]) elif "PayloadFailed" in submission: # Do not keep running and do not overwrite the Payload error message = "Payload execution failed with error code %s" % submission["PayloadFailed"] if self.stopOnApplicationFailure: return self.__finish(message, self.stopOnApplicationFailure) else: self.log.info(message) self.log.debug("After %sCE submitJob()" % (self.ceName)) except Exception: self.log.exception() return self.__rescheduleFailedJob( jobID, "Job processing failed with exception", self.stopOnApplicationFailure ) # Sum all times but the last one (elapsed_time) and remove times at init (is this correct?) cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1]) result = self.timeLeftUtil.getTimeLeft(cpuTime) if result["OK"]: self.timeLeft = result["Value"] else: if result["Message"] != "Current batch system is not supported": self.timeLeftError = result["Message"] else: # if the batch system is not defined, use the process time and the CPU normalization defined locally self.timeLeft = self.__getCPUTimeLeft() scaledCPUTime = self.timeLeftUtil.getScaledCPU() self.__setJobParam(jobID, "ScaledCPUTime", str(scaledCPUTime - self.scaledCPUTime)) self.scaledCPUTime = scaledCPUTime return S_OK("Job Agent cycle complete")
class JobRepository( object ): def __init__( self, repository = None ): self.location = repository if not self.location: if "HOME" in os.environ: self.location = '%s/.dirac.repo.rep' % os.environ['HOME'] else: self.location = '%s/.dirac.repo.rep' % os.getcwd() self.repo = CFG() if os.path.exists( self.location ): self.repo.loadFromFile( self.location ) if not self.repo.existsKey( 'Jobs' ): self.repo.createNewSection( 'Jobs' ) else: self.repo.createNewSection( 'Jobs' ) self.OK = True written = self._writeRepository( self.location ) if not written: self.OK = False def isOK( self ): return self.OK def readRepository( self ): return S_OK( self.repo.getAsDict( 'Jobs' ) ) def writeRepository( self, alternativePath = None ): destination = self.location if alternativePath: destination = alternativePath written = self._writeRepository( destination ) if not written: return S_ERROR( "Failed to write repository" ) return S_OK( destination ) def resetRepository( self, jobIDs = [] ): if not jobIDs: jobs = self.readRepository()['Value'] jobIDs = jobs.keys() paramDict = {'State' : 'Submitted', 'Retrieved' : 0, 'OutputData' : 0} for jobID in jobIDs: self._writeJob( jobID, paramDict, True ) self._writeRepository( self.location ) return S_OK() def _writeRepository( self, path ): handle, tmpName = tempfile.mkstemp() written = self.repo.writeToFile( tmpName ) os.close( handle ) if not written: if os.path.exists( tmpName ): os.remove( tmpName ) return written if os.path.exists( path ): gLogger.debug( "Replacing %s" % path ) try: shutil.move( tmpName, path ) return True except Exception as x: gLogger.error( "Failed to overwrite repository.", x ) gLogger.info( "If your repository is corrupted a backup can be found %s" % tmpName ) return False def appendToRepository( self, repoLocation ): if not os.path.exists( repoLocation ): gLogger.error( "Secondary repository does not exist", repoLocation ) return S_ERROR( "Secondary repository does not exist" ) self.repo = CFG().loadFromFile( repoLocation ).mergeWith( self.repo ) self._writeRepository( self.location ) return S_OK() def addJob( self, jobID, state = 'Submitted', retrieved = 0, outputData = 0, update = False ): paramDict = { 'State' : state, 'Time' : self._getTime(), 'Retrieved' : int( retrieved ), 'OutputData' : outputData} self._writeJob( jobID, paramDict, update ) self._writeRepository( self.location ) return S_OK( jobID ) def updateJob( self, jobID, paramDict ): if self._existsJob( jobID ): paramDict['Time'] = self._getTime() self._writeJob( jobID, paramDict, True ) self._writeRepository( self.location ) return S_OK() def updateJobs( self, jobDict ): for jobID, paramDict in jobDict.items(): if self._existsJob( jobID ): paramDict['Time'] = self._getTime() self._writeJob( jobID, paramDict, True ) self._writeRepository( self.location ) return S_OK() def _getTime( self ): runtime = time.ctime() return runtime.replace( " ", "_" ) def _writeJob( self, jobID, paramDict, update ): jobID = str( jobID ) jobExists = self._existsJob( jobID ) if jobExists and ( not update ): gLogger.warn( "Job exists and not overwriting" ) return S_ERROR( "Job exists and not overwriting" ) if not jobExists: self.repo.createNewSection( 'Jobs/%s' % jobID ) for key, value in paramDict.items(): self.repo.setOption( 'Jobs/%s/%s' % ( jobID, key ), value ) return S_OK() def removeJob( self, jobID ): res = self.repo['Jobs'].deleteKey( str( jobID ) ) #pylint: disable=no-member if res: self._writeRepository( self.location ) return S_OK() def existsJob( self, jobID ): return S_OK( self._existsJob( jobID ) ) def _existsJob( self, jobID ): return self.repo.isSection( 'Jobs/%s' % jobID ) def getLocation( self ): return S_OK( self.location ) def getSize( self ): return S_OK( len( self.repo.getAsDict( 'Jobs' ) ) )
def execute(self): """The JobAgent execution method. """ if self.jobCount: # Temporary mechanism to pass a shutdown message to the agent if os.path.exists('/var/lib/dirac_drain'): return self.__finish('Node is being drained by an operator') # Only call timeLeft utility after a job has been picked up self.log.info('Attempting to check CPU time left for filling mode') if self.fillingMode: if self.timeLeftError: self.log.warn(self.timeLeftError) return self.__finish(self.timeLeftError) self.log.info('%s normalized CPU units remaining in slot' % (self.timeLeft)) if self.timeLeft <= self.minimumTimeLeft: return self.__finish('No more time left') # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft(cpuTimeLeft=self.timeLeft) if not result['OK']: return self.__finish(result['Message']) # Update local configuration to be used by submitted job wrappers localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join('.', self.extraOptions) else: localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft) localCfg.writeToFile(localConfigFile) else: return self.__finish('Filling Mode is Disabled') self.log.verbose('Job Agent execution loop') result = self.computingElement.available() if not result['OK']: self.log.info('Resource is not available') self.log.info(result['Message']) return self.__finish('CE Not Available') self.log.info(result['Message']) ceInfoDict = result['CEInfoDict'] runningJobs = ceInfoDict.get("RunningJobs") availableSlots = result['Value'] if not availableSlots: if runningJobs: self.log.info('No available slots with %d running jobs' % runningJobs) return S_OK('Job Agent cycle complete with %d running jobs' % runningJobs) else: self.log.info('CE is not available') return self.__finish('CE Not Available') result = self.computingElement.getDescription() if not result['OK']: return result ceDict = result['Value'] # Add pilot information gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown') if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if 'PilotReference' not in ceDict: ceDict['PilotReference'] = str(self.pilotReference) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict('/AgentJobRequirements') if result['OK']: requirementsDict = result['Value'] ceDict.update(requirementsDict) self.log.info('Requirements:', requirementsDict) self.log.verbose(ceDict) start = time.time() jobRequest = MatcherClient().requestJob(ceDict) matchTime = time.time() - start self.log.info('MatcherTime = %.2f (s)' % (matchTime)) self.stopAfterFailedMatches = self.am_getOption('StopAfterFailedMatches', self.stopAfterFailedMatches) if not jobRequest['OK']: if re.search('No match found', jobRequest['Message']): self.log.notice('Job request OK: %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish('Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("seconds timeout") != -1: self.log.error('Timeout while requesting job', jobRequest['Message']) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish('Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("Pilot version does not match") != -1: errorMsg = 'Pilot version does not match the production version' self.log.error(errorMsg, jobRequest['Message'].replace(errorMsg, '')) return S_ERROR(jobRequest['Message']) else: self.log.notice('Failed to get jobs: %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish('Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) # Reset the Counter self.matchFailedCount = 0 matcherInfo = jobRequest['Value'] if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get('PilotInfoReportedFlag', False) jobID = matcherInfo['JobID'] matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if param not in matcherInfo: self.__report(jobID, 'Failed', 'Matcher did not return %s' % (param)) return self.__finish('Matcher Failed') elif not matcherInfo[param]: self.__report(jobID, 'Failed', 'Matcher returned null %s' % (param)) return self.__finish('Matcher Failed') else: self.log.verbose('Matcher returned %s = %s ' % (param, matcherInfo[param])) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] parameters = self.__getJDLParameters(jobJDL) if not parameters['OK']: self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters') self.log.warn(parameters['Message']) return self.__finish('JDL Problem') params = parameters['Value'] if 'JobID' not in params: msg = 'Job has not JobID defined in JDL parameters' self.__report(jobID, 'Failed', msg) self.log.warn(msg) return self.__finish('JDL Problem') else: jobID = params['JobID'] if 'JobType' not in params: self.log.warn('Job has no JobType defined in JDL parameters') jobType = 'Unknown' else: jobType = params['JobType'] if 'CPUTime' not in params: self.log.warn('Job has no CPU requirement defined in JDL parameters') # Job requirement for a number of processors processors = int(params.get('NumberOfProcessors', 1)) wholeNode = 'WholeNode' in params if self.extraOptions: params['Arguments'] += ' ' + self.extraOptions params['ExtraOptions'] = self.extraOptions self.log.verbose('Job request successful: \n', jobRequest['Value']) self.log.info('Received JobID=%s, JobType=%s' % (jobID, jobType)) self.log.info('OwnerDN: %s JobGroup: %s' % (ownerDN, jobGroup)) self.jobCount += 1 try: jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName) jobReport.setJobParameter('MatcherServiceTime', str(matchTime), sendFlag=False) if 'BOINC_JOB_ID' in os.environ: # Report BOINC environment for thisp in ('BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName'): jobReport.setJobParameter(thisp, gConfig.getValue('/LocalSite/%s' % thisp, 'Unknown'), sendFlag=False) jobReport.setJobStatus('Matched', 'Job Received by Agent') result = self.__setupProxy(ownerDN, jobGroup) if not result['OK']: return self.__rescheduleFailedJob(jobID, result['Message'], self.stopOnApplicationFailure) proxyChain = result.get('Value') # Save the job jdl for external monitoring self.__saveJobJDLRequest(jobID, jobJDL) software = self.__checkInstallSoftware(jobID, params, ceDict) if not software['OK']: self.log.error('Failed to install software for job', '%s' % (jobID)) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self.__rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure) self.log.debug('Before %sCE submitJob()' % (self.ceName)) result = self.__submitJob(jobID, params, ceDict, optimizerParams, proxyChain, processors, wholeNode) if not result['OK']: self.__report(jobID, 'Failed', result['Message']) return self.__finish(result['Message']) elif 'PayloadFailed' in result: # Do not keep running and do not overwrite the Payload error message = 'Payload execution failed with error code %s' % result['PayloadFailed'] if self.stopOnApplicationFailure: return self.__finish(message, self.stopOnApplicationFailure) else: self.log.info(message) self.log.debug('After %sCE submitJob()' % (self.ceName)) except Exception as subExcept: # pylint: disable=broad-except self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True) return self.__rescheduleFailedJob(jobID, 'Job processing failed with exception', self.stopOnApplicationFailure) # Sum all times but the last one (elapsed_time) and remove times at init (is this correct?) cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1]) result = self.timeLeftUtil.getTimeLeft(cpuTime, processors) if result['OK']: self.timeLeft = result['Value'] else: if result['Message'] != 'Current batch system is not supported': self.timeLeftError = result['Message'] else: # if the batch system is not defined, use the process time and the CPU normalization defined locally self.timeLeft = self.__getCPUTimeLeft() return S_OK('Job Agent cycle complete')
def execute( self ): """The JobAgent execution method. """ if self.jobCount: #Only call timeLeft utility after a job has been picked up self.log.info( 'Attempting to check CPU time left for filling mode' ) if self.fillingMode: if self.timeLeftError: self.log.warn( self.timeLeftError ) return self.__finish( self.timeLeftError ) self.log.info( '%s normalized CPU units remaining in slot' % ( self.timeLeft ) ) # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft( cpuTimeLeft = self.timeLeft ) if not result['OK']: return self.__finish( result['Message'] ) # Update local configuration to be used by submitted job wrappers localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join( '.', self.extraOptions ) else: localConfigFile = os.path.join( rootPath, "etc", "dirac.cfg" ) localCfg.loadFromFile( localConfigFile ) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption( '/LocalSite/CPUTimeLeft', self.timeLeft ) localCfg.writeToFile( localConfigFile ) else: return self.__finish( 'Filling Mode is Disabled' ) self.log.verbose( 'Job Agent execution loop' ) available = self.computingElement.available() if not available['OK'] or not available['Value']: self.log.info( 'Resource is not available' ) self.log.info( available['Message'] ) return self.__finish( 'CE Not Available' ) self.log.info( available['Message'] ) result = self.computingElement.getDescription() if not result['OK']: return result ceDict = result['Value'] # Add pilot information gridCE = gConfig.getValue( 'LocalSite/GridCE', 'Unknown' ) if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if not 'PilotReference' in ceDict: ceDict['PilotReference'] = str( self.pilotReference ) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict( '/AgentJobRequirements' ) if result['OK']: requirementsDict = result['Value'] ceDict.update( requirementsDict ) self.log.verbose( ceDict ) start = time.time() jobRequest = self.__requestJob( ceDict ) matchTime = time.time() - start self.log.info( 'MatcherTime = %.2f (s)' % ( matchTime ) ) self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches ) if not jobRequest['OK']: if re.search( 'No match found', jobRequest['Message'] ): self.log.notice( 'Job request OK: %s' % ( jobRequest['Message'] ) ) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches ) return S_OK( jobRequest['Message'] ) elif jobRequest['Message'].find( "seconds timeout" ) != -1: self.log.error( jobRequest['Message'] ) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches ) return S_OK( jobRequest['Message'] ) elif jobRequest['Message'].find( "Pilot version does not match" ) != -1 : self.log.error( jobRequest['Message'] ) return S_ERROR( jobRequest['Message'] ) else: self.log.notice( 'Failed to get jobs: %s' % ( jobRequest['Message'] ) ) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches ) return S_OK( jobRequest['Message'] ) # Reset the Counter self.matchFailedCount = 0 matcherInfo = jobRequest['Value'] jobID = matcherInfo['JobID'] if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get( 'PilotInfoReportedFlag', False ) matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if not matcherInfo.has_key( param ): self.__report( jobID, 'Failed', 'Matcher did not return %s' % ( param ) ) return self.__finish( 'Matcher Failed' ) elif not matcherInfo[param]: self.__report( jobID, 'Failed', 'Matcher returned null %s' % ( param ) ) return self.__finish( 'Matcher Failed' ) else: self.log.verbose( 'Matcher returned %s = %s ' % ( param, matcherInfo[param] ) ) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo.keys(): if not key in matcherParams: value = matcherInfo[key] optimizerParams[key] = value parameters = self.__getJDLParameters( jobJDL ) if not parameters['OK']: self.__report( jobID, 'Failed', 'Could Not Extract JDL Parameters' ) self.log.warn( parameters['Message'] ) return self.__finish( 'JDL Problem' ) params = parameters['Value'] if not params.has_key( 'JobID' ): msg = 'Job has not JobID defined in JDL parameters' self.__report( jobID, 'Failed', msg ) self.log.warn( msg ) return self.__finish( 'JDL Problem' ) else: jobID = params['JobID'] if not params.has_key( 'JobType' ): self.log.warn( 'Job has no JobType defined in JDL parameters' ) jobType = 'Unknown' else: jobType = params['JobType'] if not params.has_key( 'CPUTime' ): self.log.warn( 'Job has no CPU requirement defined in JDL parameters' ) if self.extraOptions: params['Arguments'] = params['Arguments'] + ' ' + self.extraOptions params['ExtraOptions'] = self.extraOptions self.log.verbose( 'Job request successful: \n %s' % ( jobRequest['Value'] ) ) self.log.info( 'Received JobID=%s, JobType=%s' % ( jobID, jobType ) ) self.log.info( 'OwnerDN: %s JobGroup: %s' % ( ownerDN, jobGroup ) ) self.jobCount += 1 try: jobReport = JobReport( jobID, 'JobAgent@%s' % self.siteName ) jobReport.setJobParameter( 'MatcherServiceTime', str( matchTime ), sendFlag = False ) if os.environ.has_key( 'BOINC_JOB_ID' ): # Report BOINC environment for p in ['BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName']: jobReport.setJobParameter( p, gConfig.getValue( '/LocalSite/%s' % p, 'Unknown' ), sendFlag = False ) jobReport.setJobStatus( 'Matched', 'Job Received by Agent' ) result = self.__setupProxy( ownerDN, jobGroup ) if not result[ 'OK' ]: return self.__rescheduleFailedJob( jobID, result[ 'Message' ], self.stopOnApplicationFailure ) if 'Value' in result and result[ 'Value' ]: proxyChain = result[ 'Value' ] # Save the job jdl for external monitoring self.__saveJobJDLRequest( jobID, jobJDL ) software = self.__checkInstallSoftware( jobID, params, ceDict ) if not software['OK']: self.log.error( 'Failed to install software for job %s' % ( jobID ) ) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self.__rescheduleFailedJob( jobID, errorMsg, self.stopOnApplicationFailure ) self.log.debug( 'Before %sCE submitJob()' % ( self.ceName ) ) submission = self.__submitJob( jobID, params, ceDict, optimizerParams, proxyChain ) if not submission['OK']: self.__report( jobID, 'Failed', submission['Message'] ) return self.__finish( submission['Message'] ) elif 'PayloadFailed' in submission: # Do not keep running and do not overwrite the Payload error return self.__finish( 'Payload execution failed with error code %s' % submission['PayloadFailed'], self.stopOnApplicationFailure ) self.log.debug( 'After %sCE submitJob()' % ( self.ceName ) ) except Exception: self.log.exception() return self.__rescheduleFailedJob( jobID , 'Job processing failed with exception', self.stopOnApplicationFailure ) currentTimes = list( os.times() ) for i in range( len( currentTimes ) ): currentTimes[i] -= self.initTimes[i] utime, stime, cutime, cstime, _elapsed = currentTimes cpuTime = utime + stime + cutime + cstime result = self.timeLeftUtil.getTimeLeft( cpuTime ) if result['OK']: self.timeLeft = result['Value'] else: if result['Message'] != 'Current batch system is not supported': self.timeLeftError = result['Message'] else: if self.cpuFactor: # if the batch system is not defined used the CPUNormalizationFactor # defined locally self.timeLeft = self.__getCPUTimeLeft() scaledCPUTime = self.timeLeftUtil.getScaledCPU()['Value'] self.__setJobParam( jobID, 'ScaledCPUTime', str( scaledCPUTime - self.scaledCPUTime ) ) self.scaledCPUTime = scaledCPUTime return S_OK( 'Job Agent cycle complete' )
def execute(self): """The JobAgent execution method. """ if self.jobCount: #Only call timeLeft utility after a job has been picked up self.log.info('Attempting to check CPU time left for filling mode') if self.fillingMode: if self.timeLeftError: self.log.warn(self.timeLeftError) return self.__finish(self.timeLeftError) self.log.info('%s normalized CPU units remaining in slot' % (self.timeLeft)) # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft( cpuTimeLeft=self.timeLeft) if not result['OK']: return self.__finish(result['Message']) # Update local configuration to be used by submitted job wrappers localCfg = CFG() localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft) localCfg.writeToFile(localConfigFile) else: return self.__finish('Filling Mode is Disabled') self.log.verbose('Job Agent execution loop') available = self.computingElement.available() if not available['OK'] or not available['Value']: self.log.info('Resource is not available') self.log.info(available['Message']) return self.__finish('CE Not Available') self.log.info(available['Message']) result = self.computingElement.getDescription() if not result['OK']: return result ceDict = result['Value'] # Add pilot information gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown') if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if not 'PilotReference' in ceDict: ceDict['PilotReference'] = str(self.pilotReference) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict('/AgentJobRequirements') if result['OK']: requirementsDict = result['Value'] ceDict.update(requirementsDict) self.log.verbose(ceDict) start = time.time() jobRequest = self.__requestJob(ceDict) matchTime = time.time() - start self.log.info('MatcherTime = %.2f (s)' % (matchTime)) self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) if not jobRequest['OK']: if re.search('No match found', jobRequest['Message']): self.log.notice('Job request OK: %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("seconds timeout") != -1: self.log.error(jobRequest['Message']) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find( "Pilot version does not match") != -1: self.log.error(jobRequest['Message']) return S_ERROR(jobRequest['Message']) else: self.log.notice('Failed to get jobs: %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) # Reset the Counter self.matchFailedCount = 0 matcherInfo = jobRequest['Value'] jobID = matcherInfo['JobID'] self.pilotInfoReportedFlag = matcherInfo.get('PilotInfoReportedFlag', False) matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if not matcherInfo.has_key(param): self.__report(jobID, 'Failed', 'Matcher did not return %s' % (param)) return self.__finish('Matcher Failed') elif not matcherInfo[param]: self.__report(jobID, 'Failed', 'Matcher returned null %s' % (param)) return self.__finish('Matcher Failed') else: self.log.verbose('Matcher returned %s = %s ' % (param, matcherInfo[param])) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo.keys(): if not key in matcherParams: value = matcherInfo[key] optimizerParams[key] = value parameters = self.__getJDLParameters(jobJDL) if not parameters['OK']: self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters') self.log.warn(parameters['Message']) return self.__finish('JDL Problem') params = parameters['Value'] if not params.has_key('JobID'): msg = 'Job has not JobID defined in JDL parameters' self.__report(jobID, 'Failed', msg) self.log.warn(msg) return self.__finish('JDL Problem') else: jobID = params['JobID'] if not params.has_key('JobType'): self.log.warn('Job has no JobType defined in JDL parameters') jobType = 'Unknown' else: jobType = params['JobType'] if not params.has_key('SystemConfig'): self.log.warn( 'Job has no system configuration defined in JDL parameters') systemConfig = gConfig.getValue('/LocalSite/Architecture', '') self.log.info( 'Setting system config to /LocalSite/Architecture = %s since it was not specified' % systemConfig) if not systemConfig: self.log.warn('/LocalSite/Architecture is not defined') params['SystemConfig'] = systemConfig else: systemConfig = params['SystemConfig'] if systemConfig.lower() == 'any': systemConfig = gConfig.getValue('/LocalSite/Architecture', '') self.log.info( 'Setting SystemConfig = /LocalSite/Architecture =', '"%s" since it was set to "ANY" in the job description' % systemConfig) if not systemConfig: self.log.warn('/LocalSite/Architecture is not defined') params['SystemConfig'] = systemConfig if not params.has_key('CPUTime'): self.log.warn( 'Job has no CPU requirement defined in JDL parameters') self.log.verbose('Job request successful: \n %s' % (jobRequest['Value'])) self.log.info('Received JobID=%s, JobType=%s, SystemConfig=%s' % (jobID, jobType, systemConfig)) self.log.info('OwnerDN: %s JobGroup: %s' % (ownerDN, jobGroup)) self.jobCount += 1 try: jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName) jobReport.setJobParameter('MatcherServiceTime', str(matchTime), sendFlag=False) if self.gridCEQueue: jobReport.setJobParameter('GridCEQueue', self.gridCEQueue, sendFlag=False) if os.environ.has_key('BOINC_JOB_ID'): # Report BOINC environment for p in [ 'BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName' ]: jobReport.setJobParameter(p, gConfig.getValue( '/LocalSite/%s' % p, 'Unknown'), sendFlag=False) jobReport.setJobStatus('Matched', 'Job Received by Agent') # self.__setJobSite( jobID, self.siteName ) if not self.pilotInfoReportedFlag: self.__reportPilotInfo(jobID) result = self.__setupProxy(ownerDN, jobGroup) if not result['OK']: return self.__rescheduleFailedJob( jobID, result['Message'], self.stopOnApplicationFailure) if 'Value' in result and result['Value']: proxyChain = result['Value'] # Is this necessary at all? saveJDL = self.__saveJobJDLRequest(jobID, jobJDL) #self.__report(jobID,'Matched','Job Prepared to Submit') #resourceParameters = self.__getJDLParameters( resourceJDL ) #if not resourceParameters['OK']: # return resourceParameters #resourceParams = resourceParameters['Value'] software = self.__checkInstallSoftware(jobID, params, ceDict) if not software['OK']: self.log.error('Failed to install software for job %s' % (jobID)) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self.__rescheduleFailedJob( jobID, errorMsg, self.stopOnApplicationFailure) self.log.verbose('Before %sCE submitJob()' % (self.ceName)) submission = self.__submitJob(jobID, params, ceDict, optimizerParams, jobJDL, proxyChain) if not submission['OK']: self.__report(jobID, 'Failed', submission['Message']) return self.__finish(submission['Message']) elif 'PayloadFailed' in submission: # Do not keep running and do not overwrite the Payload error return self.__finish( 'Payload execution failed with error code %s' % submission['PayloadFailed'], self.stopOnApplicationFailure) self.log.verbose('After %sCE submitJob()' % (self.ceName)) except Exception: self.log.exception() return self.__rescheduleFailedJob( jobID, 'Job processing failed with exception', self.stopOnApplicationFailure) currentTimes = list(os.times()) for i in range(len(currentTimes)): currentTimes[i] -= self.initTimes[i] utime, stime, cutime, cstime, elapsed = currentTimes cpuTime = utime + stime + cutime + cstime result = self.timeLeftUtil.getTimeLeft(cpuTime) if result['OK']: self.timeLeft = result['Value'] else: if result['Message'] != 'Current batch system is not supported': self.timeLeftError = result['Message'] else: if self.cpuFactor: # if the batch system is not defined used the CPUNormalizationFactor # defined locally self.timeLeft = self.__getCPUTimeLeft() scaledCPUTime = self.timeLeftUtil.getScaledCPU()['Value'] self.__setJobParam(jobID, 'ScaledCPUTime', str(scaledCPUTime - self.scaledCPUTime)) self.scaledCPUTime = scaledCPUTime return S_OK('Job Agent cycle complete')
def execute(self): """The JobAgent execution method. """ if self.jobCount: # Only call timeLeft utility after a job has been picked up self.log.info('Attempting to check CPU time left for filling mode') if self.fillingMode: if self.timeLeftError: self.log.warn(self.timeLeftError) return self.__finish(self.timeLeftError) self.log.info('%s normalized CPU units remaining in slot' % (self.timeLeft)) if self.timeLeft <= self.minimumTimeLeft: return self.__finish('No more time left') # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft( cpuTimeLeft=self.timeLeft) if not result['OK']: return self.__finish(result['Message']) # Update local configuration to be used by submitted job wrappers localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join('.', self.extraOptions) else: localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft) localCfg.writeToFile(localConfigFile) else: return self.__finish('Filling Mode is Disabled') self.log.verbose('Job Agent execution loop') available = self.computingElement.available() if not available['OK'] or not available['Value']: self.log.info('Resource is not available') self.log.info(available['Message']) return self.__finish('CE Not Available') self.log.info(available['Message']) result = self.computingElement.getDescription() if not result['OK']: return result ceDict = result['Value'] # Add pilot information gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown') if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if not 'PilotReference' in ceDict: ceDict['PilotReference'] = str(self.pilotReference) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict('/AgentJobRequirements') if result['OK']: requirementsDict = result['Value'] ceDict.update(requirementsDict) self.log.verbose(ceDict) start = time.time() jobRequest = self.__requestJob(ceDict) matchTime = time.time() - start self.log.info('MatcherTime = %.2f (s)' % (matchTime)) self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) if not jobRequest['OK']: if re.search('No match found', jobRequest['Message']): self.log.notice('Job request OK: %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("seconds timeout") != -1: self.log.error('Timeout while requesting job', jobRequest['Message']) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find( "Pilot version does not match") != -1: errorMsg = 'Pilot version does not match the production version' self.log.error(errorMsg, jobRequest['Message'].replace(errorMsg, '')) return S_ERROR(jobRequest['Message']) else: self.log.notice('Failed to get jobs: %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) # Reset the Counter self.matchFailedCount = 0 matcherInfo = jobRequest['Value'] if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get( 'PilotInfoReportedFlag', False) jobID = matcherInfo['JobID'] matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if param not in matcherInfo: self.__report(jobID, 'Failed', 'Matcher did not return %s' % (param)) return self.__finish('Matcher Failed') elif not matcherInfo[param]: self.__report(jobID, 'Failed', 'Matcher returned null %s' % (param)) return self.__finish('Matcher Failed') else: self.log.verbose('Matcher returned %s = %s ' % (param, matcherInfo[param])) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] parameters = self.__getJDLParameters(jobJDL) if not parameters['OK']: self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters') self.log.warn(parameters['Message']) return self.__finish('JDL Problem') params = parameters['Value'] if 'JobID' not in params: msg = 'Job has not JobID defined in JDL parameters' self.__report(jobID, 'Failed', msg) self.log.warn(msg) return self.__finish('JDL Problem') else: jobID = params['JobID'] if 'JobType' not in params: self.log.warn('Job has no JobType defined in JDL parameters') jobType = 'Unknown' else: jobType = params['JobType'] if 'CPUTime' not in params: self.log.warn( 'Job has no CPU requirement defined in JDL parameters') if self.extraOptions: params['Arguments'] += ' ' + self.extraOptions params['ExtraOptions'] = self.extraOptions self.log.verbose('Job request successful: \n', jobRequest['Value']) self.log.info('Received JobID=%s, JobType=%s' % (jobID, jobType)) self.log.info('OwnerDN: %s JobGroup: %s' % (ownerDN, jobGroup)) self.jobCount += 1 try: jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName) jobReport.setJobParameter('MatcherServiceTime', str(matchTime), sendFlag=False) if 'BOINC_JOB_ID' in os.environ: # Report BOINC environment for p in ('BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName'): jobReport.setJobParameter(p, gConfig.getValue( '/LocalSite/%s' % p, 'Unknown'), sendFlag=False) jobReport.setJobStatus('Matched', 'Job Received by Agent') result = self.__setupProxy(ownerDN, jobGroup) if not result['OK']: return self.__rescheduleFailedJob( jobID, result['Message'], self.stopOnApplicationFailure) proxyChain = result.get('Value') # Save the job jdl for external monitoring self.__saveJobJDLRequest(jobID, jobJDL) software = self.__checkInstallSoftware(jobID, params, ceDict) if not software['OK']: self.log.error('Failed to install software for job', '%s' % (jobID)) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self.__rescheduleFailedJob( jobID, errorMsg, self.stopOnApplicationFailure) self.log.debug('Before %sCE submitJob()' % (self.ceName)) submission = self.__submitJob(jobID, params, ceDict, optimizerParams, proxyChain) if not submission['OK']: self.__report(jobID, 'Failed', submission['Message']) return self.__finish(submission['Message']) elif 'PayloadFailed' in submission: # Do not keep running and do not overwrite the Payload error message = 'Payload execution failed with error code %s' % submission[ 'PayloadFailed'] if self.stopOnApplicationFailure: return self.__finish(message, self.stopOnApplicationFailure) else: self.log.info(message) self.log.debug('After %sCE submitJob()' % (self.ceName)) except Exception: self.log.exception() return self.__rescheduleFailedJob( jobID, 'Job processing failed with exception', self.stopOnApplicationFailure) # Sum all times but the last one (elapsed_time) and remove times at init (is this correct?) cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1]) result = self.timeLeftUtil.getTimeLeft(cpuTime) if result['OK']: self.timeLeft = result['Value'] else: if result['Message'] != 'Current batch system is not supported': self.timeLeftError = result['Message'] else: # if the batch system is not defined, use the process time and the CPU normalization defined locally self.timeLeft = self.__getCPUTimeLeft() scaledCPUTime = self.timeLeftUtil.getScaledCPU() self.__setJobParam(jobID, 'ScaledCPUTime', str(scaledCPUTime - self.scaledCPUTime)) self.scaledCPUTime = scaledCPUTime return S_OK('Job Agent cycle complete')
def toCFG(self): oCFG = CFG() oCFG.createNewSection(self.se) oCFG.setOption('%s/Status' % (self.se), self.status) oCFG.setOption('%s/PFN' % (self.se), self.pfn) return S_OK(str(oCFG))
def toCFG(self): oCFG = CFG() oCFG.createNewSection(self.se) oCFG.setOption('%s/Status' % (self.se), self.status) oCFG.setOption('%s/PFN' % (self.se), self.pfn) return S_OK(str(oCFG))
class JobRepository(object): def __init__(self, repository=None): self.location = repository if not self.location: if "HOME" in os.environ: self.location = '%s/.dirac.repo.rep' % os.environ['HOME'] else: self.location = '%s/.dirac.repo.rep' % os.getcwd() self.repo = CFG() if os.path.exists(self.location): self.repo.loadFromFile(self.location) if not self.repo.existsKey('Jobs'): self.repo.createNewSection('Jobs') else: self.repo.createNewSection('Jobs') self.OK = True written = self._writeRepository(self.location) if not written: self.OK = False def isOK(self): return self.OK def readRepository(self): return S_OK(self.repo.getAsDict('Jobs')) def writeRepository(self, alternativePath=None): destination = self.location if alternativePath: destination = alternativePath written = self._writeRepository(destination) if not written: return S_ERROR("Failed to write repository") return S_OK(destination) def resetRepository(self, jobIDs=[]): if not jobIDs: jobs = self.readRepository()['Value'] jobIDs = jobs.keys() paramDict = {'State': 'Submitted', 'Retrieved': 0, 'OutputData': 0} for jobID in jobIDs: self._writeJob(jobID, paramDict, True) self._writeRepository(self.location) return S_OK() def _writeRepository(self, path): handle, tmpName = tempfile.mkstemp() written = self.repo.writeToFile(tmpName) os.close(handle) if not written: if os.path.exists(tmpName): os.remove(tmpName) return written if os.path.exists(path): gLogger.debug("Replacing %s" % path) try: shutil.move(tmpName, path) return True except Exception as x: gLogger.error("Failed to overwrite repository.", x) gLogger.info("If your repository is corrupted a backup can be found %s" % tmpName) return False def appendToRepository(self, repoLocation): if not os.path.exists(repoLocation): gLogger.error("Secondary repository does not exist", repoLocation) return S_ERROR("Secondary repository does not exist") self.repo = CFG().loadFromFile(repoLocation).mergeWith(self.repo) self._writeRepository(self.location) return S_OK() def addJob(self, jobID, state='Submitted', retrieved=0, outputData=0, update=False): paramDict = {'State': state, 'Time': self._getTime(), 'Retrieved': int(retrieved), 'OutputData': outputData} self._writeJob(jobID, paramDict, update) self._writeRepository(self.location) return S_OK(jobID) def updateJob(self, jobID, paramDict): if self._existsJob(jobID): paramDict['Time'] = self._getTime() self._writeJob(jobID, paramDict, True) self._writeRepository(self.location) return S_OK() def updateJobs(self, jobDict): for jobID, paramDict in jobDict.items(): if self._existsJob(jobID): paramDict['Time'] = self._getTime() self._writeJob(jobID, paramDict, True) self._writeRepository(self.location) return S_OK() def _getTime(self): runtime = time.ctime() return runtime.replace(" ", "_") def _writeJob(self, jobID, paramDict, update): jobID = str(jobID) jobExists = self._existsJob(jobID) if jobExists and (not update): gLogger.warn("Job exists and not overwriting") return S_ERROR("Job exists and not overwriting") if not jobExists: self.repo.createNewSection('Jobs/%s' % jobID) for key, value in paramDict.items(): self.repo.setOption('Jobs/%s/%s' % (jobID, key), value) return S_OK() def removeJob(self, jobID): res = self.repo['Jobs'].deleteKey(str(jobID)) # pylint: disable=no-member if res: self._writeRepository(self.location) return S_OK() def existsJob(self, jobID): return S_OK(self._existsJob(jobID)) def _existsJob(self, jobID): return self.repo.isSection('Jobs/%s' % jobID) def getLocation(self): return S_OK(self.location) def getSize(self): return S_OK(len(self.repo.getAsDict('Jobs')))
mjfPower = getPowerFromMJF() if mjfPower: gLogger.notice( 'CPU power from MJF is %.1f HS06' % mjfPower ) else: gLogger.notice( 'MJF not available on this node' ) if update and not configFile: gConfig.setOptionValue( '/LocalSite/CPUScalingFactor', mjfPower if mjfPower else norm ) gConfig.setOptionValue( '/LocalSite/CPUNormalizationFactor', norm ) gConfig.dumpLocalCFGToFile( gConfig.diracConfigFilePath ) if configFile: from DIRAC.Core.Utilities.CFG import CFG cfg = CFG() try: # Attempt to open the given file cfg.loadFromFile( configFile ) except: pass # Create the section if it does not exist if not cfg.existsKey( 'LocalSite' ): cfg.createNewSection( 'LocalSite' ) cfg.setOption( '/LocalSite/CPUScalingFactor', mjfPower if mjfPower else norm ) cfg.setOption( '/LocalSite/CPUNormalizationFactor', norm ) cfg.writeToFile( configFile ) DIRAC.exit()
localConfigFile = cFile else: print "WORKSPACE: %s" % os.path.expandvars('$WORKSPACE') if os.path.isfile( os.path.expandvars('$WORKSPACE')+'/PilotInstallDIR/etc/dirac.cfg' ): localConfigFile = os.path.expandvars('$WORKSPACE')+'/PilotInstallDIR/etc/dirac.cfg' elif os.path.isfile( os.path.expandvars('$WORKSPACE')+'/ServerInstallDIR/etc/dirac.cfg' ): localConfigFile = os.path.expandvars('$WORKSPACE')+'/ServerInstallDIR/etc/dirac.cfg' elif os.path.isfile( './etc/dirac.cfg' ): localConfigFile = './etc/dirac.cfg' else: print "Local CFG file not found" exit( 2 ) localCfg.loadFromFile( localConfigFile ) if not localCfg.isSection( '/LocalSite' ): localCfg.createNewSection( '/LocalSite' ) localCfg.setOption( '/LocalSite/CPUTimeLeft', 5000 ) localCfg.setOption( '/DIRAC/Security/UseServerCertificate', False ) if not sMod: if not setup: setup = gConfig.getValue('/DIRAC/Setup') if not setup: setup = 'JenkinsSetup' if not vo: vo = gConfig.getValue('/DIRAC/VirtualOrganization') if not vo: vo = 'dirac' if not localCfg.isSection( '/DIRAC/VOPolicy' ): localCfg.createNewSection( '/DIRAC/VOPolicy' )
result = getCPUNormalization() if not result["OK"]: DIRAC.gLogger.error(result["Message"]) norm = int((result["Value"]["NORM"] + 0.05) * 10) / 10.0 DIRAC.gLogger.notice("Normalization for current CPU is %.1f %s" % (norm, result["Value"]["UNIT"])) if update: DIRAC.gConfig.setOptionValue("/LocalSite/CPUNormalizationFactor", norm) DIRAC.gConfig.dumpLocalCFGToFile(DIRAC.gConfig.diracConfigFilePath) if configFile: from DIRAC.Core.Utilities.CFG import CFG cfg = CFG() try: # Attempt to open the given file cfg.loadFromFile(configFile) except: pass # Create the section if it does not exist if not cfg.existsKey("LocalSite"): cfg.createNewSection("LocalSite") cfg.setOption("/LocalSite/CPUNormalizationFactor", norm) cfg.writeToFile(configFile) DIRAC.exit()
def loadJDLAsCFG( jdl ): """ Load a JDL as CFG """ def cleanValue( value ): value = value.strip() if value[0] == '"': entries = [] iPos = 1 current = "" state = "in" while iPos < len( value ): if value[ iPos ] == '"': if state == "in": entries.append( current ) current = "" state = "out" elif state == "out": current = current.strip() if current not in ( ",", ): return S_ERROR( "value seems a list but is not separated in commas" ) current = "" state = "in" else: current += value[ iPos ] iPos += 1 if state == "in": return S_ERROR( 'value is opened with " but is not closed' ) return S_OK( ", ".join ( entries ) ) else: return S_OK( value.replace( '"', '' ) ) def assignValue( key, value, cfg ): key = key.strip() if len( key ) == 0: return S_ERROR( "Invalid key name" ) value = value.strip() if not value: return S_ERROR( "No value for key %s" % key ) if value[0] == "{": if value[-1 ] != "}": return S_ERROR( "Value '%s' seems a list but does not end in '}'" % ( value ) ) valList = List.fromChar( value[1:-1] ) for i in range( len( valList ) ): result = cleanValue( valList[i] ) if not result[ 'OK' ]: return S_ERROR( "Var %s : %s" % ( key, result[ 'Message' ] ) ) valList[i] = result[ 'Value' ] if valList[ i ] == None: return S_ERROR( "List value '%s' seems invalid for item %s" % ( value, i ) ) value = ", ".join( valList ) else: result = cleanValue( value ) if not result[ 'OK' ]: return S_ERROR( "Var %s : %s" % ( key, result[ 'Message' ] ) ) nV = result[ 'Value' ] if nV == None: return S_ERROR( "Value '%s seems invalid" % ( value ) ) value = nV cfg.setOption( key, value ) return S_OK() if jdl[ 0 ] == "[": iPos = 1 else: iPos = 0 key = "" value = "" action = "key" insideLiteral = False cfg = CFG() while iPos < len( jdl ): char = jdl[ iPos ] if char == ";" and not insideLiteral: if key.strip(): result = assignValue( key, value, cfg ) if not result[ 'OK' ]: return result key = "" value = "" action = "key" elif char == "[" and not insideLiteral: key = key.strip() if not key: return S_ERROR( "Invalid key in JDL" ) if value.strip(): return S_ERROR( "Key %s seems to have a value and open a sub JDL at the same time" % key ) result = loadJDLAsCFG( jdl[ iPos: ] ) if not result[ 'OK' ]: return result subCfg, subPos = result[ 'Value' ] cfg.createNewSection( key, contents = subCfg ) key = "" value = "" action = "key" insideLiteral = False iPos += subPos elif char == "=" and not insideLiteral: if action == "key": action = "value" insideLiteral = False else: value += char elif char == "]" and not insideLiteral: key = key.strip() if len( key ) > 0: result = assignValue( key, value, cfg ) if not result[ 'OK' ]: return result return S_OK( ( cfg, iPos ) ) else: if action == "key": key += char else: value += char if char == '"': insideLiteral = not insideLiteral iPos += 1 return S_OK( ( cfg, iPos ) )
def loadJDLAsCFG(jdl): """ Load a JDL as CFG """ def cleanValue(value): value = value.strip() if value[0] == '"': entries = [] iPos = 1 current = "" state = "in" while iPos < len(value): if value[iPos] == '"': if state == "in": entries.append(current) current = "" state = "out" elif state == "out": current = current.strip() if current not in (",",): return S_ERROR("value seems a list but is not separated in commas") current = "" state = "in" else: current += value[iPos] iPos += 1 if state == "in": return S_ERROR('value is opened with " but is not closed') return S_OK(", ".join(entries)) else: return S_OK(value.replace('"', "")) def assignValue(key, value, cfg): key = key.strip() if len(key) == 0: return S_ERROR("Invalid key name") value = value.strip() if not value: return S_ERROR("No value for key %s" % key) if value[0] == "{": if value[-1] != "}": return S_ERROR("Value '%s' seems a list but does not end in '}'" % (value)) valList = List.fromChar(value[1:-1]) for i in range(len(valList)): result = cleanValue(valList[i]) if not result["OK"]: return S_ERROR("Var %s : %s" % (key, result["Message"])) valList[i] = result["Value"] if valList[i] == None: return S_ERROR("List value '%s' seems invalid for item %s" % (value, i)) value = ", ".join(valList) else: result = cleanValue(value) if not result["OK"]: return S_ERROR("Var %s : %s" % (key, result["Message"])) nV = result["Value"] if nV == None: return S_ERROR("Value '%s seems invalid" % (value)) value = nV cfg.setOption(key, value) return S_OK() if jdl[0] == "[": iPos = 1 else: iPos = 0 key = "" value = "" action = "key" insideLiteral = False cfg = CFG() while iPos < len(jdl): char = jdl[iPos] if char == ";" and not insideLiteral: if key.strip(): result = assignValue(key, value, cfg) if not result["OK"]: return result key = "" value = "" action = "key" elif char == "[" and not insideLiteral: key = key.strip() if not key: return S_ERROR("Invalid key in JDL") if value.strip(): return S_ERROR("Key %s seems to have a value and open a sub JDL at the same time" % key) result = loadJDLAsCFG(jdl[iPos:]) if not result["OK"]: return result subCfg, subPos = result["Value"] cfg.createNewSection(key, contents=subCfg) key = "" value = "" action = "key" insideLiteral = False iPos += subPos elif char == "=" and not insideLiteral: if action == "key": action = "value" insideLiteral = False else: value += char elif char == "]" and not insideLiteral: key = key.strip() if len(key) > 0: result = assignValue(key, value, cfg) if not result["OK"]: return result return S_OK((cfg, iPos)) else: if action == "key": key += char else: value += char if char == '"': insideLiteral = not insideLiteral iPos += 1 return S_OK((cfg, iPos))
# ProductionSandboxSE # { # BackendType = DISET # AccessProtocol.1 # { # Host = localhost # Port = 9196 # ProtocolName = DIP # Protocol = dips # Path = /scratch/workspace/%s/sandboxes % setupName # Access = remote # SpaceToken = # WSUrl = # } # } localCfg.createNewSection( 'Resources/StorageElements/' ) localCfg.createNewSection( 'Resources/StorageElements/ProductionSandboxSE' ) localCfg.setOption( 'Resources/StorageElements/ProductionSandboxSE/BackendType', 'DISET' ) localCfg.createNewSection( 'Resources/StorageElements/ProductionSandboxSE/AccessProtocol.1' ) localCfg.setOption( 'Resources/StorageElements/ProductionSandboxSE/AccessProtocol.1/Host', 'localhost' ) localCfg.setOption( 'Resources/StorageElements/ProductionSandboxSE/AccessProtocol.1/Port', '9196' ) localCfg.setOption( 'Resources/StorageElements/ProductionSandboxSE/AccessProtocol.1/ProtocolName', 'DIP' ) localCfg.setOption( 'Resources/StorageElements/ProductionSandboxSE/AccessProtocol.1/Path', '%s/sandboxes' % setupName ) localCfg.setOption( 'Resources/StorageElements/ProductionSandboxSE/AccessProtocol.1/Access', 'remote' ) # Setup the DFC # # DataManagement # {
mjfPower = getPowerFromMJF() if mjfPower: gLogger.notice('CPU power from MJF is %.1f HS06' % mjfPower) else: gLogger.notice('MJF not available on this node') if update and not configFile: gConfig.setOptionValue('/LocalSite/CPUScalingFactor', mjfPower if mjfPower else norm) gConfig.setOptionValue('/LocalSite/CPUNormalizationFactor', norm) gConfig.dumpLocalCFGToFile(gConfig.diracConfigFilePath) if configFile: from DIRAC.Core.Utilities.CFG import CFG cfg = CFG() try: # Attempt to open the given file cfg.loadFromFile(configFile) except: pass # Create the section if it does not exist if not cfg.existsKey('LocalSite'): cfg.createNewSection('LocalSite') cfg.setOption('/LocalSite/CPUScalingFactor', mjfPower if mjfPower else norm) cfg.setOption('/LocalSite/CPUNormalizationFactor', norm) cfg.writeToFile(configFile) DIRAC.exit()
localConfigFile = os.path.expandvars( '$WORKSPACE') + '/PilotInstallDIR/etc/dirac.cfg' elif os.path.isfile( os.path.expandvars('$WORKSPACE') + '/ServerInstallDIR/etc/dirac.cfg'): localConfigFile = os.path.expandvars( '$WORKSPACE') + '/ServerInstallDIR/etc/dirac.cfg' elif os.path.isfile('./etc/dirac.cfg'): localConfigFile = './etc/dirac.cfg' else: print "Local CFG file not found" exit(2) localCfg.loadFromFile(localConfigFile) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption('/LocalSite/CPUTimeLeft', 5000) localCfg.setOption('/DIRAC/Security/UseServerCertificate', False) if not sMod: if not setup: setup = gConfig.getValue('/DIRAC/Setup') if not setup: setup = 'dirac-JenkinsSetup' if not vo: vo = gConfig.getValue('/DIRAC/VirtualOrganization') if not vo: vo = 'dirac' if not localCfg.isSection('/DIRAC/VOPolicy'): localCfg.createNewSection('/DIRAC/VOPolicy')
# ProductionSandboxSE # { # BackendType = DISET # AccessProtocol.1 # { # Host = localhost # Port = 9196 # ProtocolName = DIP # Protocol = dips # Path = /scratch/workspace/%s/sandboxes % setupName # Access = remote # SpaceToken = # WSUrl = # } # } localCfg.createNewSection('Resources/StorageElements/') localCfg.createNewSection('Resources/StorageElements/ProductionSandboxSE') localCfg.setOption('Resources/StorageElements/ProductionSandboxSE/BackendType', 'DISET') localCfg.createNewSection( 'Resources/StorageElements/ProductionSandboxSE/AccessProtocol.1') localCfg.setOption( 'Resources/StorageElements/ProductionSandboxSE/AccessProtocol.1/Host', 'localhost') localCfg.setOption( 'Resources/StorageElements/ProductionSandboxSE/AccessProtocol.1/Port', '9196') localCfg.setOption( 'Resources/StorageElements/ProductionSandboxSE/AccessProtocol.1/ProtocolName', 'DIP') localCfg.setOption(
class JobManifest(object): def __init__(self, manifest=""): self.__manifest = CFG() self.__dirty = False self.__ops = False if manifest: result = self.load(manifest) if not result["OK"]: raise Exception(result["Message"]) def isDirty(self): return self.__dirty def setDirty(self): self.__dirty = True def clearDirty(self): self.__dirty = False def load(self, dataString): """ Auto discover format type based on [ .. ] of JDL """ dataString = dataString.strip() if dataString[0] == "[" and dataString[-1] == "]": return self.loadJDL(dataString) else: return self.loadCFG(dataString) def loadJDL(self, jdlString): """ Load job manifest from JDL format """ result = loadJDLAsCFG(jdlString.strip()) if not result["OK"]: self.__manifest = CFG() return result self.__manifest = result["Value"][0] return S_OK() def loadCFG(self, cfgString): """ Load job manifest from CFG format """ try: self.__manifest.loadFromBuffer(cfgString) except Exception as e: return S_ERROR("Can't load manifest from cfg: %s" % str(e)) return S_OK() def dumpAsCFG(self): return str(self.__manifest) def getAsCFG(self): return self.__manifest.clone() def dumpAsJDL(self): return dumpCFGAsJDL(self.__manifest) def __getCSValue(self, varName, defaultVal=None): if not self.__ops: self.__ops = Operations(group=self.__manifest["OwnerGroup"], setup=self.__manifest["DIRACSetup"]) if varName[0] != "/": varName = "JobDescription/%s" % varName return self.__ops.getValue(varName, defaultVal) def __checkNumericalVar(self, varName, defaultVal, minVal, maxVal): """ Check a numerical var """ initialVal = False if varName not in self.__manifest: varValue = self.__getCSValue("Default%s" % varName, defaultVal) else: varValue = self.__manifest[varName] initialVal = varValue try: varValue = long(varValue) except: return S_ERROR("%s must be a number" % varName) minVal = self.__getCSValue("Min%s" % varName, minVal) maxVal = self.__getCSValue("Max%s" % varName, maxVal) varValue = max(minVal, min(varValue, maxVal)) if initialVal != varValue: self.__manifest.setOption(varName, varValue) return S_OK(varValue) def __checkChoiceVar(self, varName, defaultVal, choices): """ Check a choice var """ initialVal = False if varName not in self.__manifest: varValue = self.__getCSValue("Default%s" % varName, defaultVal) else: varValue = self.__manifest[varName] initialVal = varValue if varValue not in self.__getCSValue("Choices%s" % varName, choices): return S_ERROR("%s is not a valid value for %s" % (varValue, varName)) if initialVal != varValue: self.__manifest.setOption(varName, varValue) return S_OK(varValue) def __checkMultiChoice(self, varName, choices): """ Check a multi choice var """ initialVal = False if varName not in self.__manifest: return S_OK() else: varValue = self.__manifest[varName] initialVal = varValue choices = self.__getCSValue("Choices%s" % varName, choices) for v in List.fromChar(varValue): if v not in choices: return S_ERROR("%s is not a valid value for %s" % (v, varName)) if initialVal != varValue: self.__manifest.setOption(varName, varValue) return S_OK(varValue) def __checkMaxInputData(self, maxNumber): """ Check Maximum Number of Input Data files allowed """ varName = "InputData" if varName not in self.__manifest: return S_OK() varValue = self.__manifest[varName] if len(List.fromChar(varValue)) > maxNumber: return S_ERROR( "Number of Input Data Files (%s) greater than current limit: %s" % (len(List.fromChar(varValue)), maxNumber) ) return S_OK() def __contains__(self, key): """ Check if the manifest has the required key """ return key in self.__manifest def setOptionsFromDict(self, varDict): for k in sorted(varDict): self.setOption(k, varDict[k]) def check(self): """ Check that the manifest is OK """ for k in ["OwnerName", "OwnerDN", "OwnerGroup", "DIRACSetup"]: if k not in self.__manifest: return S_ERROR("Missing var %s in manifest" % k) # Check CPUTime result = self.__checkNumericalVar("CPUTime", 86400, 100, 500000) if not result["OK"]: return result result = self.__checkNumericalVar("Priority", 1, 0, 10) if not result["OK"]: return result allowedSubmitPools = getSubmitPools(self.__manifest["OwnerGroup"]) result = self.__checkMultiChoice("SubmitPools", list(set(allowedSubmitPools))) if not result["OK"]: return result result = self.__checkMultiChoice("PilotTypes", ["private"]) if not result["OK"]: return result maxInputData = Operations().getValue("JobDescription/MaxInputData", 500) result = self.__checkMaxInputData(maxInputData) if not result["OK"]: return result transformationTypes = Operations().getValue("Transformations/DataProcessing", []) result = self.__checkMultiChoice("JobType", ["User", "Test", "Hospital"] + transformationTypes) if not result["OK"]: return result return S_OK() def createSection(self, secName, contents=False): if secName not in self.__manifest: if contents and not isinstance(contents, CFG): return S_ERROR("Contents for section %s is not a cfg object" % secName) self.__dirty = True return S_OK(self.__manifest.createNewSection(secName, contents=contents)) return S_ERROR("Section %s already exists" % secName) def getSection(self, secName): self.__dirty = True sec = self.__manifest[secName] if not sec: return S_ERROR("%s does not exist") return S_OK(sec) def setSectionContents(self, secName, contents): if contents and not isinstance(contents, CFG): return S_ERROR("Contents for section %s is not a cfg object" % secName) self.__dirty = True if secName in self.__manifest: self.__manifest[secName].reset() self.__manifest[secName].mergeWith(contents) else: self.__manifest.createNewSection(secName, contents=contents) def setOption(self, varName, varValue): """ Set a var in job manifest """ self.__dirty = True levels = List.fromChar(varName, "/") cfg = self.__manifest for l in levels[:-1]: if l not in cfg: cfg.createNewSection(l) cfg = cfg[l] cfg.setOption(levels[-1], varValue) def remove(self, opName): levels = List.fromChar(opName, "/") cfg = self.__manifest for l in levels[:-1]: if l not in cfg: return S_ERROR("%s does not exist" % opName) cfg = cfg[l] if cfg.deleteKey(levels[-1]): self.__dirty = True return S_OK() return S_ERROR("%s does not exist" % opName) def getOption(self, varName, defaultValue=None): """ Get a variable from the job manifest """ cfg = self.__manifest return cfg.getOption(varName, defaultValue) def getOptionList(self, section=""): """ Get a list of variables in a section of the job manifest """ cfg = self.__manifest.getRecursive(section) if not cfg or "value" not in cfg: return [] cfg = cfg["value"] return cfg.listOptions() def isOption(self, opName): """ Check if it is a valid option """ return self.__manifest.isOption(opName) def getSectionList(self, section=""): """ Get a list of sections in the job manifest """ cfg = self.__manifest.getRecursive(section) if not cfg or "value" not in cfg: return [] cfg = cfg["value"] return cfg.listSections()
class JobManifest(object): def __init__(self, manifest=""): self.__manifest = CFG() self.__dirty = False self.__ops = False if manifest: result = self.load(manifest) if not result['OK']: raise Exception(result['Message']) def isDirty(self): return self.__dirty def setDirty(self): self.__dirty = True def clearDirty(self): self.__dirty = False def load(self, dataString): """ Auto discover format type based on [ .. ] of JDL """ dataString = dataString.strip() if dataString[0] == "[" and dataString[-1] == "]": return self.loadJDL(dataString) else: return self.loadCFG(dataString) def loadJDL(self, jdlString): """ Load job manifest from JDL format """ result = loadJDLAsCFG(jdlString.strip()) if not result['OK']: self.__manifest = CFG() return result self.__manifest = result['Value'][0] return S_OK() def loadCFG(self, cfgString): """ Load job manifest from CFG format """ try: self.__manifest.loadFromBuffer(cfgString) except Exception as e: return S_ERROR("Can't load manifest from cfg: %s" % str(e)) return S_OK() def dumpAsCFG(self): return str(self.__manifest) def getAsCFG(self): return self.__manifest.clone() def dumpAsJDL(self): return dumpCFGAsJDL(self.__manifest) def __getCSValue(self, varName, defaultVal=None): if not self.__ops: self.__ops = Operations(group=self.__manifest['OwnerGroup'], setup=self.__manifest['DIRACSetup']) if varName[0] != "/": varName = "JobDescription/%s" % varName return self.__ops.getValue(varName, defaultVal) def __checkNumericalVar(self, varName, defaultVal, minVal, maxVal): """ Check a numerical var """ initialVal = False if varName not in self.__manifest: varValue = self.__getCSValue("Default%s" % varName, defaultVal) else: varValue = self.__manifest[varName] initialVal = varValue try: varValue = long(varValue) except BaseException: return S_ERROR("%s must be a number" % varName) minVal = self.__getCSValue("Min%s" % varName, minVal) maxVal = self.__getCSValue("Max%s" % varName, maxVal) varValue = max(minVal, min(varValue, maxVal)) if initialVal != varValue: self.__manifest.setOption(varName, varValue) return S_OK(varValue) def __checkChoiceVar(self, varName, defaultVal, choices): """ Check a choice var """ initialVal = False if varName not in self.__manifest: varValue = self.__getCSValue("Default%s" % varName, defaultVal) else: varValue = self.__manifest[varName] initialVal = varValue if varValue not in self.__getCSValue("Choices%s" % varName, choices): return S_ERROR("%s is not a valid value for %s" % (varValue, varName)) if initialVal != varValue: self.__manifest.setOption(varName, varValue) return S_OK(varValue) def __checkMultiChoice(self, varName, choices): """ Check a multi choice var """ initialVal = False if varName not in self.__manifest: return S_OK() else: varValue = self.__manifest[varName] initialVal = varValue choices = self.__getCSValue("Choices%s" % varName, choices) for v in List.fromChar(varValue): if v not in choices: return S_ERROR("%s is not a valid value for %s" % (v, varName)) if initialVal != varValue: self.__manifest.setOption(varName, varValue) return S_OK(varValue) def __checkMaxInputData(self, maxNumber): """ Check Maximum Number of Input Data files allowed """ varName = "InputData" if varName not in self.__manifest: return S_OK() varValue = self.__manifest[varName] if len(List.fromChar(varValue)) > maxNumber: return S_ERROR( 'Number of Input Data Files (%s) greater than current limit: %s' % (len(List.fromChar(varValue)), maxNumber)) return S_OK() def __contains__(self, key): """ Check if the manifest has the required key """ return key in self.__manifest def setOptionsFromDict(self, varDict): for k in sorted(varDict): self.setOption(k, varDict[k]) def check(self): """ Check that the manifest is OK """ for k in ['OwnerName', 'OwnerDN', 'OwnerGroup', 'DIRACSetup']: if k not in self.__manifest: return S_ERROR("Missing var %s in manifest" % k) # Check CPUTime result = self.__checkNumericalVar("CPUTime", 86400, 100, 500000) if not result['OK']: return result result = self.__checkNumericalVar("Priority", 1, 0, 10) if not result['OK']: return result allowedSubmitPools = getSubmitPools(self.__manifest['OwnerGroup']) result = self.__checkMultiChoice("SubmitPools", list(set(allowedSubmitPools))) if not result['OK']: return result result = self.__checkMultiChoice("PilotTypes", ['private']) if not result['OK']: return result maxInputData = Operations().getValue("JobDescription/MaxInputData", 500) result = self.__checkMaxInputData(maxInputData) if not result['OK']: return result transformationTypes = Operations().getValue( "Transformations/DataProcessing", []) result = self.__checkMultiChoice( "JobType", ['User', 'Test', 'Hospital'] + transformationTypes) if not result['OK']: return result return S_OK() def createSection(self, secName, contents=False): if secName not in self.__manifest: if contents and not isinstance(contents, CFG): return S_ERROR("Contents for section %s is not a cfg object" % secName) self.__dirty = True return S_OK( self.__manifest.createNewSection(secName, contents=contents)) return S_ERROR("Section %s already exists" % secName) def getSection(self, secName): self.__dirty = True if secName not in self.__manifest: return S_ERROR("%s does not exist" % secName) sec = self.__manifest[secName] if not sec: return S_ERROR("%s section empty" % secName) return S_OK(sec) def setSectionContents(self, secName, contents): if contents and not isinstance(contents, CFG): return S_ERROR("Contents for section %s is not a cfg object" % secName) self.__dirty = True if secName in self.__manifest: self.__manifest[secName].reset() self.__manifest[secName].mergeWith(contents) else: self.__manifest.createNewSection(secName, contents=contents) def setOption(self, varName, varValue): """ Set a var in job manifest """ self.__dirty = True levels = List.fromChar(varName, "/") cfg = self.__manifest for l in levels[:-1]: if l not in cfg: cfg.createNewSection(l) cfg = cfg[l] cfg.setOption(levels[-1], varValue) def remove(self, opName): levels = List.fromChar(opName, "/") cfg = self.__manifest for l in levels[:-1]: if l not in cfg: return S_ERROR("%s does not exist" % opName) cfg = cfg[l] if cfg.deleteKey(levels[-1]): self.__dirty = True return S_OK() return S_ERROR("%s does not exist" % opName) def getOption(self, varName, defaultValue=None): """ Get a variable from the job manifest """ cfg = self.__manifest return cfg.getOption(varName, defaultValue) def getOptionList(self, section=""): """ Get a list of variables in a section of the job manifest """ cfg = self.__manifest.getRecursive(section) if not cfg or 'value' not in cfg: return [] cfg = cfg['value'] return cfg.listOptions() def isOption(self, opName): """ Check if it is a valid option """ return self.__manifest.isOption(opName) def getSectionList(self, section=""): """ Get a list of sections in the job manifest """ cfg = self.__manifest.getRecursive(section) if not cfg or 'value' not in cfg: return [] cfg = cfg['value'] return cfg.listSections()