def _parseConfigTemplate(self, templatePath, cfg=None): """Parse the ConfigTemplate.cfg files. :param str templatePath: path to the folder containing a ConfigTemplate.cfg file :param CFG cfg: cfg to merge with the systems config :returns: CFG object """ cfg = CFG() if cfg is None else cfg system = os.path.split(templatePath.rstrip("/"))[1] if system.lower().endswith('system'): system = system[:-len('System')] if self.systems and system not in self.systems: return S_OK(cfg) templatePath = os.path.join(templatePath, 'ConfigTemplate.cfg') if not os.path.exists(templatePath): return S_ERROR("File not found: %s" % templatePath) loadCfg = CFG() loadCfg.loadFromFile(templatePath) newCfg = CFG() newCfg.createNewSection("/%s" % system, contents=loadCfg) cfg = cfg.mergeWith(newCfg) return S_OK(cfg)
def parseConfigTemplate(self, templatePath, cfg): """Parse the ConfigTemplate.cfg files. :param str templatePath: path to the folder containing a ConfigTemplate.cfg file :param CFG cfg: cfg to merge with the systems config :returns: CFG object """ system = os.path.split(templatePath.rstrip("/"))[1] if system.lower().endswith("system"): system = system[: -len("System")] templatePath = os.path.join(templatePath, "ConfigTemplate.cfg") if not os.path.exists(templatePath): return S_ERROR("File not found: %s" % templatePath) loadCfg = CFG() try: loadCfg.loadFromFile(templatePath) except ValueError as err: LOG.error("Failed loading file %r: %r", templatePath, err) self.retVal = 1 return S_ERROR() cfg.createNewSection("/Systems/%s" % system, contents=loadCfg) return S_OK(cfg)
def checkAgentOptions(getOptionMock, systemName, agentName, agentLocation, ignoreOptions=None): """Ensure that all the agent options are properly documented. :param getOptionMock: Mock object for agentmodule.get_amOption function :param str systemName: name of the **System** :param str agentName: name of the **Agent** :param list ignoreOptions: list of options to ignore """ if ignoreOptions is None: ignoreOptions = [] # add some options that can be set, see the AgentModule for all of them ignoreOptions.extend( ["PollingTime", "Status", "Enabled", "MaxCycles", "LogOutputs", "ControlDirectory", "shifterProxy"] ) ignoreOptions = list(set(ignoreOptions)) config = CFG() LOG.info("Testing %s/%s, ignoring options %s", systemName, agentName, ignoreOptions) # expect the ConfigTemplate one level above the agent module configFilePath = os.path.join(agentLocation, "..", "ConfigTemplate.cfg") config.loadFromFile(configFilePath) optionsDict = config.getAsDict("Agents/%s" % agentName) outDict = {} _parseOption(outDict, optionsDict) optionsDict = outDict LOG.info("Calls: %s", pformat(getOptionMock.call_args_list)) LOG.info("Options found in ConfigTemplate: %s ", list(optionsDict.keys())) # check that values in ConfigTemplate are used for option, value in optionsDict.items(): if any(ignoreOp in option for ignoreOp in ignoreOptions): LOG.info("From Agent: ignoring option %r with value %r, (%s)", option, value, type(value)) continue LOG.info("Looking for call to option %r with value %r, (%s)", option, value, type(value)) if not isinstance(value, bool) and not value: # empty string, list, dict ... assert any(call(option, null) in getOptionMock.call_args_list for null in ({}, set(), [], "", 0, None)) else: assert ( call(option, value) in getOptionMock.call_args_list or call(option, [value]) in getOptionMock.call_args_list ) # check that options used in the agent are in the ConfigTemplates for opCall in getOptionMock.call_args_list: optionArguments = opCall[0] if len(optionArguments) != 2: continue optionName = optionArguments[0] optionValue = optionArguments[1] if optionName in ignoreOptions: LOG.info("From Template: ignoring option %r with %r", optionName, optionValue) continue LOG.info("Checking Template option %r with %r", optionName, optionValue) assert optionName in optionsDict if not optionsDict[optionName]: assert not optionValue continue assert optionsDict[optionName] == optionValue or [optionsDict[optionName]] == optionValue
def loadFile(self, fileName): try: fileCFG = CFG() fileCFG.loadFromFile(fileName) except IOError: self.localCFG = self.localCFG.mergeWith(fileCFG) return S_ERROR("Can't load a cfg file '%s'" % fileName) return self.mergeWithLocal(fileCFG)
def updateCompleteDiracCFG(self): """Read the dirac.cfg and update the Systems sections from the ConfigTemplate.cfg files.""" compCfg = CFG() mainDiracCfgPath = self.config.cfg_baseFile if not os.path.exists(mainDiracCfgPath): LOG.error("Failed to find Main Dirac cfg at %r", mainDiracCfgPath) return 1 self.prepareDiracCFG() LOG.info("Extracting default configuration from %r", mainDiracCfgPath) loadCFG = CFG() loadCFG.loadFromFile(mainDiracCfgPath) compCfg = loadCFG.mergeWith(compCfg) cfg = self.getSystemsCFG() compCfg = compCfg.mergeWith(cfg) diracCfgOutput = self.config.cfg_targetFile LOG.info("Writing output to %r", diracCfgOutput) with open(diracCfgOutput, "w") as rst: rst.write( textwrap.dedent( """ .. _full_configuration_example: ========================== Full Configuration Example ========================== .. This file is created by docs/Tools/UpdateDiracCFG.py Below is a complete example configuration with anotations for some sections:: """ ) ) # indent the cfg text cfgString = "".join(" " + line for line in str(compCfg).splitlines(True)) # fix the links, add back the # for targets # match .html with following character using positive look ahead htmlMatch = re.compile(r"\.html(?=[a-zA-Z0-9])") cfgString = re.sub(htmlMatch, ".html#", cfgString) rst.write(cfgString) return self.retVal
def _updateConfiguration(self, key, value, path="/LocalSite"): """Update local configuration to be used by submitted job wrappers""" localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join(".", self.extraOptions) else: localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) section = "/" for p in path.split("/")[1:]: section = os.path.join(section, p) if not localCfg.isSection(section): localCfg.createNewSection(section) localCfg.setOption("%s/%s" % (section, key), value) localCfg.writeToFile(localConfigFile)
def getComputingElementDefaults(ceName="", ceType="", cfg=None, currentSectionPath=""): """ Return cfgDefaults with defaults for the given CEs defined either in arguments or in the provided cfg """ cesCfg = CFG() if cfg: try: cesCfg.loadFromFile(cfg) cesPath = cfgInstallPath("ComputingElements") if cesCfg.isSection(cesPath): for section in cfgPathToList(cesPath): cesCfg = cesCfg[section] except Exception: return CFG() # Overwrite the cfg with Command line arguments if ceName: if not cesCfg.isSection(ceName): cesCfg.createNewSection(ceName) if currentSectionPath: # Add Options from Command Line optionsDict = __getExtraOptions(currentSectionPath) for name, value in optionsDict.items(): cesCfg[ceName].setOption(name, value) # pylint: disable=no-member if ceType: cesCfg[ceName].setOption("CEType", ceType) # pylint: disable=no-member ceDefaultSection = cfgPath(defaultSection("ComputingElements")) # Load Default for the given type from Central configuration is defined ceDefaults = __gConfigDefaults(ceDefaultSection) for ceName in cesCfg.listSections(): if "CEType" in cesCfg[ceName]: ceType = cesCfg[ceName]["CEType"] if ceType in ceDefaults: for option in ceDefaults[ceType].listOptions(): # pylint: disable=no-member if option not in cesCfg[ceName]: cesCfg[ceName].setOption( option, ceDefaults[ceType][option] # pylint: disable=unsubscriptable-object ) return cesCfg
class JobRepository(object): def __init__(self, repository=None): self.location = repository if not self.location: if "HOME" in os.environ: self.location = '%s/.dirac.repo.rep' % os.environ['HOME'] else: self.location = '%s/.dirac.repo.rep' % os.getcwd() self.repo = CFG() if os.path.exists(self.location): self.repo.loadFromFile(self.location) if not self.repo.existsKey('Jobs'): self.repo.createNewSection('Jobs') else: self.repo.createNewSection('Jobs') self.OK = True written = self._writeRepository(self.location) if not written: self.OK = False def isOK(self): return self.OK def readRepository(self): return S_OK(self.repo.getAsDict('Jobs')) def writeRepository(self, alternativePath=None): destination = self.location if alternativePath: destination = alternativePath written = self._writeRepository(destination) if not written: return S_ERROR("Failed to write repository") return S_OK(destination) def resetRepository(self, jobIDs=[]): if not jobIDs: jobs = self.readRepository()['Value'] jobIDs = list(jobs) paramDict = {'State': 'Submitted', 'Retrieved': 0, 'OutputData': 0} for jobID in jobIDs: self._writeJob(jobID, paramDict, True) self._writeRepository(self.location) return S_OK() def _writeRepository(self, path): handle, tmpName = tempfile.mkstemp() written = self.repo.writeToFile(tmpName) os.close(handle) if not written: if os.path.exists(tmpName): os.remove(tmpName) return written if os.path.exists(path): gLogger.debug("Replacing %s" % path) try: shutil.move(tmpName, path) return True except Exception as x: gLogger.error("Failed to overwrite repository.", x) gLogger.info( "If your repository is corrupted a backup can be found %s" % tmpName) return False def appendToRepository(self, repoLocation): if not os.path.exists(repoLocation): gLogger.error("Secondary repository does not exist", repoLocation) return S_ERROR("Secondary repository does not exist") self.repo = CFG().loadFromFile(repoLocation).mergeWith(self.repo) self._writeRepository(self.location) return S_OK() def addJob(self, jobID, state='Submitted', retrieved=0, outputData=0, update=False): paramDict = { 'State': state, 'Time': self._getTime(), 'Retrieved': int(retrieved), 'OutputData': outputData } self._writeJob(jobID, paramDict, update) self._writeRepository(self.location) return S_OK(jobID) def updateJob(self, jobID, paramDict): if self._existsJob(jobID): paramDict['Time'] = self._getTime() self._writeJob(jobID, paramDict, True) self._writeRepository(self.location) return S_OK() def updateJobs(self, jobDict): for jobID, paramDict in jobDict.items(): if self._existsJob(jobID): paramDict['Time'] = self._getTime() self._writeJob(jobID, paramDict, True) self._writeRepository(self.location) return S_OK() def _getTime(self): runtime = time.ctime() return runtime.replace(" ", "_") def _writeJob(self, jobID, paramDict, update): jobID = str(jobID) jobExists = self._existsJob(jobID) if jobExists and (not update): gLogger.warn("Job exists and not overwriting") return S_ERROR("Job exists and not overwriting") if not jobExists: self.repo.createNewSection('Jobs/%s' % jobID) for key, value in paramDict.items(): self.repo.setOption('Jobs/%s/%s' % (jobID, key), value) return S_OK() def removeJob(self, jobID): res = self.repo['Jobs'].deleteKey(str(jobID)) # pylint: disable=no-member if res: self._writeRepository(self.location) return S_OK() def existsJob(self, jobID): return S_OK(self._existsJob(jobID)) def _existsJob(self, jobID): return self.repo.isSection('Jobs/%s' % jobID) def getLocation(self): return S_OK(self.location) def getSize(self): return S_OK(len(self.repo.getAsDict('Jobs')))
def mergeFromFile(self, filename): cfg = CFG() cfg.loadFromFile(filename) self.cfgData = self.cfgData.mergeWith(cfg)
class ConfigurationData(object): def __init__(self, loadDefaultCFG=True): envVar = os.environ.get("DIRAC_FEWER_CFG_LOCKS", "no").lower() self.__locksEnabled = envVar not in ("y", "yes", "t", "true", "on", "1") if self.__locksEnabled: lr = LockRing() self.threadingEvent = lr.getEvent() self.threadingEvent.set() self.threadingLock = lr.getLock() self.runningThreadsNumber = 0 self.__compressedConfigurationData = None self.configurationPath = "/DIRAC/Configuration" self.backupsDir = os.path.join(DIRAC.rootPath, "etc", "csbackup") self._isService = False self.localCFG = CFG() self.remoteCFG = CFG() self.mergedCFG = CFG() self.remoteServerList = [] if loadDefaultCFG: defaultCFGFile = os.path.join(DIRAC.rootPath, "etc", "dirac.cfg") gLogger.debug("dirac.cfg should be at", "%s" % defaultCFGFile) retVal = self.loadFile(defaultCFGFile) if not retVal["OK"]: gLogger.warn("Can't load %s file" % defaultCFGFile) self.sync() def getBackupDir(self): return self.backupsDir def sync(self): gLogger.debug("Updating configuration internals") self.mergedCFG = self.remoteCFG.mergeWith(self.localCFG) self.remoteServerList = [] localServers = self.extractOptionFromCFG("%s/Servers" % self.configurationPath, self.localCFG, disableDangerZones=True) if localServers: self.remoteServerList.extend(List.fromChar(localServers, ",")) remoteServers = self.extractOptionFromCFG("%s/Servers" % self.configurationPath, self.remoteCFG, disableDangerZones=True) if remoteServers: self.remoteServerList.extend(List.fromChar(remoteServers, ",")) self.remoteServerList = List.uniqueElements(self.remoteServerList) self.__compressedConfigurationData = None def loadFile(self, fileName): try: fileCFG = CFG() fileCFG.loadFromFile(fileName) except IOError: self.localCFG = self.localCFG.mergeWith(fileCFG) return S_ERROR("Can't load a cfg file '%s'" % fileName) return self.mergeWithLocal(fileCFG) def mergeWithLocal(self, extraCFG): self.lock() try: self.localCFG = self.localCFG.mergeWith(extraCFG) self.unlock() gLogger.debug("CFG merged") except Exception as e: self.unlock() return S_ERROR("Cannot merge with new cfg: %s" % str(e)) self.sync() return S_OK() def loadRemoteCFGFromCompressedMem(self, data): if six.PY3 and isinstance(data, str): data = data.encode(errors="surrogateescape") sUncompressedData = zlib.decompress(data).decode() self.loadRemoteCFGFromMem(sUncompressedData) def loadRemoteCFGFromMem(self, data): self.lock() self.remoteCFG.loadFromBuffer(data) self.unlock() self.sync() def loadConfigurationData(self, fileName=False): name = self.getName() self.lock() try: if not fileName: fileName = "%s.cfg" % name if fileName[0] != "/": fileName = os.path.join(DIRAC.rootPath, "etc", fileName) self.remoteCFG.loadFromFile(fileName) except Exception as e: print(e) self.unlock() self.sync() def getCommentFromCFG(self, path, cfg=False): if not cfg: cfg = self.mergedCFG self.dangerZoneStart() try: levelList = [ level.strip() for level in path.split("/") if level.strip() != "" ] for section in levelList[:-1]: cfg = cfg[section] return self.dangerZoneEnd(cfg.getComment(levelList[-1])) except Exception: pass return self.dangerZoneEnd(None) def getSectionsFromCFG(self, path, cfg=False, ordered=False): if not cfg: cfg = self.mergedCFG self.dangerZoneStart() try: levelList = [ level.strip() for level in path.split("/") if level.strip() != "" ] for section in levelList: cfg = cfg[section] return self.dangerZoneEnd(cfg.listSections(ordered)) except Exception: pass return self.dangerZoneEnd(None) def getOptionsFromCFG(self, path, cfg=False, ordered=False): if not cfg: cfg = self.mergedCFG self.dangerZoneStart() try: levelList = [ level.strip() for level in path.split("/") if level.strip() != "" ] for section in levelList: cfg = cfg[section] return self.dangerZoneEnd(cfg.listOptions(ordered)) except Exception: pass return self.dangerZoneEnd(None) def extractOptionFromCFG(self, path, cfg=False, disableDangerZones=False): if not cfg: cfg = self.mergedCFG if not disableDangerZones: self.dangerZoneStart() try: levelList = [ level.strip() for level in path.split("/") if level.strip() != "" ] for section in levelList[:-1]: cfg = cfg[section] if levelList[-1] in cfg.listOptions(): return self.dangerZoneEnd(cfg[levelList[-1]]) except Exception: pass if not disableDangerZones: self.dangerZoneEnd() def setOptionInCFG(self, path, value, cfg=False, disableDangerZones=False): if not cfg: cfg = self.localCFG if not disableDangerZones: self.dangerZoneStart() try: levelList = [ level.strip() for level in path.split("/") if level.strip() != "" ] for section in levelList[:-1]: if section not in cfg.listSections(): cfg.createNewSection(section) cfg = cfg[section] cfg.setOption(levelList[-1], value) finally: if not disableDangerZones: self.dangerZoneEnd() self.sync() def deleteOptionInCFG(self, path, cfg=False): if not cfg: cfg = self.localCFG self.dangerZoneStart() try: levelList = [ level.strip() for level in path.split("/") if level.strip() != "" ] for section in levelList[:-1]: if section not in cfg.listSections(): return cfg = cfg[section] cfg.deleteKey(levelList[-1]) finally: self.dangerZoneEnd() self.sync() def generateNewVersion(self): self.setVersion(Time.toString()) self.sync() gLogger.info("Generated new version %s" % self.getVersion()) def setVersion(self, version, cfg=False): if not cfg: cfg = self.remoteCFG self.setOptionInCFG("%s/Version" % self.configurationPath, version, cfg) def getVersion(self, cfg=False): if not cfg: cfg = self.remoteCFG value = self.extractOptionFromCFG( "%s/Version" % self.configurationPath, cfg) if value: return value return "0" def getName(self): return self.extractOptionFromCFG("%s/Name" % self.configurationPath, self.mergedCFG) def exportName(self): return self.setOptionInCFG("%s/Name" % self.configurationPath, self.getName(), self.remoteCFG) def getRefreshTime(self): try: return int( self.extractOptionFromCFG( "%s/RefreshTime" % self.configurationPath, self.mergedCFG)) except Exception: return 300 def getPropagationTime(self): try: return int( self.extractOptionFromCFG( "%s/PropagationTime" % self.configurationPath, self.mergedCFG)) except Exception: return 300 def getSlavesGraceTime(self): try: return int( self.extractOptionFromCFG( "%s/SlavesGraceTime" % self.configurationPath, self.mergedCFG)) except Exception: return 600 def mergingEnabled(self): try: val = self.extractOptionFromCFG( "%s/EnableAutoMerge" % self.configurationPath, self.mergedCFG) return val.lower() in ("yes", "true", "y") except Exception: return False def getAutoPublish(self): value = self.extractOptionFromCFG( "%s/AutoPublish" % self.configurationPath, self.localCFG) if value and value.lower() in ("no", "false", "n"): return False else: return True def getAutoSlaveSync(self): value = self.extractOptionFromCFG( "%s/AutoSlaveSync" % self.configurationPath, self.localCFG) if value and value.lower() in ("no", "false", "n"): return False else: return True def getServers(self): return list(self.remoteServerList) def getConfigurationGateway(self): return self.extractOptionFromCFG("/DIRAC/Gateway", self.localCFG) def setServers(self, sServers): self.setOptionInCFG("%s/Servers" % self.configurationPath, sServers, self.remoteCFG) self.sync() def deleteLocalOption(self, optionPath): self.deleteOptionInCFG(optionPath, self.localCFG) def getMasterServer(self): return self.extractOptionFromCFG( "%s/MasterServer" % self.configurationPath, self.remoteCFG) def setMasterServer(self, sURL): self.setOptionInCFG("%s/MasterServer" % self.configurationPath, sURL, self.remoteCFG) self.sync() def getCompressedData(self): if self.__compressedConfigurationData is None: self.__compressedConfigurationData = zlib.compress( str(self.remoteCFG).encode(), 9) return self.__compressedConfigurationData def isMaster(self): value = self.extractOptionFromCFG("%s/Master" % self.configurationPath, self.localCFG) if value and value.lower() in ("yes", "true", "y"): return True else: return False def getServicesPath(self): return "/Services" def setAsService(self): self._isService = True def isService(self): return self._isService def useServerCertificate(self): value = self.extractOptionFromCFG( "/DIRAC/Security/UseServerCertificate") if value and value.lower() in ("y", "yes", "true"): return True return False def skipCACheck(self): value = self.extractOptionFromCFG("/DIRAC/Security/SkipCAChecks") if value and value.lower() in ("y", "yes", "true"): return True return False def dumpLocalCFGToFile(self, fileName): try: with open(fileName, "w") as fd: fd.write(str(self.localCFG)) gLogger.verbose("Configuration file dumped", "'%s'" % fileName) except IOError: gLogger.error("Can't dump cfg file", "'%s'" % fileName) return S_ERROR("Can't dump cfg file '%s'" % fileName) return S_OK() def getRemoteCFG(self): return self.remoteCFG def getMergedCFGAsString(self): return str(self.mergedCFG) def dumpRemoteCFGToFile(self, fileName): with open(fileName, "w") as fd: fd.write(str(self.remoteCFG)) def __backupCurrentConfiguration(self, backupName): configurationFilename = "%s.cfg" % self.getName() configurationFile = os.path.join(DIRAC.rootPath, "etc", configurationFilename) today = Time.date() backupPath = os.path.join(self.getBackupDir(), str(today.year), "%02d" % today.month) mkDir(backupPath) backupFile = os.path.join( backupPath, configurationFilename.replace(".cfg", ".%s.zip" % backupName)) if os.path.isfile(configurationFile): gLogger.info("Making a backup of configuration in %s" % backupFile) try: with zipfile.ZipFile(backupFile, "w", zipfile.ZIP_DEFLATED) as zf: zf.write( configurationFile, "%s.backup.%s" % (os.path.split(configurationFile)[1], backupName)) except Exception: gLogger.exception() gLogger.error("Cannot backup configuration data file", "file %s" % backupFile) else: gLogger.warn("CS data file does not exist", configurationFile) def writeRemoteConfigurationToDisk(self, backupName=False): configurationFile = os.path.join(DIRAC.rootPath, "etc", "%s.cfg" % self.getName()) try: with open(configurationFile, "w") as fd: fd.write(str(self.remoteCFG)) except Exception as e: gLogger.fatal( "Cannot write new configuration to disk!", "file %s exception %s" % (configurationFile, repr(e))) return S_ERROR("Can't write cs file %s!: %s" % (configurationFile, repr(e).replace(",)", ")"))) if backupName: self.__backupCurrentConfiguration(backupName) return S_OK() def setRemoteCFG(self, cfg, disableSync=False): self.remoteCFG = cfg.clone() if not disableSync: self.sync() def lock(self): """ Locks Event to prevent further threads from reading. Stops current thread until no other thread is accessing. PRIVATE USE """ if not self.__locksEnabled: return self.threadingEvent.clear() while self.runningThreadsNumber > 0: time.sleep(0.1) def unlock(self): """ Unlocks Event. PRIVATE USE """ if not self.__locksEnabled: return self.threadingEvent.set() def dangerZoneStart(self): """ Start of danger zone. This danger zone may be or may not be a mutual exclusion zone. Counter is maintained to know how many threads are inside and be able to enable and disable mutual exclusion. PRIVATE USE """ if not self.__locksEnabled: return self.threadingEvent.wait() self.threadingLock.acquire() self.runningThreadsNumber += 1 try: self.threadingLock.release() except thread.error: pass def dangerZoneEnd(self, returnValue=None): """ End of danger zone. PRIVATE USE """ if not self.__locksEnabled: return returnValue self.threadingLock.acquire() self.runningThreadsNumber -= 1 try: self.threadingLock.release() except thread.error: pass return returnValue
def checkAgentOptions(getOptionMock, systemName, agentName, ignoreOptions=None, extension='DIRAC'): """Ensure that all the agent options are properly documented. :param getOptionMock: Mock object for agentmodule.get_amOption function :param str systemName: name of the **System** :param str agentName: name of the **Agent** :param list ignoreOptions: list of options to ignore :param str extension: name of the DIRAC **Extension** where the Agent comes from """ if ignoreOptions is None: ignoreOptions = [] # add some options that can be set, see the AgentModule for all of them ignoreOptions.extend([ 'PollingTime', 'Status', 'Enabled', 'MaxCycles', 'LogOutputs', 'ControlDirectory', 'shifterProxy' ]) ignoreOptions = list(set(ignoreOptions)) config = CFG() LOG.info("Testing %s/%s, ignoring options %s", systemName, agentName, ignoreOptions) # get the location where DIRAC is in from basefolder/DIRAC/__ini__.py configFilePath = os.path.join( os.path.dirname(os.path.dirname(DIRAC.__file__)), extension, systemName, 'ConfigTemplate.cfg') config.loadFromFile(configFilePath) optionsDict = config.getAsDict('Agents/%s' % agentName) outDict = {} _parseOption(outDict, optionsDict) optionsDict = outDict LOG.info("Calls: %s", pformat(getOptionMock.call_args_list)) LOG.info("Options found in ConfigTemplate: %s ", list(optionsDict.keys())) # check that values in ConfigTemplate are used for option, value in optionsDict.items(): if any(ignoreOp in option for ignoreOp in ignoreOptions): LOG.info("From Agent: ignoring option %r with value %r, (%s)", option, value, type(value)) continue LOG.info("Looking for call to option %r with value %r, (%s)", option, value, type(value)) if not isinstance(value, bool) and not value: # empty string, list, dict ... assert any( call(option, null) in getOptionMock.call_args_list for null in ({}, set(), [], '', 0, None)) else: assert call(option, value) in getOptionMock.call_args_list or \ call(option, [value]) in getOptionMock.call_args_list # check that options used in the agent are in the ConfigTemplates for opCall in getOptionMock.call_args_list: optionArguments = opCall[0] if len(optionArguments) != 2: continue optionName = optionArguments[0] optionValue = optionArguments[1] if optionName in ignoreOptions: LOG.info("From Template: ignoring option %r with %r", optionName, optionValue) continue LOG.info("Checking Template option %r with %r", optionName, optionValue) assert optionName in optionsDict if not optionsDict[optionName]: assert not optionValue continue assert optionsDict[optionName] == optionValue or [ optionsDict[optionName] ] == optionValue
os.path.expandvars("$WORKSPACE") + "/PilotInstallDIR/etc/dirac.cfg"): localConfigFile = os.path.expandvars( "$WORKSPACE") + "/PilotInstallDIR/etc/dirac.cfg" elif os.path.isfile( os.path.expandvars("$WORKSPACE") + "/ServerInstallDIR/etc/dirac.cfg"): localConfigFile = os.path.expandvars( "$WORKSPACE") + "/ServerInstallDIR/etc/dirac.cfg" elif os.path.isfile("./etc/dirac.cfg"): localConfigFile = "./etc/dirac.cfg" else: print("Local CFG file not found") exit(2) localCfg.loadFromFile(localConfigFile) if not localCfg.isSection("/LocalSite"): localCfg.createNewSection("/LocalSite") localCfg.setOption("/LocalSite/CPUTimeLeft", 5000) localCfg.setOption("/DIRAC/Security/UseServerCertificate", False) if not sMod: if not setup: setup = gConfig.getValue("/DIRAC/Setup") if not setup: setup = "dirac-JenkinsSetup" if not localCfg.isSection("/Operations"): localCfg.createNewSection("/Operations") if not localCfg.isSection("/Operations/%s" % setup): localCfg.createNewSection("/Operations/%s" % setup)
def execute(self): """The JobAgent execution method. """ # Temporary mechanism to pass a shutdown message to the agent if os.path.exists('/var/lib/dirac_drain'): return self.__finish('Node is being drained by an operator') # Check if we can match jobs at all self.log.verbose('Job Agent execution loop') result = self.computingElement.available() if not result['OK']: self.log.info('Resource is not available', result['Message']) return self.__finish('CE Not Available') ceInfoDict = result['CEInfoDict'] runningJobs = ceInfoDict.get("RunningJobs") availableSlots = result['Value'] if not availableSlots: if runningJobs: self.log.info('No available slots', ': %d running jobs' % runningJobs) return S_OK('Job Agent cycle complete with %d running jobs' % runningJobs) self.log.info( 'CE is not available (and there are no running jobs)') return self.__finish('CE Not Available') if self.jobCount: # Only call timeLeft utility after a job has been picked up self.log.info('Attempting to check CPU time left for filling mode') if self.fillingMode: self.timeLeft = self.computeCPUWorkLeft() self.log.info('normalized CPU units remaining in slot', self.timeLeft) if self.timeLeft <= self.minimumTimeLeft: return self.__finish('No more time left') # Need to update the Configuration so that the new value is published in the next matching request result = self.computingElement.setCPUTimeLeft( cpuTimeLeft=self.timeLeft) if not result['OK']: return self.__finish(result['Message']) # Update local configuration to be used by submitted job wrappers localCfg = CFG() if self.extraOptions: localConfigFile = os.path.join('.', self.extraOptions) else: localConfigFile = os.path.join(rootPath, "etc", "dirac.cfg") localCfg.loadFromFile(localConfigFile) if not localCfg.isSection('/LocalSite'): localCfg.createNewSection('/LocalSite') localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft) localCfg.writeToFile(localConfigFile) else: return self.__finish('Filling Mode is Disabled') # if we are here we assume that a job can be matched result = self.computingElement.getDescription() if not result['OK']: return result # We can have several prioritized job retrieval strategies if isinstance(result['Value'], dict): ceDictList = [result['Value']] elif isinstance(result['Value'], list): # This is the case for Pool ComputingElement, and parameter 'MultiProcessorStrategy' ceDictList = result['Value'] for ceDict in ceDictList: # Add pilot information gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown') if gridCE != 'Unknown': ceDict['GridCE'] = gridCE if 'PilotReference' not in ceDict: ceDict['PilotReference'] = str(self.pilotReference) ceDict['PilotBenchmark'] = self.cpuFactor ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag # Add possible job requirements result = gConfig.getOptionsDict('/AgentJobRequirements') if result['OK']: requirementsDict = result['Value'] ceDict.update(requirementsDict) self.log.info('Requirements:', requirementsDict) self.log.verbose('CE dict', ceDict) # here finally calling the matcher start = time.time() jobRequest = MatcherClient().requestJob(ceDict) matchTime = time.time() - start self.log.info('MatcherTime', '= %.2f (s)' % (matchTime)) if jobRequest['OK']: break self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', self.stopAfterFailedMatches) if not jobRequest['OK']: # if we don't match a job, independently from the reason, # we wait a bit longer before trying again self.am_setOption("PollingTime", int(self.am_getOption("PollingTime") * 1.5)) if re.search('No match found', jobRequest['Message']): self.log.notice('Job request OK, but no match found', ': %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find("seconds timeout") != -1: self.log.error('Timeout while requesting job', jobRequest['Message']) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) elif jobRequest['Message'].find( "Pilot version does not match") != -1: errorMsg = 'Pilot version does not match the production version' self.log.error(errorMsg, jobRequest['Message'].replace(errorMsg, '')) return S_ERROR(jobRequest['Message']) else: self.log.notice('Failed to get jobs', ': %s' % (jobRequest['Message'])) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self.__finish( 'Nothing to do for more than %d cycles' % self.stopAfterFailedMatches) return S_OK(jobRequest['Message']) # Reset the Counter self.matchFailedCount = 0 # If we are here it is because we matched a job matcherInfo = jobRequest['Value'] if not self.pilotInfoReportedFlag: # Check the flag after the first access to the Matcher self.pilotInfoReportedFlag = matcherInfo.get( 'PilotInfoReportedFlag', False) jobID = matcherInfo['JobID'] jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName) matcherParams = ['JDL', 'DN', 'Group'] for param in matcherParams: if param not in matcherInfo: jobReport.setJobStatus(status='Failed', minor='Matcher did not return %s' % (param)) return self.__finish('Matcher Failed') elif not matcherInfo[param]: jobReport.setJobStatus(status='Failed', minor='Matcher returned null %s' % (param)) return self.__finish('Matcher Failed') else: self.log.verbose('Matcher returned', '%s = %s ' % (param, matcherInfo[param])) jobJDL = matcherInfo['JDL'] jobGroup = matcherInfo['Group'] ownerDN = matcherInfo['DN'] optimizerParams = {} for key in matcherInfo: if key not in matcherParams: optimizerParams[key] = matcherInfo[key] parameters = self._getJDLParameters(jobJDL) if not parameters['OK']: jobReport.setJobStatus(status='Failed', minor='Could Not Extract JDL Parameters') self.log.warn('Could Not Extract JDL Parameters', parameters['Message']) return self.__finish('JDL Problem') params = parameters['Value'] if 'JobID' not in params: msg = 'Job has not JobID defined in JDL parameters' jobReport.setJobStatus(status='Failed', minor=msg) self.log.warn(msg) return self.__finish('JDL Problem') else: jobID = params['JobID'] if 'JobType' not in params: self.log.warn('Job has no JobType defined in JDL parameters') jobType = 'Unknown' else: jobType = params['JobType'] if 'CPUTime' not in params: self.log.warn( 'Job has no CPU requirement defined in JDL parameters') # Job requirements for determining the number of processors # the minimum number of processors requested processors = int( params.get('NumberOfProcessors', int(params.get('MinNumberOfProcessors', 1)))) # the maximum number of processors allowed to the payload maxNumberOfProcessors = int(params.get('MaxNumberOfProcessors', 0)) # need or not the whole node for the job wholeNode = 'WholeNode' in params mpTag = 'MultiProcessor' in params.get('Tags', []) if self.extraOptions and 'dirac-jobexec' in params.get( 'Executable', '').strip(): params['Arguments'] = (params.get('Arguments', '') + ' ' + self.extraOptions).strip() params['ExtraOptions'] = self.extraOptions self.log.verbose('Job request successful: \n', jobRequest['Value']) self.log.info( 'Received', 'JobID=%s, JobType=%s, OwnerDN=%s, JobGroup=%s' % (jobID, jobType, ownerDN, jobGroup)) self.jobCount += 1 try: jobReport.setJobParameter(par_name='MatcherServiceTime', par_value=str(matchTime), sendFlag=False) if 'BOINC_JOB_ID' in os.environ: # Report BOINC environment for thisp in ('BoincUserID', 'BoincHostID', 'BoincHostPlatform', 'BoincHostName'): jobReport.setJobParameter(par_name=thisp, par_value=gConfig.getValue( '/LocalSite/%s' % thisp, 'Unknown'), sendFlag=False) jobReport.setJobStatus(status='Matched', minor='Job Received by Agent', sendFlag=False) result_setupProxy = self._setupProxy(ownerDN, jobGroup) if not result_setupProxy['OK']: return self._rescheduleFailedJob(jobID, result_setupProxy['Message'], self.stopOnApplicationFailure) proxyChain = result_setupProxy.get('Value') # Save the job jdl for external monitoring self.__saveJobJDLRequest(jobID, jobJDL) software = self._checkInstallSoftware(jobID, params, ceDict, jobReport) if not software['OK']: self.log.error('Failed to install software for job', '%s' % (jobID)) errorMsg = software['Message'] if not errorMsg: errorMsg = 'Failed software installation' return self._rescheduleFailedJob(jobID, errorMsg, self.stopOnApplicationFailure) self.log.debug('Before self._submitJob() (%sCE)' % (self.ceName)) result_submitJob = self._submitJob( jobID=jobID, jobParams=params, resourceParams=ceDict, optimizerParams=optimizerParams, proxyChain=proxyChain, jobReport=jobReport, processors=processors, wholeNode=wholeNode, maxNumberOfProcessors=maxNumberOfProcessors, mpTag=mpTag) # Committing the JobReport before evaluating the result of job submission res = jobReport.commit() if not res['OK']: resFD = jobReport.generateForwardDISET() if not resFD['OK']: self.log.error("Error generating ForwardDISET operation", resFD['Message']) else: # Here we create the Request. op = resFD['Value'] request = Request() requestName = 'jobAgent_%s' % jobID request.RequestName = requestName.replace('"', '') request.JobID = jobID request.SourceComponent = "JobAgent_%s" % jobID request.addOperation(op) # This might fail, but only a message would be printed. self._sendFailoverRequest(request) if not result_submitJob['OK']: return self.__finish(result_submitJob['Message']) elif 'PayloadFailed' in result_submitJob: # Do not keep running and do not overwrite the Payload error message = 'Payload execution failed with error code %s' % result_submitJob[ 'PayloadFailed'] if self.stopOnApplicationFailure: return self.__finish(message, self.stopOnApplicationFailure) else: self.log.info(message) self.log.debug('After %sCE submitJob()' % (self.ceName)) except Exception as subExcept: # pylint: disable=broad-except self.log.exception("Exception in submission", "", lException=subExcept, lExcInfo=True) return self._rescheduleFailedJob( jobID, 'Job processing failed with exception', self.stopOnApplicationFailure) return S_OK('Job Agent cycle complete')