def createAlgoFromInfo(info): """ Create an Algo object from basic information """ algo = {'ApplicationName': info.get('ApplicationName'), 'ApplicationFamily': info.get('ApplicationFamily'), 'ApplicationVersion': info.get('ApplicationVersion'), 'PSetHash': info.get('PSetHash'), 'PSetContent': None, 'InDBS': info.get('AlgoInDBS', None) } configString = info.get('PSetContent') if configString: split = configString.split(';;') cacheURL = split[0] cacheDB = split[1] configID = split[2] try: configCache = ConfigCache(cacheURL, cacheDB) configCache.loadByID(configID) algo['PSetContent'] = configCache.getConfig() except Exception, ex: msg = "Exception in getting configCache from DB\n" msg += "Ignoring this exception and continuing without config.\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) logging.debug("URL: %s, DB: %s, ID: %s" % (cacheURL, cacheDB, configID))
def createConfig(self, bad = False): """ _createConfig_ Create a config of some sort that we can load out of ConfigCache """ PSetTweak = {'process': {'outputModules_': ['ThisIsAName'], 'ThisIsAName': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}}} BadTweak = {'process': {'outputModules_': ['ThisIsAName1', 'ThisIsAName2'], 'ThisIsAName1': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}, 'ThisIsAName2': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}}} configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDBName) configCache.createUserGroup(groupname = "testGroup", username = '******') if bad: configCache.setPSetTweaks(PSetTweak = BadTweak) else: configCache.setPSetTweaks(PSetTweak = PSetTweak) configCache.save() return configCache.getCouchID()
def createConfig(self, bad=False): """ _createConfig_ Create a config of some sort that we can load out of ConfigCache """ PSetTweak = { "process": { "outputModules_": ["ThisIsAName"], "ThisIsAName": {"dataset": {"dataTier": "RECO", "filterName": "Filter"}}, } } BadTweak = { "process": { "outputModules_": ["ThisIsAName1", "ThisIsAName2"], "ThisIsAName1": {"dataset": {"dataTier": "RECO", "filterName": "Filter"}}, "ThisIsAName2": {"dataset": {"dataTier": "RECO", "filterName": "Filter"}}, } } configCache = ConfigCache(os.environ["COUCHURL"], couchDBName=self.couchDBName) configCache.createUserGroup(groupname="testGroup", username="******") if bad: configCache.setPSetTweaks(PSetTweak=BadTweak) else: configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.save() return configCache.getCouchID()
def __call__(self, wmTask): """ Trip through steps, find CMSSW steps, pull in config files, PSet Tweaks etc """ for t in wmTask.steps().nodeIterator(): t = WMStep.WMStepHelper(t) stepPath = "%s/%s" % (self.workingDirectory(), t.name()) # the CMSSW has a special case with its ConfigCache argument if not t.stepType() in ("CMSSW", "MulticoreCMSSW"): continue if (hasattr(t.data.application.configuration,'retrieveConfigUrl')): # main config file fileTarget = "%s/%s" % ( stepPath, t.data.application.command.configuration) urllib.urlretrieve( t.data.application.configuration.retrieveConfigUrl, fileTarget) # PSet Tweak cacheUrl = t.data.application.configuration.configCacheUrl cacheDb = t.data.application.configuration.cacheName configId = t.data.application.configuration.configId tweakTarget = t.data.application.command.psetTweak configCache = ConfigCache(cacheUrl, cacheDb) configCache.loadByID(configId) tweak = TweakAPI.makeTweakFromJSON(configCache.getPSetTweaks()) if tweak: tweakFile = "%s/%s" % (stepPath, tweakTarget) tweak.persist(tweakFile, "json")
def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None, configDoc = None, couchURL = None, couchDBName = None): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ outputModules = {} if configDoc != None and configDoc != "": configCache = ConfigCache(couchURL, couchDBName) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if scenarioFunc in [ "promptReco", "expressProcessing" ]: for output in scenarioArgs.get('outputs',[]): dataTier = output['dataTier'] moduleLabel = output['moduleLabel'] filterName = output.get('filterName', None) outputModules[moduleLabel] = {'dataTier' : dataTier, 'filterName' : filterName} elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get('skims',[]): dataTier = "ALCARECO" moduleLabel = "ALCARECOStream%s" % alcaSkim filterName = alcaSkim outputModules[moduleLabel] = {'dataTier' : dataTier, 'filterName' : filterName} return outputModules
def showOriginalConfig(self, docId): """ Makes a link to the original text of the config """ configCache = ConfigCache(self.couchUrl, self.configDBName) configCache.loadByID(docId) configString = configCache.getConfig() if configString == None: return "Cannot find document " + str(docId) + " in Couch DB" return '<pre>' + configString + '</pre>'
def determineOutputModules(self, scenarioFunc=None, scenarioArgs=None, configDoc=None, couchURL=None, couchDBName=None, configCacheUrl=None): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ # set default scenarioArgs to empty dictionary if it is None. scenarioArgs = scenarioArgs or {} outputModules = {} if configDoc != None and configDoc != "": url = configCacheUrl or couchURL configCache = ConfigCache(url, couchDBName) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if 'outputs' in scenarioArgs and scenarioFunc in [ "promptReco", "expressProcessing", "repack" ]: for output in scenarioArgs.get('outputs', []): moduleLabel = output['moduleLabel'] outputModules[moduleLabel] = { 'dataTier': output['dataTier'] } if output.has_key('primaryDataset'): outputModules[moduleLabel]['primaryDataset'] = output[ 'primaryDataset'] if output.has_key('filterName'): outputModules[moduleLabel]['filterName'] = output[ 'filterName'] elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco": for dataTier in scenarioArgs.get('writeTiers'): moduleLabel = "%soutput" % dataTier outputModules[moduleLabel] = {'dataTier': dataTier} elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get('skims', []): moduleLabel = "ALCARECOStream%s" % alcaSkim if alcaSkim.startswith("PromptCalibProd"): dataTier = "ALCAPROMPT" else: dataTier = "ALCARECO" outputModules[moduleLabel] = { 'dataTier': dataTier, 'primaryDataset': scenarioArgs.get('primaryDataset'), 'filterName': alcaSkim } return outputModules
def createConfig(couchDBName): """ _createConfig_ Create a config of some sort that we can load out of ConfigCache """ PSetTweak = { 'process': { 'outputModules_': ['RECOoutput', 'ALCARECOoutput'], 'RECOoutput': { 'dataset': { 'dataTier': 'RECO', 'filterName': 'Filter' } }, 'ALCARECOoutput': { 'dataset': { 'dataTier': 'ALCARECO', 'filterName': 'AlcaFilter' } } } } configCache = ConfigCache(os.environ["COUCHURL"], couchDBName=couchDBName) configCache.createUserGroup(groupname="testGroup", username='******') configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.save() return configCache.getCouchID()
def validateConfigCacheExists(self, configID, couchURL, couchDBName, getOutputModules=False): """ _validateConfigCacheExists_ If we have a configCache, we should probably try and load it. """ if configID == '' or configID == ' ': self.raiseValidationException( msg="ConfigCacheID is invalid and cannot be loaded") configCache = ConfigCache(dbURL=couchURL, couchDBName=couchDBName, id=configID) try: configCache.loadByID(configID=configID) except ConfigCacheException: self.raiseValidationException( msg="Failure to load ConfigCache while validating workload") duplicateCheck = {} try: outputModuleInfo = configCache.getOutputModuleInfo() except Exception: # Something's gone wrong with trying to open the configCache msg = "Error in getting output modules from ConfigCache during workload validation. Check ConfigCache formatting!" self.raiseValidationException(msg=msg) for outputModule in outputModuleInfo.values(): dataTier = outputModule.get('dataTier', None) filterName = outputModule.get('filterName', None) if not dataTier: self.raiseValidationException( msg="No DataTier in output module.") # Add dataTier to duplicate dictionary if not dataTier in duplicateCheck.keys(): duplicateCheck[dataTier] = [] if filterName in duplicateCheck[dataTier]: # Then we've seen this combination before self.raiseValidationException( msg="Duplicate dataTier/filterName combination.") else: duplicateCheck[dataTier].append(filterName) if getOutputModules: return outputModuleInfo return
def __init__(self, **options): GeneratorInterface.__init__(self, **options) self.couchUrl = options.get("CouchUrl") self.couchDBName = options.get("CouchDBName") self.couchConfigDoc = options.get("ConfigCacheDoc") confCache = ConfigCache(dbURL = self.couchUrl, couchDBName = self.couchDBName, id = self.couchConfigDoc) confCache.load() seeds = confCache.document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'] self.seedTable = [] for k in seeds.keys(): if k == u"parameters_" : continue self.seedTable.append("process.RandomNumberGeneratorService.%s.initialSeed" % k)
def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None, configDoc = None, couchURL = None, couchDBName = None, configCacheUrl = None): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ # set default scenarioArgs to empty dictionary if it is None. scenarioArgs = scenarioArgs or {} outputModules = {} if configDoc != None and configDoc != "": url = configCacheUrl or couchURL if (url, couchDBName) in self.config_cache: configCache = self.config_cache[(url, couchDBName)] else: configCache = ConfigCache(url, couchDBName, True) self.config_cache[(url, couchDBName)] = configCache #TODO: need to change to DataCache #configCache.loadDocument(configDoc) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if 'outputs' in scenarioArgs and scenarioFunc in [ "promptReco", "expressProcessing", "repack" ]: for output in scenarioArgs.get('outputs', []): moduleLabel = output['moduleLabel'] outputModules[moduleLabel] = { 'dataTier' : output['dataTier'] } if 'primaryDataset' in output: outputModules[moduleLabel]['primaryDataset'] = output['primaryDataset'] if 'filterName' in output: outputModules[moduleLabel]['filterName'] = output['filterName'] elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco": for dataTier in scenarioArgs.get('writeTiers'): moduleLabel = "%soutput" % dataTier outputModules[moduleLabel] = { 'dataTier' : dataTier } elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get('skims',[]): moduleLabel = "ALCARECOStream%s" % alcaSkim if alcaSkim.startswith("PromptCalibProd"): dataTier = "ALCAPROMPT" else: dataTier = "ALCARECO" outputModules[moduleLabel] = { 'dataTier' : dataTier, 'primaryDataset' : scenarioArgs.get('primaryDataset'), 'filterName' : alcaSkim } return outputModules
def setUp(self): """ _setUp_ setUp function for unittest """ # Set constants self.couchDB = "config_test" self.configURL = "RANDOM;;URL;;NAME" self.configString = "This is a random string" self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", 'WMCore.Agent.Database'], useDefault=False) self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache") myThread = threading.currentThread() self.bufferFactory = DAOFactory( package="WMComponent.DBSBuffer.Database", logger=myThread.logger, dbinterface=myThread.dbi) self.buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = self.bufferFactory( classname="DBSBufferFiles.AddLocation") locationAction.execute(siteName="se1.cern.ch") locationAction.execute(siteName="se1.fnal.gov") locationAction.execute(siteName="malpaquet") # Set heartbeat self.componentName = 'JobSubmitter' self.heartbeatAPI = HeartbeatAPI(self.componentName) self.heartbeatAPI.registerComponent() # Set up a config cache configCache = ConfigCache(os.environ["COUCHURL"], couchDBName=self.couchDB) configCache.createUserGroup(groupname="testGroup", username='******') self.testDir = self.testInit.generateWorkDir() psetPath = os.path.join(self.testDir, "PSet.txt") f = open(psetPath, 'w') f.write(self.configString) f.close() configCache.addConfig(newConfig=psetPath, psetHash=None) configCache.save() self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"], self.couchDB, configCache.getCouchID()) return
def __call__(self, workloadName, arguments): """ _call_ Create a PromptSkimming workload with the given parameters. """ configCouchUrl = arguments.get("ConfigCacheUrl", None) or arguments["CouchURL"] injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"], arguments["InitCommand"], arguments["SkimConfig"], workloadName, configCouchUrl, arguments["CouchDBName"], arguments.get("EnvPath", None), arguments.get("BinPath", None)) try: configCache = ConfigCache(configCouchUrl, arguments["CouchDBName"]) arguments["ConfigCacheID"] = configCache.getIDFromLabel( workloadName) if not arguments["ConfigCacheID"]: logging.error("The configuration was not uploaded to couch") raise Exception except Exception: logging.error( "There was an exception loading the config out of the") logging.error( "ConfigCache. Check the scramOutput.log file in the") logging.error( "PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise parsedProcVer = parseT0ProcVer(arguments["ProcessingVersion"], 'PromptSkim') arguments["ProcessingString"] = parsedProcVer["ProcString"] arguments["ProcessingVersion"] = parsedProcVer["ProcVer"] workload = DataProcessingWorkloadFactory.__call__( self, workloadName, arguments) # We need to strip off "MSS" as that causes all sorts of problems. if arguments["CustodialSite"].find("MSS") != -1: site = arguments["CustodialSite"][:-4] else: site = arguments["CustodialSite"] workload.setSiteWhitelist(site) workload.setBlockWhitelist(arguments["BlockName"]) return workload
def validateConfigCacheExists(self, configID, couchURL, couchDBName, getOutputModules = False): """ _validateConfigCacheExists_ If we have a configCache, we should probably try and load it. """ from WMCore.Cache.WMConfigCache import ConfigCache configCache = ConfigCache(dbURL = couchURL, couchDBName = couchDBName, id = configID) try: configCache.loadByID(configID = configID) except ConfigCacheException, ex: self.raiseValidationException(msg = "Failure to load ConfigCache while validating workload")
def createAlgoFromInfo(info): """ Create an Algo object from basic information """ algo = { 'ApplicationName': info['ApplicationName'], 'ApplicationFamily': info['ApplicationFamily'], 'ApplicationVersion': info['ApplicationVersion'], 'PSetHash': info['PSetHash'], 'PSetContent': None, 'InDBS': info['AlgoInDBS'] } configString = info.get('PSetContent') if configString: try: split = configString.split(';;') cacheURL = split[0] cacheDB = split[1] configID = split[2] except IndexError: msg = "configCache not properly formatted\n" msg += "configString\n: %s" % configString msg += "Not attempting to put configCache content in DBS for this algo" msg += "AlgoInfo: %s" % algo logging.error(msg) return algo if cacheURL == "None" or cacheDB == "None" or configID == "None": # No Config for this DB logging.debug("No configCache for this algo") return algo try: configCache = ConfigCache(cacheURL, cacheDB) configCache.loadByID(configID) algo['PSetContent'] = configCache.getConfig() except Exception as ex: msg = "Exception in getting configCache from DB\n" msg += "Ignoring this exception and continuing without config.\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) logging.debug("URL: %s, DB: %s, ID: %s" % (cacheURL, cacheDB, configID)) return algo
def createAlgoFromInfo(info): """ Create an Algo object from basic information """ algo = { "ApplicationName": info["ApplicationName"], "ApplicationFamily": info["ApplicationFamily"], "ApplicationVersion": info["ApplicationVersion"], "PSetHash": info["PSetHash"], "PSetContent": None, "InDBS": info["AlgoInDBS"], } configString = info.get("PSetContent") if configString: try: split = configString.split(";;") cacheURL = split[0] cacheDB = split[1] configID = split[2] except IndexError: msg = "configCache not properly formatted\n" msg += "configString\n: %s" % configString msg += "Not attempting to put configCache content in DBS for this algo" msg += "AlgoInfo: %s" % algo logging.error(msg) return algo if cacheURL == "None" or cacheDB == "None" or configID == "None": # No Config for this DB logging.debug("No configCache for this algo") return algo try: configCache = ConfigCache(cacheURL, cacheDB) configCache.loadByID(configID) algo["PSetContent"] = configCache.getConfig() except Exception as ex: msg = "Exception in getting configCache from DB\n" msg += "Ignoring this exception and continuing without config.\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) logging.debug("URL: %s, DB: %s, ID: %s" % (cacheURL, cacheDB, configID)) return algo
def __call__(self, workloadName, arguments): """ _call_ Create a PromptSkimming workload with the given parameters. """ self.injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"], arguments["InitCommand"], arguments["SkimConfig"], workloadName, arguments["CouchURL"], arguments["CouchDBName"]) configCache = ConfigCache(arguments["CouchURL"], arguments["CouchDBName"]) arguments["ProcConfigCacheID"] = configCache.getIDFromLabel(workloadName) workload = DataProcessingWorkloadFactory.__call__(self, workloadName, arguments) workload.setSiteWhitelist(arguments["CustodialSite"]) workload.setBlockWhitelist(arguments["BlockName"]) return workload
def __init__(self, **options): GeneratorInterface.__init__(self, **options) self.couchUrl = options.get("CouchUrl") self.couchDBName = options.get("CouchDBName") self.couchConfigDoc = options.get("ConfigCacheDoc") confCache = ConfigCache(dbURL=self.couchUrl, couchDBName=self.couchDBName, id=self.couchConfigDoc) confCache.load() seeds = confCache.document[u'pset_tweak_details'][u'process'][ u'RandomNumberGeneratorService'] self.seedTable = [] for k in seeds: if k == u"parameters_": continue self.seedTable.append( "process.RandomNumberGeneratorService.%s.initialSeed" % k)
def _getConfigCache(self, requestName, processMethod): try: request = Utilities.requestDetails(requestName) except Exception as ex: msg = "Cannot find request %s, check logs." % requestName logging.error("%s, reason: %s" % (msg, ex)) return msg url = request.get("ConfigCacheUrl", None) or self.couchUrl try: configCache = ConfigCache(url, self.configDBName) configDocId = request["ConfigCacheID"] configCache.loadByID(configDocId) except Exception as ex: msg = "Cannot find ConfigCache document %s on %s." % (configDocId, url) logging.error("%s, reason: %s" % (msg, ex)) return msg return getattr(configCache, processMethod)()
def validateConfigCacheExists(self, configID, couchURL, couchDBName, getOutputModules = False): """ _validateConfigCacheExists_ If we have a configCache, we should probably try and load it. """ if configID == '' or configID == ' ': self.raiseValidationException(msg = "ConfigCacheID is invalid and cannot be loaded") configCache = ConfigCache(dbURL = couchURL, couchDBName = couchDBName, id = configID) try: configCache.loadByID(configID = configID) except ConfigCacheException, ex: self.raiseValidationException(msg = "Failure to load ConfigCache while validating workload")
def __call__(self, wmTask): """ Trip through steps, find CMSSW steps, pull in config files, PSet Tweaks etc """ for t in wmTask.steps().nodeIterator(): t = WMStep.WMStepHelper(t) stepPath = "%s/%s" % (self.workingDirectory(), t.name()) # the CMSSW has a special case with its ConfigCache argument if not t.stepType() in ("CMSSW", "MulticoreCMSSW"): continue if getattr(t.data.application.configuration, 'configCacheUrl', None) != None: # main config file fileTarget = "%s/%s" % ( stepPath, t.data.application.command.configuration) #urllib.urlretrieve( # t.data.application.configuration.retrieveConfigUrl, # fileTarget) # PSet Tweak cacheUrl = t.data.application.configuration.configCacheUrl cacheDb = t.data.application.configuration.cacheName configId = t.data.application.configuration.configId tweakTarget = t.data.application.command.psetTweak configCache = ConfigCache(cacheUrl, cacheDb) configCache.loadByID(configId) configCache.saveConfigToDisk(targetFile = fileTarget) tweak = TweakAPI.makeTweakFromJSON(configCache.getPSetTweaks()) if tweak: tweakFile = "%s/%s" % (stepPath, tweakTarget) tweak.persist(tweakFile, "json")
def getConfig(self): """ _getConfig_ Create a test config and put it in the cache """ PSetTweak = {'someKey': "Hello, I am a PSetTweak. It's nice to meet you."} configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = 'config_test') configCache.createUserGroup(groupname = "testGroup", username = '******') configCache.setPSetTweaks(PSetTweak = PSetTweak) configCache.attachments['configFile'] = 'This Is A Test Config' configCache.save() return configCache
def setUp(self): """ _setUp_ setUp function for unittest """ # Set constants self.couchDB = "config_test" self.configURL = "RANDOM;;URL;;NAME" self.configString = "This is a random string" self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer", 'WMCore.Agent.Database'], useDefault = False) self.testInit.setupCouch(self.couchDB, "GroupUser", "ConfigCache") myThread = threading.currentThread() self.bufferFactory = DAOFactory(package = "WMComponent.DBSBuffer.Database", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = self.bufferFactory(classname = "DBSBufferFiles.AddLocation") locationAction.execute(siteName = "se1.cern.ch") locationAction.execute(siteName = "se1.fnal.gov") locationAction.execute(siteName = "malpaquet") # Set heartbeat self.componentName = 'JobSubmitter' self.heartbeatAPI = HeartbeatAPI(self.componentName) self.heartbeatAPI.registerComponent() # Set up a config cache configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = self.couchDB) configCache.createUserGroup(groupname = "testGroup", username = '******') self.testDir = self.testInit.generateWorkDir() psetPath = os.path.join(self.testDir, "PSet.txt") f = open(psetPath, 'w') f.write(self.configString) f.close() configCache.addConfig(newConfig = psetPath, psetHash = None) configCache.save() self.configURL = "%s;;%s;;%s" % (os.environ["COUCHURL"], self.couchDB, configCache.getCouchID()) return
def validateConfigCacheExists(self, configID, couchURL, couchDBName, getOutputModules = False): """ _validateConfigCacheExists_ If we have a configCache, we should probably try and load it. """ if configID == '' or configID == ' ': self.raiseValidationException(msg = "ConfigCacheID is invalid and cannot be loaded") configCache = ConfigCache(dbURL = couchURL, couchDBName = couchDBName, id = configID) try: configCache.loadByID(configID = configID) except ConfigCacheException: self.raiseValidationException(msg = "Failure to load ConfigCache while validating workload") duplicateCheck = {} try: outputModuleInfo = configCache.getOutputModuleInfo() except Exception: # Something's gone wrong with trying to open the configCache msg = "Error in getting output modules from ConfigCache during workload validation. Check ConfigCache formatting!" self.raiseValidationException(msg = msg) for outputModule in outputModuleInfo.values(): dataTier = outputModule.get('dataTier', None) filterName = outputModule.get('filterName', None) if not dataTier: self.raiseValidationException(msg = "No DataTier in output module.") # Add dataTier to duplicate dictionary if not dataTier in duplicateCheck.keys(): duplicateCheck[dataTier] = [] if filterName in duplicateCheck[dataTier]: # Then we've seen this combination before self.raiseValidationException(msg = "Duplicate dataTier/filterName combination.") else: duplicateCheck[dataTier].append(filterName) if getOutputModules: return outputModuleInfo return
def __call__(self, workloadName, arguments): """ _call_ Create a PromptSkimming workload with the given parameters. """ self.injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"], arguments["InitCommand"], arguments["SkimConfig"], workloadName, arguments["CouchURL"], arguments["CouchDBName"]) try: configCache = ConfigCache(arguments["CouchURL"], arguments["CouchDBName"]) arguments["ProcConfigCacheID"] = configCache.getIDFromLabel(workloadName) except Exception, ex: logging.error("There was an exception loading the config out of the") logging.error("ConfigCache. Check the scramOutput.log file in the") logging.error("PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise
def determineOutputModules(self, scenarioFunc = None, scenarioArgs = None, configDoc = None, couchURL = None, couchDBName = None, configCacheUrl = None): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ outputModules = {} if configDoc != None and configDoc != "": url = configCacheUrl or couchURL configCache = ConfigCache(url, couchDBName) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if 'outputs' in scenarioArgs and scenarioFunc in [ "promptReco", "expressProcessing", "repack" ]: for output in scenarioArgs.get('outputs', []): moduleLabel = output['moduleLabel'] outputModules[moduleLabel] = { 'dataTier' : output['dataTier'] } if output.has_key('primaryDataset'): outputModules[moduleLabel]['primaryDataset'] = output['primaryDataset'] if output.has_key('filterName'): outputModules[moduleLabel]['filterName'] = output['filterName'] elif 'writeTiers' in scenarioArgs and scenarioFunc == "promptReco": for dataTier in scenarioArgs.get('writeTiers'): moduleLabel = "%soutput" % dataTier outputModules[moduleLabel] = { 'dataTier' : dataTier } elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get('skims',[]): moduleLabel = "ALCARECOStream%s" % alcaSkim if alcaSkim == "PromptCalibProd": dataTier = "ALCAPROMPT" else: dataTier = "ALCARECO" outputModules[moduleLabel] = { 'dataTier' : dataTier, 'primaryDataset' : scenarioArgs.get('primaryDataset'), 'filterName' : alcaSkim } return outputModules
def __call__(self, workloadName, arguments): """ _call_ Create a PromptSkimming workload with the given parameters. """ configCouchUrl = arguments.get("ConfigCacheUrl", None) or arguments["CouchURL"] injectIntoConfigCache(arguments["CMSSWVersion"], arguments["ScramArch"], arguments["InitCommand"], arguments["SkimConfig"], workloadName, configCouchUrl, arguments["CouchDBName"], arguments.get("EnvPath", None), arguments.get("BinPath", None)) try: configCache = ConfigCache(configCouchUrl, arguments["CouchDBName"]) arguments["ConfigCacheID"] = configCache.getIDFromLabel(workloadName) if not arguments["ConfigCacheID"]: logging.error("The configuration was not uploaded to couch") raise Exception except Exception: logging.error("There was an exception loading the config out of the") logging.error("ConfigCache. Check the scramOutput.log file in the") logging.error("PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise parsedProcVer = parseT0ProcVer(arguments["ProcessingVersion"], 'PromptSkim') arguments["ProcessingString"] = parsedProcVer["ProcString"] arguments["ProcessingVersion"] = parsedProcVer["ProcVer"] workload = DataProcessingWorkloadFactory.__call__(self, workloadName, arguments) # We need to strip off "MSS" as that causes all sorts of problems. if arguments["CustodialSite"].find("MSS") != -1: site = arguments["CustodialSite"][:-4] else: site = arguments["CustodialSite"] workload.setSiteWhitelist(site) workload.setBlockWhitelist(arguments["BlockName"]) return workload
def determineOutputModules(self, scenarioName = None, scenarioArgs = None, configDoc = None, couchURL = None, couchDBName = None): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ outputModules = {} if configDoc != None and configDoc != "": configCache = ConfigCache(couchURL, couchDBName) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: for dataTier in scenarioArgs.get("writeTiers",[]): outputModuleName = "output%s%s" % (dataTier, dataTier) outputModules[outputModuleName] = {"dataTier": dataTier, "filterName": None} return outputModules
def validateConfigCacheExists(self, configID, couchURL, couchDBName, getOutputModules=True): """ _validateConfigCacheExists_ If we have a configCache, we should probably try and load it. """ if configID == "" or configID == " ": self.raiseValidationException(msg="ConfigCacheID is invalid and cannot be loaded") if (couchURL, couchDBName) in self.config_cache: configCache = self.config_cache[(couchURL, couchDBName)] else: configCache = ConfigCache(dbURL=couchURL, couchDBName=couchDBName, detail=getOutputModules) self.config_cache[(couchURL, couchDBName)] = configCache try: # if dtail option is set return outputModules return configCache.validate(configID) except ConfigCacheException as ex: self.raiseValidationException(ex.message())
def determineOutputModules( self, scenarioFunc=None, scenarioArgs=None, configDoc=None, couchURL=None, couchDBName=None ): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ outputModules = {} if configDoc != None and configDoc != "": configCache = ConfigCache(couchURL, couchDBName) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if "outputs" in scenarioArgs and scenarioFunc in ["promptReco", "expressProcessing", "repack"]: for output in scenarioArgs.get("outputs", []): moduleLabel = output["moduleLabel"] outputModules[moduleLabel] = {"dataTier": output["dataTier"]} if output.has_key("primaryDataset"): outputModules[moduleLabel]["primaryDataset"] = output["primaryDataset"] if output.has_key("filterName"): outputModules[moduleLabel]["filterName"] = output["filterName"] elif "writeTiers" in scenarioArgs and scenarioFunc == "promptReco": for dataTier in scenarioArgs.get("writeTiers"): moduleLabel = "%soutput" % dataTier outputModules[moduleLabel] = {"dataTier": dataTier} elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get("skims", []): moduleLabel = "ALCARECOStream%s" % alcaSkim outputModules[moduleLabel] = { "dataTier": "ALCARECO", "primaryDataset": scenarioArgs.get("primaryDataset"), "filterName": alcaSkim, } return outputModules
def validateConfigCacheExists(self, configID, couchURL, couchDBName, getOutputModules=False): """ _validateConfigCacheExists_ If we have a configCache, we should probably try and load it. """ if configID == '' or configID == ' ': self.raiseValidationException( msg="ConfigCacheID is invalid and cannot be loaded") configCache = ConfigCache(dbURL=couchURL, couchDBName=couchDBName, id=configID) try: configCache.loadByID(configID=configID) except ConfigCacheException, ex: self.raiseValidationException( msg="Failure to load ConfigCache while validating workload")
def testE_SaveConfigFileToDisk(self): """ _SaveConfigFileToDisk_ Check and see if we can save the config file attachment to disk """ targetFile = os.path.join(self.testDir, 'configCache.test') configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = 'config_test') configCache.createUserGroup(groupname = "testGroup", username = '******') configCache.attachments['configFile'] = 'ThisIsAConfigFile' configCache.saveConfigToDisk(targetFile = targetFile) f = open(targetFile, 'r') content = f.read() f.close() self.assertEqual(content, configCache.getConfig()) return
def createConfig(couchDBName): """ _createConfig_ Create a config of some sort that we can load out of ConfigCache """ PSetTweak = {'process': {'outputModules_': ['RECOoutput', 'ALCARECOoutput'], 'RECOoutput': {'dataset': {'dataTier': 'RECO', 'filterName': 'Filter'}}, 'ALCARECOoutput': {'dataset': {'dataTier': 'ALCARECO', 'filterName': 'AlcaFilter'}}}} configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = couchDBName) configCache.createUserGroup(groupname = "testGroup", username = '******') configCache.setPSetTweaks(PSetTweak = PSetTweak) configCache.save() return configCache.getCouchID()
def buildWorkload(self): """ _buildWorkload_ Build the workload given all of the input parameters. Not that there will be LogCollect tasks created for each processing task and Cleanup tasks created for each merge task. """ (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = self.inputDataset[1:].split("/") workload = self.createWorkload() workload.setDashboardActivity("tier0") self.reportWorkflowToDashboard(workload.getDashboardActivity()) workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo, self.procJobSplitArgs) cmsswStepType = "CMSSW" taskType = "Processing" if self.multicore: taskType = "MultiProcessing" recoOutputs = [] for dataTier in self.writeTiers: recoOutputs.append({ 'dataTier': dataTier, 'eventContent': dataTier, 'moduleLabel': "write_%s" % dataTier }) recoTask = workload.newTask("Reco") recoOutMods = self.setupProcessingTask(recoTask, taskType, self.inputDataset, scenarioName=self.procScenario, scenarioFunc="promptReco", scenarioArgs={ 'globalTag': self.globalTag, 'skims': self.alcaSkims, 'dqmSeq': self.dqmSequences, 'outputs': recoOutputs }, splitAlgo=self.procJobSplitAlgo, splitArgs=self.procJobSplitArgs, stepType=cmsswStepType, forceUnmerged=True) if self.doLogCollect: self.addLogCollectTask(recoTask) recoMergeTasks = {} for recoOutLabel, recoOutInfo in recoOutMods.items(): if recoOutInfo['dataTier'] != "ALCARECO": mergeTask = self.addMergeTask(recoTask, self.procJobSplitAlgo, recoOutLabel, doLogCollect=self.doLogCollect) recoMergeTasks[recoOutInfo['dataTier']] = mergeTask else: alcaTask = recoTask.addTask("AlcaSkim") alcaOutMods = self.setupProcessingTask( alcaTask, taskType, inputStep=recoTask.getStep("cmsRun1"), inputModule=recoOutLabel, scenarioName=self.procScenario, scenarioFunc="alcaSkim", scenarioArgs={ 'globalTag': self.globalTag, 'skims': self.alcaSkims, 'primaryDataset': self.inputPrimaryDataset }, splitAlgo="WMBSMergeBySize", splitArgs={ "max_merge_size": self.maxMergeSize, "min_merge_size": self.minMergeSize, "max_merge_events": self.maxMergeEvents }, stepType=cmsswStepType) if self.doLogCollect: self.addLogCollectTask(alcaTask, taskName="AlcaSkimLogCollect") self.addCleanupTask(recoTask, recoOutLabel) for alcaOutLabel, alcaOutInfo in alcaOutMods.items(): self.addMergeTask(alcaTask, self.procJobSplitAlgo, alcaOutLabel, doLogCollect=self.doLogCollect) for promptSkim in self.promptSkims: if not promptSkim.DataTier in recoMergeTasks: error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys( ) error += 'That should be in the relevant skimConfig in T0AST' logging.error(error) raise Exception mergeTask = recoMergeTasks[promptSkim.DataTier] skimTask = mergeTask.addTask(promptSkim.SkimName) parentCmsswStep = mergeTask.getStep('cmsRun1') parsedProcVer = parseT0ProcVer(promptSkim.ProcessingVersion, 'PromptSkim') self.processingString = parsedProcVer["ProcString"] self.processingVersion = parsedProcVer["ProcVer"] if promptSkim.TwoFileRead: self.skimJobSplitArgs['include_parents'] = True else: self.skimJobSplitArgs['include_parents'] = False configLabel = '%s-%s' % (self.workloadName, promptSkim.SkimName) configCacheUrl = self.configCacheUrl or self.couchURL injectIntoConfigCache(self.frameworkVersion, self.scramArch, self.initCommand, promptSkim.ConfigURL, configLabel, configCacheUrl, self.couchDBName, self.envPath, self.binPath) try: configCache = ConfigCache(configCacheUrl, self.couchDBName) configCacheID = configCache.getIDFromLabel(configLabel) if configCacheID: logging.error( "The configuration was not uploaded to couch") raise Exception except Exception: logging.error( "There was an exception loading the config out of the") logging.error( "ConfigCache. Check the scramOutput.log file in the") logging.error( "PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise outputMods = self.setupProcessingTask( skimTask, "Skim", inputStep=parentCmsswStep, inputModule="Merged", couchURL=self.couchURL, couchDBName=self.couchDBName, configCacheUrl=self.configCacheUrl, configDoc=configCacheID, splitAlgo=self.skimJobSplitAlgo, splitArgs=self.skimJobSplitArgs) if self.doLogCollect: self.addLogCollectTask(skimTask, taskName="%sLogCollect" % promptSkim.SkimName) for outputModuleName in outputMods.keys(): self.addMergeTask(skimTask, self.skimJobSplitAlgo, outputModuleName, doLogCollect=self.doLogCollect) return workload
def createConfig(self, bad=False): """ _createConfig_ Create a config of some sort that we can load out of ConfigCache """ PSetTweak = { 'process': { 'outputModules_': ['ThisIsAName'], 'ThisIsAName': { 'dataset': { 'dataTier': 'RECO', 'filterName': 'Filter' } } } } BadTweak = { 'process': { 'outputModules_': ['ThisIsAName1', 'ThisIsAName2'], 'ThisIsAName1': { 'dataset': { 'dataTier': 'RECO', 'filterName': 'Filter' } }, 'ThisIsAName2': { 'dataset': { 'dataTier': 'RECO', 'filterName': 'Filter' } } } } configCache = ConfigCache(os.environ["COUCHURL"], couchDBName=self.couchDBName) configCache.createUserGroup(groupname="testGroup", username='******') if bad: configCache.setPSetTweaks(PSetTweak=BadTweak) else: configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.save() return configCache.getCouchID()
def showTweakFile(self, docId): """ Makes a link to the dump of the tweakfile """ configCache = ConfigCache(self.couchUrl, self.configDBName) configCache.loadByID(docId) return str(configCache.getPSetTweaks()).replace('\n', '<br>')
def determineOutputModules( self, scenarioFunc=None, scenarioArgs=None, configDoc=None, couchURL=None, couchDBName=None, configCacheUrl=None ): """ _determineOutputModules_ Determine the output module names and associated metadata for the given config. """ # set default scenarioArgs to empty dictionary if it is None. scenarioArgs = scenarioArgs or {} outputModules = {} if configDoc != None and configDoc != "": url = configCacheUrl or couchURL if (url, couchDBName) in self.config_cache: configCache = self.config_cache[(url, couchDBName)] else: configCache = ConfigCache(url, couchDBName, True) self.config_cache[(url, couchDBName)] = configCache # TODO: need to change to DataCache # configCache.loadDocument(configDoc) configCache.loadByID(configDoc) outputModules = configCache.getOutputModuleInfo() else: if "outputs" in scenarioArgs and scenarioFunc in ["promptReco", "expressProcessing", "repack"]: for output in scenarioArgs.get("outputs", []): moduleLabel = output["moduleLabel"] outputModules[moduleLabel] = {"dataTier": output["dataTier"]} if "primaryDataset" in output: outputModules[moduleLabel]["primaryDataset"] = output["primaryDataset"] if "filterName" in output: outputModules[moduleLabel]["filterName"] = output["filterName"] for physicsSkim in scenarioArgs.get("PhysicsSkims", []): skimToDataTier = { "LogError": "RAW-RECO", "LogErrorMonitor": "USER", "ZElectron": "RAW-RECO", "ZMu": "RAW-RECO", "MuTau": "RAW-RECO", "TopMuEG": "RAW-RECO", "EcalActivity": "RAW-RECO", "CosmicSP": "RAW-RECO", "CosmicTP": "RAW-RECO", "ZMM": "RAW-RECO", "Onia": "RECO", "HighPtJet": "RAW-RECO", "D0Meson": "RECO", "Photon": "AOD", "ZEE": "AOD", "BJet": "AOD", "OniaCentral": "RECO", "OniaPeripheral": "RECO", "SingleTrack": "AOD", "MinBias": "AOD", "OniaUPC": "RAW-RECO", "HighMET": "RECO", "BPHSkim": "USER", } dataTier = skimToDataTier.get(physicsSkim, "USER") moduleLabel = "SKIMStream%s" % physicsSkim outputModules[moduleLabel] = {"dataTier": dataTier, "filterName": physicsSkim} elif scenarioFunc == "alcaSkim": for alcaSkim in scenarioArgs.get("skims", []): moduleLabel = "ALCARECOStream%s" % alcaSkim if alcaSkim.startswith("PromptCalibProd"): dataTier = "ALCAPROMPT" else: dataTier = "ALCARECO" outputModules[moduleLabel] = { "dataTier": dataTier, "primaryDataset": scenarioArgs.get("primaryDataset"), "filterName": alcaSkim, } return outputModules
def testListAllConfigs(self): """ _testListAllConfigs_ Verify that the list all configs method works correctly. """ configCacheA = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCacheA.createUserGroup(groupname="testGroup", username='******') configCacheA.setLabel("labelA") configCacheA.save() configCacheB = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCacheB.createUserGroup(groupname="testGroup", username='******') configCacheB.setLabel("labelB") configCacheB.save() configs = configCacheA.listAllConfigsByLabel() self.assertEqual(len(configs.keys()), 2, "Error: There should be two configs") self.assertEqual(configs["labelA"], configCacheA.getCouchID(), "Error: Label A is wrong.") self.assertEqual(configs["labelB"], configCacheB.getCouchID(), "Error: Label B is wrong.") return
def testD_LoadConfigCache(self): """ _LoadConfigCache_ Actually load the config cache using plain .load() Tests to make sure that if we pass in an id field it gets used to load configs """ configCache = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache.createUserGroup(groupname="testGroup", username='******') configCache.setLabel("labelA") configCache.save() configCache2 = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test', id=configCache.getCouchID(), rev=configCache.getCouchRev()) configCache2.load() self.assertEqual(configCache2.document['owner'], { 'group': 'testGroup', 'user': '******' }) self.assertEqual(configCache2.document['description'], { 'config_desc': None, 'config_label': 'labelA' }) return
def _configCacheId(self, label): """Return config cache id for given config label""" key, cert = self.__class__.reqmgr['requests'].getKeyCert() configCache = ConfigCache(self.__class__.endpoint + '/couchdb', 'reqmgr_config_cache', ckey=key, cert=cert) try: configCacheId = configCache.getIDFromLabel(label) except: configCacheId = None if configCacheId: return configCacheId # The following will fail if FWCore.ParameterSet not in PYTHONPATH from PSetTweaks.WMTweak import makeTweak configCache.createUserGroup('test', 'test') configDir = os.path.join(getTestBase(), '..', '..', 'test', 'data', 'configs') configCache.addConfig(os.path.join(configDir, label + '.py')) configCache.setLabel(label) configCache.setDescription(label) modPath = imp.find_module(label, [configDir]) loadedConfig = imp.load_module(label, modPath[0], modPath[1], modPath[2]) configCache.setPSetTweaks( makeTweak(loadedConfig.process).jsondictionary()) configCache.save() return configCache.getIDFromLabel(label)
docId=request.get('ConfigCacheID', None), assignments=request['Assignments'], adminHtml=adminHtml, messages=request['RequestMessages'], updateDictList=request['RequestUpdates']) def _getConfigCache(self, requestName, processMethod): try: request = Utilities.requestDetails(requestName) except Exception, ex: msg = "Cannot find request %s, check logs." % requestName logging.error("%s, reason: %s" % (msg, ex)) return msg url = request.get("ConfigCacheUrl", None) or self.couchUrl try: configCache = ConfigCache(url, self.configDBName) configDocId = request["ConfigCacheID"] configCache.loadByID(configDocId) except Exception, ex: msg = "Cannot find ConfigCache document %s on %s." % (configDocId, url) logging.error("%s, reason: %s" % (msg, ex)) return msg return getattr(configCache, processMethod)() @cherrypy.expose @cherrypy.tools.secmodv2() def showOriginalConfig(self, requestName): """ Makes a link to the original text of the config document.
def _configCacheId(self, label): """Return config cache id for given config label""" key, cert = self.__class__.reqmgr['requests'].getKeyCert() configCache = ConfigCache(self.__class__.endpoint + '/couchdb', 'reqmgr_config_cache', ckey = key, cert = cert) try: configCacheId = configCache.getIDFromLabel(label) except: configCacheId = None if configCacheId: return configCacheId # The following will fail if FWCore.ParameterSet not in PYTHONPATH from PSetTweaks.WMTweak import makeTweak configCache.createUserGroup('test', 'test') configDir = os.path.join(getTestBase(), '..', '..', 'test', 'data', 'configs') configCache.addConfig(os.path.join(configDir, label + '.py')) configCache.setLabel(label) configCache.setDescription(label) modPath = imp.find_module(label, [configDir]) loadedConfig = imp.load_module(label, modPath[0], modPath[1], modPath[2]) configCache.setPSetTweaks(makeTweak(loadedConfig.process).jsondictionary()) configCache.save() return configCache.getIDFromLabel(label)
def testA_basicConfig(self): """ _basicConfig_ Basic configCache stuff. """ PSetTweak = "Hello, I am a PSetTweak. It's nice to meet you." configCache = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache.createUserGroup(groupname="testGroup", username='******') configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.save() configCache2 = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test', id=configCache.getCouchID(), rev=configCache.getCouchRev()) configCache2.loadByID(configCache.getCouchID()) self.assertEqual(configCache2.getPSetTweaks(), PSetTweak) configCache2.delete() configCache3 = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test', id=configCache.getCouchID(), rev=configCache.getCouchRev()) testFlag = False # It should fail to load deleted documents try: configCache3.loadByID(configCache.getCouchID()) except ConfigCacheException: testFlag = True self.assertTrue(testFlag) return
def testB_addingConfigsAndTweaks(self): """ _addingConfigsAndTweaks_ Test adding config files and tweak files """ PSetTweak = "Hello, I am a PSetTweak. It's nice to meet you." attach = "Hello, I am an attachment" configCache = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache.createUserGroup(groupname="testGroup", username='******') configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.attachments['attach1'] = attach psetPath = os.path.join(getTestBase(), "WMCore_t/Cache_t/PSet.txt") configCache.addConfig(newConfig=psetPath, psetHash=None) configCache.setLabel("sample-label") configCache.setDescription("describe this config here") configCache.save() configString1 = configCache.getConfig() configCache2 = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test', id=configCache.getCouchID(), rev=configCache.getCouchRev()) configCache2.loadByID(configCache.getCouchID()) configString2 = configCache2.getConfig() self.assertEqual(configString1, configString2) self.assertEqual(configCache2.attachments.get('attach1', None), attach) configCache.delete() return
def upload_to_couch(cfg_name, section_name, user_name, group_name, test_mode=False, url=None): if test_mode: return "00000000000000000" if not os.path.exists(cfg_name): raise RuntimeError("Error: Can't locate config file %s." % cfg_name) # create a file with the ID inside to avoid multiple injections oldID = cfg_name + '.couchID' if os.path.exists(oldID): f = open(oldID) the_id = f.readline().replace('\n', '') f.close() print cfg_name, 'already uploaded with ID', the_id, 'from', oldID return the_id try: loadedConfig = __loadConfig(cfg_name) except: #just try again !! time.sleep(2) loadedConfig = __loadConfig(cfg_name) where = COUCH_DB_ADDRESS if url: where = url configCache = ConfigCache(where, DATABASE_NAME) configCache.createUserGroup(group_name, user_name) configCache.addConfig(cfg_name) configCache.setPSetTweaks(makeTweak(loadedConfig.process).jsondictionary()) configCache.setLabel(section_name) configCache.setDescription(section_name) configCache.save() print "Added file to the config cache:" print " DocID: %s" % configCache.document["_id"] print " Revision: %s" % configCache.document["_rev"] f = open(oldID, "w") f.write(configCache.document["_id"]) f.close() return configCache.document["_id"]
assignments=request['Assignments'], adminHtml=adminHtml, messages=request['RequestMessages'], updateDictList=request['RequestUpdates']) def _getConfigCache(self, requestName, processMethod): try: request = Utilities.requestDetails(requestName) except Exception, ex: msg = "Cannot find request %s, check logs." % requestName logging.error("%s, reason: %s" % (msg, ex)) return msg url = request.get("ConfigCacheUrl", None) or self.couchUrl try: configCache = ConfigCache(url, self.configDBName) configDocId = request["ConfigCacheID"] configCache.loadByID(configDocId) except Exception, ex: msg = "Cannot find ConfigCache document %s on %s." % (configDocId, url) logging.error("%s, reason: %s" % (msg, ex)) return msg return getattr(configCache, processMethod)() @cherrypy.expose @cherrypy.tools.secmodv2() def showOriginalConfig(self, requestName): """ Makes a link to the original text of the config document.
def buildWorkload(self): """ _buildWorkload_ Build the workload given all of the input parameters. Not that there will be LogCollect tasks created for each processing task and Cleanup tasks created for each merge task. """ (self.inputPrimaryDataset, self.inputProcessedDataset, self.inputDataTier) = self.inputDataset[1:].split("/") workload = self.createWorkload() workload.setDashboardActivity("tier0") self.reportWorkflowToDashboard(workload.getDashboardActivity()) workload.setWorkQueueSplitPolicy("Block", self.procJobSplitAlgo, self.procJobSplitArgs) cmsswStepType = "CMSSW" taskType = "Processing" recoOutputs = [] for dataTier in self.writeTiers: recoOutputs.append( { 'dataTier' : dataTier, 'eventContent' : dataTier, 'moduleLabel' : "write_%s" % dataTier } ) recoTask = workload.newTask("Reco") scenarioArgs = { 'globalTag' : self.globalTag, 'skims' : self.alcaSkims, 'dqmSeq' : self.dqmSequences, 'outputs' : recoOutputs } if self.globalTagConnect: scenarioArgs['globalTagConnect'] = self.globalTagConnect recoOutMods = self.setupProcessingTask(recoTask, taskType, self.inputDataset, scenarioName = self.procScenario, scenarioFunc = "promptReco", scenarioArgs = scenarioArgs, splitAlgo = self.procJobSplitAlgo, splitArgs = self.procJobSplitArgs, stepType = cmsswStepType, forceUnmerged = True) if self.doLogCollect: self.addLogCollectTask(recoTask) recoMergeTasks = {} for recoOutLabel, recoOutInfo in recoOutMods.items(): if recoOutInfo['dataTier'] != "ALCARECO": mergeTask = self.addMergeTask(recoTask, self.procJobSplitAlgo, recoOutLabel, doLogCollect = self.doLogCollect) recoMergeTasks[recoOutInfo['dataTier']] = mergeTask else: alcaTask = recoTask.addTask("AlcaSkim") scenarioArgs = { 'globalTag' : self.globalTag, 'skims' : self.alcaSkims, 'primaryDataset' : self.inputPrimaryDataset } if self.globalTagConnect: scenarioArgs['globalTagConnect'] = self.globalTagConnect alcaOutMods = self.setupProcessingTask(alcaTask, taskType, inputStep = recoTask.getStep("cmsRun1"), inputModule = recoOutLabel, scenarioName = self.procScenario, scenarioFunc = "alcaSkim", scenarioArgs = scenarioArgs, splitAlgo = "WMBSMergeBySize", splitArgs = {"max_merge_size": self.maxMergeSize, "min_merge_size": self.minMergeSize, "max_merge_events": self.maxMergeEvents}, stepType = cmsswStepType, useMulticore = False) if self.doLogCollect: self.addLogCollectTask(alcaTask, taskName = "AlcaSkimLogCollect") self.addCleanupTask(recoTask, recoOutLabel) for alcaOutLabel, alcaOutInfo in alcaOutMods.items(): self.addMergeTask(alcaTask, self.procJobSplitAlgo, alcaOutLabel, doLogCollect = self.doLogCollect) for promptSkim in self.promptSkims: if not promptSkim.DataTier in recoMergeTasks: error = 'PromptReco output does not have the following output data tier: %s.' % promptSkim.DataTier error += 'Please change the skim input to be one of the following: %s' % recoMergeTasks.keys() error += 'That should be in the relevant skimConfig in T0AST' logging.error(error) raise Exception mergeTask = recoMergeTasks[promptSkim.DataTier] skimTask = mergeTask.addTask(promptSkim.SkimName) parentCmsswStep = mergeTask.getStep('cmsRun1') parsedProcVer = parseT0ProcVer(promptSkim.ProcessingVersion, 'PromptSkim') self.processingString = parsedProcVer["ProcString"] self.processingVersion = parsedProcVer["ProcVer"] if promptSkim.TwoFileRead: self.skimJobSplitArgs['include_parents'] = True else: self.skimJobSplitArgs['include_parents'] = False configLabel = '%s-%s' % (self.workloadName, promptSkim.SkimName) configCacheUrl = self.configCacheUrl or self.couchURL injectIntoConfigCache(self.frameworkVersion, self.scramArch, self.initCommand, promptSkim.ConfigURL, configLabel, configCacheUrl, self.couchDBName, self.envPath, self.binPath) try: configCache = ConfigCache(configCacheUrl, self.couchDBName) configCacheID = configCache.getIDFromLabel(configLabel) if configCacheID: logging.error("The configuration was not uploaded to couch") raise Exception except Exception: logging.error("There was an exception loading the config out of the") logging.error("ConfigCache. Check the scramOutput.log file in the") logging.error("PromptSkimScheduler directory to find out what went") logging.error("wrong.") raise outputMods = self.setupProcessingTask(skimTask, "Skim", inputStep = parentCmsswStep, inputModule = "Merged", couchURL = self.couchURL, couchDBName = self.couchDBName, configCacheUrl = self.configCacheUrl, configDoc = configCacheID, splitAlgo = self.skimJobSplitAlgo, splitArgs = self.skimJobSplitArgs, useMulticore = False) if self.doLogCollect: self.addLogCollectTask(skimTask, taskName = "%sLogCollect" % promptSkim.SkimName) for outputModuleName in outputMods.keys(): self.addMergeTask(skimTask, self.skimJobSplitAlgo, outputModuleName, doLogCollect = self.doLogCollect) workload.setBlockCloseSettings(self.blockCloseDelay, workload.getBlockCloseMaxFiles(), workload.getBlockCloseMaxEvents(), workload.getBlockCloseMaxSize()) # setting the parameters which need to be set for all the tasks # sets acquisitionEra, processingVersion, processingString workload.setTaskPropertiesFromWorkload() # set the LFN bases (normally done by request manager) # also pass runNumber (workload evaluates it) workload.setLFNBase(self.mergedLFNBase, self.unmergedLFNBase, runNumber = self.runNumber) return workload
def testC_testViews(self): """ _testViews_ Prototype test for what should be a lot of other tests. """ PSetTweak = "Hello, I am a PSetTweak. It's nice to meet you." attach = "Hello, I am an attachment" configCache = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache.createUserGroup(groupname="testGroup", username='******') configCache.setPSetTweaks(PSetTweak=PSetTweak) configCache.attachments['attach1'] = attach configCache.document['md5_hash'] = "somemd5" psetPath = os.path.join(getTestBase(), "WMCore_t/Cache_t/PSet.txt") configCache.addConfig(newConfig=psetPath, psetHash=None) configCache.save() configCache2 = ConfigCache(os.environ["COUCHURL"], couchDBName='config_test') configCache2.document['md5_hash'] = configCache.document['md5_hash'] configCache2.load() self.assertEqual(configCache2.attachments.get('attach1', None), attach) configCache2.delete() return
def testB_addingConfigsAndTweaks(self): """ _addingConfigsAndTweaks_ Test adding config files and tweak files """ PSetTweak = "Hello, I am a PSetTweak. It's nice to meet you." attach = "Hello, I am an attachment" configCache = ConfigCache(os.environ["COUCHURL"], couchDBName = 'config_test') configCache.createUserGroup(groupname = "testGroup", username = '******') configCache.setPSetTweaks(PSetTweak = PSetTweak) configCache.attachments['attach1'] = attach psetPath = os.path.join(getTestBase(), "WMCore_t/Cache_t/PSet.txt") configCache.addConfig(newConfig = psetPath, psetHash = None) configCache.setLabel("sample-label") configCache.setDescription("describe this config here") configCache.save() configString1 = configCache.getConfig() configCache2 = ConfigCache(os.environ["COUCHURL"], couchDBName = 'config_test', id = configCache.getCouchID(), rev = configCache.getCouchRev()) configCache2.loadByID(configCache.getCouchID()) configString2 = configCache2.getConfig() self.assertEqual(configString1, configString2) self.assertEqual(configCache2.attachments.get('attach1', None), attach) configCache.delete() return