def setupNextSteps(self, task, origArgs): """ _setupNextSteps_ Modify the step one task to include N more CMSSW steps and chain the output between all three steps. """ configCacheUrl = self.configCacheUrl or self.couchURL stepMapping = {} stepMapping.setdefault(origArgs['Step1']['StepName'], ('Step1', 'cmsRun1')) for i in range(2, self.stepChain + 1): currentStepNumber = "Step%d" % i currentCmsRun = "cmsRun%d" % i stepMapping.setdefault(origArgs[currentStepNumber]['StepName'], (currentStepNumber, currentCmsRun)) taskConf = {} for k, v in origArgs[currentStepNumber].iteritems(): taskConf[k] = v parentStepNumber = stepMapping.get(taskConf['InputStep'])[0] parentCmsRun = stepMapping.get(taskConf['InputStep'])[1] parentCmsswStep = task.getStep(parentCmsRun) parentCmsswStepHelper = parentCmsswStep.getTypeHelper() # Set default values for the task parameters self.modifyTaskConfiguration(taskConf, False, 'InputDataset' not in taskConf) globalTag = taskConf.get("GlobalTag", self.globalTag) frameworkVersion = taskConf.get("CMSSWVersion", self.frameworkVersion) scramArch = taskConf.get("ScramArch", self.scramArch) childCmssw = parentCmsswStep.addTopStep(currentCmsRun) childCmssw.setStepType("CMSSW") template = StepFactory.getStepTemplate("CMSSW") template(childCmssw.data) childCmsswStepHelper = childCmssw.getTypeHelper() childCmsswStepHelper.setGlobalTag(globalTag) childCmsswStepHelper.setupChainedProcessing(parentCmsRun, taskConf['InputFromOutputModule']) childCmsswStepHelper.cmsswSetup(frameworkVersion, softwareEnvironment="", scramArch=scramArch) childCmsswStepHelper.setConfigCache(configCacheUrl, taskConf['ConfigCacheID'], self.couchDBName) # Pileup check taskConf["PileupConfig"] = parsePileupConfig(taskConf["MCPileup"], taskConf["DataPileup"]) if taskConf["PileupConfig"]: self.setupPileup(task, taskConf['PileupConfig']) # Handling the output modules parentKeepOutput = strToBool(origArgs[parentStepNumber].get('KeepOutput', True)) parentCmsswStepHelper.keepOutput(parentKeepOutput) childKeepOutput = strToBool(taskConf.get('KeepOutput', True)) childCmsswStepHelper.keepOutput(childKeepOutput) self.setupOutputModules(task, taskConf["ConfigCacheID"], currentCmsRun, childKeepOutput, taskConf['StepName']) # Closing out the task configuration. The last step output must be saved/merged childCmsswStepHelper.keepOutput(True) return
def validateSchema(self, schema): """ _validateSchema_ Go over each step and make sure it matches validation parameters. """ outputMods = [] numSteps = schema['StepChain'] couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"] for i in range(1, numSteps + 1): stepName = "Step%s" % i if stepName not in schema: msg = "No Step%s entry present in the request" % i self.raiseValidationException(msg=msg) step = schema[stepName] # We can't handle non-dictionary steps if not isinstance(step, dict): msg = "Non-dictionary input for step in StepChain.\n" msg += "Could be an indicator of JSON error.\n" self.raiseValidationException(msg=msg) # Generic step parameter validation self.validateTask( step, self.getTaskArguments(i == 1, i == 1 and 'InputDataset' not in step)) # Validate the existence of the configCache if step["ConfigCacheID"]: self.validateConfigCacheExists( configID=step['ConfigCacheID'], couchURL=couchUrl, couchDBName=schema["CouchDBName"], getOutputModules=True) # keeping different outputs with the same output module is not allowed if strToBool(step.get("KeepOutput", True)): configOutput = self.determineOutputModules( configDoc=step["ConfigCacheID"], couchURL=couchUrl, couchDBName=schema["CouchDBName"]) for outputModuleName in configOutput.keys(): if outputModuleName in outputMods: msg = "StepChain does not support KeepOutput sharing the same output module." msg += "\n%s re-using outputModule: %s" % ( stepName, outputModuleName) self.raiseValidationException(msg=msg) else: outputMods.append(outputModuleName) if 'KeepOutput' in schema[stepName] and not strToBool( schema[stepName]['KeepOutput']): msg = "Dropping the output of the last step is prohibited.\n" msg += "Set the 'KeepOutput' value to True and try again." self.raiseValidationException(msg=msg)
def validateSchema(self, schema): """ _validateSchema_ Go over each step and make sure it matches validation parameters. """ outputMods = [] numSteps = schema['StepChain'] couchUrl = schema.get("ConfigCacheUrl", None) or schema["CouchURL"] for i in range(1, numSteps + 1): stepName = "Step%s" % i if stepName not in schema: msg = "No Step%s entry present in the request" % i self.raiseValidationException(msg=msg) step = schema[stepName] # We can't handle non-dictionary steps if not isinstance(step, dict): msg = "Non-dictionary input for step in StepChain.\n" msg += "Could be an indicator of JSON error.\n" self.raiseValidationException(msg=msg) # Generic step parameter validation self.validateTask(step, self.getTaskArguments(i == 1, i == 1 and 'InputDataset' not in step)) # Validate the existence of the configCache if step["ConfigCacheID"]: self.validateConfigCacheExists(configID=step['ConfigCacheID'], couchURL=couchUrl, couchDBName=schema["CouchDBName"], getOutputModules=True) # keeping different outputs with the same output module is not allowed if strToBool(step.get("KeepOutput", True)): configOutput = self.determineOutputModules(configDoc=step["ConfigCacheID"], couchURL=couchUrl, couchDBName=schema["CouchDBName"]) for outputModuleName in configOutput.keys(): if outputModuleName in outputMods: msg = "StepChain does not support KeepOutput sharing the same output module." msg += "\n%s re-using outputModule: %s" % (stepName, outputModuleName) self.raiseValidationException(msg=msg) else: outputMods.append(outputModuleName) if 'KeepOutput' in schema[stepName] and not strToBool(schema[stepName]['KeepOutput']): msg = "Dropping the output of the last step is prohibited.\n" msg += "Set the 'KeepOutput' value to True and try again." self.raiseValidationException(msg=msg)
def setupGeneratorTask(self, task, taskConf): """ _setupGeneratorTask_ Set up an initial generator task. """ configCacheID = taskConf['ConfigCacheID'] splitAlgorithm = taskConf["SplittingAlgo"] splitArguments = taskConf["SplittingArguments"] outMods = self.setupProcessingTask(task, "Production", couchURL=self.couchURL, couchDBName=self.couchDBName, configDoc=configCacheID, splitAlgo=splitAlgorithm, configCacheUrl=self.configCacheUrl, splitArgs=splitArguments, seeding=taskConf['Seeding'], totalEvents=taskConf['RequestNumEvents'], timePerEvent=self.timePerEvent, memoryReq=taskConf.get('Memory', None), sizePerEvent=self.sizePerEvent, cmsswVersion=taskConf.get("CMSSWVersion", None), scramArch=taskConf.get("ScramArch", None), globalTag=taskConf.get("GlobalTag", None), taskConf=taskConf) # outputModules were added already, we just want to create merge tasks here if strToBool(taskConf.get('KeepOutput', True)): for outputModuleName in outMods.keys(): dummyTask = self.addMergeTask(task, self.splittingAlgo, outputModuleName, "cmsRun1") return
def setupGeneratorTask(self, task, taskConf): """ _setupGeneratorTask_ Set up an initial generator task. """ configCacheID = taskConf['ConfigCacheID'] splitAlgorithm = taskConf["SplittingAlgo"] splitArguments = taskConf["SplittingArguments"] outMods = self.setupProcessingTask( task, "Production", couchURL=self.couchURL, couchDBName=self.couchDBName, configDoc=configCacheID, splitAlgo=splitAlgorithm, configCacheUrl=self.configCacheUrl, splitArgs=splitArguments, seeding=taskConf['Seeding'], totalEvents=taskConf['RequestNumEvents'], timePerEvent=self.timePerEvent, memoryReq=taskConf.get('Memory', None), sizePerEvent=self.sizePerEvent, cmsswVersion=taskConf.get("CMSSWVersion", None), scramArch=taskConf.get("ScramArch", None), globalTag=taskConf.get("GlobalTag", None), taskConf=taskConf) # outputModules were added already, we just want to create merge tasks here if strToBool(taskConf.get('KeepOutput', True)): for outputModuleName in outMods.keys(): dummyTask = self.addMergeTask(task, self.splittingAlgo, outputModuleName, "cmsRun1") return
def setupTask(self, task, taskConf): """ _setupTask_ Build the task using the setupProcessingTask from StdBase and set the parents appropriately to handle a processing task """ configCacheID = taskConf["ConfigCacheID"] splitAlgorithm = taskConf["SplittingAlgo"] splitArguments = taskConf["SplittingArguments"] self.inputDataset = taskConf["InputDataset"] # Use PD from the inputDataset if not provided in the task itself if not self.inputPrimaryDataset: self.inputPrimaryDataset = self.inputDataset[1:].split("/")[0] outMods = self.setupProcessingTask( task, "Processing", inputDataset=self.inputDataset, couchURL=self.couchURL, couchDBName=self.couchDBName, configDoc=configCacheID, splitAlgo=splitAlgorithm, configCacheUrl=self.configCacheUrl, splitArgs=splitArguments, timePerEvent=self.timePerEvent, memoryReq=taskConf.get('Memory', None), sizePerEvent=self.sizePerEvent, cmsswVersion=taskConf.get("CMSSWVersion", None), scramArch=taskConf.get("ScramArch", None), globalTag=taskConf.get("GlobalTag", None), taskConf=taskConf) lumiMask = taskConf.get("LumiList", self.workload.lumiList) if lumiMask: task.setLumiMask(lumiMask) if taskConf["PileupConfig"]: self.setupPileup(task, taskConf['PileupConfig']) # outputModules were added already, we just want to create merge tasks here if strToBool(taskConf.get('KeepOutput', True)): for outputModuleName in outMods.keys(): dummyTask = self.addMergeTask(task, self.splittingAlgo, outputModuleName, "cmsRun1") return
def setupTask(self, task, taskConf): """ _setupTask_ Build the task using the setupProcessingTask from StdBase and set the parents appropriately to handle a processing task """ configCacheID = taskConf["ConfigCacheID"] splitAlgorithm = taskConf["SplittingAlgo"] splitArguments = taskConf["SplittingArguments"] self.inputDataset = taskConf["InputDataset"] # Use PD from the inputDataset if not provided in the task itself if not self.inputPrimaryDataset: self.inputPrimaryDataset = self.inputDataset[1:].split("/")[0] outMods = self.setupProcessingTask(task, "Processing", inputDataset=self.inputDataset, couchURL=self.couchURL, couchDBName=self.couchDBName, configDoc=configCacheID, splitAlgo=splitAlgorithm, configCacheUrl=self.configCacheUrl, splitArgs=splitArguments, timePerEvent=self.timePerEvent, memoryReq=taskConf.get('Memory', None), sizePerEvent=self.sizePerEvent, cmsswVersion=taskConf.get("CMSSWVersion", None), scramArch=taskConf.get("ScramArch", None), globalTag=taskConf.get("GlobalTag", None), taskConf=taskConf) lumiMask = taskConf.get("LumiList", self.workload.lumiList) if lumiMask: task.setLumiMask(lumiMask) if taskConf["PileupConfig"]: self.setupPileup(task, taskConf['PileupConfig']) # outputModules were added already, we just want to create merge tasks here if strToBool(taskConf.get('KeepOutput', True)): for outputModuleName in outMods.keys(): dummyTask = self.addMergeTask(task, self.splittingAlgo, outputModuleName, "cmsRun1") return
def assignWorkload(self, requestName, kwargs): """ Make all the necessary changes in the Workload to reflect the new assignment """ request = GetRequest.getRequestByName(requestName) helper = Utilities.loadWorkload(request) # Validate the different parts of the processed dataset processedDatasetParts = { "AcquisitionEra": helper.getAcquisitionEra(), "ProcessingString": helper.getProcessingString(), "ProcessingVersion": helper.getProcessingVersion(), } for field, origValue in processedDatasetParts.iteritems(): if field in kwargs and isinstance(kwargs[field], dict): for value in kwargs[field].values(): self.validate(value, field) else: self.validate(kwargs.get(field, origValue)) # Set white list and black list whiteList = kwargs.get("SiteWhitelist", []) blackList = kwargs.get("SiteBlacklist", []) if not isinstance(whiteList, list): whiteList = [whiteList] if not isinstance(blackList, list): blackList = [blackList] helper.setSiteWildcardsLists(siteWhitelist=whiteList, siteBlacklist=blackList, wildcardDict=self.wildcardSites) res = set(whiteList) & set(blackList) if len(res): raise cherrypy.HTTPError(400, "White and blacklist the same site is not allowed %s" % list(res)) # Set AcquisitionEra, ProcessingString and ProcessingVersion # which could be json encoded dicts if "AcquisitionEra" in kwargs: helper.setAcquisitionEra(kwargs["AcquisitionEra"]) if "ProcessingString" in kwargs: helper.setProcessingString(kwargs["ProcessingString"]) if "ProcessingVersion" in kwargs: helper.setProcessingVersion(kwargs["ProcessingVersion"]) # Now verify the output datasets datatier = [] outputDatasets = helper.listOutputDatasets() for dataset in outputDatasets: tokens = dataset.split("/") procds = tokens[2] datatier.append(tokens[3]) try: WMCore.Lexicon.procdataset(procds) except AssertionError as ex: raise cherrypy.HTTPError(400, "Bad output dataset name, check the processed dataset.\n %s" % str(ex)) # Verify whether the output datatiers are available in DBS self.validateDatatier(datatier, dbsUrl=helper.getDbsUrl()) # FIXME not validated helper.setLFNBase(kwargs["MergedLFNBase"], kwargs["UnmergedLFNBase"]) helper.setMergeParameters( int(kwargs.get("MinMergeSize", 2147483648)), int(kwargs.get("MaxMergeSize", 4294967296)), int(kwargs.get("MaxMergeEvents", 50000)), ) helper.setupPerformanceMonitoring( kwargs.get("MaxRSS", None), kwargs.get("MaxVSize", None), kwargs.get("SoftTimeout", None), kwargs.get("GracePeriod", None), ) # Check whether we should check location for the data helper.setTrustLocationFlag( inputFlag=strToBool(kwargs.get("TrustSitelists", False)), pileupFlag=strToBool(kwargs.get("TrustPUSitelists", False)), ) helper.setAllowOpportunistic(allowOpport=strToBool(kwargs.get("AllowOpportunistic", False))) # Set phedex subscription information custodialList = kwargs.get("CustodialSites", []) nonCustodialList = kwargs.get("NonCustodialSites", []) autoApproveList = kwargs.get("AutoApproveSubscriptionSites", []) for site in autoApproveList: if site.endswith("_MSS"): raise cherrypy.HTTPError(400, "Auto-approval to MSS endpoint not allowed %s" % autoApproveList) subscriptionPriority = kwargs.get("SubscriptionPriority", "Low") if subscriptionPriority not in ["Low", "Normal", "High"]: raise cherrypy.HTTPError(400, "Invalid subscription priority %s" % subscriptionPriority) custodialType = kwargs.get("CustodialSubType", "Replica") if custodialType not in ["Move", "Replica"]: raise cherrypy.HTTPError(400, "Invalid custodial subscription type %s" % custodialType) nonCustodialType = kwargs.get("NonCustodialSubType", "Replica") if nonCustodialType not in ["Move", "Replica"]: raise cherrypy.HTTPError(400, "Invalid noncustodial subscription type %s" % nonCustodialType) if "CustodialGroup" in kwargs and not isinstance(kwargs["CustodialGroup"], basestring): raise cherrypy.HTTPError(400, "Invalid CustodialGroup format %s" % kwargs["CustodialGroup"]) if "NonCustodialGroup" in kwargs and not isinstance(kwargs["NonCustodialGroup"], basestring): raise cherrypy.HTTPError(400, "Invalid NonCustodialGroup format %s" % kwargs["NonCustodialGroup"]) if "DeleteFromSource" in kwargs and not isinstance(kwargs["DeleteFromSource"], bool): raise cherrypy.HTTPError(400, "Invalid DeleteFromSource format %s" % kwargs["DeleteFromSource"]) helper.setSubscriptionInformationWildCards( wildcardDict=self.wildcardSites, custodialSites=custodialList, nonCustodialSites=nonCustodialList, autoApproveSites=autoApproveList, custodialSubType=custodialType, nonCustodialSubType=nonCustodialType, custodialGroup=kwargs.get("CustodialGroup", "DataOps"), nonCustodialGroup=kwargs.get("NonCustodialGroup", "DataOps"), priority=subscriptionPriority, deleteFromSource=kwargs.get("DeleteFromSource", False), ) # Block closing information blockCloseMaxWaitTime = int(kwargs.get("BlockCloseMaxWaitTime", helper.getBlockCloseMaxWaitTime())) blockCloseMaxFiles = int(kwargs.get("BlockCloseMaxFiles", helper.getBlockCloseMaxFiles())) blockCloseMaxEvents = int(kwargs.get("BlockCloseMaxEvents", helper.getBlockCloseMaxEvents())) blockCloseMaxSize = int(kwargs.get("BlockCloseMaxSize", helper.getBlockCloseMaxSize())) helper.setBlockCloseSettings(blockCloseMaxWaitTime, blockCloseMaxFiles, blockCloseMaxEvents, blockCloseMaxSize) helper.setMemoryAndCores(kwargs.get("Memory"), kwargs.get("Multicore")) helper.setDashboardActivity(kwargs.get("Dashboard", "")) helper.setTaskProperties(kwargs) Utilities.saveWorkload(helper, request["RequestWorkflow"], self.wmstatWriteURL) # update AcquisitionEra in the Couch document (#4380) # request object returned above from Oracle doesn't have information Couch # database reqDetails = Utilities.requestDetails(request["RequestName"]) couchDb = Database(reqDetails["CouchWorkloadDBName"], reqDetails["CouchURL"]) couchDb.updateDocument( request["RequestName"], "ReqMgr", "updaterequest", fields={ "AcquisitionEra": reqDetails["AcquisitionEra"], "ProcessingVersion": reqDetails["ProcessingVersion"], "CustodialSites": custodialList, "NonCustodialSites": nonCustodialList, "AutoApproveSubscriptionSites": autoApproveList, "SubscriptionPriority": subscriptionPriority, "CustodialSubType": custodialType, "NonCustodialSubType": nonCustodialType, "CustodialGroup": kwargs.get("CustodialGroup", "DataOps"), "NonCustodialGroup": kwargs.get("NonCustodialGroup", "DataOps"), "DeleteFromSource": kwargs.get("DeleteFromSource", False), "Teams": kwargs["Teams"], "OutputDatasets": outputDatasets, "SiteWhitelist": whiteList, "SiteBlacklist": blackList, "MergedLFNBase": kwargs["MergedLFNBase"], "UnmergedLFNBase": kwargs["UnmergedLFNBase"], "Dashboard": kwargs.get("Dashboard", ""), "TrustSitelists": kwargs.get("TrustSitelists", False), "TrustPUSitelists": kwargs.get("TrustPUSitelists", False), "AllowOpportunistic": kwargs.get("AllowOpportunistic", False), }, useBody=True, )
def assignWorkload(self, requestName, kwargs): """ Make all the necessary changes in the Workload to reflect the new assignment """ request = GetRequest.getRequestByName(requestName) helper = Utilities.loadWorkload(request) try: helper.validateArgumentForAssignment(kwargs) except WMSpecFactoryException as ex: raise cherrypy.HTTPError(400, str(ex.message())) except Exception: msg = traceback.format_exc() raise cherrypy.HTTPError(400, "Unhandled error: %s" % msg) # Validate the different parts of the processed dataset processedDatasetParts = {"AcquisitionEra": kwargs.get("AcquisitionEra"), "ProcessingString": kwargs.get("ProcessingString"), "ProcessingVersion": kwargs.get("ProcessingVersion") } for field, values in processedDatasetParts.iteritems(): if field in kwargs and isinstance(kwargs[field], dict): for value in kwargs[field].values(): self.validate(value, field) else: self.validate(kwargs.get(field, values), field) # Set white list and black list whiteList = kwargs.get("SiteWhitelist", []) blackList = kwargs.get("SiteBlacklist", []) if not isinstance(whiteList, list): whiteList = [whiteList] if not isinstance(blackList, list): blackList = [blackList] helper.setSiteWildcardsLists(siteWhitelist=whiteList, siteBlacklist=blackList, wildcardDict=self.wildcardSites) res = set(whiteList) & set(blackList) if len(res): raise cherrypy.HTTPError(400, "White and blacklist the same site is not allowed %s" % list(res)) helper.setAcquisitionEra(kwargs.get("AcquisitionEra", None)) helper.setProcessingString(kwargs.get("ProcessingString", None)) helper.setProcessingVersion(kwargs.get("ProcessingVersion", None)) # Now verify the output datasets datatier = [] outputDatasets = helper.listOutputDatasets() for dataset in outputDatasets: tokens = dataset.split("/") procds = tokens[2] datatier.append(tokens[3]) try: WMCore.Lexicon.procdataset(procds) except AssertionError as ex: raise cherrypy.HTTPError(400, "Bad output dataset name, check the processed dataset.\n %s" % str(ex)) # Verify whether the output datatiers are available in DBS self.validateDatatier(datatier, dbsUrl=helper.getDbsUrl()) # FIXME not validated helper.setLFNBase(kwargs["MergedLFNBase"], kwargs["UnmergedLFNBase"]) helper.setMergeParameters(int(kwargs.get("MinMergeSize", 2147483648)), int(kwargs.get("MaxMergeSize", 4294967296)), int(kwargs.get("MaxMergeEvents", 50000))) helper.setupPerformanceMonitoring(kwargs.get("MaxRSS", None), kwargs.get("MaxVSize", None), kwargs.get("SoftTimeout", None), kwargs.get("GracePeriod", None)) # Check whether we should check location for the data helper.setTrustLocationFlag(inputFlag=strToBool(kwargs.get("TrustSitelists", False)), pileupFlag=strToBool(kwargs.get("TrustPUSitelists", False))) helper.setAllowOpportunistic(allowOpport=strToBool(kwargs.get("AllowOpportunistic", False))) # Set phedex subscription information custodialList = kwargs.get("CustodialSites", []) nonCustodialList = kwargs.get("NonCustodialSites", []) autoApproveList = kwargs.get("AutoApproveSubscriptionSites", []) subscriptionPriority = kwargs.get("SubscriptionPriority", "Low") custodialType = kwargs.get("CustodialSubType", "Replica") nonCustodialType = kwargs.get("NonCustodialSubType", "Replica") helper.setSubscriptionInformationWildCards(wildcardDict=self.wildcardSites, custodialSites=custodialList, nonCustodialSites=nonCustodialList, autoApproveSites=autoApproveList, custodialSubType=custodialType, nonCustodialSubType=nonCustodialType, custodialGroup=kwargs.get("CustodialGroup", "DataOps"), nonCustodialGroup=kwargs.get("NonCustodialGroup", "DataOps"), priority=subscriptionPriority, deleteFromSource=kwargs.get("DeleteFromSource", False)) # Block closing information blockCloseMaxWaitTime = int(kwargs.get("BlockCloseMaxWaitTime", helper.getBlockCloseMaxWaitTime())) blockCloseMaxFiles = int(kwargs.get("BlockCloseMaxFiles", helper.getBlockCloseMaxFiles())) blockCloseMaxEvents = int(kwargs.get("BlockCloseMaxEvents", helper.getBlockCloseMaxEvents())) blockCloseMaxSize = int(kwargs.get("BlockCloseMaxSize", helper.getBlockCloseMaxSize())) helper.setBlockCloseSettings(blockCloseMaxWaitTime, blockCloseMaxFiles, blockCloseMaxEvents, blockCloseMaxSize) helper.setMemory(kwargs.get("Memory")) helper.setCores(kwargs.get("Multicore")) helper.setDashboardActivity(kwargs.get("Dashboard", "")) helper.setTaskProperties(kwargs) Utilities.saveWorkload(helper, request['RequestWorkflow'], self.wmstatWriteURL) # update AcquisitionEra in the Couch document (#4380) # request object returned above from Oracle doesn't have information Couch # database reqDetails = Utilities.requestDetails(request["RequestName"]) couchDb = Database(reqDetails["CouchWorkloadDBName"], reqDetails["CouchURL"]) couchDb.updateDocument(request["RequestName"], "ReqMgr", "updaterequest", fields={"AcquisitionEra": reqDetails["AcquisitionEra"], "ProcessingVersion": reqDetails["ProcessingVersion"], "CustodialSites": custodialList, "NonCustodialSites": nonCustodialList, "AutoApproveSubscriptionSites": autoApproveList, "SubscriptionPriority": subscriptionPriority, "CustodialSubType": custodialType, "NonCustodialSubType": nonCustodialType, "CustodialGroup": kwargs.get("CustodialGroup", "DataOps"), "NonCustodialGroup": kwargs.get("NonCustodialGroup", "DataOps"), "DeleteFromSource": kwargs.get("DeleteFromSource", False), "Teams": kwargs["Teams"], "OutputDatasets": outputDatasets, "SiteWhitelist": whiteList, "SiteBlacklist": blackList, "MergedLFNBase": kwargs["MergedLFNBase"], "UnmergedLFNBase": kwargs["UnmergedLFNBase"], "Dashboard": kwargs.get("Dashboard", ""), "TrustSitelists": kwargs.get("TrustSitelists", False), "TrustPUSitelists": kwargs.get("TrustPUSitelists", False), "AllowOpportunistic": kwargs.get("AllowOpportunistic", False)}, useBody=True)
def assignWorkload(self, requestName, kwargs): """ Make all the necessary changes in the Workload to reflect the new assignment """ request = GetRequest.getRequestByName(requestName) helper = Utilities.loadWorkload(request) #Validate the different parts of the processed dataset processedDatasetParts = ["AcquisitionEra", "ProcessingVersion"] if kwargs.get("ProcessingString", None): processedDatasetParts.append("ProcessingString") for field in processedDatasetParts: if type(kwargs[field]) == dict: for value in kwargs[field].values(): self.validate(value, field) else: self.validate(kwargs[field], field) # Set white list and black list whiteList = kwargs.get("SiteWhitelist", []) blackList = kwargs.get("SiteBlacklist", []) if type(whiteList) != list: whiteList = [whiteList] if type(blackList) != list: blackList = [blackList] helper.setSiteWildcardsLists(siteWhitelist=whiteList, siteBlacklist=blackList, wildcardDict=self.wildcardSites) res = set(whiteList) & set(blackList) if len(res): raise cherrypy.HTTPError( 400, "White and blacklist the same site is not allowed %s" % list(res)) # Set ProcessingVersion and AcquisitionEra, which could be json encoded dicts helper.setProcessingVersion(kwargs["ProcessingVersion"]) helper.setAcquisitionEra(kwargs["AcquisitionEra"]) helper.setProcessingString(kwargs.get("ProcessingString", None)) # Now verify the output datasets datatier = [] outputDatasets = helper.listOutputDatasets() for dataset in outputDatasets: tokens = dataset.split("/") procds = tokens[2] datatier.append(tokens[3]) try: WMCore.Lexicon.procdataset(procds) except AssertionError as ex: raise cherrypy.HTTPError( 400, "Bad output dataset name, check the processed dataset.\n %s" % str(ex)) # Verify whether the output datatiers are available in DBS self.validateDatatier(datatier, dbsUrl=helper.getDbsUrl()) #FIXME not validated helper.setLFNBase(kwargs["MergedLFNBase"], kwargs["UnmergedLFNBase"]) helper.setMergeParameters(int(kwargs.get("MinMergeSize", 2147483648)), int(kwargs.get("MaxMergeSize", 4294967296)), int(kwargs.get("MaxMergeEvents", 50000))) helper.setupPerformanceMonitoring(kwargs.get("MaxRSS", None), kwargs.get("MaxVSize", None), kwargs.get("SoftTimeout", None), kwargs.get("GracePeriod", None)) # Check whether we should check location for the data useAAA = strToBool(kwargs.get("useSiteListAsLocation", False)) if useAAA: helper.setLocationDataSourceFlag(flag=useAAA) # Set phedex subscription information custodialList = kwargs.get("CustodialSites", []) nonCustodialList = kwargs.get("NonCustodialSites", []) autoApproveList = kwargs.get("AutoApproveSubscriptionSites", []) for site in autoApproveList: if site.endswith('_MSS'): raise cherrypy.HTTPError( 400, "Auto-approval to MSS endpoint not allowed %s" % autoApproveList) subscriptionPriority = kwargs.get("SubscriptionPriority", "Low") if subscriptionPriority not in ["Low", "Normal", "High"]: raise cherrypy.HTTPError( 400, "Invalid subscription priority %s" % subscriptionPriority) custodialType = kwargs.get("CustodialSubType", "Replica") if custodialType not in ["Move", "Replica"]: raise cherrypy.HTTPError( 400, "Invalid custodial subscription type %s" % custodialType) nonCustodialType = kwargs.get("NonCustodialSubType", "Replica") if nonCustodialType not in ["Move", "Replica"]: raise cherrypy.HTTPError( 400, "Invalid noncustodial subscription type %s" % nonCustodialType) helper.setSubscriptionInformationWildCards( wildcardDict=self.wildcardSites, custodialSites=custodialList, nonCustodialSites=nonCustodialList, autoApproveSites=autoApproveList, custodialSubType=custodialType, nonCustodialSubType=nonCustodialType, priority=subscriptionPriority) # Block closing information blockCloseMaxWaitTime = int( kwargs.get("BlockCloseMaxWaitTime", helper.getBlockCloseMaxWaitTime())) blockCloseMaxFiles = int( kwargs.get("BlockCloseMaxFiles", helper.getBlockCloseMaxFiles())) blockCloseMaxEvents = int( kwargs.get("BlockCloseMaxEvents", helper.getBlockCloseMaxEvents())) blockCloseMaxSize = int( kwargs.get("BlockCloseMaxSize", helper.getBlockCloseMaxSize())) helper.setBlockCloseSettings(blockCloseMaxWaitTime, blockCloseMaxFiles, blockCloseMaxEvents, blockCloseMaxSize) helper.setDashboardActivity(kwargs.get("Dashboard", "")) # set Task properties if they are exist # TODO: need to define the task format (maybe kwargs["tasks"]?) helper.setTaskProperties(kwargs) Utilities.saveWorkload(helper, request['RequestWorkflow'], self.wmstatWriteURL) # update AcquisitionEra in the Couch document (#4380) # request object returned above from Oracle doesn't have information Couch # database reqDetails = Utilities.requestDetails(request["RequestName"]) couchDb = Database(reqDetails["CouchWorkloadDBName"], reqDetails["CouchURL"]) couchDb.updateDocument(request["RequestName"], "ReqMgr", "updaterequest", fields={ "AcquisitionEra": reqDetails["AcquisitionEra"], "ProcessingVersion": reqDetails["ProcessingVersion"], "CustodialSites": custodialList, "NonCustodialSites": nonCustodialList, "AutoApproveSubscriptionSites": autoApproveList, "SubscriptionPriority": subscriptionPriority, "CustodialSubType": custodialType, "NonCustodialSubType": nonCustodialType, "Teams": kwargs["Teams"], "OutputDatasets": outputDatasets, "SiteWhitelist": whiteList, "SiteBlacklist": blackList }, useBody=True)
def setupNextSteps(self, task, origArgs): """ _setupNextSteps_ Modify the step one task to include N more CMSSW steps and chain the output between all three steps. """ configCacheUrl = self.configCacheUrl or self.couchURL stepMapping = {} stepMapping.setdefault(origArgs['Step1']['StepName'], ('Step1', 'cmsRun1')) for i in range(2, self.stepChain + 1): currentStepNumber = "Step%d" % i currentCmsRun = "cmsRun%d" % i stepMapping.setdefault(origArgs[currentStepNumber]['StepName'], (currentStepNumber, currentCmsRun)) taskConf = {} for k, v in origArgs[currentStepNumber].iteritems(): taskConf[k] = v parentStepNumber = stepMapping.get(taskConf['InputStep'])[0] parentCmsRun = stepMapping.get(taskConf['InputStep'])[1] parentCmsswStep = task.getStep(parentCmsRun) parentCmsswStepHelper = parentCmsswStep.getTypeHelper() # Set default values for the task parameters self.modifyTaskConfiguration(taskConf, False, 'InputDataset' not in taskConf) globalTag = taskConf.get("GlobalTag", self.globalTag) frameworkVersion = taskConf.get("CMSSWVersion", self.frameworkVersion) scramArch = taskConf.get("ScramArch", self.scramArch) childCmssw = parentCmsswStep.addTopStep(currentCmsRun) childCmssw.setStepType("CMSSW") template = StepFactory.getStepTemplate("CMSSW") template(childCmssw.data) childCmsswStepHelper = childCmssw.getTypeHelper() childCmsswStepHelper.setGlobalTag(globalTag) childCmsswStepHelper.setupChainedProcessing( parentCmsRun, taskConf['InputFromOutputModule']) childCmsswStepHelper.cmsswSetup(frameworkVersion, softwareEnvironment="", scramArch=scramArch) childCmsswStepHelper.setConfigCache(configCacheUrl, taskConf['ConfigCacheID'], self.couchDBName) # Pileup check taskConf["PileupConfig"] = parsePileupConfig( taskConf["MCPileup"], taskConf["DataPileup"]) if taskConf["PileupConfig"]: self.setupPileup(task, taskConf['PileupConfig']) # Handling the output modules parentKeepOutput = strToBool(origArgs[parentStepNumber].get( 'KeepOutput', True)) parentCmsswStepHelper.keepOutput(parentKeepOutput) childKeepOutput = strToBool(taskConf.get('KeepOutput', True)) childCmsswStepHelper.keepOutput(childKeepOutput) self.setupOutputModules(task, taskConf["ConfigCacheID"], currentCmsRun, childKeepOutput, taskConf['StepName']) # Closing out the task configuration. The last step output must be saved/merged childCmsswStepHelper.keepOutput(True) return
def assignWorkload(self, requestName, kwargs): """ Make all the necessary changes in the Workload to reflect the new assignment """ request = GetRequest.getRequestByName(requestName) helper = Utilities.loadWorkload(request) #Validate the different parts of the processed dataset processedDatasetParts = ["AcquisitionEra", "ProcessingVersion"] if kwargs.get("ProcessingString", None): processedDatasetParts.append("ProcessingString") for field in processedDatasetParts: if type(kwargs[field]) == dict: for value in kwargs[field].values(): self.validate(value, field) else: self.validate(kwargs[field], field) # Set white list and black list whiteList = kwargs.get("SiteWhitelist", []) blackList = kwargs.get("SiteBlacklist", []) if type(whiteList) != list: whiteList = [whiteList] if type(blackList) != list: blackList = [blackList] helper.setSiteWildcardsLists(siteWhitelist = whiteList, siteBlacklist = blackList, wildcardDict = self.wildcardSites) res = set(whiteList) & set(blackList) if len(res): raise cherrypy.HTTPError(400, "White and blacklist the same site is not allowed %s" % list(res)) # Set ProcessingVersion and AcquisitionEra, which could be json encoded dicts helper.setProcessingVersion(kwargs["ProcessingVersion"]) helper.setAcquisitionEra(kwargs["AcquisitionEra"]) helper.setProcessingString(kwargs.get("ProcessingString", None)) # Now verify the output datasets outputDatasets = helper.listOutputDatasets() for dataset in outputDatasets: tokens = dataset.split("/") procds = tokens[2] try: WMCore.Lexicon.procdataset(procds) except AssertionError as ex: raise cherrypy.HTTPError(400, "Bad output dataset name, check the processed dataset.\n %s" % str(ex)) #FIXME not validated helper.setLFNBase(kwargs["MergedLFNBase"], kwargs["UnmergedLFNBase"]) helper.setMergeParameters(int(kwargs.get("MinMergeSize", 2147483648)), int(kwargs.get("MaxMergeSize", 4294967296)), int(kwargs.get("MaxMergeEvents", 50000))) helper.setupPerformanceMonitoring(kwargs.get("MaxRSS", None), kwargs.get("MaxVSize", None), kwargs.get("SoftTimeout",None), kwargs.get("GracePeriod", None)) # Check whether we should check location for the data useAAA = strToBool(kwargs.get("useSiteListAsLocation", False)) if useAAA: helper.setLocationDataSourceFlag(flag = useAAA) # Set phedex subscription information custodialList = kwargs.get("CustodialSites", []) nonCustodialList = kwargs.get("NonCustodialSites", []) autoApproveList = kwargs.get("AutoApproveSubscriptionSites", []) for site in autoApproveList: if site.endswith('_MSS'): raise cherrypy.HTTPError(400, "Auto-approval to MSS endpoint not allowed %s" % autoApproveList) subscriptionPriority = kwargs.get("SubscriptionPriority", "Low") if subscriptionPriority not in ["Low", "Normal", "High"]: raise cherrypy.HTTPError(400, "Invalid subscription priority %s" % subscriptionPriority) custodialType = kwargs.get("CustodialSubType", "Replica") if custodialType not in ["Move", "Replica"]: raise cherrypy.HTTPError(400, "Invalid custodial subscription type %s" % custodialType) nonCustodialType = kwargs.get("NonCustodialSubType", "Replica") if nonCustodialType not in ["Move", "Replica"]: raise cherrypy.HTTPError(400, "Invalid noncustodial subscription type %s" % nonCustodialType) helper.setSubscriptionInformationWildCards(wildcardDict = self.wildcardSites, custodialSites = custodialList, nonCustodialSites = nonCustodialList, autoApproveSites = autoApproveList, custodialSubType = custodialType, nonCustodialSubType = nonCustodialType, priority = subscriptionPriority) # Block closing information blockCloseMaxWaitTime = int(kwargs.get("BlockCloseMaxWaitTime", helper.getBlockCloseMaxWaitTime())) blockCloseMaxFiles = int(kwargs.get("BlockCloseMaxFiles", helper.getBlockCloseMaxFiles())) blockCloseMaxEvents = int(kwargs.get("BlockCloseMaxEvents", helper.getBlockCloseMaxEvents())) blockCloseMaxSize = int(kwargs.get("BlockCloseMaxSize", helper.getBlockCloseMaxSize())) helper.setBlockCloseSettings(blockCloseMaxWaitTime, blockCloseMaxFiles, blockCloseMaxEvents, blockCloseMaxSize) helper.setDashboardActivity(kwargs.get("Dashboard", "")) # set Task properties if they are exist # TODO: need to define the task format (maybe kwargs["tasks"]?) helper.setTaskProperties(kwargs) Utilities.saveWorkload(helper, request['RequestWorkflow'], self.wmstatWriteURL) # update AcquisitionEra in the Couch document (#4380) # request object returned above from Oracle doesn't have information Couch # database reqDetails = Utilities.requestDetails(request["RequestName"]) couchDb = Database(reqDetails["CouchWorkloadDBName"], reqDetails["CouchURL"]) couchDb.updateDocument(request["RequestName"], "ReqMgr", "updaterequest", fields={"AcquisitionEra": reqDetails["AcquisitionEra"], "ProcessingVersion": reqDetails["ProcessingVersion"], "CustodialSites": custodialList, "NonCustodialSites": nonCustodialList, "AutoApproveSubscriptionSites": autoApproveList, "SubscriptionPriority": subscriptionPriority, "CustodialSubType": custodialType, "NonCustodialSubType": nonCustodialType, "Teams": kwargs["Teams"], "OutputDatasets": outputDatasets, "SiteWhitelist": whiteList, "SiteBlacklist": blackList}, useBody = True)