# // # // Pileup sample? #// if pileupDataset != None: maker.addPileupDataset(pileupDataset, pileupFilesPerJob) # // # // DataMix pileup sample? #// if dataMixDS: maker.addPileupDataset(dataMixDS, 1, 'DataMixingModule') # // # // Input Dataset #// maker.addInputDataset(dataset) maker.inputDataset['SplitType'] = splitType maker.inputDataset['SplitSize'] = splitSize if onlySites != None: maker.inputDataset['OnlySites'] = onlySites if onlyBlocks != None: maker.inputDataset['OnlyBlocks'] = onlyBlocks if onlyClosedBlocks: maker.inputDataset['OnlyClosedBlocks'] = True if dbsUrl != None: maker.workflow.parameters['DBSURL'] = dbsUrl
def createHarvestingWorkflow(dataset, site, cmsPath, scramArch, cmsswVersion, globalTag, configFile = None, DQMServer = None, proxyLocation = None, DQMCopyToCERN = None, runNumber = None, doStageOut = None): """ _createHarvestingWorkflow_ Create a Harvesting workflow to extract DQM information from a dataset Enters an essentially empty process that will be updated at runtime to use the harvesting cfg from the release. """ datasetPieces = DatasetConventions.parseDatasetPath(dataset) physicsGroup = "OfflineDQM" category = "DQM" if runNumber == None: requestId = "OfflineDQM" label = "%s-%s-%s" % (datasetPieces['Primary'], datasetPieces['Processed'], datasetPieces['DataTier']) channel = "DQMHarvest" else: requestId = "%s-%s" % (datasetPieces["Primary"], datasetPieces["DataTier"]) label = "DQMHarvesting" channel = "Run%s" % runNumber logging.debug("path, arch, ver: %s, %s, %s" % (cmsPath, scramArch, cmsswVersion)) if configFile != None: cfgWrapper = configFromFile(cmsPath, scramArch, cmsswVersion, configFile) else: cfgWrapper = configOnFly(cmsPath, scramArch, cmsswVersion) # // # // Pass in global tag #// cfgWrapper.conditionsTag = globalTag maker = WorkflowMaker(requestId, channel, label ) maker.setCMSSWVersion(cmsswVersion) maker.setPhysicsGroup(physicsGroup) maker.setConfiguration(cfgWrapper, Type = "instance") maker.changeCategory(category) maker.setPSetHash("NO_HASH") maker.addInputDataset(dataset) maker.setActivity('harvesting') spec = maker.makeWorkflow() spec.parameters['WorkflowType'] = "Harvesting" spec.parameters['DBSURL'] = "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" spec.parameters['OnlySites'] = site if DQMServer != None : spec.parameters['DQMServer'] = DQMServer if proxyLocation != None : spec.parameters['proxyLocation'] = proxyLocation if DQMCopyToCERN != None : spec.parameters['DQMCopyToCERN'] = DQMCopyToCERN if doStageOut is not None: spec.parameters['DoStageOut'] = doStageOut spec.payload.scriptControls['PostTask'].append( "JobCreator.RuntimeTools.RuntimeOfflineDQM") if configFile == None: preExecScript = spec.payload.scriptControls["PreExe"] preExecScript.append("JobCreator.RuntimeTools.RuntimeOfflineDQMSetup") return spec
def makeWorkflow(self, testInstance): """ _processTest_ Process a test, create a WorkflowSpec for it, generate job specs and add the, to the test instance """ loader = CMSSWAPILoader(testInstance['CMSSWArchitecture'], testInstance['CMSSWVersion'], testInstance['CMSPath']) loader.load() cfgWrapper = CMSSWConfig() process = pickle.load(file(testInstance['PickleFile'])) cfgInt = cfgWrapper.loadConfiguration(process) cfgInt.validateForProduction() cfgAsString = process.dumpPython() # // # // Get release validation PSet from process #// relValPSet = getattr(process, "ReleaseValidation", None) if relValPSet == None: msg = "Unable to extract ReleaseValidation PSet from pickled cfg for \n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return testName = getattr(relValPSet, "primaryDatasetName", None) testInstance['Name'] = testName.value() if testName == None: msg = "No primaryDatasetName parameter in ReleaseValidation PSet\n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return totalEvents = getattr(relValPSet, "totalNumberOfEvents", None) if totalEvents == None: msg = "No totalNumberOfEvents parameter in ReleaseValidation PSet\n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return testInstance['TotalEvents'] = totalEvents.value() eventsPerJob = getattr(relValPSet, "eventsPerJob", None) speedCat = getattr(relValPSet, "speedCategory", None) if (eventsPerJob == None) and (speedCat == None): msg = "ReleaseValidation PSet must contain one of either eventsPerJob or speedCategory\n" msg += "%s\n" % testInstance['PickleFile'] logging.error(msg) return if eventsPerJob != None: testInstance['EventsPerJob'] = eventsPerJob.value() else: testInstance['SpeedCategory'] = speedCat.value() if not self.args.has_key(testInstance['SpeedCategory']): msg = "Unknown Speed Category: %s\n" % testInstance['SpeedCategory'] msg += "In file: %s\n" % testInstance['PickleFile'] logging.error(msg) return testInstance['EventsPerJob'] = self.args[testInstance['SpeedCategory']] inputDataset = getattr(relValPSet, "inputDatasetPath", None) pileupDataset = getattr(relValPSet, "pileupDatasetPath", None) if pileupDataset != None: testInstance['PileupDataset'] = pileupDataset.value() if inputDataset != None: testInstance['InputDataset'] = inputDataset.value() msg = "Processing : %s\n" % testInstance['Name'] msg += "From Pickle: %s\n" % testInstance['PickleFile'] msg += "TotalEvents: %s\n" % testInstance['TotalEvents'] msg += "EventsPerJob: %s\n" % testInstance['EventsPerJob'] msg += "SpeedCategory: %s\n" % testInstance['SpeedCategory'] logging.info(msg) if self.workflows.has_key(testInstance['Name']): testInstance['WorkflowSpecId'] = self.workflows[testInstance['Name']] testInstance['WorkflowSpecFile'] = self.workflowFiles[testInstance['Name']] testInstance['WorkingDir'] = self.workingDirs[testInstance['Name']] loader.unload() return self.jobCounts[testInstance['Name']] = 1 workingDir = os.path.join(self.args['ComponentDir'], testInstance['CMSSWVersion'], testInstance['Name']) if not os.path.exists(workingDir): os.makedirs(workingDir) loader.unload() maker = WorkflowMaker(str(self.timestamp), testInstance['Name'], 'RelVal') maker.setCMSSWVersion(testInstance['CMSSWVersion']) maker.setPhysicsGroup("RelVal") maker.setConfiguration(cfgWrapper, Type = "instance") maker.setOriginalCfg(cfgAsString) psetHash = "NO_PSET_HASH" if cfgWrapper.configMetadata.has_key('PSetHash'): psetHash = cfgWrapper.configMetadata['PSetHash'] maker.setPSetHash(psetHash) maker.changeCategory("relval") if testInstance['SelectionEfficiency'] != None: selEff = float(testInstance['SelectionEfficiency'] ) maker.addSelectionEfficiency(selEff) if testInstance['PileupDataset'] != None: maker.addPileupDataset(testInstance['PileupDataset'], 100) if testInstance['InputDataset'] != None: maker.addInputDataset(testInstance['InputDataset']) maker.inputDataset["SplitType"] = "events" maker.inputDataset["SplitSize"] = testInstance['EventsPerJob'] spec = maker.makeWorkflow() spec.parameters['OnlySites'] = testInstance['Site'] spec.parameters['DBSURL'] = self.dbsUrl specFile = "/%s/%s-Workflow.xml" % (workingDir, maker.workflowName) spec.save(specFile) self.workflows[testInstance['Name']] = str(maker.workflowName) self.workflowFiles[testInstance['Name']] = specFile self.workingDirs[testInstance['Name']] = workingDir testInstance['WorkflowSpecId'] = str(maker.workflowName) testInstance['WorkflowSpecFile'] = specFile testInstance['WorkingDir'] = workingDir msg = "Workflow created for test: %s" % testInstance['Name'] logging.info(msg) msg = "Registering Workflow Entity: %s" % maker.workflowName logging.debug(msg) WEWorkflow.register( maker.workflowName, {"owner" : "RelValInjector", "workflow_spec_file" : specFile, }) msg = "Publishing NewWorkflow/NewDataset for \n" msg += " %s\n "% specFile logging.debug(msg) self.ms.publish("NewWorkflow", specFile) self.ms.publish("NewDataset", specFile) self.ms.commit() return
maker.addSelectionEfficiency(selectionEfficiency) # // # // Pileup?? #// if pileupDS != None: maker.addPileupDataset( pileupDS, pileupFilesPerJob) if dbsUrl != None: maker.workflow.parameters['DBSURL'] = dbsUrl # // # // Input Dataset #// if useInputDataset: maker.addInputDataset(inputDataset) maker.inputDataset['SplitType'] = splitType maker.inputDataset['SplitSize'] = splitSize spec = maker.makeWorkflow() workflowBase = "%s-Workflow.xml" % maker.workflowName workflow = os.path.join(os.getcwd(), workflowBase) spec.save("%s-Workflow.xml" % maker.workflowName) print "Created: %s-Workflow.xml" % maker.workflowName print "From Tag: %s Of %s " % (cvsTag, cfgFile ) if useInputDataset: print "Input Dataset: %s " % inputDataset print " ==> Will be split by %s in increments of %s" % (splitType, splitSize) print "Output Datasets:"