def createDataPileUpFile(jsonFile, pileUpReweightingType): jsonList = LumiList(jsonFile) lumiString = jsonList.getCMSSWString() thisHash = hashlib.md5(lumiString) #print 'Require pileup file with hash ', thisHash.hexdigest() dataPileUpFilename = '/data/DataPileUp_'+thisHash.hexdigest()+'_'+pileUpReweightingType+'.root' if not os.path.exists(baseDir+dataPileUpFilename): print 'Creating new "', pileUpReweightingType, '" data pileup file for json file "',jsonFile,'"' subprocess.call(['pileupCalc.py', '-i', jsonFile, '--inputLumiJSON', '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions11/7TeV/PileUp/pileup_JSON_2011_4_2_validation.txt', '--calcMode', pileUpReweightingType, '--maxPileupBin', '50', baseDir+dataPileUpFilename], shell=False) return dataPileUpFilename
# special paths always saved setattr(stepBTree.variables, "std_vector_trigger_special", cms.string("specialRateTrigger/8") ) # mc if dataset[0] == "MC": stepBTree.variables.baseW = "%.12f" % scalef # data else: from FWCore.PythonUtilities.LumiList import LumiList import os if json != None : lumis = LumiList(filename = os.getenv('CMSSW_BASE')+'/src/LatinoTrees/Misc/Jsons/%s.json'%json) process.source.lumisToProcess = cms.untracked.VLuminosityBlockRange() process.source.lumisToProcess = lumis.getCMSSWString().split(',') stepBTree.variables.baseW = "1" stepBTree.variables.trpu = cms.string("1") stepBTree.variables.itpu = cms.string("1") stepBTree.variables.ootpup1 = cms.string("1") stepBTree.variables.ootpum1 = cms.string("1") stepBTree.variables.puW = cms.string("1") stepBTree.variables.puAW = cms.string("1") stepBTree.variables.puBW = cms.string("1") #################### # run electron id ## # see twiki: # https://twiki.cern.ch/twiki/bin/view/CMS/EgammaIDRecipesRun2 #
def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ): lumiSecExtend = "" if firstRun or lastRun or jsonPath: if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") <= lastRun ] lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \ + str( self.__findInJson(run, "run_number") ) + ":max" \ for run in selectedRunList ] splitLumiList = list( self.__chunks( lumiList, 255 ) ) else: theLumiList = None try: theLumiList = LumiList ( filename = jsonPath ) except ValueError: pass if theLumiList is not None: allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int( run ) < firstRun: runsToRemove.append( run ) if lastRun and int( run ) > lastRun: runsToRemove.append( run ) theLumiList.removeRuns( runsToRemove ) splitLumiList = list( self.__chunks( theLumiList.getCMSSWString().split(','), 255 ) ) if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]): splitLumiList = None else: with open(jsonPath) as f: jsoncontents = f.read() if "process.source.lumisToProcess" in jsoncontents: msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath if firstRun or lastRun: msg += ("\n (after applying firstRun and/or lastRun)") msg += ".\nPlease note that, depending on the format of this file, it may not work as expected." msg += "\nCheck your config file to make sure that it worked properly." print msg runlist = self.__getRunList() if firstRun or lastRun: self.__firstusedrun = -1 self.__lastusedrun = -1 jsoncontents = re.sub(r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents) jsoncontents = (jsoncontents.replace("'',\n","").replace("''\n","") .replace('"",\n','').replace('""\n','')) self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number"))) self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number"))) if self.__lastusedrun < self.__firstusedrun: jsoncontents = None else: self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number")) self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number")) lumiSecExtend = jsoncontents splitLumiList = None else: raise AllInOneError("%s is not a valid json file!" % jsonPath) if splitLumiList and splitLumiList[0] and splitLumiList[0][0]: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join( lumiSecStr ) runlist = self.__getRunList() self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number"))) self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number"))) elif lumiSecExtend: pass else: msg = "You are trying to run a validation without any runs! Check that:" if firstRun or lastRun: msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data" if jsonPath: msg += "\n - your JSON file is correct for this dataset, and the runs contain data" if (firstRun or lastRun) and jsonPath: msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file" raise AllInOneError(msg) else: if self.__inputMagneticField is not None: pass #never need self.__firstusedrun or self.__lastusedrun else: runlist = self.__getRunList() self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number")) self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number")) return lumiSecExtend
def __createSnippet( self, jsonPath = None, begin = None, end = None, firstRun = None, lastRun = None, repMap = None, crab = False ): if firstRun: firstRun = int( firstRun ) if lastRun: lastRun = int( lastRun ) if ( begin and firstRun ) or ( end and lastRun ): msg = ( "The Usage of " + "'begin' & 'firstRun' " * int( bool( begin and firstRun ) ) + "and " * int( bool( ( begin and firstRun ) and ( end and lastRun ) ) ) + "'end' & 'lastRun' " * int( bool( end and lastRun ) ) + "is ambigous." ) raise AllInOneError( msg ) if begin or end: ( firstRun, lastRun ) = self.convertTimeToRun( begin = begin, end = end, firstRun = firstRun, lastRun = lastRun ) if ( firstRun and lastRun ) and ( firstRun > lastRun ): msg = ( "The lower time/runrange limit ('begin'/'firstRun') " "chosen is greater than the upper time/runrange limit " "('end'/'lastRun').") raise AllInOneError( msg ) goodLumiSecStr = "" lumiStr = "" lumiSecExtend = "" if firstRun or lastRun: goodLumiSecStr = ( "lumiSecs = cms.untracked." "VLuminosityBlockRange()\n" ) lumiStr = " lumisToProcess = lumiSecs,\n" if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if run["run_number"] >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if run["run_number"] <= lastRun ] lumiList = [ str( run["run_number"] ) + ":1-" \ + str( run["run_number"] ) + ":max" \ for run in selectedRunList ] splitLumiList = list( self.__chunks( lumiList, 255 ) ) else: theLumiList = LumiList ( filename = jsonPath ) allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int( run ) < firstRun: runsToRemove.append( run ) if lastRun and int( run ) > lastRun: runsToRemove.append( run ) theLumiList.removeRuns( runsToRemove ) splitLumiList = list( self.__chunks( theLumiList.getCMSSWString().split(','), 255 ) ) if not len(splitLumiList[0][0]) == 0: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join( lumiSecStr ) elif jsonPath: goodLumiSecStr = ( "goodLumiSecs = LumiList.LumiList(filename" "= '%(json)s').getCMSSWString().split(',')\n" "lumiSecs = cms.untracked" ".VLuminosityBlockRange()\n" ) lumiStr = " lumisToProcess = lumiSecs,\n" lumiSecExtend = "lumiSecs.extend(goodLumiSecs)\n" if crab: files = "" else: splitFileList = list( self.__chunks( self.fileList(), 255 ) ) fileStr = [ "',\n'".join( files ) for files in splitFileList ] fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \ for files in fileStr ] files = "\n".join( fileStr ) theMap = repMap theMap["files"] = files theMap["json"] = jsonPath theMap["lumiStr"] = lumiStr theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap ) theMap["lumiSecExtend"] = lumiSecExtend if crab: dataset_snippet = self.__dummy_source_template%( theMap ) else: dataset_snippet = self.__source_template%( theMap ) return dataset_snippet
### in grid mode define input via crab.cfg. python will be send precompiled, for the c++ part baseDir needs to be the absolute path (starting from the from the CMSSW_BASE) sourceString = "" baseDir = 'src' process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring(sourceString) ) process.maxEvents = cms.untracked.PSet( input = cms.untracked.int32(numberOfEvents) ) ### select runs from JSON file if isData and not options.runFromCrab: print 'Discriminate lumisections from', jsonFile jsonList = LumiList(jsonFile) lumiString = jsonList.getCMSSWString()#convert into compact format needed by CMSSW process.source.lumisToProcess = cms.untracked.VLuminosityBlockRange(lumiString.split(',')) process.GlobalTag.globaltag = globaltag #print 'Using the GlobalTag ',process.GlobalTag.globaltag motherPdgID = 0 if isData: decayType = 'data' else: decayType, motherPdgID = Tools.parseDecayType(jobName) numberOfEventsStr = str(numberOfEvents) if numberOfEvents == -1: numberOfEventsStr = 'all' print '--> Analysing',numberOfEventsStr,'events in <'+inputPath+'>.'
def jobSplittingByLumi(self): """ Split task into jobs by Lumi section paying attention to which lumis should be run (according to the analysis dataset). This uses WMBS job splitting which does not split files over jobs so the job will have AT LEAST as many lumis as requested, perhaps more """ self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0)) common.logger.debug('Splitting by Lumi') self.checkLumiSettings() blockSites = self.args['blockSites'] pubdata = self.args['pubdata'] lumisPerFile = pubdata.getLumis() self.parentFiles=pubdata.getParent() # Make the list of WMBS files for job splitter fileList = pubdata.getListFiles() wmFileList = [] for jobFile in fileList: block = jobFile['Block']['Name'] try: jobFile['Block']['StorageElementList'].extend(blockSites[block]) except: continue wmbsFile = File(jobFile['LogicalFileName']) if not blockSites[block]: msg = 'WARNING: No sites are hosting any part of data for block: %s\n' %block msg += 'Related jobs will not be submitted and this block of data can not be analyzed' common.logger.debug(msg) # wmbsFile['locations'].add('Nowhere') [ wmbsFile['locations'].add(x) for x in blockSites[block] ] wmbsFile['block'] = block for lumi in lumisPerFile[jobFile['LogicalFileName']]: wmbsFile.addRun(Run(lumi[0], lumi[1])) wmFileList.append(wmbsFile) fileSet = set(wmFileList) thefiles = Fileset(name='FilesToSplit', files = fileSet) # Create the factory and workflow work = Workflow() subs = Subscription(fileset = thefiles, workflow = work, split_algo = 'LumiBased', type = "Processing") splitter = SplitterFactory() jobFactory = splitter(subs) list_of_lists = [] jobDestination = [] jobCount = 0 lumisCreated = 0 list_of_blocks = [] if not self.limitJobLumis: if self.totalNLumis > 0: self.lumisPerJob = max(self.totalNLumis // self.theNumberOfJobs,1) else: self.lumisPerJob = pubdata.getMaxLumis() // self.theNumberOfJobs + 1 common.logger.info('Each job will process about %s lumis.' % self.lumisPerJob) for jobGroup in jobFactory(lumis_per_job = self.lumisPerJob): for job in jobGroup.jobs: if (self.limitNJobs and jobCount >= self.theNumberOfJobs): common.logger.info('Requested number of jobs reached.') break if (self.limitTotalLumis and lumisCreated >= self.totalNLumis): common.logger.info('Requested number of lumis reached.') break lumis = [] lfns = [] if self.useParent==1: parentlfns = [] pString ="" locations = [] blocks = [] firstFile = True # Collect information from all the files for jobFile in job.getFiles(): doFile = False if firstFile: # Get locations from first file in the job for loc in jobFile['locations']: locations.append(loc) blocks.append(jobFile['block']) firstFile = False # Accumulate Lumis from all files for lumiList in jobFile['runs']: theRun = lumiList.run for theLumi in list(lumiList): if (not self.limitTotalLumis) or \ (lumisCreated < self.totalNLumis): doFile = True lumisCreated += 1 lumis.append( (theRun, theLumi) ) if doFile: lfns.append(jobFile['lfn']) if self.useParent==1: parent = self.parentFiles[jobFile['lfn']] for p in parent : pString += p + ',' fileString = ','.join(lfns) lumiLister = LumiList(lumis = lumis) lumiString = lumiLister.getCMSSWString() blockString=','.join(blocks) if self.useParent==1: common.logger.debug("Files: "+fileString+" with the following parents: "+pString[:-1]) pfileString = pString[:-1] list_of_lists.append([fileString, pfileString, str(-1), str(0), lumiString,blockString]) else: list_of_lists.append([fileString, str(-1), str(0), lumiString, blockString]) list_of_blocks.append(blocks) jobDestination.append(locations) jobCount += 1 common.logger.debug('Job %s will run on %s files and %s lumis ' % (jobCount, len(lfns), len(lumis) )) common.logger.info('%s jobs created to run on %s lumis' % (jobCount, lumisCreated)) # Prepare dict output matching back to non-WMBS job creation if self.global_data_service and self.global_data_rewrite: for job in list_of_lists: GlobalDataService.modifyJobFilenames( job ) dictOut = {} dictOut['params'] = ['InputFiles', 'MaxEvents', 'SkipEvents', 'Lumis','InputBlocks'] if self.useParent==1: dictOut['params']= ['InputFiles','ParentFiles','MaxEvents','SkipEvents','Lumis','InputBlocks'] dictOut['args'] = list_of_lists dictOut['jobDestination'] = jobDestination dictOut['njobs'] = jobCount self.cacheBlocks(list_of_blocks,jobDestination) return dictOut
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False): if firstRun: firstRun = int(firstRun) if lastRun: lastRun = int(lastRun) if (begin and firstRun) or (end and lastRun): msg = ( "The Usage of " + "'begin' & 'firstRun' " * int(bool(begin and firstRun)) + "and " * int(bool( (begin and firstRun) and (end and lastRun))) + "'end' & 'lastRun' " * int(bool(end and lastRun)) + "is ambigous.") raise AllInOneError(msg) if begin or end: (firstRun, lastRun) = self.convertTimeToRun(begin=begin, end=end, firstRun=firstRun, lastRun=lastRun) if (firstRun and lastRun) and (firstRun > lastRun): msg = ("The lower time/runrange limit ('begin'/'firstRun') " "chosen is greater than the upper time/runrange limit " "('end'/'lastRun').") raise AllInOneError(msg) if self.predefined() and (jsonPath or begin or end or firstRun or lastRun): msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'" "only work for official datasets, not predefined _cff.py files" ) raise AllInOneError(msg) goodLumiSecStr = "" lumiStr = "" lumiSecExtend = "" if firstRun or lastRun or jsonPath: goodLumiSecStr = ("lumiSecs = cms.untracked." "VLuminosityBlockRange()\n") lumiStr = " lumisToProcess = lumiSecs,\n" if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") <= lastRun ] lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \ + str( self.__findInJson(run, "run_number") ) + ":max" \ for run in selectedRunList ] splitLumiList = list(self.__chunks(lumiList, 255)) else: theLumiList = None try: theLumiList = LumiList(filename=jsonPath) except ValueError: pass if theLumiList is not None: allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int(run) < firstRun: runsToRemove.append(run) if lastRun and int(run) > lastRun: runsToRemove.append(run) theLumiList.removeRuns(runsToRemove) splitLumiList = list( self.__chunks(theLumiList.getCMSSWString().split(','), 255)) if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]): splitLumiList = None else: with open(jsonPath) as f: jsoncontents = f.read() if "process.source.lumisToProcess" in jsoncontents: msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath if firstRun or lastRun: msg += ( "\n (after applying firstRun and/or lastRun)" ) msg += ".\nPlease note that, depending on the format of this file, it may not work as expected." msg += "\nCheck your config file to make sure that it worked properly." print msg runlist = self.__getRunList() if firstRun or lastRun: self.__firstusedrun = -1 self.__lastusedrun = -1 jsoncontents = re.sub( r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction( firstRun, lastRun), jsoncontents) jsoncontents = (jsoncontents.replace( "'',\n", "").replace("''\n", "").replace( '"",\n', '').replace('""\n', '')) self.__firstusedrun = max( self.__firstusedrun, int( self.__findInJson( runlist[0], "run_number"))) self.__lastusedrun = min( self.__lastusedrun, int( self.__findInJson( runlist[-1], "run_number"))) if self.__lastusedrun < self.__firstusedrun: jsoncontents = None else: self.__firstusedrun = int( self.__findInJson(runlist[0], "run_number")) self.__lastusedrun = int( self.__findInJson(runlist[-1], "run_number")) lumiSecExtend = jsoncontents splitLumiList = None else: raise AllInOneError( "%s is not a valid json file!" % jsonPath) if splitLumiList and splitLumiList[0] and splitLumiList[0][0]: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join(lumiSecStr) runlist = self.__getRunList() self.__firstusedrun = max( int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0], "run_number"))) self.__lastusedrun = min( int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1], "run_number"))) elif lumiSecExtend: pass else: msg = "You are trying to run a validation without any runs! Check that:" if firstRun or lastRun: msg += "\n - firstRun and lastRun are correct for this dataset, and there are runs in between containing data" if jsonPath: msg += "\n - your JSON file is correct for this dataset, and the runs contain data" if (firstRun or lastRun) and jsonPath: msg += "\n - firstRun and lastRun are consistent with your JSON file" if begin: msg = msg.replace("firstRun", "begin") if end: msg = msg.replace("lastRun", "end") raise AllInOneError(msg) else: runlist = self.__getRunList() self.__firstusedrun = int( self.__findInJson(self.__getRunList()[0], "run_number")) self.__lastusedrun = int( self.__findInJson(self.__getRunList()[-1], "run_number")) if crab: files = "" else: splitFileList = list(self.__chunks(self.fileList(), 255)) fileStr = ["',\n'".join(files) for files in splitFileList] fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \ for files in fileStr ] files = "\n".join(fileStr) if parent: splitParentFileList = list( self.__chunks(self.fileList(parent=True), 255)) parentFileStr = [ "',\n'".join(parentFiles) for parentFiles in splitParentFileList ] parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \ for parentFiles in parentFileStr ] parentFiles = "\n".join(parentFileStr) files += "\n\n" + parentFiles theMap = repMap theMap["files"] = files theMap["json"] = jsonPath theMap["lumiStr"] = lumiStr theMap["goodLumiSecStr"] = goodLumiSecStr % (theMap) theMap["lumiSecExtend"] = lumiSecExtend if crab: dataset_snippet = self.__dummy_source_template % (theMap) else: dataset_snippet = self.__source_template % (theMap) return dataset_snippet
def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None): lumiSecExtend = "" if firstRun or lastRun or jsonPath: if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") <= lastRun ] lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \ + str( self.__findInJson(run, "run_number") ) + ":max" \ for run in selectedRunList ] splitLumiList = list(self.__chunks(lumiList, 255)) else: theLumiList = None try: theLumiList = LumiList(filename=jsonPath) except ValueError: pass if theLumiList is not None: allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int(run) < firstRun: runsToRemove.append(run) if lastRun and int(run) > lastRun: runsToRemove.append(run) theLumiList.removeRuns(runsToRemove) splitLumiList = list( self.__chunks(theLumiList.getCMSSWString().split(','), 255)) if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]): splitLumiList = None else: with open(jsonPath) as f: jsoncontents = f.read() if "process.source.lumisToProcess" in jsoncontents: msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath if firstRun or lastRun: msg += ( "\n (after applying firstRun and/or lastRun)" ) msg += ".\nPlease note that, depending on the format of this file, it may not work as expected." msg += "\nCheck your config file to make sure that it worked properly." print msg runlist = self.__getRunList() if firstRun or lastRun: self.__firstusedrun = -1 self.__lastusedrun = -1 jsoncontents = re.sub( r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction( firstRun, lastRun), jsoncontents) jsoncontents = (jsoncontents.replace( "'',\n", "").replace("''\n", "").replace( '"",\n', '').replace('""\n', '')) self.__firstusedrun = max( self.__firstusedrun, int( self.__findInJson( runlist[0], "run_number"))) self.__lastusedrun = min( self.__lastusedrun, int( self.__findInJson( runlist[-1], "run_number"))) if self.__lastusedrun < self.__firstusedrun: jsoncontents = None else: self.__firstusedrun = int( self.__findInJson(runlist[0], "run_number")) self.__lastusedrun = int( self.__findInJson(runlist[-1], "run_number")) lumiSecExtend = jsoncontents splitLumiList = None else: raise AllInOneError( "%s is not a valid json file!" % jsonPath) if splitLumiList and splitLumiList[0] and splitLumiList[0][0]: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join(lumiSecStr) runlist = self.__getRunList() self.__firstusedrun = max( int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0], "run_number"))) self.__lastusedrun = min( int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1], "run_number"))) elif lumiSecExtend: pass else: msg = "You are trying to run a validation without any runs! Check that:" if firstRun or lastRun: msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data" if jsonPath: msg += "\n - your JSON file is correct for this dataset, and the runs contain data" if (firstRun or lastRun) and jsonPath: msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file" raise AllInOneError(msg) else: runlist = self.__getRunList() self.__firstusedrun = int( self.__findInJson(self.__getRunList()[0], "run_number")) self.__lastusedrun = int( self.__findInJson(self.__getRunList()[-1], "run_number")) return lumiSecExtend
def jobSplittingByLumi(self): """ Split task into jobs by Lumi section paying attention to which lumis should be run (according to the analysis dataset). This uses WMBS job splitting which does not split files over jobs so the job will have AT LEAST as many lumis as requested, perhaps more """ self.useParent = int(self.cfg_params.get('CMSSW.use_parent', 0)) common.logger.debug('Splitting by Lumi') self.checkLumiSettings() blockSites = self.args['blockSites'] pubdata = self.args['pubdata'] lumisPerFile = pubdata.getLumis() self.parentFiles = pubdata.getParent() # Make the list of WMBS files for job splitter fileList = pubdata.getListFiles() wmFileList = [] for jobFile in fileList: block = jobFile['Block']['Name'] try: jobFile['Block']['StorageElementList'].extend( blockSites[block]) except: continue wmbsFile = File(jobFile['LogicalFileName']) if not blockSites[block]: msg = 'WARNING: No sites are hosting any part of data for block: %s\n' % block msg += 'Related jobs will not be submitted and this block of data can not be analyzed' common.logger.debug(msg) # wmbsFile['locations'].add('Nowhere') [wmbsFile['locations'].add(x) for x in blockSites[block]] wmbsFile['block'] = block for lumi in lumisPerFile[jobFile['LogicalFileName']]: wmbsFile.addRun(Run(lumi[0], lumi[1])) wmFileList.append(wmbsFile) fileSet = set(wmFileList) thefiles = Fileset(name='FilesToSplit', files=fileSet) # Create the factory and workflow work = Workflow() subs = Subscription(fileset=thefiles, workflow=work, split_algo='LumiBased', type="Processing") splitter = SplitterFactory() jobFactory = splitter(subs) list_of_lists = [] jobDestination = [] jobCount = 0 lumisCreated = 0 list_of_blocks = [] if not self.limitJobLumis: if self.totalNLumis > 0: self.lumisPerJob = max( self.totalNLumis // self.theNumberOfJobs, 1) else: self.lumisPerJob = pubdata.getMaxLumis( ) // self.theNumberOfJobs + 1 common.logger.info('Each job will process about %s lumis.' % self.lumisPerJob) for jobGroup in jobFactory(lumis_per_job=self.lumisPerJob): for job in jobGroup.jobs: if (self.limitNJobs and jobCount >= self.theNumberOfJobs): common.logger.info('Requested number of jobs reached.') break if (self.limitTotalLumis and lumisCreated >= self.totalNLumis): common.logger.info('Requested number of lumis reached.') break lumis = [] lfns = [] if self.useParent == 1: parentlfns = [] pString = "" locations = [] blocks = [] firstFile = True # Collect information from all the files for jobFile in job.getFiles(): doFile = False if firstFile: # Get locations from first file in the job for loc in jobFile['locations']: locations.append(loc) blocks.append(jobFile['block']) firstFile = False # Accumulate Lumis from all files for lumiList in jobFile['runs']: theRun = lumiList.run for theLumi in list(lumiList): if (not self.limitTotalLumis) or \ (lumisCreated < self.totalNLumis): doFile = True lumisCreated += 1 lumis.append((theRun, theLumi)) if doFile: lfns.append(jobFile['lfn']) if self.useParent == 1: parent = self.parentFiles[jobFile['lfn']] for p in parent: pString += p + ',' fileString = ','.join(lfns) lumiLister = LumiList(lumis=lumis) lumiString = lumiLister.getCMSSWString() blockString = ','.join(blocks) if self.useParent == 1: common.logger.debug("Files: " + fileString + " with the following parents: " + pString[:-1]) pfileString = pString[:-1] list_of_lists.append([ fileString, pfileString, str(-1), str(0), lumiString, blockString ]) else: list_of_lists.append( [fileString, str(-1), str(0), lumiString, blockString]) list_of_blocks.append(blocks) jobDestination.append(locations) jobCount += 1 common.logger.debug( 'Job %s will run on %s files and %s lumis ' % (jobCount, len(lfns), len(lumis))) common.logger.info('%s jobs created to run on %s lumis' % (jobCount, lumisCreated)) # Prepare dict output matching back to non-WMBS job creation dictOut = {} dictOut['params'] = [ 'InputFiles', 'MaxEvents', 'SkipEvents', 'Lumis', 'InputBlocks' ] if self.useParent == 1: dictOut['params'] = [ 'InputFiles', 'ParentFiles', 'MaxEvents', 'SkipEvents', 'Lumis', 'InputBlocks' ] dictOut['args'] = list_of_lists dictOut['jobDestination'] = jobDestination dictOut['njobs'] = jobCount self.cacheBlocks(list_of_blocks, jobDestination) return dictOut
else: process.step3Tree.variables.fourW = "1" process.step3Tree.variables.fermiW = "1" if mhiggs <= 0: process.step3Tree.variables.kfW = cms.string("1") else: process.higgsPt.inputFilename = "HiggsAnalysis/HiggsToWW2Leptons/data/kfactors_Std/kfactors_mh%(mass)d_ren%(mass)d_fac%(mass)d.dat" % { "mass": abs(mhiggs) } else: from FWCore.PythonUtilities.LumiList import LumiList import os lumis = LumiList(filename=os.getenv('CMSSW_BASE') + '/src/WWAnalysis/Misc/Jsons/%s.json' % json) process.source.lumisToProcess = cms.untracked.VLuminosityBlockRange() process.source.lumisToProcess = lumis.getCMSSWString().split(',') process.step3Tree.variables.baseW = "1" process.step3Tree.variables.fourW = "1" process.step3Tree.variables.fermiW = "1" process.step3Tree.variables.kfW = cms.string("1") process.step3Tree.variables.trpu = cms.string("1") process.step3Tree.variables.itpu = cms.string("1") process.step3Tree.variables.ootpup1 = cms.string("1") process.step3Tree.variables.ootpum1 = cms.string("1") process.step3Tree.variables.puW = cms.string("1") process.step3Tree.variables.puAW = cms.string("1") process.step3Tree.variables.puBW = cms.string("1") # process.schedule = cms.Schedule() process.load("WWAnalysis.AnalysisStep.hww_reboosting_cff") if doPDFvar: process.slimPatJetsTriggerMatch.isData = cms.untracked.bool(False)
def __createSnippet( self, jsonPath = None, begin = None, end = None, firstRun = None, lastRun = None, repMap = None, crab = False, parent = False ): if firstRun: firstRun = int( firstRun ) if lastRun: lastRun = int( lastRun ) if ( begin and firstRun ) or ( end and lastRun ): msg = ( "The Usage of " + "'begin' & 'firstRun' " * int( bool( begin and firstRun ) ) + "and " * int( bool( ( begin and firstRun ) and ( end and lastRun ) ) ) + "'end' & 'lastRun' " * int( bool( end and lastRun ) ) + "is ambigous." ) raise AllInOneError( msg ) if begin or end: ( firstRun, lastRun ) = self.convertTimeToRun( begin = begin, end = end, firstRun = firstRun, lastRun = lastRun ) if ( firstRun and lastRun ) and ( firstRun > lastRun ): msg = ( "The lower time/runrange limit ('begin'/'firstRun') " "chosen is greater than the upper time/runrange limit " "('end'/'lastRun').") raise AllInOneError( msg ) if self.predefined() and (jsonPath or begin or end or firstRun or lastRun): msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'" "only work for official datasets, not predefined _cff.py files" ) raise AllInOneError( msg ) goodLumiSecStr = "" lumiStr = "" lumiSecExtend = "" if firstRun or lastRun or jsonPath: goodLumiSecStr = ( "lumiSecs = cms.untracked." "VLuminosityBlockRange()\n" ) lumiStr = " lumisToProcess = lumiSecs,\n" if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") <= lastRun ] lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \ + str( self.__findInJson(run, "run_number") ) + ":max" \ for run in selectedRunList ] splitLumiList = list( self.__chunks( lumiList, 255 ) ) else: theLumiList = None try: theLumiList = LumiList ( filename = jsonPath ) except ValueError: pass if theLumiList is not None: allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int( run ) < firstRun: runsToRemove.append( run ) if lastRun and int( run ) > lastRun: runsToRemove.append( run ) theLumiList.removeRuns( runsToRemove ) splitLumiList = list( self.__chunks( theLumiList.getCMSSWString().split(','), 255 ) ) else: with open(jsonPath) as f: jsoncontents = f.read() if "process.source.lumisToProcess" in jsoncontents: msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath if firstRun or lastRun: msg += ("\n (after applying firstRun and/or lastRun)") msg += ".\nPlease note that, depending on the format of this file, it may not work as expected." msg += "\nCheck your config file to make sure that it worked properly." print msg runlist = self.__getRunList() if firstRun or lastRun: self.__firstusedrun = -1 self.__lastusedrun = -1 jsoncontents = re.sub("\d+:(\d+|max)-\d+:(\d+|max)", self.getForceRunRangeFunction(firstRun, lastRun), jsoncontents) self.__firstusedrun = max(self.__firstusedrun, int(self.__findInJson(runlist[0],"run_number"))) self.__lastusedrun = min(self.__lastusedrun, int(self.__findInJson(runlist[-1],"run_number"))) else: self.__firstusedrun = int(self.__findInJson(runlist[0],"run_number")) self.__lastusedrun = int(self.__findInJson(runlist[-1],"run_number")) lumiSecExtend = jsoncontents splitLumiList = [[""]] if not len(splitLumiList[0][0]) == 0: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join( lumiSecStr ) runlist = self.__getRunList() self.__firstusedrun = max(int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0],"run_number"))) self.__lastusedrun = min(int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1],"run_number"))) else: runlist = self.__getRunList() self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number")) self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number")) if crab: files = "" else: splitFileList = list( self.__chunks( self.fileList(), 255 ) ) fileStr = [ "',\n'".join( files ) for files in splitFileList ] fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \ for files in fileStr ] files = "\n".join( fileStr ) if parent: splitParentFileList = list( self.__chunks( self.fileList(parent = True), 255 ) ) parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ] parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \ for parentFiles in parentFileStr ] parentFiles = "\n".join( parentFileStr ) files += "\n\n" + parentFiles theMap = repMap theMap["files"] = files theMap["json"] = jsonPath theMap["lumiStr"] = lumiStr theMap["goodLumiSecStr"] = goodLumiSecStr%( theMap ) theMap["lumiSecExtend"] = lumiSecExtend if crab: dataset_snippet = self.__dummy_source_template%( theMap ) else: dataset_snippet = self.__source_template%( theMap ) return dataset_snippet
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False): if firstRun: firstRun = int(firstRun) if lastRun: lastRun = int(lastRun) if (begin and firstRun) or (end and lastRun): msg = ( "The Usage of " + "'begin' & 'firstRun' " * int(bool(begin and firstRun)) + "and " * int(bool( (begin and firstRun) and (end and lastRun))) + "'end' & 'lastRun' " * int(bool(end and lastRun)) + "is ambigous.") raise AllInOneError(msg) if begin or end: (firstRun, lastRun) = self.convertTimeToRun(begin=begin, end=end, firstRun=firstRun, lastRun=lastRun) if (firstRun and lastRun) and (firstRun > lastRun): msg = ("The lower time/runrange limit ('begin'/'firstRun') " "chosen is greater than the upper time/runrange limit " "('end'/'lastRun').") raise AllInOneError(msg) goodLumiSecStr = "" lumiStr = "" lumiSecExtend = "" if firstRun or lastRun: goodLumiSecStr = ("lumiSecs = cms.untracked." "VLuminosityBlockRange()\n") lumiStr = " lumisToProcess = lumiSecs,\n" if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if run["run_number"] >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if run["run_number"] <= lastRun ] lumiList = [ str( run["run_number"] ) + ":1-" \ + str( run["run_number"] ) + ":max" \ for run in selectedRunList ] splitLumiList = list(self.__chunks(lumiList, 255)) else: theLumiList = LumiList(filename=jsonPath) allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int(run) < firstRun: runsToRemove.append(run) if lastRun and int(run) > lastRun: runsToRemove.append(run) theLumiList.removeRuns(runsToRemove) splitLumiList = list( self.__chunks(theLumiList.getCMSSWString().split(','), 255)) if not len(splitLumiList[0][0]) == 0: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join(lumiSecStr) elif jsonPath: goodLumiSecStr = ("goodLumiSecs = LumiList.LumiList(filename" "= '%(json)s').getCMSSWString().split(',')\n" "lumiSecs = cms.untracked" ".VLuminosityBlockRange()\n") lumiStr = " lumisToProcess = lumiSecs,\n" lumiSecExtend = "lumiSecs.extend(goodLumiSecs)\n" if crab: files = "" else: splitFileList = list(self.__chunks(self.fileList(), 255)) fileStr = ["',\n'".join(files) for files in splitFileList] fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \ for files in fileStr ] files = "\n".join(fileStr) theMap = repMap theMap["files"] = files theMap["json"] = jsonPath theMap["lumiStr"] = lumiStr theMap["goodLumiSecStr"] = goodLumiSecStr % (theMap) theMap["lumiSecExtend"] = lumiSecExtend if crab: dataset_snippet = self.__dummy_source_template % (theMap) else: dataset_snippet = self.__source_template % (theMap) return dataset_snippet
process.eventHists = cms.EDAnalyzer("CreateEventHistsEDMWrapped", FWLiteParams.clone(), ) if isMC: # PU re-weighting: process.eventHists.puWeights = reWeightVector[:] process.eventHists.puLabel = cms.InputTag("addPileupInfo") else: lumis = LumiList(filename = 'RMMEJSON') # lumis = LumiList(filename = os.getenv('CMSSW_BASE')+'/src/WWAnalysis/Misc/Jsons/certifiedUCSD.json') process.eventHists.lumisToProcess = cms.untracked.VLuminosityBlockRange() process.eventHists.lumisToProcess = lumis.getCMSSWString().split(',') process.eventHists.sampleName = cms.string("RMMENUM_RMMENAME") # process.eventHists.sampleName = cms.string("101160_ggToH160toWWto2L2Nu") process.eventHists.doNMinus1 = False # process.eventHists.doByCuts = False # process.eventHists.printSummary = True # RMME # do this here or in the step 2 files? if isGluGlu: process.higgsPt = cms.EDProducer("HWWKFactorProducer", genParticlesTag = cms.InputTag("onlyHiggsGen"), inputFilename = cms.untracked.string("HiggsAnalysis/HiggsToWW2Leptons/data/kfactors_Std/kfactors_mhRMMEMASS_renRMMEMASS_facRMMEMASS.dat") # inputFilename = cms.untracked.string("HiggsAnalysis/HiggsToWW2Leptons/data/kfactors_Std/kfactors_mh160_ren160_fac160.dat")
if args.intersect: edmLumis = edmLumis & impLumis reclumiData = None dellumiData = None if lumiCalc is not None: print "Accessing LumiDB... can take a while..." dellumiData = lumiCalc.deliveredLumiForRange(edmLumis.getCompactList()) reclumiData = lumiCalc.recordedLumiForRange(edmLumis.getCompactList()) totalRec = 0.0 totalDel = 0.0 for dpr in dellumiData: if dpr[2] != 'N/A': totalDel += float(dpr[2]) for dpr in reclumiData: totalRec += lumiCalc.calculateTotalRecorded(dpr[2]) print "Delivered Luminosity: ", totalDel print "Recorded Luminosity: ", totalRec if args.outputJSON: edmLumis.writeJSON(outputJSON) if args.printJSON: print edmLumis.getCompactList() if args.printCMSSWString: print edmLumis.getCMSSWString() if lumiCalc is not None: del lumiCalc
if args.intersect: edmLumis = edmLumis & impLumis reclumiData=None dellumiData=None if lumiCalc is not None: print "Accessing LumiDB... can take a while..." dellumiData=lumiCalc.deliveredLumiForRange(edmLumis.getCompactList()) reclumiData=lumiCalc.recordedLumiForRange(edmLumis.getCompactList()) totalRec = 0.0 totalDel = 0.0 for dpr in dellumiData: if dpr[2] != 'N/A': totalDel += float(dpr[2]) for dpr in reclumiData: totalRec += lumiCalc.calculateTotalRecorded(dpr[2]) print "Delivered Luminosity: ",totalDel print "Recorded Luminosity: ",totalRec if args.outputJSON: edmLumis.writeJSON(outputJSON) if args.printJSON: print edmLumis.getCompactList() if args.printCMSSWString: print edmLumis.getCMSSWString() if lumiCalc is not None: del lumiCalc
from WWAnalysis.AnalysisStep.cutPSets_cfi import * swapOutPSetValues(process.yieldParams.selectionParams.wwelel,defaultWW) process.yieldParams.selectionParams.wwmumu = process.yieldParams.selectionParams.wwelel.clone() process.yieldParams.selectionParams.wwelmu = process.yieldParams.selectionParams.wwelel.clone() swapOutPSetValues(process.yieldParams.selectionParams.wwelmu,oppositeFlavor) from WWAnalysis.AnalysisStep.data.v03.wwSamples_cff import wwSamples isData = False looper = wwSamples if isData: lumis = LumiList(filename = '/nfs/bluearc/group/jsons/Skim_Mu_And_El_byGiovanni/Final.json') process.yieldParams.lumisToProcess = cms.untracked.VLuminosityBlockRange() process.yieldParams.lumisToProcess = lumis.getCMSSWString().split(',') #loop over the looper for x in looper: #make new pset setattr( process.yieldParams.inputParams, looper[x][0], cms.PSet( files = cms.vstring(), scale = cms.double(looper[x][1]), printEvents = cms.bool(isData), isMC = cms.bool( not isData ), isSignal = cms.bool( looper[x][5] )