def doit(*x): print x ll_fn, lumi_fn, check_intlumi_sum, goal, out_fn = x check_intlumi_sum *= 1e9 # csv in /ub random.seed(8675309) goal *= check_intlumi_sum in_ll = LumiList(ll_fn).getLumis() intlumis, intlumi_sum = intlumi_from_brilcalc_csv(lumi_fn, False) assert abs(intlumi_sum - check_intlumi_sum) < 1e6 tot = 0. out_ll = [] while tot < goal: i = random.randrange(len(in_ll)) rl = in_ll.pop(i) #if not intlumis.has_key(rl): # continue tot += intlumis[rl] out_ll.append(rl) print 'tot = %f, picked %i lumis' % (tot, len(out_ll)) LumiList(lumis=out_ll).writeJSON(out_fn)
def getStrangeRuns(self): hltInfoByLs =self.analysisOutput() strangeRunsLumis = LumiList( lumis = [[int(hltInf[0]),int(hltInf[1])] for hltInf in hltInfoByLs if not int(hltInf[2][1]) >= 1] ) if self.jsonOutput: strangeRunsLumis.writeJSON(jsonOutput+"_strange") else: return strangeRunsLumis
def __init__(self, name, nanotrees, weight, triggers, jess, jers, mc, injfile, outjfile): self.jess = jess self.jers = jers self.mc = mc self.weight = weight self.__book__(name) # get json file myList = LumiList(filename=injfile) # initialize output lumilist myrunlumi = [] # open root files files = open(nanotrees, "r") # for file in glob.glob(nanotrees + "*root"): for file in files.read().splitlines(): self.Fill(file, triggers, myList, myrunlumi) # only do this for data if not (self.mc): outList = LumiList(lumis=myrunlumi) outList.writeJSON(outjfile) print "got to the end" files.close() self.O.cd() self.O.Write() self.O.Close()
def combine_grls(grl1,grl2): lumis1 = LumiList(compactList=grl1) lumis2 = LumiList(compactList=grl2) new_lumis = lumis1 & lumis2 # print new_lumis.compactList return new_lumis.compactList
def createDataDatasets(self): self.dataDatasets = {} for d in self.datasets: dsLumiList = None if not os.path.isfile(d['json']): oldSsArgv = sys.argv; sys.argv=[] # sys argv fix dasC = dasTools.myDasClient();dasC.limit=0 dsLumiList = dasC.getJsonOfDataset(d["dataset"]) dsLumiList.writeJSON(d['json']) sys.argv = oldSsArgv else: dsLumiList = LumiList(compactList=json.load(open(d['json']))) dsRuns = dsLumiList.getRuns() self.dataDatasets[d['label']] = ('{ \n ' '\t"xSec":None\n' '\t,"localFile":None\n' '\t,"datasetName":"'+d["dataset"]+'"\n' '\t,"label":"Data_'+d['label']+'"\n' '\t,"datasetJSON":"'+d['json']+'"\n' '\t,"crabConfig":{\n' '\t\t"CMSSW":{"lumis_per_job":5\n' '\t\t\t,"lumi_mask": os.getenv("CMSSW_BASE") + '+'"/'+d['goldenJson'].lstrip('/')+'"\n' '\t\t\t,"total_number_of_lumis" : -1}\n' '\t\t}\n' '\t,"color":0\n' '\t,"runRange":"'+str(dsRuns[0])+"-"+str(dsRuns[-1])+'"\n' '\t}\n');
def mk_secondary_lumimask(dset): dq = das_query("file dataset=%s instance=prod/phys03" % dset, cmd='dasgoclient --dasmaps=./') assert 'data' in dq.keys() fs = [str(f['file'][0]['name']) for f in dq['data']] #fs = fs[:2] print('N files:', len(fs)) lumis = [] dqs = [ das_query("lumi file=%s instance=prod/phys03" % f, cmd='dasgoclient --dasmaps=./') for f in fs ] for dq in dqs: for data in dq['data']: for lumi in data['lumi'][0]['lumi_section_num']: lumis.append([data['lumi'][0]['run_number'], lumi]) jsonList = LumiList(lumis=lumis) #print(jsonList) output_file = dset.split('/')[2].split('-')[1].split('_')[0] #print(output_file) jsonList.writeJSON(output_dir + output_file + '_3photons_imgskim_lumi_list.json')
def getLumiList(lumi_mask_name, logger=None): """ Takes a lumi-mask and returns a LumiList object. lumi-mask: either an http address or a json file on disk. """ lumi_list = None parts = urlparse(lumi_mask_name) if parts[0] in ['http', 'https']: if logger: logger.debug('Downloading lumi-mask from %s' % lumi_mask_name) try: lumi_list = LumiList(url=lumi_mask_name) except Exception as err: raise ConfigurationException( "CMSSW failed to get lumimask from URL. Please try to download the lumimask yourself and point to it in crabConfig;\n%s" % str(err)) else: if logger: logger.debug('Reading lumi-mask from %s' % lumi_mask_name) try: lumi_list = LumiList(filename=lumi_mask_name) except IOError as err: raise ConfigurationException("Problem loading lumi-mask file; %s" % str(err)) return lumi_list
def getGoodRuns(self): hltInfoByLs =self.analysisOutput() goodRunsAndLumis = LumiList(lumis = [[int(hltInf[0]),int(hltInf[1])] for hltInf in hltInfoByLs if int(hltInf[2][1]) == 1] ) if self.jsonOutput: goodRunsAndLumis.writeJSON(jsonOutput+"_good") return jsonOutput+"_good" else: return goodRunsAndLumis
def getPrescaledRuns(self): hltInfoByLs =self.analysisOutput() prescaledRunsAndLumis = LumiList( lumis = [[int(hltInf[0]),int(hltInf[1])] for hltInf in hltInfoByLs if int(hltInf[2][1]) >= 1] ) if self.jsonOutput: prescaledRunsAndLumis.writeJSON(jsonOutput+"_prescaled") return jsonOutput+"_prescaled" else: return prescaledRunsAndLumis
def getDatasetLumiList(self, name, catalog): from FWCore.PythonUtilities.LumiList import LumiList dlist = LumiList() for fil in catalog[name]["files"]: flist = LumiList(runsAndLumis=fil.get("lumis", {})) dlist += flist return dlist
def files_for_json(json_fn, dataset, instance='global'): json = LumiList(json_fn) files = set() for file, run_lumis in file_details_run_lumis(dataset, instance).iteritems(): ll = LumiList(runsAndLumis=run_lumis) if json & ll: files.add(file) return sorted(files)
def getRuns(name=None,bfield=None,bunchSpacing=None): ll = LumiList() for rp in runPeriods: if name is None or rp.name == name: if bfield is None or rp.bfield == bfield: if bunchSpacing is None or rp.bunchSpacing == bunchSpacing: newll = LumiListForRunPeriod(rp) ll += LumiListForRunPeriod(rp) return ll.getRuns()
def getRuns(name=None, bfield=None, bunchSpacing=None): ll = LumiList() for rp in runPeriods: if name is None or rp.name == name: if bfield is None or rp.bfield == bfield: if bunchSpacing is None or rp.bunchSpacing == bunchSpacing: newll = LumiListForRunPeriod(rp) ll += LumiListForRunPeriod(rp) return ll.getRuns()
def createDataPileUpFile(jsonFile, pileUpReweightingType): jsonList = LumiList(jsonFile) lumiString = jsonList.getCMSSWString() thisHash = hashlib.md5(lumiString) #print 'Require pileup file with hash ', thisHash.hexdigest() dataPileUpFilename = '/data/DataPileUp_'+thisHash.hexdigest()+'_'+pileUpReweightingType+'.root' if not os.path.exists(baseDir+dataPileUpFilename): print 'Creating new "', pileUpReweightingType, '" data pileup file for json file "',jsonFile,'"' subprocess.call(['pileupCalc.py', '-i', jsonFile, '--inputLumiJSON', '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions11/7TeV/PileUp/pileup_JSON_2011_4_2_validation.txt', '--calcMode', pileUpReweightingType, '--maxPileupBin', '50', baseDir+dataPileUpFilename], shell=False) return dataPileUpFilename
def mergeLumis(inputdata, lumimask): """ Computes the processed lumis, merges if needed and returns the compacted list. """ mergedlumis = LumiList() doublelumis = LumiList() for report in inputdata: doublelumis = mergedlumis & LumiList(runsAndLumis=report) mergedlumis = mergedlumis | LumiList(runsAndLumis=report) return mergedlumis.getCompactList(), (LumiList(compactList=lumimask) - mergedlumis).getCompactList(), doublelumis.getCompactList()
def __init__(self, cfg_ana, cfg_comp, looperName): super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName) if not cfg_comp.isMC: if self.cfg_comp.json is None: raise ValueError('component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'.format(cname=cfg_comp.name)) self.lumiList = LumiList(os.path.expandvars(self.cfg_comp.json)) else: self.lumiList = None self.rltInfo = RLTInfo()
def LumiListForRunPeriod(rp, MIN_LUMIS=0): ll = LumiList(filename = rp.json) runs = [ run for run in map(int,ll.getRuns()) if run >= rp.firstRun and run <= rp.lastRun] lumis = ll.getLumis() nlumis = defaultdict(int) for r,l in lumis: nlumis[r]+=1 select_runs = [run for run in runs if nlumis[run] > MIN_LUMIS] ll.selectRuns(select_runs) return ll
def crab_fjr_json_to_ll(fn): print colors.yellow('this is not fully tested') j = crab_fjr_json(fn) ll = LumiList() for x in j['steps']['cmsRun']['input']['source']: x2 = defaultdict(list) for k, v in x['runs'].iteritems(): for l in v.keys(): x2[int(k)].append(int(l)) ll += LumiList(runsAndLumis=x2) return ll
def load_golden_jsons(golden_json_path): if not os.path.isfile(golden_json_path): raise RuntimeError("No such file: %s" % golden_json_path) lumi_obj = LumiList(golden_json_path) lumis = lumi_obj.getLumis() runlumi_dict = {} for run, lumi in lumis: if run not in runlumi_dict: runlumi_dict[run] = [] assert(lumi not in runlumi_dict[run]) runlumi_dict[run].append(lumi) return runlumi_dict
def mergeLumis(inputdata, lumimask): """ Computes the processed lumis, merges if needed and returns the compacted list. """ mergedlumis = LumiList() doublelumis = LumiList() for report in inputdata: doublelumis = mergedlumis & LumiList(runsAndLumis=report) mergedlumis = mergedlumis | LumiList(runsAndLumis=report) if doublelumis: self.logger.info("Warning: double run-lumis processed %s" % doublelumis) return mergedlumis.getCompactList(), (LumiList(compactList=lumimask) - mergedlumis).getCompactList()
def getInputRunLumi(self, file): import xml.dom.minidom dom = xml.dom.minidom.parse(file) ll=[] for elem in dom.getElementsByTagName("Job"): nJob = int(elem.getAttribute("JobID")) lumis = elem.getAttribute('Lumis') #lumis = '193752:1' #lumis = '193752:1-193752:5,193774:1-193774:5,193775:1' if lumis: tmp=str.split(str(lumis), ",") #print "tmp = ", tmp else: msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created" common.logger.info(msg) return #tmp = [193752:1-193752:5] [193774:1-193774:5] for entry in tmp: run_lumi=str.split(entry, "-") # run_lumi = [193752:1] [193752:5] if len(run_lumi) == 0: pass if len(run_lumi) == 1: lumi = str.split(run_lumi[0],":")[1] run = str.split(run_lumi[0],":")[0] ll.append((run,int(lumi))) if len(run_lumi) == 2: lumi_max = str.split(run_lumi[1],":")[1] lumi_min = str.split(run_lumi[0],":")[1] run = str.split(run_lumi[1],":")[0] for count in range(int(lumi_min),int(lumi_max) + 1): ll.append((run,count)) if len(ll): lumiList = LumiList(lumis = ll) compactList = lumiList.getCompactList() totalLumiFilename = self.fjrDirectory + 'inputLumiSummaryOfTask.json' totalLumiSummary = open(totalLumiFilename, 'w') json.dump(compactList, totalLumiSummary) totalLumiSummary.write('\n') totalLumiSummary.close() msg = "Summary file of input run and lumi to be analize with this task: %s\n" %totalLumiFilename common.logger.info(msg) else: msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created" common.logger.info(msg) return totalLumiFilename
def makeLumiBlocks(in_lumi_file, outdir, chunksize=5): ll = LumiList(filename=in_lumi_file) lumis = ll.getLumis() nblock = 0 blocks = [] for lumiblock in chunks(lumis, chunksize): nblock += 1 ll2 = LumiList(lumis=lumiblock) fn = outdir + "/block_{0}.json".format(nblock) of = open(fn, "w") of.write(str(ll2)) of.close() blocks += [fn] return blocks
def main(argv=None): if argv is None: argv = sys.argv[1:] args = parse_command_line(argv) loglevel = getattr(logging,args.log) logging.basicConfig(format='%(asctime)s.%(msecs)03d %(levelname)s %(name)s: %(message)s', level=loglevel, datefmt='%Y-%m-%d %H:%M:%S', stream=sys.stderr) allfiles = [] for f in args.inputFiles: for fname in glob.glob(f): allfiles += [fname] logging.info('Adding {0} files to tchain'.format(len(allfiles))) tchain = ROOT.TChain(args.treeName) for fname in allfiles: tchain.Add(fname) nlumis = tchain.GetEntries() logging.info('Processing {0} lumis'.format(nlumis)) allLumis = {} total = 0 for row in tchain: total += 1 if row.run not in allLumis: allLumis[row.run] = set() allLumis[row.run].add(row.lumi) lumiJson = LumiList(runsAndLumis = allLumis) #lumiJson.writeJSON(args.outputFile) print lumiJson
def mergeLumis(inputdata): """ Computes the processed lumis, merges if needed and returns the compacted list. """ mergedLumis = set() #merge the lumis from single files for reports in inputdata.values(): for report in reports: for run, lumis in literal_eval(report['runlumi']).items(): if isinstance(run, bytes): run = run.decode(encoding='UTF-8') for lumi in lumis: mergedLumis.add( (run, int(lumi))) #lumi is str, but need int mergedLumis = LumiList(lumis=mergedLumis) return mergedLumis.getCompactList()
class applyJSON(Module): def __init__(self, json_file): if json_file: self.lumiList = LumiList(os.path.expandvars(json_file)) else: self.lumiList = None def beginJob(self): pass def endJob(self): pass def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree): self.out = wrappedOutputTree self.out.branch("jsonPassed", "I") def endFile(self, inputFile, outputFile, inputTree, wrappedOutputTree): pass def analyze(self, event): """process event, return True (go to next module) or False (fail, go to next event)""" if self.lumiList: jsonPassed = self.lumiList.contains(event.run, event.luminosityBlock) else: jsonPassed = 1 self.out.fillBranch("jsonPassed", jsonPassed) return True
def files_for_events(run_events, dataset, instance='global'): wanted_run_lumis = [] for x in run_events: # list of runs, or list of (run, event), or list of (run, lumi, event) if type(x) == int: wanted_run_lumis.append((x, None)) elif len(x) == 2: wanted_run_lumis.append((x[0], None)) else: wanted_run_lumis.append(x[:2]) files = set() for file, run_lumis in file_details_run_lumis(dataset, instance).iteritems(): ll = LumiList(runsAndLumis=run_lumis) for x in wanted_run_lumis: if ll.contains(*x): files.add(file) return sorted(files)
def getLumisToSkip(self,dataset): catalog = self.readCatalog(True) if not dataset in catalog: return None from FWCore.PythonUtilities.LumiList import LumiList return LumiList( compactList=catalog[dataset].get('lumisToSkip',{}) )
def __init__(self, cfg_ana, cfg_comp, looperName): super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName) if not cfg_comp.isMC: if self.cfg_comp.json is None: raise ValueError('component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'.format(cname=cfg_comp.name)) self.lumiList = LumiList(os.path.expandvars(self.cfg_comp.json)) else: self.lumiList = None if hasattr(self.cfg_comp, 'additionaljson'): self.additionalLumiList = LumiList(os.path.expandvars(self.cfg_comp.additionaljson)) self.twojson = True else: self.twojson = False self.useLumiBlocks = self.cfg_ana.useLumiBlocks if (hasattr(self.cfg_ana,'useLumiBlocks')) else False self.rltInfo = RLTInfo()
def __init__(self, cfg_ana, cfg_comp, looperName): super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName) if not cfg_comp.isMC: self.lumiList = LumiList(self.cfg_comp.json) else: self.lumiList = None self.rltInfo = RLTInfo()
def getDuplicateLumis(lumisDict): """ Get the run-lumis appearing more than once in the input dictionary of runs and lumis, which is assumed to have the following format: { '1': [1,2,3,4,6,7,8,9,10], '2': [1,4,5,20] } """ doubleLumis = set() for run, lumis in lumisDict.items(): seen = set() doubleLumis.update( set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi)))) doubleLumis = LumiList(lumis=doubleLumis) return doubleLumis.getCompactList()
def __init__(self, cfg_ana, cfg_comp, looperName): super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName) if not cfg_comp.isMC: if self.cfg_comp.json is None and hasattr(self.cfg_ana, "json") == False: raise ValueError( 'component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component' .format(cname=cfg_comp.name)) #use json from this analyzer if given, otherwise use the component json self.lumiList = LumiList( os.path.expandvars( getattr(self.cfg_ana, "json", self.cfg_comp.json))) else: self.lumiList = None self.passAll = getattr(self.cfg_ana, 'passAll', False) self.useLumiBlocks = self.cfg_ana.useLumiBlocks if (hasattr( self.cfg_ana, 'useLumiBlocks')) else False self.rltInfo = RLTInfo()
def customise(process): lumiFile = 'Cert_246908-260627_13TeV_PromptReco_Collisions15_25ns_JSON.txt' runOnMC = True for i in process.source.fileNames: if 'Run2015' in i: runOnMC=False isTTbar=False for i in process.source.fileNames: if '/TT' in i or '/tt' in i: isTTbar=True if not runOnMC: from FWCore.PythonUtilities.LumiList import LumiList lumiList = LumiList(os.environ["CMSSW_BASE"]+'/src/CATTools/CatProducer/prod/LumiMask/'+lumiFile) #lumiList = LumiList(os.environ["CMSSW_BASE"]+'/src/CATTools/CommonTools/test/ttbb/'+lumiFile) process.source.lumisToProcess = lumiList.getVLuminosityBlockRange()
def getLumiList(self, *args): catalog = self.readCatalog(True) datasets = [] output = filter(lambda x: "output=" in x, args) args = filter(lambda x: not "output=" in x, args) for dataset in catalog.keys(): for arg in args: if dataset == arg or fnmatch(dataset, arg): datasets.append(dataset) break if len(output) > 1: print "ERROR: you specified the output json more than once:\n" print " %s" % " ".join(output) sys.exit(-1) if len(output) > 0: output = output[0].strip("output=") else: output = None from FWCore.PythonUtilities.LumiList import LumiList fulist = LumiList() for dataset in datasets: dlist = LumiList() jsonout = dataset.lstrip("/").rstrip("/").replace("/", "_") + ".json" for fil in catalog[dataset]["files"]: flist = LumiList(runsAndLumis=fil.get("lumis", {})) ## print flist dlist += flist if not output: with open(jsonout, "w+") as fout: fout.write(json.dumps(dlist.compactList, sort_keys=True)) fout.close() else: fulist += dlist if output: with open(output, "w+") as fout: fout.write(json.dumps(fulist.compactList, sort_keys=True)) fout.close()
def savejsons(self, processed): jsondir = os.path.join(self.__plotdir, 'jsons') if not os.path.exists(jsondir): os.makedirs(jsondir) res = {} for label in processed: jsondir = os.path.join('jsons', label) if not os.path.exists(os.path.join(self.__plotdir, jsondir)): os.makedirs(os.path.join(self.__plotdir, jsondir)) lumis = LumiList(lumis=processed[label]) lumis.writeJSON(os.path.join(self.__plotdir, jsondir, 'processed.json')) res[label] = [(os.path.join(jsondir, 'processed.json'), 'processed')] published = os.path.join(self.__workdir, label, 'published.json') if os.path.isfile(published): shutil.copy(published, os.path.join(self.__plotdir, jsondir)) res[label] += [(os.path.join(jsondir, 'published.json'), 'published')] return res
def mergeDataset(self,dst,merge): dst["vetted"]=False from FWCore.PythonUtilities.LumiList import LumiList dstLumisToSkip = LumiList(compactList=dst.get('lumisToSkip',{})) mergeLumisToSkip = LumiList(compactList=merge.get('lumisToSkip',{})) dstLumisToSkip += mergeLumisToSkip dstLumisToSkip = dstLumisToSkip.compactList if len(dstLumisToSkip) > 0: dst['lumisToSkip'] = dstLumisToSkip print "\nWARNING: Merged lumisToSkip list. It is reccomended to run the 'overlap' command to re-geneate the list from scratch." dstFiles=dst["files"] mergeFiles=merge["files"] for fil in mergeFiles: skip = False for dfil in dstFiles: if dfil["name"] == fil["name"]: skip = True if not skip: dstFiles.append( fil )
def getDatasetLumiList(self, name, catalog, check=False): from FWCore.PythonUtilities.LumiList import LumiList lumisToSkip = catalog[name].get("lumisToSkip", None) if lumisToSkip: print "Dataset %s has list of lumi sections to skip in catalog" % name lumisToSkip = LumiList(compactList=lumisToSkip) dlist = LumiList() for fil in catalog[name]["files"]: flist = LumiList(runsAndLumis=fil.get("lumis", {})) if lumisToSkip and not check: flist = flist.__sub__(lumisToSkip) if check: andlist = dlist.__and__(flist) ## print andlist, fil.get("name") if len(andlist) != 0: print "Warning: duplicate lumi sections in dataset. %s" % fil.get("name") print andlist, flist dlist += flist return dlist
def customise(process): lumiFile = 'Cert_246908-260627_13TeV_PromptReco_Collisions15_25ns_JSON.txt' runOnMC = True for i in process.source.fileNames: if 'Run2015' in i: runOnMC = False isTTbar = False for i in process.source.fileNames: if '/TT' in i or '/tt' in i: isTTbar = True if not runOnMC: from FWCore.PythonUtilities.LumiList import LumiList lumiList = LumiList(os.environ["CMSSW_BASE"] + '/src/CATTools/CatProducer/prod/LumiMask/' + lumiFile) #lumiList = LumiList(os.environ["CMSSW_BASE"]+'/src/CATTools/CommonTools/test/ttbb/'+lumiFile) process.source.lumisToProcess = lumiList.getVLuminosityBlockRange()
def writejson(l, out_fn): is_data = is_data_fn(out_fn) if not is_data: run = 1 rll = defaultdict(list) for x in l: if is_data: run, lumi = x[:2] else: lumi = x[0] rll[run].append(lumi) LumiList(runsAndLumis=rll).writeJSON(out_fn)
def __init__(self, cfg_ana, cfg_comp, looperName): super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName) if not cfg_comp.isMC: if self.cfg_comp.json is None and hasattr(self.cfg_ana,"json") == False : raise ValueError('component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'.format(cname=cfg_comp.name)) #use json from this analyzer if given, otherwise use the component json self.lumiList = LumiList(os.path.expandvars(getattr(self.cfg_ana,"json",self.cfg_comp.json))) else: self.lumiList = None self.passAll = getattr(self.cfg_ana,'passAll',False) self.useLumiBlocks = self.cfg_ana.useLumiBlocks if (hasattr(self.cfg_ana,'useLumiBlocks')) else False self.rltInfo = RLTInfo()
def __init__(self, cfg_ana, cfg_comp, looperName): super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName) if not cfg_comp.isMC: if self.cfg_comp.json is None: raise ValueError( 'component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component' .format(cname=cfg_comp.name)) self.lumiList = LumiList(os.path.expandvars(self.cfg_comp.json)) else: self.lumiList = None if hasattr(self.cfg_comp, 'additionaljson'): self.additionalLumiList = LumiList( os.path.expandvars(self.cfg_comp.additionaljson)) self.twojson = True else: self.twojson = False self.useLumiBlocks = self.cfg_ana.useLumiBlocks if (hasattr( self.cfg_ana, 'useLumiBlocks')) else False self.rltInfo = RLTInfo()
def __init__(self, fn, mask_fn=None): self.mask = LumiList(mask_fn) if mask_fn else None self.lls = LumiLines.load(fn) self.by_run = defaultdict(list) self.by_run_ls = {} self.fills = defaultdict(lambda: 999999) for ll in self.lls: if not self.mask or (ll.run, ll.ls) in self.mask: self.by_run[ll.run].append(ll) self.by_run_ls[(ll.run, ll.ls)] = ll self.fills[ll.fill] = min(self.fills[ll.fill], ll.run) self.fill_boundaries = sorted(self.fills.values()) self.by_run = dict(self.by_run)
def split_by_lumi(config, dataset_info, task_list): if config.has_key('lumi mask'): lumi_mask = LumiList(filename=config['lumi mask']) dataset_info.total_lumis = 0 for file in dataset_info.files: dataset_info.lumis[file] = lumi_mask.filterLumis(dataset_info.lumis[file]) dataset_info.total_lumis += len(dataset_info.lumis[file]) lumis_per_task = config['lumis per task'] lumis_processed = 0 task_id = 0 tasks = [] files = iter(dataset_info.files) file = files.next() input_files_this_task = [file] task_lumis_remaining = dataset_info.lumis[file] while lumis_processed < dataset_info.total_lumis: for file in input_files_this_task: common_lumis = set(dataset_info.lumis[file]).intersection(set(task_lumis_remaining)) if len(common_lumis) == 0 or len(dataset_info.lumis[file]) == 0: input_files_this_task.remove(file) while lumis_per_task <= len(task_lumis_remaining): task_lumis = LumiList(lumis=task_lumis_remaining[:lumis_per_task]) task_lumis_remaining = task_lumis_remaining[lumis_per_task:] tasks.append((input_files_this_task, task_lumis.getVLuminosityBlockRange())) task_id += 1 lumis_processed += lumis_per_task try: file = files.next() input_files_this_task.append(file) task_lumis_remaining.extend(dataset_info.lumis[file]) except: lumis_per_task = len(task_lumis_remaining) with open(task_list, 'w') as json_file: json.dump(tasks, json_file) return len(tasks)
def LumiListForRunPeriod(rp, MIN_LUMIS=0): ll = LumiList(filename=rp.json) runs = [ run for run in map(int, ll.getRuns()) if run >= rp.firstRun and run <= rp.lastRun ] lumis = ll.getLumis() nlumis = defaultdict(int) for r, l in lumis: nlumis[r] += 1 select_runs = [run for run in runs if nlumis[run] > MIN_LUMIS] ll.selectRuns(select_runs) return ll
def getLumiListInValidFiles(dataset, dbsurl='phys03'): """ Get the runs/lumis in the valid files of a given dataset. dataset: the dataset name as published in DBS dbsurl: the DBS URL or DBS prod instance Returns a LumiList object. """ from dbs.apis.dbsClient import DbsApi dbsurl = DBSURLS['reader'].get(dbsurl, dbsurl) dbs3api = DbsApi(url=dbsurl) try: files = dbs3api.listFileArray(dataset=dataset, validFileOnly=0, detail=True) except Exception as ex: msg = "Got DBS client error requesting details of dataset '%s' on DBS URL '%s': %s" % ( dataset, dbsurl, ex) msg += "\n%s" % (traceback.format_exc()) raise ClientException(msg) if not files: msg = "Dataset '%s' not found in DBS URL '%s'." % (dataset, dbsurl) raise ClientException(msg) validFiles = [f['logical_file_name'] for f in files if f['is_file_valid']] blocks = set([f['block_name'] for f in files]) runLumiPairs = [] for blockName in blocks: fileLumis = dbs3api.listFileLumis(block_name=blockName) for f in fileLumis: if f['logical_file_name'] in validFiles: run = f['run_num'] lumis = f['lumi_section_num'] for lumi in lumis: runLumiPairs.append((run, lumi)) lumiList = LumiList(lumis=runLumiPairs) return lumiList
def getDatasetLumiList(self,name,catalog,check=False): from FWCore.PythonUtilities.LumiList import LumiList lumisToSkip = catalog[name].get('lumisToSkip',None) if lumisToSkip: print "Dataset %s has list of lumi sections to skip in catalog" % name lumisToSkip = LumiList(compactList=lumisToSkip) dlist = LumiList() for fil in catalog[name]["files"]: flist = LumiList( runsAndLumis=fil.get("lumis",{}) ) if lumisToSkip and not check: flist = flist.__sub__(lumisToSkip) if check: andlist = dlist.__and__(flist) ## print andlist, fil.get("name") if len(andlist) != 0: print "Warning: duplicate lumi sections in dataset. %s" % fil.get("name") print andlist, flist dlist += flist return dlist
def shortenJson(jsonFile,minRun=0,maxRun=-1,output=None,debug=False): from copy import deepcopy runList = jsonFile if isinstance(runList,LumiList): runList = deepcopy(jsonFile) else: runList = LumiList (filename = jsonFile) # Read in first JSON file allRuns = runList.getRuns() runsToRemove=[] for run in allRuns: if int(run) < minRun: runsToRemove.append (run) if maxRun > 0 and int(run) > maxRun: runsToRemove.append (run) if debug: print " runsToRemove ",runsToRemove runList.removeRuns (runsToRemove) if output: runList.writeJSON (output) else: return runList
handle = Handle ('LumiSummary') label = ('lumiProducer') else: handle, lable = None, None runsLumisDict = {} lumis = Lumis (args) delivered = recorded = 0 for lum in lumis: runList = runsLumisDict.setdefault (lum.aux().run(), []) runList.append( lum.aux().id().luminosityBlock() ) # get the summary and keep track of the totals if options.intLumi: lum.getByLabel (label, handle) summary = handle.product() delivered += summary.avgInsDelLumi() recorded += summary.avgInsRecLumi() # print out lumi sections in JSON format jsonList = LumiList (runsAndLumis = runsLumisDict) if options.output: jsonList.writeJSON (options.output) else: print jsonList # print out integrated luminosity numbers if requested if options.intLumi: print "\nNote: These numbers should be considered approximate. For official numbers, please use lumiCalc.py" print "delivered %.1f mb, recorded %.1f mb" % \ (delivered, recorded)
parser.add_option ('--output', dest='output', type='string', help='Save output to file OUTPUT') # required parameters (options, args) = parser.parse_args() if len (args) != 1: raise RuntimeError, "Must provide exactly one input file" if options.min and options.max and options.min > options.max: raise RuntimeError, "Minimum value (%d) is greater than maximum value (%d)" % (options.min, options.max) commaRE = re.compile (r',') runsToRemove = [] for chunk in options.runs: runs = commaRE.split (chunk) runsToRemove.extend (runs) alphaList = LumiList (filename = args[0]) # Read in first JSON file allRuns = alphaList.getRuns() for run in allRuns: if options.min and int(run) < options.min: runsToRemove.append (run) if options.max and int(run) > options.max: runsToRemove.append (run) alphaList.removeRuns (runsToRemove) if options.output: alphaList.writeJSON (options.output) else: print alphaList
class JSONAnalyzer( Analyzer ): '''Apply a json filter, and creates an RLTInfo TTree. See PhysicsTools.HeppyCore.utils.RLTInfo for more information example: jsonFilter = cfg.Analyzer( "JSONAnalyzer", ) The path of the json file to be used is set as a component attribute. The process function returns: - True if - the component is MC or - if the run/lumi pair is in the JSON file - if the json file was not set for this component - False if the component is MC or embed (for H->tau tau), and if the run/lumi pair is not in the JSON file. ''' def __init__(self, cfg_ana, cfg_comp, looperName): super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName) if not cfg_comp.isMC: if self.cfg_comp.json is None: raise ValueError('component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'.format(cname=cfg_comp.name)) self.lumiList = LumiList(os.path.expandvars(self.cfg_comp.json)) else: self.lumiList = None self.rltInfo = RLTInfo() def beginLoop(self, setup): super(JSONAnalyzer,self).beginLoop(setup) self.counters.addCounter('JSON') self.count = self.counters.counter('JSON') self.count.register('All Lumis') self.count.register('Passed Lumis') def process(self, event): self.readCollections( event.input ) evid = event.input.eventAuxiliary().id() run = evid.run() lumi = evid.luminosityBlock() eventId = evid.event() event.run = run event.lumi = lumi event.eventId = eventId if self.cfg_comp.isMC: return True if self.lumiList is None: return True self.count.inc('All Lumis') if self.lumiList.contains(run,lumi): self.count.inc('Passed Lumis') self.rltInfo.add('dummy', run, lumi) return True else: return False def write(self, setup): super(JSONAnalyzer, self).write(setup) self.rltInfo.write( self.dirName )
def add_merged_sample(NAME, type, AnaUrl, FWUrl, samples, comment): # samples is a simple dict containing three keys: 'process', 'dataset_id', 'sample_id' dbstore = DbStore() sample = None # check that source dataset exist # Skip: should exist, the check has been done before calling this function # check that there is no existing entry update = False localpath = '' nevents = 0 checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME)) if checkExisting.is_empty(): sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents) else: update = True sample = checkExisting.one() sample.removeFiles(dbstore) # collecting contents sample.nevents_processed = 0 sample.nevents = 0 sample.normalization = 1 sample.event_weight_sum = 0 extras_event_weight_sum = {} dataset_nevents = 0 processed_lumi = LumiList() for i, s in enumerate(samples): if i == 0: sample.source_dataset_id = s['dataset_id'] sample.source_sample_id = s['sample_id'] results = dbstore.find(Sample, Sample.sample_id == s['sample_id']) # Should exist, the check has been done before calling this function sample.nevents_processed += results[0].nevents_processed sample.nevents += results[0].nevents sample.event_weight_sum += results[0].event_weight_sum extra_sumw = results[0].extras_event_weight_sum if extra_sumw is not None: extra_sumw = json.loads(extra_sumw) for key in extra_sumw: try: extras_event_weight_sum[key] += extra_sumw[key] except KeyError: extras_event_weight_sum[key] = extra_sumw[key] tmp_processed_lumi = results[0].processed_lumi if tmp_processed_lumi is not None: tmp_processed_lumi = json.loads( tmp_processed_lumi ) processed_lumi = processed_lumi | LumiList(compactList = tmp_processed_lumi) # Get info from file table results = dbstore.find(File, File.sample_id == s['sample_id']) for lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents in list(results.values(File.lfn, File.pfn, File.event_weight_sum, File.extras_event_weight_sum, File.nevents)): f = File(lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents) sample.files.add(f) # Get info from parent datasets results = dbstore.find(Dataset, Dataset.dataset_id == s['dataset_id']) dataset_nevents += results[0].nevents if len(extras_event_weight_sum) > 0: sample.extras_event_weight_sum = unicode(json.dumps(extras_event_weight_sum)) if len(processed_lumi.getCompactList()) > 0: sample.processed_lumi = unicode(json.dumps(processed_lumi.getCompactList())) sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good if sample.nevents_processed != dataset_nevents: sample.user_comment = unicode("Sample was not fully processed, only " + str(sample.nevents_processed) + "/" + str(dataset_nevents) + " events were processed. " + comment) else: sample.user_comment = unicode(comment) sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name) if not update: dbstore.add(sample) if sample.luminosity is None: sample.luminosity = sample.getLuminosity() print sample dbstore.commit() return else: sample.luminosity = sample.getLuminosity() print("Sample updated") print(sample) dbstore.commit() return # rollback dbstore.rollback()
sample = FWLiteSample.fromDAS( sample_name, args.sample, maxN = maxN, dbFile = dbFile) output_directory = os.path.join(skim_ntuple_directory, args.targetDir, sample.name) # Run only job number "args.job" from total of "args.nJobs" if args.nJobs>1: n_files_before = len(sample.files) sample = sample.split(args.nJobs)[args.job] n_files_after = len(sample.files) logger.info( "Running job %i/%i over %i files from a total of %i.", args.job, args.nJobs, n_files_after, n_files_before) output_filename = os.path.join(output_directory, sample.name + '.root') if 'Run2018' in sample.name: from FWCore.PythonUtilities.LumiList import LumiList json = '$CMSSW_BASE/src/JetMET/diagnosis/python/pu2018/Cert_314472-325175_13TeV_PromptReco_Collisions18_JSON.txt' lumiList = LumiList(os.path.expandvars(json)) else: lumiList = None if args.maxFiles > 0: sample.files = sample.files[:args.maxFiles] if not os.path.exists( output_directory ): os.makedirs( output_directory ) logger.info( "Created output directory %s", output_directory ) products = { 'muon': {'skip':False,'type':'vector<pat::Muon>', 'label': ("slimmedMuons")}, 'vertices': {'skip':False, 'type':'vector<reco::Vertex>', 'label':('offlineSlimmedPrimaryVertices')}, 'met': {'skip':False,'type':'vector<pat::MET>', 'label': ("slimmedMETs")}, }
def fetchDBSInfo(self): """ Contact DBS """ # make assumption that same host won't be used for both # this check should catch most deployed servers (useDBS2, useDBS3, dbs2_url, dbs3_url) = verify_dbs_url(self) # DBS2 is gone dbs_url=dbs3_url useDBS2 = False useDBS3 = True verifyDBS23 = False common.logger.info("Accessing DBS at: %s" % dbs_url) ## check if runs are selected runselection = [] if (self.cfg_params.has_key('CMSSW.runselection')): runselection = parseRange2(self.cfg_params['CMSSW.runselection']) if len(runselection)>1000000: common.logger.info("ERROR: runselection range has more then 1M numbers") common.logger.info("ERROR: Too large. runselection is ignored") runselection=[] ## check if various lumi parameters are set self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None) self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \ self.cfg_params.get('CMSSW.lumis_per_job',None) lumiList = None if self.lumiMask: lumiList = LumiList(filename=self.lumiMask) if runselection: runList = LumiList(runs = runselection) self.splitByRun = int(self.cfg_params.get('CMSSW.split_by_run', 0)) self.splitDataByEvent = int(self.cfg_params.get('CMSSW.split_by_event', 0)) common.logger.log(10-1,"runselection is: %s"%runselection) if not self.splitByRun: self.splitByLumi = self.lumiMask or self.lumiParams or self.ads if self.splitByRun and not runselection: msg = "Error: split_by_run must be combined with a runselection" raise CrabException(msg) ## service API if useDBS2 or verifyDBS23: args = {} args['url'] = dbs2_url args['level'] = 'CRITICAL' ## check if has been requested to use the parent info useparent = int(self.cfg_params.get('CMSSW.use_parent',0)) defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt' ## check if has been asked for a non default file to store/read analyzed fileBlocks #SB no no, we do not want this, it is not even documented ! #fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName)) if self.cfg_params.get('CMSSW.fileblocks_file') : msg = "CMSSW.fileblocks_file option non supported" raise CrabException(msg) fileBlocks_FileName = os.path.abspath(defaultName) if useDBS2 or verifyDBS23: #common.logger.info("looking up DBS2 ...") import DBSAPI.dbsApi import DBSAPI.dbsApiException start_time=time.time() api2 = DBSAPI.dbsApi.DbsApi(args) files2 = self.queryDbs(api2,path=self.datasetPath,runselection=runselection,useParent=useparent) elapsed=time.time() - start_time common.logger.info("DBS2 lookup took %5.2f sec" % elapsed) if useDBS2: self.files = files2 if useDBS3 or verifyDBS23: #common.logger.info("looking up DBS3 ...") from dbs.apis.dbsClient import DbsApi start_time=time.time() api3 = DbsApi(dbs3_url) files3 = self.queryDbs3(api3,path=self.datasetPath,runselection=runselection,useParent=useparent) elapsed=time.time() - start_time common.logger.info("DBS3 lookup took %5.2f sec" % elapsed) if useDBS3: self.files = files3 # Check to see what the dataset is pdsName = self.datasetPath.split("/")[1] if useDBS2 : primDSs = api2.listPrimaryDatasets(pdsName) dataType = primDSs[0]['Type'] elif useDBS3 : dataType=api3.listDataTypes(dataset=self.datasetPath)[0]['data_type'] common.logger.info("Datatype is %s" % dataType) if dataType == 'data' and not \ (self.splitByRun or self.splitByLumi or self.splitDataByEvent): msg = 'Data must be split by lumi or by run. ' \ 'Please see crab -help for the correct settings' raise CrabException(msg) anFileBlocks = [] if self.skipBlocks: anFileBlocks = readTXTfile(self, fileBlocks_FileName) # parse files and fill arrays for file in self.files : parList = [] fileLumis = [] # List of tuples # skip already analyzed blocks fileblock = file['Block']['Name'] if fileblock not in anFileBlocks : filename = file['LogicalFileName'] # asked retry the list of parent for the given child if useparent==1: parList = [x['LogicalFileName'] for x in file['ParentList']] if self.splitByLumi: fileLumis = [ (x['RunNumber'], x['LumiSectionNumber']) for x in file['LumiList'] ] self.parent[filename] = parList # For LumiMask, intersection of two lists. if self.lumiMask and runselection: self.lumis[filename] = runList.filterLumis(lumiList.filterLumis(fileLumis)) elif runselection: self.lumis[filename] = runList.filterLumis(fileLumis) elif self.lumiMask: self.lumis[filename] = lumiList.filterLumis(fileLumis) else: self.lumis[filename] = fileLumis if filename.find('.dat') < 0 : events = file['NumberOfEvents'] # Count number of events and lumis per block if fileblock in self.eventsPerBlock.keys() : self.eventsPerBlock[fileblock] += events else : self.eventsPerBlock[fileblock] = events # Number of events per file self.eventsPerFile[filename] = events # List of files per block if fileblock in self.blocksinfo.keys() : self.blocksinfo[fileblock].append(filename) else : self.blocksinfo[fileblock] = [filename] # total number of events self.maxEvents += events self.maxLumis += len(self.lumis[filename]) if self.skipBlocks and len(self.eventsPerBlock.keys()) == 0: msg = "No new fileblocks available for dataset: "+str(self.datasetPath) raise CrabException(msg) if len(self.eventsPerBlock) <= 0: msg="No data for %s in DBS\n Check datasetpath parameter in crab.cfg" % self.datasetPath raise CrabException(msg)
def SingleTopStep2(): options = VarParsing("analysis") options.register( "subChannel", "T_t", VarParsing.multiplicity.singleton, VarParsing.varType.string, "The sample that you are running on", ) options.register( "reverseIsoCut", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Consider anti-isolated region", ) options.register( "doDebug", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Turn on debugging messages" ) options.register("isMC", True, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Run on MC") options.register( "doGenParticlePath", True, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Run the gen particle paths (only works on specific MC)", ) options.register( "globalTag", Config.globalTagMC, VarParsing.multiplicity.singleton, VarParsing.varType.string, "Global tag" ) options.register( "srcPUDistribution", "S10", VarParsing.multiplicity.singleton, VarParsing.varType.string, "Source pile-up distribution", ) options.register( "destPUDistribution", "data", VarParsing.multiplicity.singleton, VarParsing.varType.string, "destination pile-up distribution", ) options.register( "isComphep", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Use CompHep-specific processing", ) options.register( "isAMCatNLO", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Use aMC@NLO-specific processing", ) options.register( "isSherpa", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Use sherpa-specific processing" ) options.register( "systematic", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "Apply Systematic variation" ) options.register( "dataRun", "RunABCD", VarParsing.multiplicity.singleton, VarParsing.varType.string, "A string Run{A,B,C,D} to specify the data period", ) options.register( "doSync", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Synchronization exercise" ) options.parseArguments() if options.isMC: Config.srcPUDistribution = pileUpDistributions.distributions[options.srcPUDistribution] Config.Leptons.reverseIsoCut = options.reverseIsoCut Config.subChannel = options.subChannel Config.doDebug = options.doDebug Config.isMC = options.isMC Config.doSkim = options.doSync or not sample_types.is_signal(Config.subChannel) Config.isCompHep = options.isComphep or "comphep" in Config.subChannel Config.isAMCatNLO = Config.isAMCatNLO or options.isAMCatNLO or "aMCatNLO" in Config.subChannel Config.isSherpa = options.isSherpa or "sherpa" in Config.subChannel Config.systematic = options.systematic Config.doSync = options.doSync print "Systematic: ", Config.systematic if Config.isMC and not Config.doSync: logging.info("Changing jet source from %s to smearedPatJetsWithOwnRef" % Config.Jets.source) Config.Jets.source = "smearedPatJetsWithOwnRef" if Config.systematic in ["ResUp", "ResDown"]: logging.info( "Changing jet source from %s to smearedPatJetsWithOwnRef%s" % (Config.Jets.source, Config.systematic) ) Config.Jets.source = "smearedPatJetsWithOwnRef" + Config.systematic logging.info( "Changing MET source from %s to patType1CorrectedPFMetJet%s" % (Config.metSource, Config.systematic) ) Config.metSource = "patType1CorrectedPFMetJet" + Config.systematic elif Config.systematic in ["EnUp", "EnDown"]: logging.info( "Changing jet source from %s to shiftedPatJetsWithOwnRef%sForCorrMEt" % (Config.Jets.source, Config.systematic) ) Config.Jets.source = "shiftedPatJetsWithOwnRef" + Config.systematic + "ForCorrMEt" logging.info( "Changing MET source from %s to patType1CorrectedPFMetJet%s" % (Config.metSource, Config.systematic) ) Config.metSource = "patType1CorrectedPFMetJet" + Config.systematic elif Config.systematic in ["UnclusteredEnUp", "UnclusteredEnDown"]: logging.info( "Changing MET source from %s to patType1CorrectedPFMet%s" % (Config.metSource, Config.systematic) ) Config.metSource = "patType1CorrectedPFMet" + Config.systematic print "Configuration" print Config._toStr() print Config.Jets._toStr() print Config.Muons._toStr() print Config.Electrons._toStr() print "" process = cms.Process("STPOLSEL2") eventCounting.countProcessed(process) process.load("Configuration.Geometry.GeometryIdeal_cff") process.load("Configuration.StandardSequences.FrontierConditions_GlobalTag_cff") from Configuration.AlCa.autoCond import autoCond process.GlobalTag.globaltag = cms.string(options.globalTag) process.load("Configuration.StandardSequences.MagneticField_cff") if Config.doDebug: process.load("FWCore.MessageLogger.MessageLogger_cfi") process.MessageLogger = cms.Service( "MessageLogger", destinations=cms.untracked.vstring("cout", "debug"), debugModules=cms.untracked.vstring("*"), cout=cms.untracked.PSet(threshold=cms.untracked.string("INFO")), debug=cms.untracked.PSet(threshold=cms.untracked.string("DEBUG")), ) logging.basicConfig(level=logging.DEBUG) else: process.load("FWCore.MessageLogger.MessageLogger_cfi") process.MessageLogger.cerr.FwkReport.reportEvery = 1000 process.MessageLogger.cerr.threshold = cms.untracked.string("INFO") logging.basicConfig(level=logging.DEBUG) process.maxEvents = cms.untracked.PSet(input=cms.untracked.int32(options.maxEvents)) process.options = cms.untracked.PSet(wantSummary=cms.untracked.bool(True)) import os from FWCore.PythonUtilities.LumiList import LumiList if not Config.isMC: ll1 = LumiList( os.environ["CMSSW_BASE"] + "/../crabs/lumis/Cert_190456-208686_8TeV_22Jan2013ReReco_Collisions12_JSON.txt" ) process.source = cms.Source( "PoolSource", fileNames=cms.untracked.vstring(options.inputFiles), cacheSize=cms.untracked.uint32(50 * 1024 * 1024), lumisToProcess=ll1.getVLuminosityBlockRange() if not Config.isMC else cms.untracked.VLuminosityBlockRange(), ) print options # ------------------------------------------------- # Jets # ------------------------------------------------- from SingleTopPolarization.Analysis.jets_step2_cfi import JetSetup JetSetup(process, Config) # ------------------------------------------------- # Leptons # ------------------------------------------------- from SingleTopPolarization.Analysis.muons_step2_cfi import MuonSetup MuonSetup(process, Config) from SingleTopPolarization.Analysis.electrons_step2_cfi import ElectronSetup ElectronSetup(process, Config) process.looseVetoMuCount = cms.EDProducer( "CollectionSizeProducer<reco::Candidate>", src=cms.InputTag("looseVetoMuons") ) process.looseVetoEleCount = cms.EDProducer( "CollectionSizeProducer<reco::Candidate>", src=cms.InputTag("looseVetoElectrons") ) process.decayTreeProducerMu = cms.EDProducer( "GenParticleDecayTreeProducer", src=cms.untracked.InputTag("singleIsoMu") ) process.decayTreeProducerEle = cms.EDProducer( "GenParticleDecayTreeProducer", src=cms.untracked.InputTag("singleIsoEle") ) # ----------------------------------------------- # Top reco and cosine calcs # ----------------------------------------------- from SingleTopPolarization.Analysis.top_step2_cfi import TopRecoSetup TopRecoSetup(process, Config) process.allEventObjects = cms.EDProducer( "CandRefCombiner", sources=cms.vstring(["goodJets", "goodSignalLeptons", Config.metSource]), maxOut=cms.uint32(9999), minOut=cms.uint32(0), logErrors=cms.bool(False), ) process.hadronicEventObjects = cms.EDProducer( "CandRefCombiner", sources=cms.vstring(["goodJets"]), maxOut=cms.uint32(9999), minOut=cms.uint32(0), logErrors=cms.bool(False), ) process.allEventObjectsWithNu = cms.EDProducer( "CandRefCombiner", sources=cms.vstring(["goodJets", "goodSignalLeptons", Config.metSource, "recoNuProducer"]), maxOut=cms.uint32(9999), minOut=cms.uint32(0), logErrors=cms.bool(False), ) process.eventShapeVars = cms.EDProducer("EventShapeVarsProducer", src=cms.InputTag("allEventObjects")) process.eventShapeVarsWithNu = cms.EDProducer("EventShapeVarsProducer", src=cms.InputTag("allEventObjectsWithNu")) # Vector sum of all reconstructed objects process.shat = cms.EDProducer("SimpleCompositeCandProducer", sources=cms.VInputTag(["allEventObjects"])) # Hadronic final state process.ht = cms.EDProducer("SimpleCompositeCandProducer", sources=cms.VInputTag(["hadronicEventObjects"])) process.shatNTupleProducer = cms.EDProducer( "CandViewNtpProducer2", src=cms.InputTag("shat"), lazyParser=cms.untracked.bool(True), prefix=cms.untracked.string(""), # eventInfo = cms.untracked.bool(True), variables=ntupleCollection([["Pt", "pt"], ["Eta", "eta"], ["Phi", "phi"], ["Mass", "mass"]]), ) process.htNTupleProducer = process.shatNTupleProducer.clone(src=cms.InputTag("ht")) process.eventShapeSequence = cms.Sequence( process.allEventObjects * process.hadronicEventObjects * process.eventShapeVars * process.allEventObjectsWithNu * process.eventShapeVarsWithNu * process.shat * process.ht * process.shatNTupleProducer * process.htNTupleProducer ) # ----------------------------------------------- # Treemaking # ----------------------------------------------- process.recoTopNTupleProducer = cms.EDProducer( "CandViewNtpProducer2", src=cms.InputTag("recoTop"), lazyParser=cms.untracked.bool(True), prefix=cms.untracked.string(""), # eventInfo = cms.untracked.bool(True), variables=ntupleCollection([["Pt", "pt"], ["Eta", "eta"], ["Phi", "phi"], ["Mass", "mass"]]), ) process.recoNuNTupleProducer = cms.EDProducer( "CandViewNtpProducer2", src=cms.InputTag("recoNu"), lazyParser=cms.untracked.bool(True), prefix=cms.untracked.string(""), # eventInfo = cms.untracked.bool(True), variables=ntupleCollection( [ ["Pt", "pt"], ["Eta", "eta"], ["Phi", "phi"], ["Px", "p4().Px()"], ["Py", "p4().Py()"], ["Pz", "p4().Pz()"], ] ), ) process.recoWNTupleProducer = cms.EDProducer( "CandViewNtpProducer2", src=cms.InputTag("recoW"), lazyParser=cms.untracked.bool(True), prefix=cms.untracked.string(""), variables=ntupleCollection([["Pt", "pt"], ["Eta", "eta"], ["Phi", "phi"], ["Mass", "mass"]]), ) process.trueNuNTupleProducer = process.recoNuNTupleProducer.clone( src=cms.InputTag("genParticleSelector", "trueNeutrino", "STPOLSEL2") ) process.trueWNTupleProducer = process.recoTopNTupleProducer.clone( src=cms.InputTag("genParticleSelector", "trueWboson", "STPOLSEL2") ) process.trueTopNTupleProducer = process.recoTopNTupleProducer.clone( src=cms.InputTag("genParticleSelector", "trueTop", "STPOLSEL2") ) process.patMETDeltaRProducer = cms.EDProducer( "DeltaRProducerMET", muonSrc=cms.InputTag("goodSignalMuons"), electronSrc=cms.InputTag("goodSignalElectrons"), metSrc=cms.InputTag(Config.metSource), ) process.patMETNTupleProducer = cms.EDProducer( "CandViewNtpProducer2", src=cms.InputTag(Config.metSource), lazyParser=cms.untracked.bool(True), prefix=cms.untracked.string(""), variables=ntupleCollection( [ ["Pt", "pt"], ["Eta", "eta"], ["Phi", "phi"], ["Px", "p4().Px()"], ["Py", "p4().Py()"], ["Pz", "p4().Pz()"], ] ), ) process.trueLeptonNTupleProducer = process.recoTopNTupleProducer.clone( src=cms.InputTag("genParticleSelector", "trueLepton", "STPOLSEL2") ) process.trueLightJetNTupleProducer = process.recoTopNTupleProducer.clone( src=cms.InputTag("genParticleSelector", "trueLightJet", "STPOLSEL2") ) def userfloat(key): return "? hasUserFloat('{0}') ? userFloat('{0}') : {1}".format(key, nanval) def userint(key): return "? hasUserInt('{0}') ? userInt('{0}') : {1}".format(key, nanval) process.goodSignalMuonsNTupleProducer = cms.EDProducer( "CandViewNtpProducer2", src=cms.InputTag("patMETDeltaRProducer", "muons"), lazyParser=cms.untracked.bool(True), prefix=cms.untracked.string(""), # eventInfo = cms.untracked.bool(True), variables=ntupleCollection( [ ["Pt", "pt"], ["Eta", "eta"], ["Phi", "phi"], ["relIso", userfloat(Config.Muons.relIsoType)], ["Charge", "charge"], ["genPdgId", "? genParticlesSize() > 0 ? genParticle(0).pdgId() : {0}".format(nanval)], ["motherGenPdgId", "? genParticlesSize() > 0 ? genParticle(0).mother(0).pdgId() : {0}".format(nanval)], ["normChi2", "? globalTrack().isNonnull() ? normChi2 : {0}".format(nanval)], [ "trackhitPatterntrackerLayersWithMeasurement", userfloat("track_hitPattern_trackerLayersWithMeasurement"), ], [ "globalTrackhitPatternnumberOfValidMuonHits", userfloat("globalTrack_hitPattern_numberOfValidMuonHits"), ], [ "innerTrackhitPatternnumberOfValidPixelHits", userfloat("innerTrack_hitPattern_numberOfValidPixelHits"), ], ["db", "dB"], ["dz", userfloat("dz")], ["numberOfMatchedStations", "numberOfMatchedStations"], [ "triggerMatch", "? triggerObjectMatchesByPath('{0}').size()==1 ? triggerObjectMatchByPath('{0}').hasPathLastFilterAccepted() : {1}".format( Config.Muons.triggerPath, nanval ), ], ["deltaRMET", userfloat("deltaRMET")], ["deltaPhiMET", userfloat("deltaPhiMET")], ] ), ) process.isoMuonsNTP = process.goodSignalMuonsNTupleProducer.clone(src=cms.InputTag("muonsWithIso")) process.allMuonsNTP = process.goodSignalMuonsNTupleProducer.clone(src=cms.InputTag("muonsWithIDAll")) process.goodSignalElectronsNTupleProducer = cms.EDProducer( "CandViewNtpProducer2", src=cms.InputTag("patMETDeltaRProducer", "electrons"), lazyParser=cms.untracked.bool(True), prefix=cms.untracked.string(""), # eventInfo = cms.untracked.bool(True), variables=ntupleCollection( [ ["Pt", "%s" % Config.Electrons.pt], ["Eta", "eta"], ["Phi", "phi"], ["relIso", userfloat(Config.Electrons.relIsoType)], ["mvaID", "electronID('mvaTrigV0')"], ["Charge", "charge"], ["superClustereta", "superCluster.eta"], ["passConversionVeto", "passConversionVeto()"], [ "gsfTracktrackerExpectedHitsInnernumberOfHits", userint("gsfTrack_trackerExpectedHitsInner_numberOfHits"), ], [ "triggerMatch", "? triggerObjectMatchesByPath('{0}').size()==1 ? triggerObjectMatchByPath('{0}').hasPathLastFilterAccepted() : {1}".format( Config.Electrons.triggerPath, nanval ), ], ["genPdgId", "? genParticlesSize() > 0 ? genParticle(0).pdgId() : {0}".format(nanval)], ["motherGenPdgId", "? genParticlesSize() > 0 ? genParticle(0).mother(0).pdgId() : {0}".format(nanval)], ["deltaRMET", userfloat("deltaRMET")], ["deltaPhiMET", userfloat("deltaPhiMET")], ] ), ) process.isoElectronsNTP = process.goodSignalElectronsNTupleProducer.clone(src=cms.InputTag("electronsWithIso")) process.allElectronsNTP = process.goodSignalElectronsNTupleProducer.clone(src=cms.InputTag("electronsWithIDAll")) process.goodJetsNTupleProducer = cms.EDProducer( "CandViewNtpProducer2", src=cms.InputTag("goodJets"), lazyParser=cms.untracked.bool(True), prefix=cms.untracked.string(""), eventInfo=cms.untracked.bool(False), variables=ntupleCollection( [ ["Pt", "pt"], ["Eta", "eta"], ["Phi", "phi"], ["Mass", "mass"], # ["bDiscriminator", "bDiscriminator('%s')" % Config.Jets.bTagDiscriminant], ["bDiscriminatorTCHP", "bDiscriminator('%s')" % Config.Jets.BTagDiscriminant.TCHP], ["bDiscriminatorCSV", "bDiscriminator('%s')" % Config.Jets.BTagDiscriminant.CSV], ["rms", userfloat("rms")], ["partonFlavour", "partonFlavour()"], ["area", "jetArea()"], # These require PFCandidates to be present (huge collection) # ["n90", "n90()"], # ["n60", "n60()"], # ["genJetFlavour", "? genJet()>0 ? (genJet()->pdgId()) : 0"], #FIXME ["deltaR", userfloat("deltaR")], ["deltaPhi", userfloat("deltaPhi")], ["numberOfDaughters", "numberOfDaughters"], ["neutralHadronEnergy", "neutralHadronEnergy"], ["HFHadronEnergy", "HFHadronEnergy"], ["chargedEmEnergyFraction", "chargedEmEnergyFraction"], ["neutralEmEnergyFraction", "neutralEmEnergyFraction"], ["chargedHadronEnergyFraction", "chargedHadronEnergyFraction"], ["chargedMultiplicity", "chargedMultiplicity"], ["nParticles", userfloat("nParticles")], ["puMva", userfloat("mva")], ["nCharged", userfloat("nCharged")], ["nNeutral", userfloat("nNeutral")], ["deltaRMET", userfloat("deltaRMET")], ["deltaPhiMET", userfloat("deltaPhiMET")], ] ), ) process.lowestBTagJetNTupleProducer = process.goodJetsNTupleProducer.clone(src=cms.InputTag("lowestBTagJet")) process.highestBTagJetNTupleProducer = process.goodJetsNTupleProducer.clone(src=cms.InputTag("highestBTagJet")) process.treeSequenceNew = cms.Sequence( process.patMETNTupleProducer * process.recoTopNTupleProducer * process.recoNuNTupleProducer * process.recoWNTupleProducer * process.trueTopNTupleProducer * process.trueNuNTupleProducer * process.trueWNTupleProducer * process.trueLeptonNTupleProducer * process.trueLightJetNTupleProducer * process.goodJetsNTupleProducer * process.lowestBTagJetNTupleProducer * process.highestBTagJetNTupleProducer * process.goodSignalMuonsNTupleProducer * process.goodSignalElectronsNTupleProducer * process.isoMuonsNTP * process.isoElectronsNTP ) # ----------------------------------------------- # Flavour analyzer # ----------------------------------------------- Config.doWJetsFlavour = Config.isMC and sample_types.is_wjets(Config.subChannel) and not Config.isSherpa if Config.doWJetsFlavour: process.flavourAnalyzer = cms.EDProducer( "FlavourAnalyzer", genParticles=cms.InputTag("genParticles"), generator=cms.InputTag("generator"), genJets=cms.InputTag("selectedPatJets", "genJets"), saveGenJets=cms.bool(False), savePDFInfo=cms.bool(True), ) # ----------------------------------------------- # Paths # ----------------------------------------------- from SingleTopPolarization.Analysis.hlt_step2_cfi import HLTSetup HLTSetup(process, Config) from SingleTopPolarization.Analysis.leptons_cfg import LeptonSetup LeptonSetup(process, Config) if Config.isMC: WeightSetup(process, Config) if Config.isMC and options.doGenParticlePath: if Config.isCompHep: from SingleTopPolarization.Analysis.partonStudy_comphep_step2_cfi import PartonStudySetup elif Config.isAMCatNLO: from SingleTopPolarization.Analysis.partonStudy_aMCatNLO_step2_cfi import PartonStudySetup else: from SingleTopPolarization.Analysis.partonStudy_step2_cfi import PartonStudySetup PartonStudySetup(process) process.partonPath = cms.Path() # NOTE: this path will REJECT events not having a true t-channel lepton if sample_types.is_signal(Config.subChannel): logging.warning( "Using signal-only sequence 'process.partonStudyTrueSequence' on subChannel=%s" % Config.subChannel ) process.partonPath += process.partonStudyTrueSequence from SingleTopPolarization.Analysis.muons_step2_cfi import MuonPath MuonPath(process, Config) from SingleTopPolarization.Analysis.electrons_step2_cfi import ElectronPath ElectronPath(process, Config) if Config.isMC: process.muPath += process.weightSequence process.elePath += process.weightSequence if Config.isMC and sample_types.is_signal(Config.subChannel): process.muPath += process.partonStudyCompareSequence process.elePath += process.partonStudyCompareSequence process.treePath = cms.Path(process.treeSequenceNew) process.eventVarsPath = cms.Path(process.eventShapeSequence) # enable embedding the gen-level weight, which is relevant for the Sherpa sample if Config.isMC: process.genWeightProducer = cms.EDProducer("GenWeightProducer") process.eventVarsPath += process.genWeightProducer if Config.isAMCatNLO: process.lheWeightProducer = cms.EDProducer("LHEWeightProducer") process.eventVarsPath += process.lheWeightProducer if Config.doWJetsFlavour: process.treePath += process.flavourAnalyzer if Config.isMC: if not Config.isSherpa: process.meWeightProducer = cms.EDProducer("MEWeightProducer") process.eventVarsPath += process.meWeightProducer process.load("SimGeneral.HepPDTESSource.pythiapdt_cfi") process.prunedGenParticles = cms.EDProducer( "GenParticlePruner", src=cms.InputTag("genParticles"), select=cms.vstring( "drop *", "keep status = 3", # keeps all particles from the hard matrix element "+keep abs(pdgId) = 15 & status = 1", # keeps intermediate decaying tau ), ) """process.pat2pxlio=cms.EDAnalyzer('EDM2PXLIO', SelectEventsFromProcess=cms.vstring("USER"), SelectEventsFromPath = cms.vstring("p0"), OutFileName=cms.untracked.string("wjets.pxlio"), process=cms.untracked.string("test"), genCollection = cms.PSet( type=cms.string("GenParticle2Pxlio"), srcs=cms.VInputTag(cms.InputTag("prunedGenParticles")), EventInfo=cms.InputTag('generator') ), genJets = cms.PSet( type=cms.string("GenJet2Pxlio"), srcs=cms.VInputTag("ak5GenJets","kt4GenJets","kt6GenJets"), names=cms.vstring("AK5GenJets","KT4GenJets","KT6GenJets") ), q2weights = cms.PSet( type=cms.string("ValueList2Pxlio"), srcs=cms.VInputTag( cms.InputTag("extraPartons","nExtraPartons"), ), names = cms.vstring("nExtraPartons") ) )""" process.extraPartons = cms.EDProducer("ExtraPartonCounter", isTTJets=cms.bool("TTJets" in Config.subChannel)) process.extraPartonSequence = cms.Sequence(process.prunedGenParticles * process.extraPartons) # process.pxlioOut=cms.EndPath(process.out*process.pat2pxlio) process.eventVarsPath += process.extraPartonSequence # ----------------------------------------------- # Outpath # ----------------------------------------------- process.out = cms.OutputModule( "PoolOutputModule", dropMetaData=cms.untracked.string("DROPPED"), splitLevel=cms.untracked.int32(99), fileName=cms.untracked.string(options.outputFile), SelectEvents=cms.untracked.PSet(SelectEvents=cms.vstring(["*"])), outputCommands=cms.untracked.vstring( "drop *", #'keep *', "keep edmMergeableCounter_*__*", "keep *_generator__*", #'keep *_genParticles__*', #hack for powheg PDF sets "keep edmTriggerResults_TriggerResults__*", "keep *_flavourAnalyzer_*_STPOLSEL2", "keep floats_*_*_STPOLSEL2", "keep double_*__STPOLSEL2", "keep float_*__STPOLSEL2", "keep double_*_*_STPOLSEL2", "keep float_*_*_STPOLSEL2", "keep int_*__STPOLSEL2", "keep int_*_*_STPOLSEL2", "keep int_*_*_*", "keep String_*_*_*", # the decay trees "keep *_pdfInfo1_*_STPOLSEL2", "keep *_pdfInfo2_*_STPOLSEL2", "keep *_pdfInfo3_*_STPOLSEL2", "keep *_pdfInfo4_*_STPOLSEL2", "keep *_pdfInfo5_*_STPOLSEL2", #'keep *', #'keep *_recoTop_*_*', #'keep *_goodSignalMuons_*_*', #'keep *_goodSignalElectrons_*_*', #'keep *_goodJets_*_*', #'keep *_bTaggedJets_*_*', #'keep *_untaggedJets_*_*', ), ) if Config.doDebug: process.out.outputCommands.append("keep *") process.debugpath = cms.Path( process.muAnalyzer * process.eleAnalyzer * process.jetAnalyzer * process.metAnalyzer ) process.outpath = cms.EndPath(process.out) if Config.doSkim: process.out.SelectEvents.SelectEvents = [] process.out.SelectEvents.SelectEvents.append("elePath") process.out.SelectEvents.SelectEvents.append("muPath") # ----------------------------------------------- # Final printout # ----------------------------------------------- if hasattr(process, "out"): print "Output patTuples: %s" % process.out.fileName.value() print 80 * "-" print "Step2 configured" return process
def makeJSON(optlist): outdir = optlist[0] basedir = optlist[1] lastUnblindRun = optlist[2] name = optlist[3] files = optlist[4:] s = sampleInfo(name,basedir,files) #lumi set for this sample mergedLumisUnblind = set() mergedLumisBlinded = set() for f in s.fileList: file = TFile.Open(f) if file == None: continue # only keep necessary branches t = file.Get("TreeMaker2/PreSelection") if t == None: continue t.SetBranchStatus("*",0) t.SetBranchStatus("RunNum",1) t.SetBranchStatus("LumiBlockNum",1) #get tree entries nentries = t.GetEntries() if nentries==0: continue t.SetEstimate(nentries) t.Draw("RunNum:LumiBlockNum","","goff") v1 = t.GetV1(); v1.SetSize(t.GetSelectedRows()); a1 = array.array('d',v1); v1 = None; v2 = t.GetV2(); v2.SetSize(t.GetSelectedRows()); a2 = array.array('d',v2); v2 = None; #loop over tree entries for run,ls in izip(a1,a2): irun = int(run) ils = int(ls) if irun <= lastUnblindRun or lastUnblindRun==-1: if not (irun,ils) in mergedLumisUnblind: mergedLumisUnblind.add((irun,ils)) else: if not (irun,ils) in mergedLumisBlinded: mergedLumisBlinded.add((irun,ils)) file.Close() ### end loop over files in sample #convert the runlumis from list of pairs to dict: [(123,3), (123,4), (123,5), (123,7), (234,6)] => {123 : [3,4,5,7], 234 : [6]} mLumisDictUnblind = {} mLumisDictBlinded = {} for k, v in mergedLumisUnblind: mLumisDictUnblind.setdefault(k, []).append(int(v)) for k, v in mergedLumisBlinded: mLumisDictBlinded.setdefault(k, []).append(int(v)) #make lumi list from dict mergedLumiListUnblind = LumiList(runsAndLumis=mLumisDictUnblind) mergedLumiListBlinded = LumiList(runsAndLumis=mLumisDictBlinded) if mergedLumiListUnblind: outfile = outdir+'/lumiSummary_unblind_'+s.outName+'.json' mergedLumiListUnblind.writeJSON(outfile) print "wrote "+outfile if mergedLumiListBlinded: outfile = outdir+'/lumiSummary_blinded_'+s.outName+'.json' mergedLumiListBlinded.writeJSON(outfile) print "wrote "+outfile
if isMC : process.skimEventProducer.SelectedPaths = cms.vstring ("") # special paths always saved setattr(stepBTree.variables, "std_vector_trigger_special", cms.string("specialRateTrigger/8") ) # mc if dataset[0] == "MC": stepBTree.variables.baseW = "%.12f" % scalef # data else: from FWCore.PythonUtilities.LumiList import LumiList import os if json != None : lumis = LumiList(filename = os.getenv('CMSSW_BASE')+'/src/LatinoTrees/Misc/Jsons/%s.json'%json) process.source.lumisToProcess = cms.untracked.VLuminosityBlockRange() process.source.lumisToProcess = lumis.getCMSSWString().split(',') stepBTree.variables.baseW = "1" stepBTree.variables.trpu = cms.string("1") stepBTree.variables.itpu = cms.string("1") stepBTree.variables.ootpup1 = cms.string("1") stepBTree.variables.ootpum1 = cms.string("1") stepBTree.variables.puW = cms.string("1") stepBTree.variables.puAW = cms.string("1") stepBTree.variables.puBW = cms.string("1") #################### # run electron id ## # see twiki:
ls = set(l for r,l in lumi_mask) if ls == set([-1]): is_mc = True elif -1 in ls: raise ValueError('batch for dataset %s has lumis -1 and others' % dataset) else: is_mc = False if not is_mc: job_control = ''' lumi_mask = pick_events.json total_number_of_lumis = -1 lumis_per_job = 1''' ll = LumiList(lumis=lumi_mask) ll.writeJSON('pick_events.json') else: job_control = ''' total_number_of_events = -1 events_per_job = 100000''' scheduler = 'condor' if 'condor' in sys.argv else 'glite' open('crab.cfg', 'wt').write(crab_cfg % locals()) pset = open('pick_events.py').read() pset += '\nevents_to_process = ' pset += pformat(events_to_process) pset += '\nset_events_to_process(process, events_to_process)\n' open('pick_events_crab.py', 'wt').write(pset)