def compare_output(args, verb=0): """Quick comparison between job output.""" print ">>> Compare job output..." nbins = 100000 fname1 = "/scratch/ineuteli/analysis/2016/DY/DYJetsToLL_M-2000to3000_tautau.root" # file-based split fname2 = "/scratch/ineuteli/analysis/2016/DY/DYJetsToLL_M-2000to3000_tautau_test.root" # event-based split if len(args.infiles) >= 2: fname1, fname2 = args.infiles[:2] print ">>> ", fname1 print ">>> ", fname2 file1 = ensureTFile(fname1) file2 = ensureTFile(fname2) tree1 = file1.Get('tree') tree2 = file2.Get('tree') hist1 = TH1F('h1', 'h1', nbins, 0, 1000000) hist2 = TH1F('h2', 'h2', nbins, 0, 1000000) tree1.Draw("evt >> h1", "", "gOff") tree2.Draw("evt >> h2", "", "gOff") print ">>> tree1: %9d, hist1: %9d" % (tree1.GetEntries(), hist1.GetEntries()) print ">>> tree2: %9d, hist2: %9d" % (tree2.GetEntries(), hist2.GetEntries()) hist1.Add(hist2, -1) nfound = 0 for i in range(0, nbins + 2): if nfound == 20: print ">>> BREAK! Already found 20 different bins" break if hist1.GetBinContent(i) != 0.0: print ">>> difference in bin %4d!" nfound += 1 file1.Close() file2.Close()
def __init__(self, year=2017, sigma='central', sample=None, buggy=False, flat=False): """Load data and MC pilup profiles.""" assert (year in [2016, 2017, 2018]), "You must choose a year from: 2016, 2017, or 2018." assert ( sigma in ['central', 'up', 'down'] ), "You must choose a s.d. variation from: 'central', 'up', or 'down'." minbias = '69p2' if sigma == 'down': minbias = '66p0168' # -4.6% elif sigma == 'up': minbias = '72p3832' # +4.6% if year == 2016: datafilename = os.path.join(datadir, "Data_PileUp_2016_%s.root" % (minbias)) mcfilename = os.path.join(datadir, "MC_PileUp_2016_Moriond17.root") elif year == 2017: tag = "" if buggy or sample: buggy = buggy or hasBuggyPU(sample) if buggy: tag = "_old_pmx" else: tag = "_new_pmx" datafilename = os.path.join(datadir, "Data_PileUp_2017_%s.root" % (minbias)) mcfilename = os.path.join( datadir, "MC_PileUp_2017_Winter17_V2%s.root" % (tag)) else: datafilename = os.path.join(datadir, "Data_PileUp_2018_%s.root" % (minbias)) mcfilename = os.path.join(datadir, "MC_PileUp_2018_Autumn18.root") if flat or (sample and hasFlatPU(sample)): mcfilename = os.path.join(datadir, "MC_PileUp_%d_FlatPU0to75.root" % year) print "Loading PileupWeightTool for '%s' and '%s'" % (datafilename, mcfilename) self.datafile = ensureTFile(datafilename, 'READ') self.mcfile = ensureTFile(mcfilename, 'READ') self.datahist = self.datafile.Get('pileup') self.mchist = self.mcfile.Get('pileup') self.datahist.SetDirectory(0) self.mchist.SetDirectory(0) self.datahist.Scale(1. / self.datahist.Integral()) self.mchist.Scale(1. / self.mchist.Integral()) self.datafile.Close() self.mcfile.Close()
def getnevents(self, das=True, refresh=False, treename='Events', verb=0): """Get number of nanoAOD events from DAS (default), or from files on storage system (das=False).""" nevents = self.nevents if nevents <= 0 or refresh: if self.storage and not das: # get number of events from storage system files = self.getfiles(url=True, refresh=refresh, verb=verb) for fname in files: file = ensureTFile(fname) tree = file.Get(treename) if not tree: LOG.warning("getnevents: No %r tree in events in %r!" % ('Events', fname)) continue nevts = tree.GetEntries() file.Close() nevents += nevts LOG.verb( "getnevents: Found %d events in %r." % (nevts, fname), verb, 3) else: # get number of events from DAS for daspath in self.paths: cmdout = dasgoclient("summary dataset=%s instance=%s" % (daspath, self.instance), verb=verb - 1) if "nevents" in cmdout: ndasevts = int( cmdout.split('"nevents":')[1].split(',')[0]) else: ndasevts = 0 LOG.warning( "Could not get number of events from DAS for %r." % (self.name)) nevents += ndasevts self.nevents = nevents return nevents
def __init__(self, filename, histname, filename2, histname2, isPU=False): self.filename = filename self.isPU = isPU self.file = ensureTFile(filename) self.hist = self.file.Get(histname) print "Open hist %s in %s" % (self.filename, histname) self.hist.SetDirectory(0) self.file.Close() if isPU: self.hist.Scale(1. / self.hist.Integral()) self.filename2 = filename2 self.file2 = ensureTFile(filename2) self.hist2 = self.file2.Get(histname2) self.hist2.SetDirectory(0) self.hist2.Scale(1. / self.hist2.Integral()) self.file2.Close()
def __init__(self, era, filename=None, histname='zptmass_weight'): """Load Z pT weights.""" #assert year in [2016,2017,2018], "ZptCorrectionTool: You must choose a year from: 2016, 2017, or 2018." if not filename: if 'UL' in era: #if '2016' in era and 'preVFP' in era: # filename = zptpath+"Zpt_weights_UL2016_preVFP.root" #elif '2016' in era and 'postVFP' in era: # filename = zptpath+"Zpt_weights_UL2016_postVFP.root" if '2016' in era: filename = zptpath + "zptmass_weights_UL2016.root" elif '2017' in era: filename = zptpath + "zptmass_weights_UL2017.root" elif '2018' in era: filename = zptpath + "zptmass_weights_UL2018.root" else: if '2016' in era: filename = zptpath + "zptmass_weights_2016.root" elif '2017' in era: filename = zptpath + "zptmass_weights_2017.root" elif '2018' in era: filename = zptpath + "zptmass_weights_2018.root" assert filename, "ZptCorrectionTool.__init__: Did not find filename for %r" % ( era) print "Loading ZptCorrectionTool for %s:%r..." % (filename, histname) file = ensureTFile(filename, 'READ') hist = file.Get(histname) hist.SetDirectory(0) file.Close() self.hist = hist self.filename = filename
def getnevents(fname, treename='Events'): file = ensureTFile(fname) tree = file.Get(treename) if not tree: LOG.warning("getnevents: No %r tree in events in %r!" % (treename, fname)) return 0 nevts = tree.GetEntries() file.Close() return nevts
def gettree(fname, tree='tree'): """Get file and tree. If glob pattern, expand and use TChain.""" if '*' in fname: fnames = glob.glob(fname) file = None tree = TChain(tree) for fname in fnames: tree.Add(fname) else: file = ensureTFile(fname) tree = file.Get(tree) return file, tree
def chunkify_by_evts(fnames, nmax, evenly=True): """Test implementation of event-based splitting of large files to limit number of events to process during jobs with a given maximum. Small files are still grouped as long as their total events is less than the maximum. For full implementation, see TauFW.PicoProducer.batch.utils""" result = [] nlarge = {} nsmall = {} for fname in fnames: file = ensureTFile(fname, 'READ') nevts = file.Get('Events').GetEntries() file.Close() print "%10d %s" % (nevts, fname) if nevts < nmax: nsmall.setdefault(nevts, []).append(fname) else: nlarge.setdefault(nevts, []).append(fname) #nlarge = { # 1081403L: ['nano_1.root'], # 2235175L: ['nano_2.root'], # 144447L: ['nano_3.root'], # #1515407L: ['nano_4.root'], # 200000: ['nano_5.root'], # 150000: ['nano_6.root'], # 100000: ['nano_7.root'], #} #nsmall = { # 50000: ['nano_8.root', 'nano_9.root', 'nano_10.root'], # 20000: ['nano_11.root','nano_12.root','nano_13.root'], #} print 'nlarge =', nlarge print 'nsmall =', nsmall for nevts in nlarge: for fname in nlarge[nevts]: nmax_ = nmax if evenly: nchunks = ceil(float(nevts) / nmax) nmax_ = int(ceil(nevts / nchunks)) print nevts, nmax, nmax_, nchunks ifirst = 0 while ifirst < nevts: result.append(["%s:%d:%d" % (fname, ifirst, nmax_)]) ifirst += nmax_ mylist = [] for nevts in nsmall: mylist.extend([nevts] * len(nsmall[nevts])) for part in partition_by_max(mylist, nmax): result.append([]) for nevts in part: fname = nsmall[nevts][0] nsmall[nevts].remove(fname) result[-1].append(fname + ":%d" % nevts) return result
def __init__(self, filename, histname, name="<noname>", ptvseta=True, verb=0): #print '>>> ScaleFactor.init("%s","%s",name="%s",ptvseta=%r)'%(filename,histname,name,ptvseta) self.name = name self.ptvseta = ptvseta self.filename = filename LOG.verb("ScaleFactor(%s): Opening %s:%r..."%(self.name,filename,histname),verb,1) self.file = ensureTFile(filename) self.hist = self.file.Get(histname) LOG.insist(self.hist,"ScaleFactor(%s): histogram %r does not exist in %s"%(self.name,histname,filename)) self.hist.SetDirectory(0) self.file.Close() if ptvseta: self.getSF = self.getSF_ptvseta else: self.getSF = self.getSF_etavspt
def __init__(self, filename, graphname='ZMass', name="<noname>", verb=0): self.name = name self.filename = filename LOG.verb("ScaleFactor(%s): Opening %s:%r..."%(self.name,filename,graphname),verb,1) self.file = ensureTFile(filename) self.hist_eta = self.file.Get('etaBinsH') self.hist_eta.SetDirectory(0) self.effs_data = { } self.effs_mc = { } for ieta in range(1,self.hist_eta.GetXaxis().GetNbins()+1): etalabel = self.hist_eta.GetXaxis().GetBinLabel(ieta) self.effs_data[etalabel] = self.file.Get(graphname+etalabel+"_Data") self.effs_mc[etalabel] = self.file.Get(graphname+etalabel+"_MC") self.file.Close()
def __init__(self, filename, histname, name="<noname>", ptvseta=True): #print '>>> ScaleFactor.init("%s","%s",name="%s",ptvseta=%r)'%(filename,histname,name,ptvseta) self.name = name self.ptvseta = ptvseta self.filename = filename self.file = ensureTFile(filename) self.hist = self.file.Get(histname) if not self.hist: print '>>> ScaleFactor(%s).__init__: histogram "%s" does not exist in "%s"' % ( self.name, histname, filename) exit(1) self.hist.SetDirectory(0) self.file.Close() if ptvseta: self.getSF = self.getSF_ptvseta else: self.getSF = self.getSF_etavspt
def __init__(self, filename, graphname='ZMass', name="<noname>"): #print '>>> ScaleFactor.init("%s","%s",name="%s")'%(filename,graphname,name) self.name = name self.filename = filename self.file = ensureTFile(filename) self.hist_eta = self.file.Get('etaBinsH') self.hist_eta.SetDirectory(0) self.effs_data = {} self.effs_mc = {} for ieta in range(1, self.hist_eta.GetXaxis().GetNbins() + 1): etalabel = self.hist_eta.GetXaxis().GetBinLabel(ieta) self.effs_data[etalabel] = self.file.Get(graphname + etalabel + "_Data") self.effs_mc[etalabel] = self.file.Get(graphname + etalabel + "_MC") self.file.Close()
def __init__(self, year=2017): """Load Z pT weights.""" assert year in [2016,2017,2018], "ZptCorrectionTool: You must choose a year from: 2016, 2017, or 2018." if year==2016: filename = zptpath+"Zpt_weights_2016.root" elif year==2017: filename = zptpath+"Zpt_weights_2017.root" else: filename = zptpath+"Zpt_weights_2018.root" file = ensureTFile(filename,'READ') hist = file.Get('zptmass_weights') hist.SetDirectory(0) file.Close() self.hist = hist self.filename = filename
def __init__(self, trigger, wp='Medium', id='DeepTau2017v2p1', year=2016): """Load tau trigger histograms from files.""" print "Loading %s trigger SFs for %s WP of %s ID for year %d..." % ( trigger, wp, id, year) # CHECKS dms = [0, 1, 10, 11] triggers = ['ditau', 'mutau', 'etau'] years = [2016, 2017, 2018] ids = ['DeepTau2017v2p1'] wps = [ 'VVVLoose', 'VVLoose', 'VLoose', 'Loose', 'Medium', 'Tight', 'VTight', 'VVTight' ] trigger = trigger.replace('tautau', 'ditau').replace('eletau', 'etau') assert trigger in triggers, "Did not recognize '%s' trigger! Choose from: '%s' triggers." % ( trigger, "', '".join(triggers)) assert wp in wps, "Did not recognize '%s' WP! Choose from: '%s'" % ( wp, "', '".join(wps)) assert id in ids, "Did not recognize '%s' ID! Choose from: '%s'." % ( id, "', '".join(ids)) assert year in years, "Did not recognize '%s' year! Choose from: %s." % ( year, "', '".join(str(y) for y in years)) # GET DATA file = ensureTFile('%s/%d_tauTriggerEff_%s.root' % (datadir, year, id), 'r') hists_data, hists_mc, hists_sf = {}, {}, {} for dm in dms: for histtype, histdict in [('data', hists_data), ('mc', hists_mc), ('sf', hists_sf)]: histname = "%s_%s_%s_dm%d_fitted" % (histtype, trigger, wp, dm) histdict[dm] = gethist(file, histname) file.Close() self.hists_data = hists_data self.hists_mc = hists_mc self.hists_sf = hists_sf self.trigger = trigger self.year = year self.wp = wp self.id = id self.dms = dms
def __init__(self, tagger, wp='medium', channel='mutau', year=2017, maxeta=2.4, loadsys=False, type_bc='comb'): """Load b tag weights from CSV file.""" assert(year in [2016,2017,2018]), "You must choose a year from: 2016, 2017, or 2018." assert(tagger in ['CSVv2','DeepCSV']), "BTagWeightTool: You must choose a tagger from: CSVv2, DeepCSV!" assert(wp in ['loose','medium','tight']), "BTagWeightTool: You must choose a WP from: loose, medium, tight!" #assert(sigma in ['central','up','down']), "BTagWeightTool: You must choose a WP from: central, up, down!" #assert(channel in ['mutau','eletau','tautau','mumu']), "BTagWeightTool: You must choose a channel from: mutau, eletau, tautau, mumu!" # FILE if year==2016: if 'deep' in tagger.lower(): csvname = os.path.join(datadir,'DeepCSV_Moriond17_B_H.csv') effname = os.path.join(datadir,'DeepCSV_2016_Moriond17_eff.root') else: csvname = os.path.join(datadir,'CSVv2_Moriond17_B_H.csv') effname = os.path.join(datadir,'CSVv2_2016_Moriond17_eff.root') elif year==2017: if 'deep' in tagger.lower(): csvname = os.path.join(datadir,'DeepCSV_94XSF_V3_B_F.csv') effname = os.path.join(datadir,'DeepCSV_2017_12Apr2017_eff.root') else: csvname = os.path.join(datadir,'CSVv2_94XSF_V2_B_F.csv') effname = os.path.join(datadir,'CSVv2_2017_12Apr2017_eff.root') elif year==2018: if 'deep' in tagger.lower(): csvname = os.path.join(datadir,'DeepCSV_94XSF_V3_B_F.csv') effname = os.path.join(datadir,'DeepCSV_2018_Autumn18_eff.root') else: csvname = os.path.join(datadir,'CSVv2_94XSF_V2_B_F.csv') effname = os.path.join(datadir,'CSVv2_2018_Autumn18_eff.root') # TAGGING WP self.wpname = wp self.wp = getattr(BTagWPs(tagger,year),wp) if 'deep' in tagger.lower(): tagged = lambda j: j.btagDeepB>self.wp else: tagged = lambda j: j.btagCSVV2>self.wp # CSV READER print "Loading BTagWeightTool for %s (%s WP)..."%(tagger,wp) #,(", "+sigma) if sigma!='central' else "" readers = { } op = OP_LOOSE if wp=='loose' else OP_MEDIUM if wp=='medium' else OP_TIGHT if wp=='tight' else OP_RESHAPING type_udsg = 'incl' type_bc = type_bc # 'mujets' for QCD; 'comb' for QCD+TT calib = BTagCalibration(tagger, csvname) readers['Nom'] = BTagCalibrationReader(op,'central') if loadsys: readers['Up'] = BTagCalibrationReader(op,'up') readers['Down'] = BTagCalibrationReader(op,'down') for reader in readers.values(): reader.load(calib,FLAV_B, type_bc) reader.load(calib,FLAV_C, type_bc) reader.load(calib,FLAV_UDSG,type_udsg) # EFFICIENCIES hists = { } # histograms to compute the b tagging efficiencies in MC effmaps = { } # b tag efficiencies in MC to compute b tagging weight for an event efffile = ensureTFile(effname) default = False if not efffile: LOG.warning("File %s with efficiency histograms does not exist! Reverting to default efficiency histogram..."%(effname)) default = True for flavor in [0,4,5]: flavor = flavorToString(flavor) histname = "%s_%s_%s"%(tagger,flavor,wp) effname = "%s/eff_%s_%s_%s"%(channel,tagger,flavor,wp) hists[flavor] = getEffMap(histname) # numerator = b tagged jets hists[flavor+'_all'] = getEffMap(histname+'_all') # denominator = all jets if efffile: effmaps[flavor] = efffile.Get(effname) if not effmaps[flavor]: LOG.warning("Histogram '%s' does not exist in %s! Reverting to default efficiency histogram..."%(effname,efffile.GetName())) default = True effmaps[flavor] = getDefaultEffMap(effname,flavor,wp) else: effmaps[flavor] = getDefaultEffMap(effname,flavor,wp) effmaps[flavor].SetDirectory(0) efffile.Close() if default: LOG.warning("Made use of default efficiency histograms! The b tag weights from this module should be regarded as placeholders only,\n"+\ "and should NOT be used for analyses. B (mis)tag efficiencies in MC are analysis dependent. Please create your own\n"+\ "efficiency histogram with data/btag/getBTagEfficiencies.py after running all MC samples with BTagWeightTool.") self.tagged = tagged self.calib = calib self.readers = readers self.loadsys = loadsys self.hists = hists self.effmaps = effmaps self.maxeta = maxeta
def chunkify_by_evts(fnames, maxevts, evenly=True, verb=0): """Split list of files into chunks with total events per chunks less than given maximum, and update input fnames to bookkeep first event and maximum events. E.g. ['nano_1.root','nano_2.root','nano_3.root','nano_4.root'] -> [ ['nano_1.root:0:1000'], ['nano_1.root:1000:1000'], # 'fname:firstevt:maxevts' ['nano_2.root','nano_3.root','nano_4.root'] ] """ result = [] # list of chunks nlarge = {} nsmall = {} if verb >= 4: print ">>> chunkify_by_evts: events per file:" for fname in fnames[:]: if evtsplitexp.match(fname): # already split; cannot be split again result.append([fname]) else: # get number of events file = ensureTFile(fname, 'READ') nevts = file.Get('Events').GetEntries() file.Close() if verb >= 4: print "%10d %s" % (nevts, fname) if nevts < maxevts: # split this large file into several chunks nsmall.setdefault(nevts, []).append(fname) else: # don't split this small, group with others in chunks, if possible nlarge.setdefault(nevts, []).append(fname) fnames.remove(fname) if verb >= 1: print ">>> chunkify_by_evts: %d small files (<%d events) and %d large files (>=%d events)" % ( len(nsmall), maxevts, len(nlarge), maxevts) for nevts in nlarge: for fname in nlarge[nevts]: # split large files into several chunks maxevts_ = maxevts if evenly: nchunks = ceil(float(nevts) / maxevts) maxevts_ = int(ceil(nevts / nchunks)) if verb >= 3: print ">>> nevts/maxevts = %d/%d = %.2f => make %d chunks with max. %d events" % ( nevts, maxevts, nevts / float(maxevts), nchunks, maxevts_) ifirst = 0 while ifirst < nevts: infname = "%s:%d:%d" % (fname, ifirst, maxevts_) fnames.append(infname) # update for book keeping result.append([infname]) ifirst += maxevts_ mylist = [] for nevts in nsmall: mylist.extend([nevts] * len(nsmall[nevts])) for part in partition_by_max(mylist, maxevts): # group small files into one chunk result.append([]) for nevts in part: fname = nsmall[nevts][0] nsmall[nevts].remove(fname) result[-1].append(fname) #+":%d"%nevts) if verb >= 4: print ">>> chunkify_by_evts: chunks = [" for chunk in result: print ">>> %s" % (chunk) print ">>> ]" return result
def getGenProfile(outfname, era): """Create generator pileup profile.""" print ">>> getGenProfile(%s):" % (era) if era == '2016': bins = [ # https://github.com/cms-sw/cmssw/blob/CMSSW_9_4_X/SimGeneral/MixingModule/python/mix_2016_25ns_Moriond17MC_PoissonOOTPU_cfi.py 1.78653e-05, 2.56602e-05, 5.27857e-05, 8.88954e-05, 0.000109362, 0.000140973, 0.000240998, 0.00071209, 0.00130121, 0.00245255, 0.00502589, 0.00919534, 0.0146697, 0.0204126, 0.0267586, 0.0337697, 0.0401478, 0.0450159, 0.0490577, 0.0524855, 0.0548159, 0.0559937, 0.0554468, 0.0537687, 0.0512055, 0.0476713, 0.0435312, 0.0393107, 0.0349812, 0.0307413, 0.0272425, 0.0237115, 0.0208329, 0.0182459, 0.0160712, 0.0142498, 0.012804, 0.011571, 0.010547, 0.00959489, 0.00891718, 0.00829292, 0.0076195, 0.0069806, 0.0062025, 0.00546581, 0.00484127, 0.00407168, 0.00337681, 0.00269893, 0.00212473, 0.00160208, 0.00117884, 0.000859662, 0.000569085, 0.000365431, 0.000243565, 0.00015688, 9.88128e-05, 6.53783e-05, 3.73924e-05, 2.61382e-05, 2.0307e-05, 1.73032e-05, 1.435e-05, 1.36486e-05, 1.35555e-05, 1.37491e-05, 1.34255e-05, 1.33987e-05, 1.34061e-05, 1.34211e-05, 1.34177e-05, 1.32959e-05, 1.33287e-05, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ] elif era == '2017': # https://github.com/cms-sw/cmssw/blob/CMSSW_9_4_X/SimGeneral/MixingModule/python/mix_2017_25ns_WinterMC_PUScenarioV1_PoissonOOTPU_cfi.py bins = [ 3.39597497605e-05, 6.63688402133e-06, 1.39533611284e-05, 3.64963078209e-05, 6.00872171664e-05, 9.33932578027e-05, 0.000120591524486, 0.000128694546198, 0.000361697233219, 0.000361796847553, 0.000702474896113, 0.00133766053707, 0.00237817050805, 0.00389825605651, 0.00594546732588, 0.00856825906255, 0.0116627396044, 0.0148793350787, 0.0179897368379, 0.0208723871946, 0.0232564170641, 0.0249826433945, 0.0262245860346, 0.0272704617569, 0.0283301107549, 0.0294006137386, 0.0303026836965, 0.0309692426278, 0.0308818046328, 0.0310566806228, 0.0309692426278, 0.0310566806228, 0.0310566806228, 0.0310566806228, 0.0307696426944, 0.0300103336052, 0.0288355370103, 0.0273233309106, 0.0264343533951, 0.0255453758796, 0.0235877272306, 0.0215627588047, 0.0195825559393, 0.0177296309658, 0.0160560731931, 0.0146022004183, 0.0134080690078, 0.0129586991411, 0.0125093292745, 0.0124360740539, 0.0123547104433, 0.0123953922486, 0.0124360740539, 0.0124360740539, 0.0123547104433, 0.0124360740539, 0.0123387597772, 0.0122414455005, 0.011705203844, 0.0108187105305, 0.00963985508986, 0.00827210065136, 0.00683770076341, 0.00545237697118, 0.00420456901556, 0.00367513566191, 0.00314570230825, 0.0022917978982, 0.00163221454973, 0.00114065309494, 0.000784838366118, 0.000533204105387, 0.000358474034915, 0.000238881117601, 0.0001984254989, 0.000157969880198, 0.00010375646169, 6.77366175538e-05, 4.39850477645e-05, 2.84298066026e-05, 1.83041729561e-05, 1.17473542058e-05, 7.51982735129e-06, 6.16160108867e-06, 4.80337482605e-06, 3.06235473369e-06, 1.94863396999e-06, 1.23726800704e-06, 7.83538083774e-07, 4.94602064224e-07, 3.10989480331e-07, 1.94628487765e-07, 1.57888581037e-07, 1.2114867431e-07, 7.49518929908e-08, 4.6060444984e-08, 2.81008884326e-08, 1.70121486128e-08, 1.02159894812e-08, 0.0, #0.0, ] elif era == '2018': # https://github.com/cms-sw/cmssw/blob/CMSSW_10_4_X/SimGeneral/MixingModule/python/mix_2018_25ns_JuneProjectionFull18_PoissonOOTPU_cfi.py bins = [ 4.695341e-10, 1.206213e-06, 1.162593e-06, 6.118058e-06, 1.626767e-05, 3.508135e-05, 7.12608e-05, 0.0001400641, 0.0002663403, 0.0004867473, 0.0008469, 0.001394142, 0.002169081, 0.003198514, 0.004491138, 0.006036423, 0.007806509, 0.00976048, 0.0118498, 0.01402411, 0.01623639, 0.01844593, 0.02061956, 0.02273221, 0.02476554, 0.02670494, 0.02853662, 0.03024538, 0.03181323, 0.03321895, 0.03443884, 0.035448, 0.03622242, 0.03674106, 0.0369877, 0.03695224, 0.03663157, 0.03602986, 0.03515857, 0.03403612, 0.0326868, 0.03113936, 0.02942582, 0.02757999, 0.02563551, 0.02362497, 0.02158003, 0.01953143, 0.01750863, 0.01553934, 0.01364905, 0.01186035, 0.01019246, 0.008660705, 0.007275915, 0.006043917, 0.004965276, 0.004035611, 0.003246373, 0.002585932, 0.002040746, 0.001596402, 0.001238498, 0.0009533139, 0.0007282885, 0.000552306, 0.0004158005, 0.0003107302, 0.0002304612, 0.0001696012, 0.0001238161, 8.96531e-05, 6.438087e-05, 4.585302e-05, 3.23949e-05, 2.271048e-05, 1.580622e-05, 1.09286e-05, 7.512748e-06, 5.140304e-06, 3.505254e-06, 2.386437e-06, 1.625859e-06, 1.111865e-06, 7.663272e-07, 5.350694e-07, 3.808318e-07, 2.781785e-07, 2.098661e-07, 1.642811e-07, 1.312835e-07, 1.081326e-07, 9.141993e-08, 7.890983e-08, 6.91468e-08, 6.119019e-08, 5.443693e-08, 4.85036e-08, 4.31486e-08, 3.822112e-08 ] else: print ">>> Warning! No generator pileup profile for era %s" % (era) nbins = len(bins) hist = TH1F('pileup', 'pileup', nbins, 0, nbins) hist.Sumw2() for i, binc in enumerate(bins, 1): hist.SetBinContent(i, binc) file = ensureTFile(outfname, 'RECREATE') hist.Write('pileup') hist.SetDirectory(0) file.Close() return hist
def drawpostfit(fname, bin, procs, **kwargs): """Plot pre- and post-fit plots PostFitShapesFromWorkspace.""" print '>>>\n>>> drawpostfit("%s","%s")' % (fname, bin) outdir = kwargs.get('outdir', "") pname = kwargs.get('pname', "$FIT.png") # replace $FIT = 'prefit', 'postfit' ratio = kwargs.get('ratio', True) tag = kwargs.get('tag', "") xtitle = kwargs.get('xtitle', None) title = kwargs.get('title', None) text = kwargs.get('text', "") tsize = kwargs.get('tsize', 0.050) xmin = kwargs.get('xmin', None) xmax = kwargs.get('xmax', None) ymargin = kwargs.get('ymargin', 1.22) groups = kwargs.get('group', []) position = kwargs.get('pos', None) # legend position ncol = kwargs.get('ncol', None) # legend columns square = kwargs.get('square', False) era = kwargs.get('era', "") exts = kwargs.get('exts', ['pdf', 'png']) # figure extension ymax = None fits = ['prefit', 'postfit'] file = ensureTFile(fname, 'READ') if outdir: ensuredir(outdir) if era: setera(era) # DRAW PRE-/POST-FIT for fit in fits: fitdirname = "%s_%s" % (bin, fit) dir = file.Get(fitdirname) if not dir: LOG.warning('drawpostfit: Did not find dir "%s"' % (fitdirname), pre=" ") return obshist = None exphists = [] # GET HIST for proc in procs: #reversed(samples): hname = "%s/%s" % (fitdirname, proc) hist = file.Get(hname) if not hist: LOG.warning( 'drawpostfit: Could not find "%s" template in directory "%s_%s"' % (proc, bin, fit), pre=" ") continue if 'data_obs' in proc: obshist = hist hist.SetLineColor(1) ymax = hist.GetMaximum() * ymargin else: exphists.append(hist) if proc in STYLE.sample_titles: hist.SetTitle(STYLE.sample_titles[proc]) if proc in STYLE.sample_colors: hist.SetFillStyle(1001) hist.SetFillColor(STYLE.sample_colors[proc]) if len(exphists) == 0: LOG.warning( 'drawpostfit: Could not find any templates in directory "%s"' % (bin), pre=" ") continue if not obshist: LOG.warning( 'drawpostfit: Could not find a data template in directory "%s"' % (bin), pre=" ") continue for groupargs in groups: grouphists(exphists, *groupargs, replace=True) # PLOT xtitle = (xtitle or exphists[0].GetXaxis().GetTitle() ) #.replace('[GeV]','(GeV)') xmax = xmax or exphists[0].GetXaxis().GetXmax() xmin = xmin or exphists[0].GetXaxis().GetXmin() errtitle = "Pre-fit stat. + syst. unc." if fit == 'prefit' else "Post-fit unc." pname_ = repkey(pname, FIT=fit, ERA=era) rmin, rmax = (0.28, 1.52) plot = Stack(xtitle, obshist, exphists) plot.draw(xmin=xmin, xmax=xmax, ymax=ymax, square=square, ratio=ratio, rmin=rmin, rmax=rmax, staterror=True, errtitle=errtitle) plot.drawlegend(position, tsize=tsize, text=text, ncol=ncol) if title: plot.drawtext(title, bold=False) plot.saveas(pname_, outdir=outdir, ext=exts) plot.close() file.Close()
def plotinputs(fname, varprocs, observables, bins, **kwargs): """Plot histogram inputs from ROOT file for datacards, and write to ROOT file. fname: filename pattern of ROOT file varprocs: dictionary for systematic variation to list of processes, e.g. { 'Nom': ['ZTT','TTT','W','QCD','data_obs'], 'TESUp': ['ZTT','TTT'], 'TESDown': ['ZTT','TTT'] } observables: list of Variables objects bins: list of Selection objects """ #LOG.header("plotinputs") tag = kwargs.get('tag', "") pname = kwargs.get('pname', "$OBS_$BIN$TAG.png") outdir = kwargs.get('outdir', 'plots') text = kwargs.get('text', "$BIN") groups = kwargs.get('group', []) # add processes together into one histogram verbosity = kwargs.get('verb', 0) ensuredir(outdir) print ">>>\n>>> " + color(" plotting... ", 'magenta', bold=True, ul=True) for obs in observables: obsname = obs.filename ftag = tag + obs.tag fname_ = repkey(fname, OBS=obsname, TAG=ftag) file = ensureTFile(fname_, 'UPDATE') for set, procs in varprocs.iteritems( ): # loop over processes with variation if set == 'Nom': systag = "" # no systematics tag for nominal procs_ = procs[:] else: systag = '_' + set # systematics tag for variation, e.g. '_TESUp' procs_ = [ (p + systag if p in procs else p) for p in varprocs['Nom'] ] # add tag to varied processes for selection in bins: if not obs.plotfor(selection): continue obs.changecontext(selection) bin = selection.filename text_ = repkey( text, BIN=selection.title) # extra text in plot corner tdir = ensureTDirectory(file, bin, cd=True) # directory with histograms if set == 'Nom': gStyle.Write( 'style', TH1.kOverwrite ) # write current TStyle object to reproduce plots # STACKS pname_ = repkey(pname, OBS=obsname, BIN=bin, TAG=ftag + systag) # image file name wname = "stack" + systag # name in ROOT file stackinputs(tdir, obs, procs_, group=groups, save=pname_, write=wname, text=text_) # VARIATIONS if 'Down' in set: systag_ = systag.replace( 'Down', '') # e.g.'_TES' without 'Up' or 'Down' suffix pname_ = repkey(pname, OBS=obsname, BIN=bin, TAG=ftag + "_$PROC" + systag) # image file name wname = "plot_$PROC" + systag # name in ROOT file comparevars(tdir, obs, procs, systag_, save=pname_, write=wname, text=text_) file.Close()
def __init__(self, era, sigma='central', sample=None, buggy=False, flat=False, minbias=None, verb=0): """Load data and MC pilup profiles.""" assert ( sigma in ['central', 'up', 'down'] ), "You must choose a s.d. variation from: 'central', 'up', or 'down'." if not minbias: minbias = '69p2' if sigma == 'down': minbias = '66p0168' # -4.6% elif sigma == 'up': minbias = '72p3832' # +4.6% datafilename, mcfilename = None, None if 'UL' in era: if '2016' in era and 'preVFP' in era: datafilename = os.path.join( datadir, "Data_PileUp_UL2016_preVFP_%s.root" % (minbias)) mcfilename = os.path.join( datadir, "MC_PileUp_UL2016_preVFP_Summer19.root") elif '2016' in era and 'postVFP' in era: datafilename = os.path.join( datadir, "Data_PileUp_UL2016_postVFP_%s.root" % (minbias)) mcfilename = os.path.join( datadir, "MC_PileUp_UL2016_postVFP_Summer19.root") elif '2016' in era: datafilename = os.path.join( datadir, "Data_PileUp_UL2016_%s.root" % (minbias)) mcfilename = os.path.join(datadir, "MC_PileUp_UL2016_Summer19.root") elif '2017' in era: datafilename = os.path.join( datadir, "Data_PileUp_UL2017_%s.root" % (minbias)) mcfilename = os.path.join(datadir, "MC_PileUp_UL2017_Summer19.root") elif '2018' in era: datafilename = os.path.join( datadir, "Data_PileUp_UL2018_%s.root" % (minbias)) mcfilename = os.path.join(datadir, "MC_PileUp_UL2018_Summer19.root") else: if '2016' in era: datafilename = os.path.join( datadir, "Data_PileUp_%s_%s.root" % (era, minbias)) mcfilename = os.path.join( datadir, "MC_PileUp_%s_Moriond17.root" % (era)) elif '2017' in era: tag = "" if buggy or sample: # pre-UL 2017 had buggy samples buggy = buggy or hasBuggyPU(sample) tag = "_old_pmx" if buggy else "_new_pmx" datafilename = os.path.join( datadir, "Data_PileUp_%s_%s.root" % (era, minbias)) mcfilename = os.path.join( datadir, "MC_PileUp_%s_Winter17_V2%s.root" % (era, tag)) elif '2018' in era: datafilename = os.path.join( datadir, "Data_PileUp_%s_%s.root" % (era, minbias)) mcfilename = os.path.join(datadir, "MC_PileUp_%s_Autumn18.root" % (era)) assert datafilename and mcfilename, "PileupWeightTool: Did not recognize era %r!" % ( era) if flat or (sample and hasFlatPU(sample)): mcfilename = os.path.join(datadir, "MC_PileUp_%d_FlatPU0to75.root" % year) print "Loading PileupWeightTool for %s and %s" % (datafilename, mcfilename) self.datafile = ensureTFile(datafilename, 'READ') self.mcfile = ensureTFile(mcfilename, 'READ') self.datahist = self.datafile.Get('pileup') self.mchist = self.mcfile.Get('pileup') self.datahist.SetDirectory(0) self.mchist.SetDirectory(0) self.datahist.Scale(1. / self.datahist.Integral()) self.mchist.Scale(1. / self.mchist.Integral()) self.datafile.Close() self.mcfile.Close()
def chunkify_by_evts(fnames,maxevts,evenly=True,evtdict=None,verb=0): """Split list of files into chunks with total events per chunks less than given maximum, and update input fnames to bookkeep first event and maximum events. E.g. ['nano_1.root','nano_2.root','nano_3.root','nano_4.root'] -> [ ['nano_1.root:0:1000'], ['nano_1.root:1000:1000'], # 'fname:firstevt:maxevts' ['nano_2.root','nano_3.root','nano_4.root'] ] """ result = [ ] # list of chunks nlarge = { } nsmall = { } ntot = 0 if verb>=4: print ">>> chunkify_by_evts: events per file:" for fname in fnames[:]: if evtsplitexp.match(fname): # already split; cannot be split again # TODO: add maxevts to ntot ? result.append([fname]) # do not split again, keep in single chunk continue if evtdict and fname in evtdict: # get number of events from sample's dictionary to speed up nevts = evtdict[fname] if verb>=4: print ">>> %10d %s (dict)"%(nevts,fname) else: # get number of events from file file = ensureTFile(fname,'READ') nevts = file.Get('Events').GetEntries() file.Close() if isinstance(evtdict,dict): evtdict[fname] = nevts # store for possible later reuse (if same sample is submitted multiple times) if verb>=4: print ">>> %10d %s"%(nevts,fname) if nevts<maxevts: # split this large file into several chunks nsmall.setdefault(nevts,[ ]).append(fname) else: # don't split this small, group with others in chunks, if possible nlarge.setdefault(nevts,[ ]).append(fname) fnames.remove(fname) ntot += nevts if verb>=1: print ">>> chunkify_by_evts: %d small files (<%d events) and %d large files (>=%d events)"%( len(nsmall),maxevts,len(nlarge),maxevts) for nevts in nlarge: for fname in nlarge[nevts]: # split large files into several chunks maxevts_ = maxevts if evenly: # split events evenly over chunks nchunks = ceil(float(nevts)/maxevts) maxevts_ = int(ceil(nevts/nchunks)) # new maxevts per chunk if verb>=3: print ">>> nevts/maxevts = %d/%d = %.2f => make %d chunks with max. %d events"%( nevts,maxevts,nevts/float(maxevts),nchunks,maxevts_) ifirst = 0 # first event to process in first chunk while ifirst<nevts: #if ifirst+maxevts_+1>=nevts: # if nevts%maxevts_!=0; index starts counting from 0 # maxevts_ = nevts - (nchunks-1)*maxevts_ # maxevts for the last chunk; use correct maxevts for bookkeeping ntot infname = "%s:%d:%d"%(fname,ifirst,maxevts_) fnames.append(infname) # update for book keeping result.append([infname]) ifirst += maxevts_ mylist = [ ] for nevts in nsmall: mylist.extend([nevts]*len(nsmall[nevts])) for part in partition_by_max(mylist,maxevts): # group small files into one chunk result.append([ ]) for nevts in part: fname = nsmall[nevts][0] nsmall[nevts].remove(fname) result[-1].append(fname) #+":%d"%nevts) if verb>=4: print ">>> chunkify_by_evts: chunks = [" for chunk in result: print ">>> %s"%(chunk) print ">>> ]" return ntot, result
def __init__(self, workspace, function, arguments): self.workspace_file = ensureTFile(workspace) self.arguments = arguments self.workspace = self.workspace_file.Get("w") self.function = self.workspace.function(function) self.argset = self.workspace.argSet(",".join(self.arguments))