def stackinputs(file, variable, processes, **kwargs): """Stack histograms from ROOT file. file: TFile or TDirectory object variable: Variables object processes: list of strings (name of processes) e.g. stackinputs(file,variable,['ZTT','TTT','W','QCD','data_obs']) """ text = kwargs.get('text', None) tag = kwargs.get('tag', "") groups = kwargs.get('group', []) # e.g. [(['^TT','ST'],'Top')] dname = kwargs.get('dname', None) # directory ('bin') name pname = kwargs.get('save', "stack$TAG.png") # save as image file wname = kwargs.get('write', "stack$TAG") # write to file style = kwargs.get('style', False) # write style to file exphists = [] datahist = None tdir = ensureTDirectory(file, dname, cd=True) if dname else file if style: gStyle.Write( 'style', TH1.kOverwrite) # write current TStyle object to reproduce plots for process in processes: hname = process hist = gethist(tdir, process, fatal=False, warn=False) if not hist: LOG.warning( "stackinputs: Could not find %r in %s. Skipping stacked plot..." % (process, tdir.GetPath())) return hist.SetDirectory(0) hist.SetLineColor(kBlack) hist.SetFillStyle(1001) # assume fill color is already correct if process == 'data_obs': datahist = hist else: exphists.append(hist) for group in groups: grouphists(exphists, *group, replace=True, regex=True, verb=0) stack = Stack(variable, datahist, exphists) stack.draw() stack.drawlegend(ncols=2, twidth=0.9) if text: stack.drawtext(text) if pname: pname = repkey(pname, TAG=tag) stack.saveas(pname, ext=['png']) if wname: wname = repkey(wname, TAG=tag) stack.canvas.Write(wname, TH1.kOverwrite) stack.close()
def writefiles(self,listname,**kwargs): """Write filenames to text file for fast look up in future. If there is more than one DAS dataset path, write lists separately for each path.""" kwargs = kwargs.copy() # do not edit given dictionary writeevts = kwargs.pop('nevts',False) # also write nevents to file listname = repkey(listname,ERA=self.era,GROUP=self.group,SAMPLE=self.name) ensuredir(os.path.dirname(listname)) filenevts = self.getfilenevts(checkfiles=True,**kwargs) if writeevts else None treename = kwargs.pop('tree','Events') # do not pass to Sample.getfiles kwargs.pop('ncores') # do not pass to Sample.getfiles kwargs['refresh'] = False # already got file list in Sample.filenevts files = self.getfiles(**kwargs) # get right URL if not files: LOG.warning("writefiles: Did not find any files!") def _writefile(ofile,fname,prefix=""): """Help function to write individual files.""" if writeevts: # add nevents at end of infile string nevts = filenevts.get(fname,-1) # retrieve from cache if nevts<0: LOG.warning("Did not find nevents of %s. Trying again..."%(fname)) nevts = getnevents(fname,treename) # get nevents from file fname = "%s:%d"%(fname,nevts) # write $FILENAM(:NEVTS) ofile.write(prefix+fname+'\n') paths = self.paths if '$PATH' in listname else [self.paths[0]] for path in paths: listname_ = repkey(listname,PATH=path.strip('/').replace('/','__')) with open(listname_,'w+') as lfile: if '$PATH' in listname: # write only the file list of this path to this text file print ">>> Write %s files to list %r..."%(len(self.pathfiles[path]),listname_) for infile in self.pathfiles[path]: _writefile(lfile,infile) elif len(self.paths)<=1: # write file list for the only path if self.nevents>0: print ">>> Write %s files to list %r..."%(len(files),listname_) else: print ">>> Write %s files (%d events) to list %r..."%(len(files),self.nevents,listname_) for infile in files: _writefile(lfile,infile) else: # divide up list per DAS dataset path if self.nevents>0: print ">>> Write %s files to list %r..."%(len(files),listname_) else: print ">>> Write %s files (%d events) to list %r..."%(len(files),self.nevents,listname_) for i, path in enumerate(self.paths): print ">>> %3s files for %s..."%(len(self.pathfiles[path]),path) lfile.write("DASPATH=%s\n"%(path)) # write special line to text file, which loadfiles() can parse for infile in self.pathfiles[path]: # loop over this list (general list is sorted) LOG.insist(infile in files,"Did not find file %s in general list! %s"%(infile,files)) _writefile(lfile,infile,prefix=" ") if i+1<len(self.paths): # add extra white line between blocks lfile.write("\n")
def writefiles(self, listname, **kwargs): """Write filenames to text file for fast look up in future.""" writeevts = kwargs.pop('nevts', False) # also write nevents to file listname = repkey(listname, ERA=self.era, GROUP=self.group, SAMPLE=self.name) print ">>> Write list to %r..." % (listname) ensuredir(os.path.dirname(listname)) filenevts = self.getfilenevts(checkfiles=True, ** kwargs) if writeevts else None treename = kwargs.pop('tree', 'Events') files = self.getfiles(**kwargs) with open(listname, 'w+') as lfile: for infile in files: if writeevts: nevts = filenevts.get(infile, -1) if nevts < 0: LOG.warning( "Did not find nevents of %s. Trying again..." % (infile)) nevts = getnevents(infile, treename) infile = "%s:%d" % (infile, nevts ) # write $FILENAM(:NEVTS) lfile.write(infile + '\n')
def saveas(self, *fnames, **kwargs): """Save plot, close canvas and delete the histograms.""" save = kwargs.get('save', True) close = kwargs.get('close', False) outdir = kwargs.get('outdir', "") # output directory tag = kwargs.get('tag', "") # extra tag for output file exts = kwargs.get('ext', []) # [".png"] pdf = kwargs.get('pdf', False) exts = ensurelist(exts) if pdf: exts.append(".pdf") if not fnames: fnames = [self.name + tag] if save: for fname in fnames: fname = os.path.join( outdir, repkey(fname, VAR=self.name, NAME=self.name, TAG=tag)) if exts: for ext in ensurelist(exts): if not ext.startswith('.'): ext = '.' + ext fname = re.sub( r"\.?(png|pdf|jpg|gif|eps|tiff?|cc?|root)?$", ext, fname, re.IGNORECASE) self.canvas.SaveAs(fname) elif not any(fname.lower().endswith('.' + e) for e in [ 'png', 'pdf', 'jpg', 'gif', 'eps', 'tif', 'tiff', 'c', 'root' ]): self.canvas.SaveAs(fname + ".png") else: self.canvas.SaveAs(fname) if close: self.close()
def getsamples(era,channel="",tag="",dtype=[],filter=[],veto=[],moddict={},verb=0): """Help function to get samples from a sample list and filter if needed.""" import TauFW.PicoProducer.tools.config as GLOB CONFIG = GLOB.getconfig(verb=verb) filters = filter if not filter or isinstance(filter,list) else [filter] vetoes = veto if not veto or isinstance(veto,list) else [veto] dtypes = dtype if not dtype or isinstance(dtype,list) else [dtype] sampfile = ensurefile("samples",repkey(CONFIG.eras[era],ERA=era,CHANNEL=channel,TAG=tag)) samppath = sampfile.replace('.py','').replace('/','.') if samppath not in moddict: moddict[samppath] = importlib.import_module(samppath) # save time by loading once if not hasattr(moddict[samppath],'samples'): LOG.throw(IOError,"Module '%s' must have a list of Sample objects called 'samples'!"%(samppath)) samplelist = moddict[samppath].samples samples = [ ] sampledict = { } # ensure for unique names LOG.verb("getsamples: samplelist=%r"%(samplelist),verb,3) for sample in samplelist: if filters and not sample.match(filters,verb): continue if vetoes and sample.match(vetoes,verb): continue if dtypes and sample.dtype not in dtypes: continue if channel and sample.channels and not any(fnmatch(channel,c) for c in sample.channels): continue if sample.name in sampledict: LOG.throw(IOError,"Sample short names should be unique. Found two samples '%s'!\n\t%s\n\t%s"%( sample.name,','.join(sampledict[sample.name].paths),','.join(sample.paths))) if 'skim' in channel and sample.dosplit: # split samples with multiple DAS dataset paths, and submit as separate jobs for subsample in sample.split(): samples.append(subsample) # keep correspondence sample to one sample in DAS else: samples.append(sample) sampledict[sample.name] = sample return samples
def comparevars(file, variable, processes, systag, **kwargs): """Compare up/down variations of input histograms from ROOT file. file: TFile or TDirectory object variable: Variables object processes: list of strings (name of processes) systag: string of systematic (file must contain up/down variation) e.g. comparevars(file,variable,['ZTT','TTT'],'TES') """ text = kwargs.get('text', None) pname = kwargs.get('pname', "stack.png") tag = kwargs.get('tag', "") groups = kwargs.get('group', []) # e.g. [(['^TT','ST'],'Top')] dname = kwargs.get('dname', None) # directory ('bin') name pname = kwargs.get('save', "plot_$PROC$SYST$TAG.png") # save as image file wname = kwargs.get('write', "plot_$PROC$SYST$TAG") # write to file processes = ensurelist(processes) uptag = systag + "Up" downtag = systag + "Down" tdir = ensureTDirectory(file, dname, cd=True) if dname else file for process in processes: hists = [] skip = False for var in [uptag, "", downtag]: hname = process + var hist = gethist(tdir, hname, fatal=False, warn=False) if not hist: skip = True break hists.append(hist) if skip: LOG.warning( "comparevars: Could not find %r in %s. Skipping shape comparison..." % (hname, tdir.GetPath())) continue plot = Plot(variable, hists) plot.draw(ratio=2, lstyle=1) plot.drawlegend() if text: plot.drawtext(text) if pname: pname_ = repkey(pname, PROC=process, TAG=tag) plot.saveas(pname_, ext=['png']) if wname: wname_ = repkey(wname, PROC=process, TAG=tag) plot.canvas.Write(wname_, TH1.kOverwrite) plot.close()
def __init__(self, systag, procs, replaceweight=('','',''), **kwargs): regexp = kwargs.pop('regex',False) self.procs = procs # list of processes self.tag = repkey(systag,**kwargs) self.dn = self.tag +'Down' self.up = self.tag +'Up' #weightnom = replaceweight[0] if regexp else re.escape(replaceweight[0]) # escape non regexp self.wgtup = (replaceweight[0],replaceweight[1],regexp) # (oldweight,newweightUp) self.wgtdn = (replaceweight[0],replaceweight[2],regexp) # (oldweight,newweightDown)
def __init__(self, systag, procs, replaceweight=('', '', ''), **kwargs): self.procs = procs # list of processes self.tag = repkey(systag, **kwargs) self.dn = self.tag + 'Down' self.up = self.tag + 'Up' self.wgtup = (replaceweight[0], replaceweight[1] ) # (oldweight,newweightUp) self.wgtdn = (replaceweight[0], replaceweight[2] ) # (oldweight,newweightDown)
def getfiles(self,das=False,refresh=False,url=True,limit=-1,verb=0): """Get list of files from storage system (default), or DAS (if no storage system of das=True).""" LOG.verb("getfiles: das=%r, refresh=%r, url=%r, limit=%r, filelist=%r, len(files)=%d, len(filenevts)=%d"%( das,refresh,url,limit,self.filelist,len(self.files),len(self.filenevts)),verb,1) if self.filelist and not self.files: # get file list from text file for first time self.loadfiles(self.filelist) files = self.files # cache for efficiency url_ = self.dasurl if (das and self.storage) else self.url if self.refreshable and (not files or das or refresh): # (re)derive file list if not files or das: LOG.verb("getfiles: Retrieving files...",verb,2) else: LOG.verb("getfiles: Refreshing file list...",verb,2) files = [ ] for daspath in self.paths: # loop over DAS dataset paths self.pathfiles[daspath] = [ ] if (self.storage and not das) or (not self.instance): # get files from storage system postfix = self.postfix+'.root' sepath = repkey(self.storepath,PATH=daspath,DAS=daspath).replace('//','/') outlist = self.storage.getfiles(sepath,url=url,verb=verb-1) if limit>0: outlist = outlist[:limit] else: # get files from DAS postfix = '.root' outlist = getdasfiles(daspath,instance=self.instance,limit=limit,verb=verb-1) for line in outlist: # filter root files line = line.strip() if line.endswith(postfix) and not any(f.endswith(line) for f in self.blacklist): if url and url_ not in line and 'root://' not in line: line = url_+line files.append(line) self.pathfiles[daspath].append(line) self.pathfiles[daspath].sort() if not self.pathfiles[daspath]: LOG.warning("getfiles: Did not find any files for %s"%(daspath)) files.sort() # for consistent list order if not das or not self.storage: self.files = files # store cache for efficiency elif url and any(url_ not in f for f in files): # add url if missing files = [(url_+f if url_ not in f else f) for f in files] elif not url and any(url_ in f for f in files): # remove url files = [f.replace(url_,"") for f in files] return files[:] # pass copy to protect private self.files
def getfiles(self, das=False, refresh=False, url=True, limit=-1, verb=0): """Get list of files from storage system (default), or DAS (if no storage system of das=True).""" if isinstance(self.files, str): # get file list from text file for first time self.loadfiles(self.files) files = self.files url_ = self.dasurl if (das and self.storage) else self.url if self.refreshable and (not files or das or refresh): files = [] for daspath in self.paths: if (self.storage and not das) or ( not self.instance): # get files from storage system postfix = self.postfix + '.root' sepath = repkey(self.storepath, PATH=daspath, DAS=daspath).replace('//', '/') outlist = self.storage.getfiles(sepath, url=url, verb=verb - 1) if limit > 0: outlist = outlist[:limit] else: # get files from DAS postfix = '.root' outlist = getdasfiles(daspath, instance=self.instance, limit=limit, verb=verb - 1) for line in outlist: # filter root files line = line.strip() if line.endswith(postfix) and not any( f.endswith(line) for f in self.blacklist): if url and url_ not in line and 'root://' not in line: line = url_ + line files.append(line) files.sort() # for consistent list order if not das or not self.storage: self.files = files # save for efficiency elif url and any(url_ not in f for f in files): # add url if missing files = [(url_ + f if url_ not in f else f) for f in files] elif not url and any(url_ in f for f in files): # remove url files = [f.replace(url_, "") for f in files] return files[:] # pass copy to protect private self.files
def loadfiles(self, listname, **kwargs): """Load filenames from text file for fast look up in future.""" listname = repkey(listname, ERA=self.era, GROUP=self.group, SAMPLE=self.name) filenevts = self.filenevts nevents = 0 if self.verbosity + 2 >= 1: print ">>> Loading sample files from '%r'" % (listname) ensurefile(listname, fatal=True) filelist = [] with open(listname, 'r') as file: for line in file: line = line.strip().split() if not line: continue line = line[0].strip() # remove spaces, one per line if line[0] == '#': continue # do not consider out-commented #if v.endswith('.root'): match = fevtsexp.match(line) # match $FILENAM(:NEVTS) if not match: continue infile = match.group(1) if match.group(2): # found nevents in filename nevts = int(match.group(2)) filenevts[infile] = nevts # store/cache in dictionary nevents += nevts filelist.append(infile) if self.nevents <= 0: self.nevents = nevents elif self.nevents != nevents: LOG.warning( "loadfiles: stored nevents=%d does not match the sum total of file events, %d!" % (self.nevents, nevents)) self.nevents == nevents self.files = filelist self.files.sort() return self.files
def loadfiles(self,listname_,**kwargs): verbosity = LOG.getverbosity(self,kwargs) """Load filenames from text file for fast look up in future.""" listname = repkey(listname_,ERA=self.era,GROUP=self.group,SAMPLE=self.name) LOG.verb("loadfiles: listname=%r -> %r, len(files)=%d, len(filenevts)=%d"%( listname_,listname,len(self.files),len(self.filenevts)),verbosity,1) filenevts = self.filenevts nevents = 0 #listname = ensurefile(listname,fatal=False) filelist = [ ] paths = self.paths if '$PATH' in listname else [self.paths[0]] for path in paths: listname_ = repkey(listname,PATH=path.strip('/').replace('/','__')) if self.verbosity>=1: print ">>> Loading sample files from %r..."%(listname_) self.pathfiles[path] = [ ] if os.path.isfile(listname_): skip = False subpaths = [ ] # for sanity check with open(listname_,'r') as file: for line in file: line = line.strip().split() # split at space to allow comments at end if not line: continue line = line[0].strip() # remove spaces, consider only first part of the line if line[0]=='#': continue # do not consider comments #if line.endswith('.root'): if line.startswith("DASPATH="): # to keep track of multiple DAS data set paths path = line.split('=')[-1] # DAS data set path LOG.insist(path.count('/')>=3 and path.startswith('/'), "DAS path %r in %s has wrong format. Need /SAMPLE/CAMPAIGN/FORMAT..."%(path,listname_)) if path in self.paths: # store file list for this path self.pathfiles[path] = [ ] subpaths.append(path) skip = False else: # do not store file list for this path skip = True else: if skip: continue # only load files for this sample's DAS dataset paths match = fevtsexp.match(line) # match $FILENAM(:NEVTS) if not match: continue infile = match.group(1) if match.group(2): # found nevents in filename nevts = int(match.group(2)) filenevts[infile] = nevts # store/cache in dictionary nevents += nevts filelist.append(infile) self.pathfiles[path].append(infile) if self.verbosity>=3: print ">>> %7d events for %s"%(nevts,infile) if not filelist: LOG.warning("loadfiles: Did not find any files in %s!"%(listname_)) self.refreshable = True else: # sanity check for empty list for subpath in subpaths: if not self.pathfiles[subpath]: LOG.warning("loadfiles: Did not find any files for path %s in %s!"%(subpath,listname_)) else: LOG.warning("loadfiles: file list %s does not exist!"%(listname_)) self.refreshable = True for path in self.paths: if path not in self.pathfiles: # nonexistent list LOG.warning("loadfiles: Did not find any files for path %s in %s!"%(path,listname)) if self.nevents<=0: self.nevents = nevents elif self.nevents!=nevents: LOG.warning("loadfiles: stored nevents=%d does not match the sum total of file events, %d!"%(self.nevents,nevents)) self.nevents == nevents self.files = filelist self.files.sort() return self.files
def getsampleset(datasample, expsamples, sigsamples=[], **kwargs): """Create sample set from a table of data and MC samples.""" channel = kwargs.get('channel', "") era = kwargs.get('era', "") fpattern = kwargs.get( 'file', None) # file name pattern, e.g. $PICODIR/$SAMPLE_$CHANNEL$TAG.root weight = kwargs.pop('weight', "") # common weight for MC samples dataweight = kwargs.pop('dataweight', "") # weight for data samples url = kwargs.pop('url', "") # XRootD url tag = kwargs.pop('tag', "") # extra tag for file name if not fpattern: fpattern = "$PICODIR/$SAMPLE_$CHANNEL$TAG.root" if '$PICODIR' in fpattern: import TauFW.PicoProducer.tools.config as GLOB CONFIG = GLOB.getconfig(verb=0) picodir = CONFIG['picodir'] fpattern = repkey(fpattern, PICODIR=picodir) if url: fpattern = "%s/%s" % (fpattern, url) LOG.verb("getsampleset: fpattern=%r" % (fpattern), level=1) # MC (EXPECTED) for i, info in enumerate(expsamples[:]): expkwargs = kwargs.copy() expkwargs['weight'] = weight if len(info) == 4: group, name, title, xsec = info elif len(info) == 5 and isinstance(info[4], dict): group, name, title, xsec, newkwargs = info expkwargs.update(newkwargs) else: LOG.throw(IOError, "Did not recognize mc row %s" % (info)) fname = repkey(fpattern, ERA=era, GROUP=group, SAMPLE=name, CHANNEL=channel, TAG=tag) #print fname sample = MC(name, title, fname, xsec, **expkwargs) expsamples[i] = sample # DATA (OBSERVED) title = 'Observed' datakwargs = kwargs.copy() datakwargs['weight'] = dataweight if isinstance(datasample, dict) and channel: datasample = datasample[channel] if len(datasample) == 2: group, name = datasample elif len(datasample) == 3: group, name = datasample[:2] if isinstance(datasample[2], dict): # dictionary datakwargs.update(datasample[2]) else: # string title = datasample[2] elif len(datasample) == 4 and isinstance(datasample[3], dict): group, name, title, newkwargs = datasample datakwargs.update(newkwargs) else: LOG.throw(IOError, "Did not recognize data row %s" % (datasample)) fpattern = repkey(fpattern, ERA=era, GROUP=group, SAMPLE=name, CHANNEL=channel, TAG=tag) fnames = glob.glob(fpattern) #print fnames if len(fnames) == 1: datasample = Data(name, title, fnames[0]) elif len(fnames) > 1: namerexp = re.compile(name.replace('?', '.').replace('*', '.*')) name = name.replace('?', '').replace('*', '') datasample = MergedSample(name, 'Observed', data=True) for fname in fnames: setname = namerexp.findall(fname)[0] #print setname datasample.add(Data(setname, 'Observed', fname, **datakwargs)) else: LOG.throw(IOError, "Did not find data file %r" % (fpattern)) # SAMPLE SET sampleset = SampleSet(datasample, expsamples, sigsamples, **kwargs) return sampleset
def main(args): print "" analysis = 'ztt_tid' obsset = args.observables channels = args.channels eras = args.eras tag = "" bin = 'Tight' title = "m_{T} < 60 GeV, D_{#zeta} > -25 GeV, |#Delta#eta| < 1.5" #"mutau, DeepTau2017v2p1VSjet" fname = "$DIR/$ANALYSIS_$OBS_$CHANNEL-$BIN-$ERA$TAG.shapes.root" pname = "$DIR/$OBS_$CHANNEL-$BIN-$ERA$TAG_$FIT.png" #outfile = "$TAG/$ANALYSIS_%s_$CHANNEL-$ERA%s.inputs.root"%(obs,tag+outtag) # PLOT SETTINGS ZTT = "Z -> #tau#tau" # "Z -> #tau_{#mu}#tau_{h}" #STYLE.sample_colors['ZTT'] = STYLE.kOrange-4 STYLE.sample_titles.update({ 'ZTT': ZTT, 'ZTT_DM0': ZTT + ", h^{#pm}", 'ZTT_DM1': ZTT + ", h^{#pm}#pi^{0}", 'ZTT_DM10': ZTT + ", h^{#pm}h^{#mp}h^{#pm}", 'ZTT_DM11': ZTT + ", h^{#pm}h^{#mp}h^{#pm}#pi^{0}", 'ZJ': "Z -> ll", }) procs = [ 'ZTT', 'ZL', 'ZJ', 'TTT', 'TTJ', 'W', 'ST', 'VV', 'QCD', 'data_obs' #'STT', 'STJ' ] groups = [ (['^TT*'], 'Top', 'ttbar'), #,STYLE.sample_colors['TT']), #(['^TT*','ST*'],'Top','ttbar and single top'), (['W*', 'ZJ', 'VV', 'ST*', 'QCD'], 'EWK', 'Electroweak'), #,STYLE.sample_colors['EWK']), ] title_dict = { 'mvis': "m_{vis} (GeV)", 'mtau': "m(#tau_{h}) (GeV)", } tsize = 0.054 PLOT._lsize = 0.040 # label size ratio = False pos = 'x=0.56,y=0.88' ncol = 1 square = not ratio and False exts = ['png', 'pdf', 'root', 'C'] if "mtau" in obsset: procs = ['ZTT_DM0', 'ZTT_DM1', 'ZTT_DM10', 'ZTT_DM11'] + procs[1:] pos = 'x=0.22,y=0.85' ncol = 2 # PLOT for era in eras: setera(era, extra="") for channel in channels: for obs in obsset: indir = "output/%s" % era outdir = ensuredir("plots/%s" % era) xtitle = title_dict.get(obs) fname_ = repkey(fname, DIR=indir, ANALYSIS=analysis, OBS=obs, CHANNEL=channel, BIN=bin, ERA=era, TAG=tag) pname_ = repkey(pname, DIR=outdir, ANALYSIS=analysis, OBS=obs, CHANNEL=channel, BIN=bin, ERA=era, TAG=tag) drawpostfit(fname_, bin, procs, pname=pname_, tag=tag, group=groups, title=title, xtitle=xtitle, tsize=tsize, pos=pos, ncol=ncol, ratio=ratio, square=square, exts=exts)
def main(): eras = args.eras periods = cleanEras(args.periods) channel = args.channel types = args.types verbosity = args.verbosity minbiases = [69.2] if periods else [69.2, 80.0, 69.2 * 1.046, 69.2 * 0.954] for era in args.eras: year = getyear(era) mcfilename = "MC_PileUp_%s.root" % (era) jsondir = os.path.join(datadir, 'json', str(year)) pileup = os.path.join(jsondir, "pileup_latest.txt") CMSStyle.setCMSEra(year) if era == '2016': # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmV2017Analysis # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/ReReco/Final/Cert_271036-284044_13TeV_23Sep2016ReReco_Collisions16_JSON.txt" # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/Final/Cert_271036-284044_13TeV_PromptReco_Collisions16_JSON.txt # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/PileUp/pileup_latest.txt JSON = os.path.join( jsondir, "Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt" ) datasets = { 'B': (272007, 275376), 'C': (275657, 276283), 'D': (276315, 276811), 'E': (276831, 277420), 'F': (277772, 278808), 'G': (278820, 280385), 'H': (280919, 284044), } campaign = "Moriond17" samples = [ ( 'TT', "TT", ), ( 'DY', "DYJetsToLL_M-10to50", ), ( 'DY', "DYJetsToLL_M-50", ), ( 'DY', "DY1JetsToLL_M-50", ), ( 'DY', "DY2JetsToLL_M-50", ), ( 'DY', "DY3JetsToLL_M-50", ), ( 'WJ', "WJetsToLNu", ), ( 'WJ', "W1JetsToLNu", ), ( 'WJ', "W2JetsToLNu", ), ( 'WJ', "W3JetsToLNu", ), ( 'WJ', "W4JetsToLNu", ), ( 'ST', "ST_tW_top", ), ( 'ST', "ST_tW_antitop", ), ( 'ST', "ST_t-channel_top", ), ( 'ST', "ST_t-channel_antitop", ), #( 'ST', "ST_s-channel", ), ( 'VV', "WW", ), ( 'VV', "WZ", ), ( 'VV', "ZZ", ), ] elif '2017' in era: # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmV2017Analysis # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/Final/Cert_271036-284044_13TeV_PromptReco_Collisions16_JSON.txt # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions17/13TeV/PileUp/pileup_latest.txt JSON = os.path.join( jsondir, "Cert_294927-306462_13TeV_PromptReco_Collisions17_JSON.txt") datasets = { 'B': (297020, 299329), 'C': (299337, 302029), 'D': (302030, 303434), 'E': (303435, 304826), 'F': (304911, 306462), } samples_bug = [] samples_fix = [] if 'UL' in era: campaign = "Summer19" samples_fix = [ #( 'DY', "DYJetsToLL_M-10to50", ), ( 'DY', "DYJetsToLL_M-50", ), ( 'DY', "DY1JetsToLL_M-50", ), ( 'DY', "DY2JetsToLL_M-50", ), ( 'DY', "DY3JetsToLL_M-50", ), ( 'DY', "DY4JetsToLL_M-50", ), #( 'TT', "TTTo2L2Nu", ), ( 'TT', "TTToHadronic", ), #( 'TT', "TTToSemiLeptonic", ), ( 'WJ', "WJetsToLNu", ), ( 'WJ', "W1JetsToLNu", ), ( 'WJ', "W2JetsToLNu", ), ( 'WJ', "W3JetsToLNu", ), ( 'WJ', "W4JetsToLNu", ), ( 'ST', "ST_tW_top", ), ( 'ST', "ST_tW_antitop", ), ( 'ST', "ST_t-channel_top", ), ( 'ST', "ST_t-channel_antitop", ), #( 'ST', "ST_s-channel", ), #( 'VV', "WW", ), #( 'VV', "WZ", ), #( 'VV', "ZZ", ), ] else: campaign = "Winter17_V2" samples_bug = [ ( 'DY', "DYJetsToLL_M-50", ), ( 'WJ', "W3JetsToLNu", ), ( 'VV', "WZ", ), ] samples_fix = [ ( 'DY', "DYJetsToLL_M-10to50", ), ( 'DY', "DY1JetsToLL_M-50", ), ( 'DY', "DY2JetsToLL_M-50", ), ( 'DY', "DY3JetsToLL_M-50", ), ( 'DY', "DY4JetsToLL_M-50", ), ( 'TT', "TTTo2L2Nu", ), ( 'TT', "TTToHadronic", ), ( 'TT', "TTToSemiLeptonic", ), ( 'WJ', "WJetsToLNu", ), ( 'WJ', "W1JetsToLNu", ), ( 'WJ', "W2JetsToLNu", ), ( 'WJ', "W4JetsToLNu", ), ( 'ST', "ST_tW_top", ), ( 'ST', "ST_tW_antitop", ), ( 'ST', "ST_t-channel_top", ), ( 'ST', "ST_t-channel_antitop", ), #( 'ST', "ST_s-channel", ), ( 'VV', "WW", ), ( 'VV', "ZZ", ), ] samples = samples_bug + samples_fix else: # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmV2018Analysis # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions18/13TeV/PromptReco # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions18/13TeV/PileUp/pileup_latest.txt JSON = os.path.join( jsondir, "Cert_314472-325175_13TeV_PromptReco_Collisions18_JSON.txt") datasets = { 'A': (315252, 316995), 'B': (317080, 319310), 'C': (319337, 320065), 'D': (320673, 325175), } campaign = "Autumn18" samples = [ ( 'TT', "TTTo2L2Nu", ), ( 'TT', "TTToHadronic", ), ( 'TT', "TTToSemiLeptonic", ), ( 'DY', "DYJetsToLL_M-10to50", ), ( 'DY', "DYJetsToLL_M-50", ), ( 'DY', "DY1JetsToLL_M-50", ), ( 'DY', "DY2JetsToLL_M-50", ), ( 'DY', "DY3JetsToLL_M-50", ), ( 'DY', "DY4JetsToLL_M-50", ), #( 'WJ', "WJetsToLNu", ), ( 'WJ', "W1JetsToLNu", ), ( 'WJ', "W2JetsToLNu", ), ( 'WJ', "W3JetsToLNu", ), ( 'WJ', "W4JetsToLNu", ), ( 'ST', "ST_tW_top", ), ( 'ST', "ST_tW_antitop", ), ( 'ST', "ST_t-channel_top", ), ( 'ST', "ST_t-channel_antitop", ), #( 'ST', "ST_s-channel", ), ( 'VV', "WW", ), ( 'VV', "WZ", ), ( 'VV', "ZZ", ), ] # SAMPLES FILENAMES fname = "$PICODIR/$SAMPLE_$CHANNEL.root" if '$PICODIR' in fname: import TauFW.PicoProducer.tools.config as GLOB CONFIG = GLOB.getconfig(verb=verbosity) fname = repkey(fname, PICODIR=CONFIG['picodir']) for i, (group, sample) in enumerate(samples): fname = repkey(fname, ERA=era, GROUP=group, SAMPLE=sample, CHANNEL=channel) samples[i] = (sample, fname) if verbosity >= 1: print ">>> samples = %r" % (samples) # JSON jsons = {} if periods: outdir = ensuredir("json") for period in periods: start, end = getPeriodRunNumbers(period, datasets) erarun = "Run%d%s" % (era, period) jsonout = "json/" + re.sub(r"\d{6}-\d{6}", erarun, JSON.split('/')[-1]) filterJSONByRunNumberRange(JSON, jsonout, start, end, verb=verbosity) jsons[erarun] = jsonout else: jsons[era] = JSON # DATA datahists = {period: [] for period in jsons} if 'data' in types: for period, json in jsons.iteritems(): for minbias in minbiases: filename = "Data_PileUp_%s_%s.root" % ( period, str(minbias).replace('.', 'p')) datahist = getDataProfile(filename, json, pileup, 100, era, minbias) datahists[period].append((minbias, datahist)) elif args.plot: for era in jsons: for minbias in minbiases: filename = "Data_PileUp_%s_%s.root" % ( era, str(minbias).replace('.', 'p')) file, hist = gethist(filename, 'pileup', retfile=True) if not file or not hist: continue hist.SetDirectory(0) file.Close() datahists[era].append((minbias, hist)) # MC if 'mc' in types: mcfilename = "MC_PileUp_%s.root" % (era) #mcfilename = "MC_PileUp_%s_%s.root"%(era,campaign) getMCProfile(mcfilename, samples, channel, era) if args.plot: mchist = compareMCProfiles(samples, channel, era) for era in jsons: for minbias, datahist in datahists[era]: compareDataMCProfiles(datahist, mchist, era, minbias) deletehist(mchist) # clean memory if era == '2017': # also check new/old pmx separately mcfilename_bug = mcfilename.replace(".root", "_old_pmx.root") mcfilename_fix = mcfilename.replace(".root", "_new_pmx.root") getMCProfile(mcfilename_bug, samples_bug, channel, era) getMCProfile(mcfilename_fix, samples_fix, channel, era) if args.plot: mchist_bug = compareMCProfiles(samples_bug, channel, era, tag="old_pmx") mchist_fix = compareMCProfiles(samples_fix, channel, era, tag="new_pmx") for era in jsons: for minbias, datahist in datahists[era]: compareDataMCProfiles(datahist, mchist_bug, era, minbias, tag="old_pmx") compareDataMCProfiles(datahist, mchist_fix, era, minbias, tag="new_pmx") # FLAT if 'flat' in types: filename = "MC_PileUp_%d_FlatPU0to75.root" % era hist_flat = getFlatProfile(filename, 75) for era in jsons: for minbias, datahist in datahists[era]: compareDataMCProfiles(datahist, hist_flat, era, minbias, tag="FlatPU0to75", rmin=0.0, rmax=3.1)
def harvest(obs, channel, era, **kwargs): """Harvest cards.""" tag = kwargs.get('tag', "") # tag for input and output file names outtag = kwargs.get('outtag', "") # extra tag for output file names analysis = kwargs.get('analysis', 'ztt_tid') indir = kwargs.get('indir', 'input') outdir = kwargs.get('outdir', 'output/$ERA') infile = "$INDIR/$ANALYSIS_$OBS_$CHANNEL-$ERA$TAG.inputs.root" infile = repkey(infile, INDIR=indir, ANALYSIS=analysis, OBS=obs, CHANNEL=channel, ERA=era, TAG=tag) outcard = "$TAG/$ANALYSIS_%s_$CHANNEL-$BINID-$ERA%s.datacard.txt" % ( obs, tag + outtag) outfile = "$TAG/$ANALYSIS_%s_$CHANNEL-$ERA%s.inputs.root" % (obs, tag + outtag) indir = repkey(indir, ERA=era, CHANNEL=channel) outdir = repkey(outdir, ERA=era, CHANNEL=channel) # HARVESTER cats = [ # categories "bins" 'Tight' ] procs = { # processes 'sig': ['ZTT'], 'bkg': ['ZL', 'ZJ', 'TTT', 'TTJ', 'W', 'QCD', 'ST', 'VV'], 'noQCD': ['ZL', 'ZJ', 'TTT', 'TTJ', 'W', 'ST', 'VV'], 'DY': ['ZTT', 'ZL', 'ZJ'], 'TT': ['TTT', 'TTJ'], 'ST': ['ST'], #'STT', 'STJ' ], 'tau': ['ZTT', 'TTT'], #'STT' } procs['all'] = procs['sig'] + procs['bkg'] if "mtau" in obs: for key, plist in procs.iteritems(): if 'ZTT' in plist: procs[key] = ['ZTT_DM0', 'ZTT_DM1', 'ZTT_DM10', 'ZTT_DM11' ] + plist[1:] cats = [c for c in enumerate(cats, 1)] # autmatically number; ($BINID,$BIN) harvester = CombineHarvester() harvester.AddObservations(['*'], [analysis], [era], [channel], cats) harvester.AddProcesses(['*'], [analysis], [era], [channel], procs['bkg'], cats, False) harvester.AddProcesses(['90'], [analysis], [era], [channel], procs['sig'], cats, True) #harvester.FilterAll(lambda obj: obj.process() in ['QCD','W','ZJ','STJ'] ) # NORM NUISSANCE PARAMETERS LOG.color("Defining nuissance parameters ...") harvester.cp().process(procs['DY'] + procs['TT'] + procs['ST'] + ['VV']).AddSyst(harvester, 'lumi', 'lnN', SystMap()(1.025)) # luminosity harvester.cp().process(procs['DY'] + procs['TT'] + procs['ST'] + ['VV']).AddSyst( harvester, 'eff_trig', 'lnN', SystMap()(1.02)) # trigger efficiency harvester.cp().process(procs['DY'] + procs['TT'] + procs['ST'] + ['VV']).AddSyst(harvester, 'eff_m', 'lnN', SystMap()(1.02)) # muon efficiency if 'mtau' in obs: for dm in [0, 1, 10, 11]: sf, err = tauidsfs[era][dm] harvester.cp().process(['ZTT_DM%d' % dm]).AddSyst( harvester, 'eff_t_dm%s' % dm, 'lnN', SystMap()(1. + err / sf)) # tau eff. SF (DM-dependent) #else: # harvester.cp().process(procs['tau']).AddSyst( # harvester, 'eff_t', 'lnN', SystMap()(1.20)) # tau efficiency ###harvester.cp().process(procs['DY']+procs['TT']+procs['ST']+['VV']).AddSyst( ### harvester, 'eff_tracking', 'lnN', SystMap()(1.04)) harvester.cp().process(['W']).AddSyst(harvester, 'norm_w', 'lnN', SystMap()(1.15)) # W+jets xsec harvester.cp().process(['QCD']).AddSyst(harvester, 'norm_qcd', 'lnN', SystMap()(1.20)) # QCD xsec harvester.cp().process(procs['DY']).AddSyst( harvester, 'xsec_dy', 'lnN', SystMap()(1.02)) # Drell-Yan xsec harvester.cp().process(procs['TT']).AddSyst(harvester, 'xsec_tt', 'lnN', SystMap()(1.06)) # ttbar xsec harvester.cp().process(procs['ST']).AddSyst( harvester, 'xsec_st', 'lnN', SystMap()(1.05)) # single top xsec harvester.cp().process(['VV']).AddSyst(harvester, 'xsec_vv', 'lnN', SystMap()(1.05)) # diboson xsec harvester.cp().process(['ZL', 'TTL', 'STL' ]).AddSyst(harvester, 'rate_ltf', 'lnN', SystMap()(1.25)) # l -> tau fake rate # SHAPE NUISSANCE PARAMETERS harvester.cp().process(['W', 'QCD', 'ZJ', 'TTJ', 'STJ' ]).AddSyst(harvester, 'rate_jtf', 'lnN', SystMap()(1.25)) # j -> tau fake rate if doshapes: harvester.cp().process(['ZJ', 'W', 'QCD']).AddSyst( #'ZJ','TTJ','STJ' harvester, 'shape_jtf', 'shape', SystMap()(1.00)) # j -> tau fake energy scale if 'mtau' in obs: harvester.cp().process(['TTT', 'STT']).AddSyst( harvester, 'shape_tid', 'shape', SystMap()(1.00)) # tau eff. SF (pt-dependent) else: harvester.cp().process(procs['tau']).AddSyst( harvester, 'shape_tid', 'shape', SystMap()(1.00)) # tau eff. SF (pt-dependent) harvester.cp().process(['ZL']).AddSyst( #bin_id([1,2]) harvester, 'shape_ltf', 'shape', SystMap()(1.00)) # l -> tau fake energy scale harvester.cp().process(procs['DY']).AddSyst( harvester, 'shape_dy', 'shape', SystMap()(1.00)) # Z pT reweighting #harvester.cp().process(['ZJ','TTT','TTJ','STT','STJ','W','QCD']).AddSyst( # harvester, 'shape_jes', 'shape', SystMap()(1.00)) # jet energy scale #harvester.cp().process(['ZJ','TTT','TTJ','STT','STJ','W','QCD']).AddSyst( # harvester, 'shape_jer', 'shape', SystMap()(1.00)) # jet energy resolution #harvester.cp().process(['ZJ','TTT','TTJ','STT','STJ','W','QCD']).AddSyst( # harvester, 'shape_uncEn', 'shape', SystMap()(1.0)) # unclustered energy # EXTRACT SHAPES LOG.color("Extracting shapes...") print ">>> file %r" % (infile) harvester.cp().channel([channel ]).ExtractShapes(infile, "$BIN/$PROCESS", "$BIN/$PROCESS_$SYSTEMATIC") # RESCALE on the fly if 'mtau' in obs: for dm in [0, 1, 10, 11]: sf, err = tauidsfs[era][dm] harvester.cp().process(['ZTT_DM%d' % dm ]).ForEachProc(lambda p: scaleproc(p, sf)) # AUTOREBIN #LOG.color("automatically rebin (30%)...") #rebin = AutoRebin().SetBinThreshold(0.).SetBinUncertFraction(0.30).SetRebinMode(1).SetPerformRebin(True).SetVerbosity(1) #rebin.Rebin(harvester,harvester) # BINS LOG.color("Generating unique bin names...") bins = harvester.bin_set() #SetStandardBinNames(harvester,"%s_$BINID_$ERA"%(obs)) # BIN NAMES if dobbb: LOG.color("Generating bbb uncertainties...") bbb = BinByBinFactory() bbb.SetAddThreshold(0.0) bbb.SetFixNorm(False) bbb.SetPattern("$PROCESS_bin_$#_$CHANNEL_$BIN") bbb.AddBinByBin(harvester, harvester) ###bbb.MergeBinErrors(harvester.cp().process(procs['sig'] + ['W', 'QCD', 'ZJ', 'ZL'])) ###bbb.SetMergeThreshold(0.0) # NUISANCE PARAMETER GROUPS LOG.color("Setting nuisance parameter groups...") harvester.SetGroup('all', [".*"]) harvester.SetGroup('bin', [".*_bin_.*"]) harvester.SetGroup('sys', ["^((?!bin).)*$"]) # everything except bin-by-bin harvester.SetGroup('lumi', [".*lumi"]) harvester.SetGroup('xsec', [".*Xsec.*"]) harvester.SetGroup('eff', [".*eff_.*"]) harvester.SetGroup('norm', [".*(lumi|xsec|norm|eff).*"]) harvester.SetGroup('jtf', [".*jtf.*"]) harvester.SetGroup('ltf', [".*ltf.*"]) harvester.SetGroup('es', [".*shape_(tes|[eml]tf|jes)_.*"]) harvester.SetGroup('zpt', [".*shape_dy.*"]) # PRINT if verbosity >= 1: LOG.color("\n>>> print observation...\n") harvester.PrintObs() LOG.color("\n>>> print processes...\n") harvester.PrintProcs() LOG.color("\n>>> print systematics...\n") harvester.PrintSysts() LOG.color("\n>>> print parameters...\n") harvester.PrintParams() print "\n" # WRITE CARDS LOG.color("Writing datacards...") writer = CardWriter(outcard, outfile) writer.SetVerbosity(verbosity) writer.SetWildcardMasses([]) writer.WriteCards(outdir, harvester) # REPLACE bin ID by bin name for bin, cat in cats: oldfilename = repkey(outcard, TAG=outdir, ANALYSIS=analysis, CHANNEL=channel, ERA=era, BINID=str(bin)) newfilename = repkey(outcard, TAG=outdir, ANALYSIS=analysis, CHANNEL=channel, ERA=era, BINID=cat) if os.path.exists(oldfilename): os.rename(oldfilename, newfilename) print '>>> Renaming "%s" -> "%s"' % (oldfilename, newfilename) else: print '>>> Warning! "%s" does not exist!' % (oldfilename)
def drawpostfit(fname, bin, procs, **kwargs): """Plot pre- and post-fit plots PostFitShapesFromWorkspace.""" print '>>>\n>>> drawpostfit("%s","%s")' % (fname, bin) outdir = kwargs.get('outdir', "") pname = kwargs.get('pname', "$FIT.png") # replace $FIT = 'prefit', 'postfit' ratio = kwargs.get('ratio', True) tag = kwargs.get('tag', "") xtitle = kwargs.get('xtitle', None) title = kwargs.get('title', None) text = kwargs.get('text', "") tsize = kwargs.get('tsize', 0.050) xmin = kwargs.get('xmin', None) xmax = kwargs.get('xmax', None) ymargin = kwargs.get('ymargin', 1.22) groups = kwargs.get('group', []) position = kwargs.get('pos', None) # legend position ncol = kwargs.get('ncol', None) # legend columns square = kwargs.get('square', False) era = kwargs.get('era', "") exts = kwargs.get('exts', ['pdf', 'png']) # figure extension ymax = None fits = ['prefit', 'postfit'] file = ensureTFile(fname, 'READ') if outdir: ensuredir(outdir) if era: setera(era) # DRAW PRE-/POST-FIT for fit in fits: fitdirname = "%s_%s" % (bin, fit) dir = file.Get(fitdirname) if not dir: LOG.warning('drawpostfit: Did not find dir "%s"' % (fitdirname), pre=" ") return obshist = None exphists = [] # GET HIST for proc in procs: #reversed(samples): hname = "%s/%s" % (fitdirname, proc) hist = file.Get(hname) if not hist: LOG.warning( 'drawpostfit: Could not find "%s" template in directory "%s_%s"' % (proc, bin, fit), pre=" ") continue if 'data_obs' in proc: obshist = hist hist.SetLineColor(1) ymax = hist.GetMaximum() * ymargin else: exphists.append(hist) if proc in STYLE.sample_titles: hist.SetTitle(STYLE.sample_titles[proc]) if proc in STYLE.sample_colors: hist.SetFillStyle(1001) hist.SetFillColor(STYLE.sample_colors[proc]) if len(exphists) == 0: LOG.warning( 'drawpostfit: Could not find any templates in directory "%s"' % (bin), pre=" ") continue if not obshist: LOG.warning( 'drawpostfit: Could not find a data template in directory "%s"' % (bin), pre=" ") continue for groupargs in groups: grouphists(exphists, *groupargs, replace=True) # PLOT xtitle = (xtitle or exphists[0].GetXaxis().GetTitle() ) #.replace('[GeV]','(GeV)') xmax = xmax or exphists[0].GetXaxis().GetXmax() xmin = xmin or exphists[0].GetXaxis().GetXmin() errtitle = "Pre-fit stat. + syst. unc." if fit == 'prefit' else "Post-fit unc." pname_ = repkey(pname, FIT=fit, ERA=era) rmin, rmax = (0.28, 1.52) plot = Stack(xtitle, obshist, exphists) plot.draw(xmin=xmin, xmax=xmax, ymax=ymax, square=square, ratio=ratio, rmin=rmin, rmax=rmax, staterror=True, errtitle=errtitle) plot.drawlegend(position, tsize=tsize, text=text, ncol=ncol) if title: plot.drawtext(title, bold=False) plot.saveas(pname_, outdir=outdir, ext=exts) plot.close() file.Close()
def __init__(self, group, name, *paths, **kwargs): """Container class for CMSSW samples, e.g.: - group: DY (used to group similar samples in final output) - name: DYJetsToLL_M-50 (used as shorthand and jobname) - path: /DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIAutumn18NanoAODv6_Nano25Oct2019_102X_mcRun2/NANOAODSIM - dtype: 'mc', 'data', 'embed' """ # PATH LOG.insist( len(paths) >= 1, "Need at least one path to create a sample...") if len(paths) == 1 and isinstance(paths[0], list): paths = paths[0] for path in paths: LOG.insist( path.count('/') >= 3 and path.startswith('/'), "DAS path %r has wrong format. Need /SAMPLE/CAMPAIGN/FORMAT." % (path)) #sample = '/'.join(line.split('/')[-3:]) # DATA TYPE dtype = kwargs.get('dtype', None) dtypes = ['mc', 'data', 'embed'] if dtype == None: # automatic recognition path = paths[0] if 'Embed' in path: dtype = 'embed' elif path.endswith('SIM') or any(g in path for g in ['pythia', 'madgraph']): dtype = 'mc' elif re.search(r"/Run20\d\d", path): dtype = 'data' dtype = 'mc' # TODO: remove LOG.insist( dtype in dtypes, "Given data type '%s' is not recongized! Please choose from %s..." % (dtype, ', '.join(dtypes))) # ATTRIBUTES self.group = group self.name = name self.paths = paths # DAS dataset path self.dtype = dtype self.channels = kwargs.get('channel', None) self.channels = kwargs.get('channels', self.channels) self.storage = None self.storepath = kwargs.get('store', None) # if stored elsewhere than DAS self.url = kwargs.get('url', None) # URL if stored elsewhere self.dasurl = kwargs.get( 'dasurl', None) or "root://cms-xrd-global.cern.ch/" # URL for DAS self.blacklist = kwargs.get('blacklist', []) # black list file self.instance = kwargs.get( 'instance', 'prod/phys03' if path.endswith('USER') else 'prod/global') # if None, does not exist in DAS self.nfilesperjob = kwargs.get('nfilesperjob', -1) # number of nanoAOD files per job self.maxevts = kwargs.get( 'maxevtsperjob', -1) # maximum number of events processed per job self.maxevts = kwargs.get( 'maxevts', self.maxevts) # maximum number of events processed per job self.extraopts = kwargs.get( 'opts', [] ) # extra options for analysis module, e.g. ['doZpt=1','tes=1.1'] self.subtry = kwargs.get('subtry', 0) # to help keep track of resubmission self.jobcfg = kwargs.get('jobcfg', {}) # to help keep track of resubmission self.nevents = kwargs.get( 'nevts', 0) # number of nanoAOD events that can be processed self.nevents = kwargs.get('nevents', self.nevents) # cache of number of events self.files = kwargs.get( 'files', []) # list of ROOT files, OR text file with list of files self.filenevts = {} # cache of number of events for each file self.postfix = kwargs.get( 'postfix', None) or "" # post-fix (before '.root') for stored ROOT files self.era = kwargs.get('era', "") # for expansion of $ERA variable self.dosplit = kwargs.get( 'split', len(self.paths) >= 2) # allow splitting (if multiple DAS datasets) self.verbosity = kwargs.get('verbosity', 0) # verbosity level for debugging self.refreshable = not self.files # allow refresh on file list in getfiles() # ENSURE LIST if self.channels != None and not isinstance(self.channels, list): self.channels = [self.channels] if isinstance(self.extraopts, str): if ',' in self.extraopts: self.extraopts = self.extraopts.split(',') self.extraopts = [self.extraopts] # STORAGE & URL DEFAULTS if self.storepath: self.storepath = repkey(self.storepath, USER=_user, ERA=self.era, GROUP=self.group, SAMPLE=self.name) self.storage = getstorage(repkey(self.storepath, PATH=self.paths[0], DAS=self.paths[0]), ensure=False) if not self.dasurl: self.dasurl = self.url if (self.url in dasurls) else dasurls[0] if not self.url: if self.storepath: if self.storage.__class__.__name__ == 'Local': self.url = "" #root://cms-xrd-global.cern.ch/ else: self.url = self.storage.fileurl else: self.url = self.dasurl # GET FILE LIST FROM TEXT FILE if isinstance(self.files, str): self.loadfiles(self.files)
def plotinputs(fname, varprocs, observables, bins, **kwargs): """Plot histogram inputs from ROOT file for datacards, and write to ROOT file. fname: filename pattern of ROOT file varprocs: dictionary for systematic variation to list of processes, e.g. { 'Nom': ['ZTT','TTT','W','QCD','data_obs'], 'TESUp': ['ZTT','TTT'], 'TESDown': ['ZTT','TTT'] } observables: list of Variables objects bins: list of Selection objects """ #LOG.header("plotinputs") tag = kwargs.get('tag', "") pname = kwargs.get('pname', "$OBS_$BIN$TAG.png") outdir = kwargs.get('outdir', 'plots') text = kwargs.get('text', "$BIN") groups = kwargs.get('group', []) # add processes together into one histogram verbosity = kwargs.get('verb', 0) ensuredir(outdir) print ">>>\n>>> " + color(" plotting... ", 'magenta', bold=True, ul=True) for obs in observables: obsname = obs.filename ftag = tag + obs.tag fname_ = repkey(fname, OBS=obsname, TAG=ftag) file = ensureTFile(fname_, 'UPDATE') for set, procs in varprocs.iteritems( ): # loop over processes with variation if set == 'Nom': systag = "" # no systematics tag for nominal procs_ = procs[:] else: systag = '_' + set # systematics tag for variation, e.g. '_TESUp' procs_ = [ (p + systag if p in procs else p) for p in varprocs['Nom'] ] # add tag to varied processes for selection in bins: if not obs.plotfor(selection): continue obs.changecontext(selection) bin = selection.filename text_ = repkey( text, BIN=selection.title) # extra text in plot corner tdir = ensureTDirectory(file, bin, cd=True) # directory with histograms if set == 'Nom': gStyle.Write( 'style', TH1.kOverwrite ) # write current TStyle object to reproduce plots # STACKS pname_ = repkey(pname, OBS=obsname, BIN=bin, TAG=ftag + systag) # image file name wname = "stack" + systag # name in ROOT file stackinputs(tdir, obs, procs_, group=groups, save=pname_, write=wname, text=text_) # VARIATIONS if 'Down' in set: systag_ = systag.replace( 'Down', '') # e.g.'_TES' without 'Up' or 'Down' suffix pname_ = repkey(pname, OBS=obsname, BIN=bin, TAG=ftag + "_$PROC" + systag) # image file name wname = "plot_$PROC" + systag # name in ROOT file comparevars(tdir, obs, procs, systag_, save=pname_, write=wname, text=text_) file.Close()
def createinputs(fname, sampleset, observables, bins, **kwargs): """Create histogram inputs in ROOT file for datacards. fname: filename pattern of ROOT file sampleset: SampleSet object observables: list of Variables objects bins: list of Selection objects """ #LOG.header("createinputs") outdir = kwargs.get('outdir', "") tag = kwargs.get('tag', "") # file tag htag = kwargs.get('htag', "") # hist tag for systematic filters = kwargs.get('filter', None) # only create histograms for these processes vetoes = kwargs.get('veto', None) # veto these processes parallel = kwargs.get('parallel', True) # MultiDraw histograms in parallel recreate = kwargs.get('recreate', False) # recreate ROOT file replaceweight = kwargs.get('replaceweight', None) # replace weight extraweight = kwargs.get('weight', "") # extraweight shiftQCD = kwargs.get('shiftQCD', 0) # e.g 0.30 for 30% verbosity = kwargs.get('verb', 0) option = 'RECREATE' if recreate else 'UPDATE' method = 'QCD_OSSS' if filters == None or 'QCD' in filters else None method = kwargs.get('method', method) # FILE LOGISTICS: prepare file and directories files = {} ensuredir(outdir) fname = os.path.join(outdir, fname) for obs in observables: obsname = obs.filename ftag = tag + obs.tag fname_ = repkey(fname, OBS=obsname, TAG=tag) file = TFile.Open(fname_, option) if recreate: print ">>> created file %s" % (fname_) for selection in bins: if not obs.plotfor(selection): continue obs.changecontext(selection) ensureTDirectory(file, selection.filename, cd=True, verb=verbosity) if recreate: string = joincuts(selection.selection, obs.cut) TNamed("selection", string).Write( ) # write exact selection string to ROOT file for the record / debugging #TNamed("weight",sampleset.weight).Write() LOG.verb( "%s selection %r: %r" % (obsname, selection.name, string), verbosity, 1) files[obs] = file # GET HISTS for selection in bins: bin = selection.filename # bin name print ">>>\n>>> " + color( " %s " % (bin), 'magenta', bold=True, ul=True) if htag: print ">>> systematic uncertainty: %s" % (color( htag.lstrip('_'), 'grey')) if recreate or verbosity >= 1: print ">>> %r" % (selection.selection) hists = sampleset.gethists(observables, selection, method=method, split=True, parallel=parallel, filter=filters, veto=vetoes) # SAVE HIST ljust = 4 + max(11, len(htag)) # extra space TAB = LOG.table("%10.1f %10d %-18s %s") TAB.printheader('events', 'entries', 'variable', 'process'.ljust(ljust)) for obs, hist in hists.iterhists(): name = lreplace(hist.GetName(), obs.filename).strip( '_') # histname = $VAR_$NAME (see Sample.gethist) if not name.endswith(htag): name += htag # HIST = $PROCESS_$SYSTEMATIC name = repkey(name, BIN=bin) drawopt = 'E1' if 'data' in name else 'EHIST' lcolor = kBlack if any( s in name for s in ['data', 'ST', 'VV']) else hist.GetFillColor() hist.SetOption(drawopt) hist.SetLineColor(lcolor) hist.SetFillStyle(0) # no fill in ROOT file hist.SetName(name) hist.GetXaxis().SetTitle(obs.title) for i, yval in enumerate(hist): if yval < 0: print ">>> replace bin %d (%.3f<0) of %r" % ( i, yval, hist.GetName()) hist.SetBinContent(i, 0) files[obs].cd(bin) # $FILE:$BIN/$PROCESS_$SYSTEMATC hist.Write(name, TH1.kOverwrite) TAB.printrow(hist.GetSumOfWeights(), hist.GetEntries(), obs.printbins(), name) deletehist(hist) # clean memory # CLOSE for obs, file in files.iteritems(): file.Close()
def main(): eras = args.eras periods = cleanPeriods(args.periods) channel = args.channel types = args.types verbosity = args.verbosity minbiases = [ 69.2 ] if periods else [ 69.2, 69.2*1.046, 69.2*0.954, 80.0 ] fname_ = "$PICODIR/$SAMPLE_$CHANNEL.root" # sample file name if 'mc' in types and '$PICODIR' in fname_: import TauFW.PicoProducer.tools.config as GLOB CONFIG = GLOB.getconfig(verb=verbosity) fname_ = repkey(fname_,PICODIR=CONFIG['picodir']) for era in args.eras: year = getyear(era) mcfilename = "MC_PileUp_%s.root"%(era) jsondir = os.path.join(datadir,'json',str(year)) pileup = os.path.join(jsondir,"pileup_latest.txt") jname = getJSON(era) CMSStyle.setCMSEra(era) samples_bug = [ ] # buggy samples in (pre-UL) 2017 with "old pmx" library samples_fix = [ ] # fixed samples in (pre-UL) 2017 with "new pmx" library samples = [ # default set of samples ( 'DY', "DYJetsToMuTauh_M-50" ), ( 'DY', "DYJetsToLL_M-50" ), ( 'DY', "DY4JetsToLL_M-50" ), ( 'DY', "DY3JetsToLL_M-50" ), ( 'DY', "DY2JetsToLL_M-50" ), ( 'DY', "DY1JetsToLL_M-50" ), ( 'WJ', "WJetsToLNu" ), ( 'WJ', "W4JetsToLNu" ), ( 'WJ', "W3JetsToLNu" ), ( 'WJ', "W2JetsToLNu" ), ( 'WJ', "W1JetsToLNu" ), ( 'TT', "TTToHadronic" ), ( 'TT', "TTTo2L2Nu" ), ( 'TT', "TTToSemiLeptonic" ), ( 'ST', "ST_tW_top" ), ( 'ST', "ST_tW_antitop" ), ( 'ST', "ST_t-channel_top" ), ( 'ST', "ST_t-channel_antitop" ), ( 'VV', "WW" ), ( 'VV', "WZ" ), ( 'VV', "ZZ" ), ] if era=='2016': campaign = "Moriond17" if 'UL' in era and 'preVFP' in era: campaign = "Summer19" elif 'UL' in era: campaign = "Summer19" else: samples = [ ( 'TT', "TT", ), ( 'DY', "DYJetsToLL_M-10to50", ), ( 'DY', "DYJetsToLL_M-50", ), ( 'DY', "DY1JetsToLL_M-50", ), ( 'DY', "DY2JetsToLL_M-50", ), ( 'DY', "DY3JetsToLL_M-50", ), ( 'WJ', "WJetsToLNu", ), ( 'WJ', "W1JetsToLNu", ), ( 'WJ', "W2JetsToLNu", ), ( 'WJ', "W3JetsToLNu", ), ( 'WJ', "W4JetsToLNu", ), ( 'ST', "ST_tW_top", ), ( 'ST', "ST_tW_antitop", ), ( 'ST', "ST_t-channel_top", ), ( 'ST', "ST_t-channel_antitop", ), #( 'ST', "ST_s-channel", ), ( 'VV', "WW", ), ( 'VV', "WZ", ), ( 'VV', "ZZ", ), ] elif '2017' in era: if 'UL' in era: campaign = "Summer19" else: campaign = "Winter17_V2" samples_bug = [ # buggy samples in (pre-UL) 2017 ( 'DY', "DYJetsToLL_M-50", ), ( 'WJ', "W3JetsToLNu", ), ( 'VV', "WZ", ), ] samples_fix = [ # fixed samples in (pre-UL) 2017 ( 'DY', "DYJetsToLL_M-10to50", ), ( 'DY', "DY1JetsToLL_M-50", ), ( 'DY', "DY2JetsToLL_M-50", ), ( 'DY', "DY3JetsToLL_M-50", ), ( 'DY', "DY4JetsToLL_M-50", ), ( 'TT', "TTTo2L2Nu", ), ( 'TT', "TTToHadronic", ), ( 'TT', "TTToSemiLeptonic", ), ( 'WJ', "WJetsToLNu", ), ( 'WJ', "W1JetsToLNu", ), ( 'WJ', "W2JetsToLNu", ), ( 'WJ', "W4JetsToLNu", ), ( 'ST', "ST_tW_top", ), ( 'ST', "ST_tW_antitop", ), ( 'ST', "ST_t-channel_top", ), ( 'ST', "ST_t-channel_antitop", ), #( 'ST', "ST_s-channel", ), ( 'VV', "WW", ), ( 'VV', "ZZ", ), ] samples = samples_bug + samples_fix else: if 'UL' in era: campaign = "Summer19" else: campaign = "Autumn18" samples = [ ( 'TT', "TTTo2L2Nu", ), ( 'TT', "TTToHadronic", ), ( 'TT', "TTToSemiLeptonic", ), ( 'DY', "DYJetsToLL_M-10to50", ), ( 'DY', "DYJetsToLL_M-50", ), ( 'DY', "DY1JetsToLL_M-50", ), ( 'DY', "DY2JetsToLL_M-50", ), ( 'DY', "DY3JetsToLL_M-50", ), ( 'DY', "DY4JetsToLL_M-50", ), #( 'WJ', "WJetsToLNu", ), ( 'WJ', "W1JetsToLNu", ), ( 'WJ', "W2JetsToLNu", ), ( 'WJ', "W3JetsToLNu", ), ( 'WJ', "W4JetsToLNu", ), ( 'ST', "ST_tW_top", ), ( 'ST', "ST_tW_antitop", ), ( 'ST', "ST_t-channel_top", ), ( 'ST', "ST_t-channel_antitop", ), #( 'ST', "ST_s-channel", ), ( 'VV', "WW", ), ( 'VV', "WZ", ), ( 'VV', "ZZ", ), ] # SAMPLES FILENAMES samples_ = [ ] suberas = [era+"_preVFP",era+"_postVFP"] if era=='UL2016' else [era] for subera in suberas: for i, (group,sample) in enumerate(samples): fname = repkey(fname_,ERA=subera,GROUP=group,SAMPLE=sample,CHANNEL=channel) samples_.append((sample,fname)) samples = samples_ # replace sample list if verbosity>=1: print ">>> samples = %r"%(samples) # JSON jsons = { } if periods: for period in periods: jsonout = filterJSONByRunNumberRange(jname,era,period=period,outdir='json',verb=verbosity) jsons[erarun] = jsonout else: jsons[era] = jname # DATA datahists = { period: [ ] for period in jsons } if 'data' in types: for period, json in jsons.iteritems(): for minbias in minbiases: filename = "Data_PileUp_%s_%s.root"%(period,str(minbias).replace('.','p')) datahist = getDataProfile(filename,json,pileup,100,era,minbias) datahists[period].append((minbias,datahist)) elif args.plot: # do not create new data profiles, but just load them for era in jsons: for minbias in minbiases: filename = "Data_PileUp_%s_%s.root"%(era,str(minbias).replace('.','p')) file, hist = gethist(filename,'pileup',retfile=True) if not file or not hist: continue hist.SetDirectory(0) file.Close() datahists[era].append((minbias,hist)) # MC if 'mc' in types: assert samples, "compareMCProfiles: Did not find any samples for %r..."%(era) mcfilename = "MC_PileUp_%s.root"%(era) #mcfilename = "MC_PileUp_%s_%s.root"%(era,campaign) getMCProfile(mcfilename,samples,channel,era) if args.plot: mchist = compareMCProfiles(samples,channel,era) for era in jsons: for minbias, datahist in datahists[era]: compareDataMCProfiles(datahist,mchist,era,minbias) compareDataMCProfiles(datahists[era],mchist,era,rmin=0.4,rmax=1.5,delete=True) deletehist(mchist) # clean memory if era=='2017': #and 'UL' not in era # buggy (pre-UL) 2017: also check new/old pmx separately mcfilename_bug = mcfilename.replace(".root","_old_pmx.root") mcfilename_fix = mcfilename.replace(".root","_new_pmx.root") getMCProfile(mcfilename_bug,samples_bug,channel,era) getMCProfile(mcfilename_fix,samples_fix,channel,era) if args.plot: mchist_bug = compareMCProfiles(samples_bug,channel,era,tag="old_pmx") mchist_fix = compareMCProfiles(samples_fix,channel,era,tag="new_pmx") for era in jsons: for minbias, datahist in datahists[era]: compareDataMCProfiles(datahist,mchist_bug,era,minbias,tag="old_pmx") compareDataMCProfiles(datahist,mchist_fix,era,minbias,tag="new_pmx") # FLAT if 'flat' in types: filename = "MC_PileUp_%d_FlatPU0to75.root"%era hist_flat = getFlatProfile(filename,75) for era in jsons: for minbias, datahist in datahists[era]: compareDataMCProfiles(datahist,hist_flat,era,minbias,tag="FlatPU0to75",rmin=0.0,rmax=3.1)