Example #1
0
def stackinputs(file, variable, processes, **kwargs):
    """Stack histograms from ROOT file.
       file:       TFile or TDirectory object
       variable:  Variables object
       processes: list of strings (name of processes)
     e.g.
       stackinputs(file,variable,['ZTT','TTT','W','QCD','data_obs'])
  """
    text = kwargs.get('text', None)
    tag = kwargs.get('tag', "")
    groups = kwargs.get('group', [])  # e.g. [(['^TT','ST'],'Top')]
    dname = kwargs.get('dname', None)  # directory ('bin') name
    pname = kwargs.get('save', "stack$TAG.png")  # save as image file
    wname = kwargs.get('write', "stack$TAG")  # write to file
    style = kwargs.get('style', False)  # write style to file

    exphists = []
    datahist = None
    tdir = ensureTDirectory(file, dname, cd=True) if dname else file
    if style:
        gStyle.Write(
            'style',
            TH1.kOverwrite)  # write current TStyle object to reproduce plots
    for process in processes:
        hname = process
        hist = gethist(tdir, process, fatal=False, warn=False)
        if not hist:
            LOG.warning(
                "stackinputs: Could not find %r in %s. Skipping stacked plot..."
                % (process, tdir.GetPath()))
            return
        hist.SetDirectory(0)
        hist.SetLineColor(kBlack)
        hist.SetFillStyle(1001)  # assume fill color is already correct
        if process == 'data_obs':
            datahist = hist
        else:
            exphists.append(hist)
    for group in groups:
        grouphists(exphists, *group, replace=True, regex=True, verb=0)
    stack = Stack(variable, datahist, exphists)
    stack.draw()
    stack.drawlegend(ncols=2, twidth=0.9)
    if text:
        stack.drawtext(text)
    if pname:
        pname = repkey(pname, TAG=tag)
        stack.saveas(pname, ext=['png'])
    if wname:
        wname = repkey(wname, TAG=tag)
        stack.canvas.Write(wname, TH1.kOverwrite)
    stack.close()
Example #2
0
 def writefiles(self,listname,**kwargs):
   """Write filenames to text file for fast look up in future.
   If there is more than one DAS dataset path, write lists separately for each path."""
   kwargs    = kwargs.copy() # do not edit given dictionary
   writeevts = kwargs.pop('nevts',False) # also write nevents to file
   listname  = repkey(listname,ERA=self.era,GROUP=self.group,SAMPLE=self.name)
   ensuredir(os.path.dirname(listname))
   filenevts = self.getfilenevts(checkfiles=True,**kwargs) if writeevts else None
   treename  = kwargs.pop('tree','Events') # do not pass to Sample.getfiles
   kwargs.pop('ncores') # do not pass to Sample.getfiles
   kwargs['refresh'] = False # already got file list in Sample.filenevts
   files     = self.getfiles(**kwargs) # get right URL
   if not files:
     LOG.warning("writefiles: Did not find any files!")
   def _writefile(ofile,fname,prefix=""):
     """Help function to write individual files."""
     if writeevts: # add nevents at end of infile string
       nevts = filenevts.get(fname,-1) # retrieve from cache
       if nevts<0:
         LOG.warning("Did not find nevents of %s. Trying again..."%(fname))
         nevts = getnevents(fname,treename) # get nevents from file
       fname = "%s:%d"%(fname,nevts) # write $FILENAM(:NEVTS)
     ofile.write(prefix+fname+'\n')
   paths = self.paths if '$PATH' in listname else [self.paths[0]]
   for path in paths:
     listname_ = repkey(listname,PATH=path.strip('/').replace('/','__'))
     with open(listname_,'w+') as lfile:
       if '$PATH' in listname: # write only the file list of this path to this text file
         print ">>> Write %s files to list %r..."%(len(self.pathfiles[path]),listname_)
         for infile in self.pathfiles[path]:
           _writefile(lfile,infile)
       elif len(self.paths)<=1: # write file list for the only path
         if self.nevents>0:
           print ">>> Write %s files to list %r..."%(len(files),listname_)
         else:
           print ">>> Write %s files (%d events) to list %r..."%(len(files),self.nevents,listname_)
         for infile in files:
           _writefile(lfile,infile)
       else: # divide up list per DAS dataset path
         if self.nevents>0:
           print ">>> Write %s files to list %r..."%(len(files),listname_)
         else:
           print ">>> Write %s files (%d events) to list %r..."%(len(files),self.nevents,listname_)
         for i, path in enumerate(self.paths):
           print ">>>   %3s files for %s..."%(len(self.pathfiles[path]),path)
           lfile.write("DASPATH=%s\n"%(path)) # write special line to text file, which loadfiles() can parse
           for infile in self.pathfiles[path]: # loop over this list (general list is sorted)
             LOG.insist(infile in files,"Did not find file %s in general list! %s"%(infile,files))
             _writefile(lfile,infile,prefix="  ")
           if i+1<len(self.paths): # add extra white line between blocks
             lfile.write("\n")
Example #3
0
 def writefiles(self, listname, **kwargs):
     """Write filenames to text file for fast look up in future."""
     writeevts = kwargs.pop('nevts', False)  # also write nevents to file
     listname = repkey(listname,
                       ERA=self.era,
                       GROUP=self.group,
                       SAMPLE=self.name)
     print ">>> Write list to %r..." % (listname)
     ensuredir(os.path.dirname(listname))
     filenevts = self.getfilenevts(checkfiles=True, **
                                   kwargs) if writeevts else None
     treename = kwargs.pop('tree', 'Events')
     files = self.getfiles(**kwargs)
     with open(listname, 'w+') as lfile:
         for infile in files:
             if writeevts:
                 nevts = filenevts.get(infile, -1)
                 if nevts < 0:
                     LOG.warning(
                         "Did not find nevents of %s. Trying again..." %
                         (infile))
                     nevts = getnevents(infile, treename)
                 infile = "%s:%d" % (infile, nevts
                                     )  # write $FILENAM(:NEVTS)
             lfile.write(infile + '\n')
Example #4
0
 def saveas(self, *fnames, **kwargs):
     """Save plot, close canvas and delete the histograms."""
     save = kwargs.get('save', True)
     close = kwargs.get('close', False)
     outdir = kwargs.get('outdir', "")  # output directory
     tag = kwargs.get('tag', "")  # extra tag for output file
     exts = kwargs.get('ext', [])  # [".png"]
     pdf = kwargs.get('pdf', False)
     exts = ensurelist(exts)
     if pdf:
         exts.append(".pdf")
     if not fnames:
         fnames = [self.name + tag]
     if save:
         for fname in fnames:
             fname = os.path.join(
                 outdir,
                 repkey(fname, VAR=self.name, NAME=self.name, TAG=tag))
             if exts:
                 for ext in ensurelist(exts):
                     if not ext.startswith('.'):
                         ext = '.' + ext
                     fname = re.sub(
                         r"\.?(png|pdf|jpg|gif|eps|tiff?|cc?|root)?$", ext,
                         fname, re.IGNORECASE)
                     self.canvas.SaveAs(fname)
             elif not any(fname.lower().endswith('.' + e) for e in [
                     'png', 'pdf', 'jpg', 'gif', 'eps', 'tif', 'tiff', 'c',
                     'root'
             ]):
                 self.canvas.SaveAs(fname + ".png")
             else:
                 self.canvas.SaveAs(fname)
     if close:
         self.close()
Example #5
0
def getsamples(era,channel="",tag="",dtype=[],filter=[],veto=[],moddict={},verb=0):
  """Help function to get samples from a sample list and filter if needed."""
  import TauFW.PicoProducer.tools.config as GLOB
  CONFIG   = GLOB.getconfig(verb=verb)
  filters  = filter if not filter or isinstance(filter,list) else [filter]
  vetoes   = veto   if not veto   or isinstance(veto,list)   else [veto]
  dtypes   = dtype  if not dtype  or isinstance(dtype,list)  else [dtype]
  sampfile = ensurefile("samples",repkey(CONFIG.eras[era],ERA=era,CHANNEL=channel,TAG=tag))
  samppath = sampfile.replace('.py','').replace('/','.')
  if samppath not in moddict:
    moddict[samppath] = importlib.import_module(samppath) # save time by loading once
  if not hasattr(moddict[samppath],'samples'):
    LOG.throw(IOError,"Module '%s' must have a list of Sample objects called 'samples'!"%(samppath))
  samplelist = moddict[samppath].samples
  samples    = [ ]
  sampledict = { } # ensure for unique names
  LOG.verb("getsamples: samplelist=%r"%(samplelist),verb,3)
  for sample in samplelist:
    if filters and not sample.match(filters,verb): continue
    if vetoes and sample.match(vetoes,verb): continue
    if dtypes and sample.dtype not in dtypes: continue
    if channel and sample.channels and not any(fnmatch(channel,c) for c in sample.channels): continue
    if sample.name in sampledict:
      LOG.throw(IOError,"Sample short names should be unique. Found two samples '%s'!\n\t%s\n\t%s"%(
                        sample.name,','.join(sampledict[sample.name].paths),','.join(sample.paths)))
    if 'skim' in channel and sample.dosplit: # split samples with multiple DAS dataset paths, and submit as separate jobs
      for subsample in sample.split():
        samples.append(subsample) # keep correspondence sample to one sample in DAS
    else:
      samples.append(sample)
    sampledict[sample.name] = sample
  return samples
Example #6
0
def comparevars(file, variable, processes, systag, **kwargs):
    """Compare up/down variations of input histograms from ROOT file.
       file:      TFile or TDirectory object
       variable:  Variables object
       processes: list of strings (name of processes)
       systag:    string of systematic (file must contain up/down variation)
     e.g.
       comparevars(file,variable,['ZTT','TTT'],'TES')
  """
    text = kwargs.get('text', None)
    pname = kwargs.get('pname', "stack.png")
    tag = kwargs.get('tag', "")
    groups = kwargs.get('group', [])  # e.g. [(['^TT','ST'],'Top')]
    dname = kwargs.get('dname', None)  # directory ('bin') name
    pname = kwargs.get('save', "plot_$PROC$SYST$TAG.png")  # save as image file
    wname = kwargs.get('write', "plot_$PROC$SYST$TAG")  # write to file
    processes = ensurelist(processes)
    uptag = systag + "Up"
    downtag = systag + "Down"
    tdir = ensureTDirectory(file, dname, cd=True) if dname else file
    for process in processes:
        hists = []
        skip = False
        for var in [uptag, "", downtag]:
            hname = process + var
            hist = gethist(tdir, hname, fatal=False, warn=False)
            if not hist:
                skip = True
                break
            hists.append(hist)
        if skip:
            LOG.warning(
                "comparevars: Could not find %r in %s. Skipping shape comparison..."
                % (hname, tdir.GetPath()))
            continue
        plot = Plot(variable, hists)
        plot.draw(ratio=2, lstyle=1)
        plot.drawlegend()
        if text:
            plot.drawtext(text)
        if pname:
            pname_ = repkey(pname, PROC=process, TAG=tag)
            plot.saveas(pname_, ext=['png'])
        if wname:
            wname_ = repkey(wname, PROC=process, TAG=tag)
            plot.canvas.Write(wname_, TH1.kOverwrite)
        plot.close()
Example #7
0
 def __init__(self, systag, procs, replaceweight=('','',''), **kwargs):
   regexp     = kwargs.pop('regex',False)
   self.procs = procs # list of processes
   self.tag   = repkey(systag,**kwargs)
   self.dn    = self.tag +'Down'
   self.up    = self.tag +'Up'
   #weightnom  = replaceweight[0] if regexp else re.escape(replaceweight[0]) # escape non regexp
   self.wgtup = (replaceweight[0],replaceweight[1],regexp) # (oldweight,newweightUp)
   self.wgtdn = (replaceweight[0],replaceweight[2],regexp) # (oldweight,newweightDown)
Example #8
0
 def __init__(self, systag, procs, replaceweight=('', '', ''), **kwargs):
     self.procs = procs  # list of processes
     self.tag = repkey(systag, **kwargs)
     self.dn = self.tag + 'Down'
     self.up = self.tag + 'Up'
     self.wgtup = (replaceweight[0], replaceweight[1]
                   )  # (oldweight,newweightUp)
     self.wgtdn = (replaceweight[0], replaceweight[2]
                   )  # (oldweight,newweightDown)
Example #9
0
 def getfiles(self,das=False,refresh=False,url=True,limit=-1,verb=0):
   """Get list of files from storage system (default), or DAS (if no storage system of das=True)."""
   LOG.verb("getfiles: das=%r, refresh=%r, url=%r, limit=%r, filelist=%r, len(files)=%d, len(filenevts)=%d"%(
     das,refresh,url,limit,self.filelist,len(self.files),len(self.filenevts)),verb,1)
   if self.filelist and not self.files: # get file list from text file for first time
     self.loadfiles(self.filelist)
   files = self.files # cache for efficiency
   url_  = self.dasurl if (das and self.storage) else self.url
   if self.refreshable and (not files or das or refresh): # (re)derive file list
     if not files or das:
       LOG.verb("getfiles: Retrieving files...",verb,2)
     else:
       LOG.verb("getfiles: Refreshing file list...",verb,2)
     files = [ ]
     for daspath in self.paths: # loop over DAS dataset paths
       self.pathfiles[daspath] = [ ]
       if (self.storage and not das) or (not self.instance): # get files from storage system
         postfix = self.postfix+'.root'
         sepath  = repkey(self.storepath,PATH=daspath,DAS=daspath).replace('//','/')
         outlist = self.storage.getfiles(sepath,url=url,verb=verb-1)
         if limit>0:
           outlist = outlist[:limit]
       else: # get files from DAS
         postfix = '.root'
         outlist = getdasfiles(daspath,instance=self.instance,limit=limit,verb=verb-1)
       for line in outlist: # filter root files
         line = line.strip()
         if line.endswith(postfix) and not any(f.endswith(line) for f in self.blacklist):
           if url and url_ not in line and 'root://' not in line:
             line = url_+line
           files.append(line)
           self.pathfiles[daspath].append(line)
       self.pathfiles[daspath].sort()
       if not self.pathfiles[daspath]:
         LOG.warning("getfiles: Did not find any files for %s"%(daspath))
     files.sort() # for consistent list order
     if not das or not self.storage:
       self.files = files # store cache for efficiency
   elif url and any(url_ not in f for f in files): # add url if missing
     files = [(url_+f if url_ not in f else f) for f in files]
   elif not url and any(url_ in f for f in files): # remove url
     files = [f.replace(url_,"") for f in files]
   return files[:] # pass copy to protect private self.files
Example #10
0
 def getfiles(self, das=False, refresh=False, url=True, limit=-1, verb=0):
     """Get list of files from storage system (default), or DAS (if no storage system of das=True)."""
     if isinstance(self.files,
                   str):  # get file list from text file for first time
         self.loadfiles(self.files)
     files = self.files
     url_ = self.dasurl if (das and self.storage) else self.url
     if self.refreshable and (not files or das or refresh):
         files = []
         for daspath in self.paths:
             if (self.storage and not das) or (
                     not self.instance):  # get files from storage system
                 postfix = self.postfix + '.root'
                 sepath = repkey(self.storepath, PATH=daspath,
                                 DAS=daspath).replace('//', '/')
                 outlist = self.storage.getfiles(sepath,
                                                 url=url,
                                                 verb=verb - 1)
                 if limit > 0:
                     outlist = outlist[:limit]
             else:  # get files from DAS
                 postfix = '.root'
                 outlist = getdasfiles(daspath,
                                       instance=self.instance,
                                       limit=limit,
                                       verb=verb - 1)
             for line in outlist:  # filter root files
                 line = line.strip()
                 if line.endswith(postfix) and not any(
                         f.endswith(line) for f in self.blacklist):
                     if url and url_ not in line and 'root://' not in line:
                         line = url_ + line
                     files.append(line)
         files.sort()  # for consistent list order
         if not das or not self.storage:
             self.files = files  # save for efficiency
     elif url and any(url_ not in f for f in files):  # add url if missing
         files = [(url_ + f if url_ not in f else f) for f in files]
     elif not url and any(url_ in f for f in files):  # remove url
         files = [f.replace(url_, "") for f in files]
     return files[:]  # pass copy to protect private self.files
Example #11
0
 def loadfiles(self, listname, **kwargs):
     """Load filenames from text file for fast look up in future."""
     listname = repkey(listname,
                       ERA=self.era,
                       GROUP=self.group,
                       SAMPLE=self.name)
     filenevts = self.filenevts
     nevents = 0
     if self.verbosity + 2 >= 1:
         print ">>> Loading sample files from '%r'" % (listname)
     ensurefile(listname, fatal=True)
     filelist = []
     with open(listname, 'r') as file:
         for line in file:
             line = line.strip().split()
             if not line: continue
             line = line[0].strip()  # remove spaces, one per line
             if line[0] == '#': continue  # do not consider out-commented
             #if v.endswith('.root'):
             match = fevtsexp.match(line)  # match $FILENAM(:NEVTS)
             if not match: continue
             infile = match.group(1)
             if match.group(2):  # found nevents in filename
                 nevts = int(match.group(2))
                 filenevts[infile] = nevts  # store/cache in dictionary
                 nevents += nevts
             filelist.append(infile)
     if self.nevents <= 0:
         self.nevents = nevents
     elif self.nevents != nevents:
         LOG.warning(
             "loadfiles: stored nevents=%d does not match the sum total of file events, %d!"
             % (self.nevents, nevents))
         self.nevents == nevents
     self.files = filelist
     self.files.sort()
     return self.files
Example #12
0
 def loadfiles(self,listname_,**kwargs):
   verbosity = LOG.getverbosity(self,kwargs)
   """Load filenames from text file for fast look up in future."""
   listname  = repkey(listname_,ERA=self.era,GROUP=self.group,SAMPLE=self.name)
   LOG.verb("loadfiles: listname=%r -> %r, len(files)=%d, len(filenevts)=%d"%(
     listname_,listname,len(self.files),len(self.filenevts)),verbosity,1)
   filenevts = self.filenevts
   nevents   = 0
   #listname = ensurefile(listname,fatal=False)
   filelist = [ ]
   paths = self.paths if '$PATH' in listname else [self.paths[0]]
   for path in paths:
     listname_ = repkey(listname,PATH=path.strip('/').replace('/','__'))
     if self.verbosity>=1:
       print ">>> Loading sample files from %r..."%(listname_)
     self.pathfiles[path] = [ ]
     if os.path.isfile(listname_):
       skip = False
       subpaths = [ ] # for sanity check
       with open(listname_,'r') as file:
         for line in file:
           line = line.strip().split() # split at space to allow comments at end
           if not line: continue
           line = line[0].strip() # remove spaces, consider only first part of the line
           if line[0]=='#': continue # do not consider comments
           #if line.endswith('.root'):
           if line.startswith("DASPATH="): # to keep track of multiple DAS data set paths
             path = line.split('=')[-1] # DAS data set path
             LOG.insist(path.count('/')>=3 and path.startswith('/'),
               "DAS path %r in %s has wrong format. Need /SAMPLE/CAMPAIGN/FORMAT..."%(path,listname_))
             if path in self.paths: # store file list for this path
               self.pathfiles[path] = [ ]
               subpaths.append(path)
               skip = False
             else: # do not store file list for this path
               skip = True
           else:
             if skip: continue # only load files for this sample's DAS dataset paths
             match = fevtsexp.match(line) # match $FILENAM(:NEVTS)
             if not match: continue
             infile = match.group(1)
             if match.group(2): # found nevents in filename
               nevts  = int(match.group(2))
               filenevts[infile] = nevts # store/cache in dictionary
               nevents += nevts
             filelist.append(infile)
             self.pathfiles[path].append(infile)
             if self.verbosity>=3:
               print ">>> %7d events for %s"%(nevts,infile)
       if not filelist:
         LOG.warning("loadfiles: Did not find any files in %s!"%(listname_))
         self.refreshable = True
       else: # sanity check for empty list
         for subpath in subpaths:
           if not self.pathfiles[subpath]:
             LOG.warning("loadfiles: Did not find any files for path %s in %s!"%(subpath,listname_))
     else:
       LOG.warning("loadfiles: file list %s does not exist!"%(listname_))
       self.refreshable = True
   for path in self.paths:
     if path not in self.pathfiles: # nonexistent list
       LOG.warning("loadfiles: Did not find any files for path %s in %s!"%(path,listname))
   if self.nevents<=0:
     self.nevents = nevents
   elif self.nevents!=nevents:
     LOG.warning("loadfiles: stored nevents=%d does not match the sum total of file events, %d!"%(self.nevents,nevents))
     self.nevents == nevents
   self.files = filelist
   self.files.sort()
   return self.files
Example #13
0
def getsampleset(datasample, expsamples, sigsamples=[], **kwargs):
    """Create sample set from a table of data and MC samples."""
    channel = kwargs.get('channel', "")
    era = kwargs.get('era', "")
    fpattern = kwargs.get(
        'file',
        None)  # file name pattern, e.g. $PICODIR/$SAMPLE_$CHANNEL$TAG.root
    weight = kwargs.pop('weight', "")  # common weight for MC samples
    dataweight = kwargs.pop('dataweight', "")  # weight for data samples
    url = kwargs.pop('url', "")  # XRootD url
    tag = kwargs.pop('tag', "")  # extra tag for file name

    if not fpattern:
        fpattern = "$PICODIR/$SAMPLE_$CHANNEL$TAG.root"
    if '$PICODIR' in fpattern:
        import TauFW.PicoProducer.tools.config as GLOB
        CONFIG = GLOB.getconfig(verb=0)
        picodir = CONFIG['picodir']
        fpattern = repkey(fpattern, PICODIR=picodir)
    if url:
        fpattern = "%s/%s" % (fpattern, url)
    LOG.verb("getsampleset: fpattern=%r" % (fpattern), level=1)

    # MC (EXPECTED)
    for i, info in enumerate(expsamples[:]):
        expkwargs = kwargs.copy()
        expkwargs['weight'] = weight
        if len(info) == 4:
            group, name, title, xsec = info
        elif len(info) == 5 and isinstance(info[4], dict):
            group, name, title, xsec, newkwargs = info
            expkwargs.update(newkwargs)
        else:
            LOG.throw(IOError, "Did not recognize mc row %s" % (info))
        fname = repkey(fpattern,
                       ERA=era,
                       GROUP=group,
                       SAMPLE=name,
                       CHANNEL=channel,
                       TAG=tag)
        #print fname
        sample = MC(name, title, fname, xsec, **expkwargs)
        expsamples[i] = sample

    # DATA (OBSERVED)
    title = 'Observed'
    datakwargs = kwargs.copy()
    datakwargs['weight'] = dataweight
    if isinstance(datasample, dict) and channel:
        datasample = datasample[channel]
    if len(datasample) == 2:
        group, name = datasample
    elif len(datasample) == 3:
        group, name = datasample[:2]
        if isinstance(datasample[2], dict):  # dictionary
            datakwargs.update(datasample[2])
        else:  # string
            title = datasample[2]
    elif len(datasample) == 4 and isinstance(datasample[3], dict):
        group, name, title, newkwargs = datasample
        datakwargs.update(newkwargs)
    else:
        LOG.throw(IOError, "Did not recognize data row %s" % (datasample))
    fpattern = repkey(fpattern,
                      ERA=era,
                      GROUP=group,
                      SAMPLE=name,
                      CHANNEL=channel,
                      TAG=tag)
    fnames = glob.glob(fpattern)
    #print fnames
    if len(fnames) == 1:
        datasample = Data(name, title, fnames[0])
    elif len(fnames) > 1:
        namerexp = re.compile(name.replace('?', '.').replace('*', '.*'))
        name = name.replace('?', '').replace('*', '')
        datasample = MergedSample(name, 'Observed', data=True)
        for fname in fnames:
            setname = namerexp.findall(fname)[0]
            #print setname
            datasample.add(Data(setname, 'Observed', fname, **datakwargs))
    else:
        LOG.throw(IOError, "Did not find data file %r" % (fpattern))

    # SAMPLE SET
    sampleset = SampleSet(datasample, expsamples, sigsamples, **kwargs)
    return sampleset
Example #14
0
def main(args):
    print ""
    analysis = 'ztt_tid'
    obsset = args.observables
    channels = args.channels
    eras = args.eras
    tag = ""
    bin = 'Tight'
    title = "m_{T} < 60 GeV, D_{#zeta} > -25 GeV, |#Delta#eta| < 1.5"  #"mutau, DeepTau2017v2p1VSjet"
    fname = "$DIR/$ANALYSIS_$OBS_$CHANNEL-$BIN-$ERA$TAG.shapes.root"
    pname = "$DIR/$OBS_$CHANNEL-$BIN-$ERA$TAG_$FIT.png"
    #outfile  = "$TAG/$ANALYSIS_%s_$CHANNEL-$ERA%s.inputs.root"%(obs,tag+outtag)

    # PLOT SETTINGS
    ZTT = "Z -> #tau#tau"  # "Z -> #tau_{#mu}#tau_{h}"
    #STYLE.sample_colors['ZTT'] = STYLE.kOrange-4
    STYLE.sample_titles.update({
        'ZTT': ZTT,
        'ZTT_DM0': ZTT + ", h^{#pm}",
        'ZTT_DM1': ZTT + ", h^{#pm}#pi^{0}",
        'ZTT_DM10': ZTT + ", h^{#pm}h^{#mp}h^{#pm}",
        'ZTT_DM11': ZTT + ", h^{#pm}h^{#mp}h^{#pm}#pi^{0}",
        'ZJ': "Z -> ll",
    })
    procs = [
        'ZTT',
        'ZL',
        'ZJ',
        'TTT',
        'TTJ',
        'W',
        'ST',
        'VV',
        'QCD',
        'data_obs'  #'STT', 'STJ'
    ]
    groups = [
        (['^TT*'], 'Top', 'ttbar'),  #,STYLE.sample_colors['TT']),
        #(['^TT*','ST*'],'Top','ttbar and single top'),
        (['W*', 'ZJ', 'VV', 'ST*',
          'QCD'], 'EWK', 'Electroweak'),  #,STYLE.sample_colors['EWK']),
    ]
    title_dict = {
        'mvis': "m_{vis} (GeV)",
        'mtau': "m(#tau_{h}) (GeV)",
    }
    tsize = 0.054
    PLOT._lsize = 0.040  # label size
    ratio = False
    pos = 'x=0.56,y=0.88'
    ncol = 1
    square = not ratio and False
    exts = ['png', 'pdf', 'root', 'C']
    if "mtau" in obsset:
        procs = ['ZTT_DM0', 'ZTT_DM1', 'ZTT_DM10', 'ZTT_DM11'] + procs[1:]
        pos = 'x=0.22,y=0.85'
        ncol = 2

    # PLOT
    for era in eras:
        setera(era, extra="")
        for channel in channels:
            for obs in obsset:
                indir = "output/%s" % era
                outdir = ensuredir("plots/%s" % era)
                xtitle = title_dict.get(obs)
                fname_ = repkey(fname,
                                DIR=indir,
                                ANALYSIS=analysis,
                                OBS=obs,
                                CHANNEL=channel,
                                BIN=bin,
                                ERA=era,
                                TAG=tag)
                pname_ = repkey(pname,
                                DIR=outdir,
                                ANALYSIS=analysis,
                                OBS=obs,
                                CHANNEL=channel,
                                BIN=bin,
                                ERA=era,
                                TAG=tag)
                drawpostfit(fname_,
                            bin,
                            procs,
                            pname=pname_,
                            tag=tag,
                            group=groups,
                            title=title,
                            xtitle=xtitle,
                            tsize=tsize,
                            pos=pos,
                            ncol=ncol,
                            ratio=ratio,
                            square=square,
                            exts=exts)
Example #15
0
def main():

    eras = args.eras
    periods = cleanEras(args.periods)
    channel = args.channel
    types = args.types
    verbosity = args.verbosity
    minbiases = [69.2] if periods else [69.2, 80.0, 69.2 * 1.046, 69.2 * 0.954]

    for era in args.eras:
        year = getyear(era)
        mcfilename = "MC_PileUp_%s.root" % (era)
        jsondir = os.path.join(datadir, 'json', str(year))
        pileup = os.path.join(jsondir, "pileup_latest.txt")
        CMSStyle.setCMSEra(year)
        if era == '2016':
            # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmV2017Analysis
            # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/ReReco/Final/Cert_271036-284044_13TeV_23Sep2016ReReco_Collisions16_JSON.txt"
            # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/Final/Cert_271036-284044_13TeV_PromptReco_Collisions16_JSON.txt
            # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/PileUp/pileup_latest.txt
            JSON = os.path.join(
                jsondir,
                "Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt"
            )
            datasets = {
                'B': (272007, 275376),
                'C': (275657, 276283),
                'D': (276315, 276811),
                'E': (276831, 277420),
                'F': (277772, 278808),
                'G': (278820, 280385),
                'H': (280919, 284044),
            }
            campaign = "Moriond17"
            samples = [
                (
                    'TT',
                    "TT",
                ),
                (
                    'DY',
                    "DYJetsToLL_M-10to50",
                ),
                (
                    'DY',
                    "DYJetsToLL_M-50",
                ),
                (
                    'DY',
                    "DY1JetsToLL_M-50",
                ),
                (
                    'DY',
                    "DY2JetsToLL_M-50",
                ),
                (
                    'DY',
                    "DY3JetsToLL_M-50",
                ),
                (
                    'WJ',
                    "WJetsToLNu",
                ),
                (
                    'WJ',
                    "W1JetsToLNu",
                ),
                (
                    'WJ',
                    "W2JetsToLNu",
                ),
                (
                    'WJ',
                    "W3JetsToLNu",
                ),
                (
                    'WJ',
                    "W4JetsToLNu",
                ),
                (
                    'ST',
                    "ST_tW_top",
                ),
                (
                    'ST',
                    "ST_tW_antitop",
                ),
                (
                    'ST',
                    "ST_t-channel_top",
                ),
                (
                    'ST',
                    "ST_t-channel_antitop",
                ),
                #( 'ST', "ST_s-channel",         ),
                (
                    'VV',
                    "WW",
                ),
                (
                    'VV',
                    "WZ",
                ),
                (
                    'VV',
                    "ZZ",
                ),
            ]
        elif '2017' in era:
            # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmV2017Analysis
            # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/Final/Cert_271036-284044_13TeV_PromptReco_Collisions16_JSON.txt
            # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions17/13TeV/PileUp/pileup_latest.txt
            JSON = os.path.join(
                jsondir,
                "Cert_294927-306462_13TeV_PromptReco_Collisions17_JSON.txt")
            datasets = {
                'B': (297020, 299329),
                'C': (299337, 302029),
                'D': (302030, 303434),
                'E': (303435, 304826),
                'F': (304911, 306462),
            }
            samples_bug = []
            samples_fix = []
            if 'UL' in era:
                campaign = "Summer19"
                samples_fix = [
                    #( 'DY', "DYJetsToLL_M-10to50",  ),
                    (
                        'DY',
                        "DYJetsToLL_M-50",
                    ),
                    (
                        'DY',
                        "DY1JetsToLL_M-50",
                    ),
                    (
                        'DY',
                        "DY2JetsToLL_M-50",
                    ),
                    (
                        'DY',
                        "DY3JetsToLL_M-50",
                    ),
                    (
                        'DY',
                        "DY4JetsToLL_M-50",
                    ),
                    #( 'TT', "TTTo2L2Nu",            ),
                    (
                        'TT',
                        "TTToHadronic",
                    ),
                    #( 'TT', "TTToSemiLeptonic",     ),
                    (
                        'WJ',
                        "WJetsToLNu",
                    ),
                    (
                        'WJ',
                        "W1JetsToLNu",
                    ),
                    (
                        'WJ',
                        "W2JetsToLNu",
                    ),
                    (
                        'WJ',
                        "W3JetsToLNu",
                    ),
                    (
                        'WJ',
                        "W4JetsToLNu",
                    ),
                    (
                        'ST',
                        "ST_tW_top",
                    ),
                    (
                        'ST',
                        "ST_tW_antitop",
                    ),
                    (
                        'ST',
                        "ST_t-channel_top",
                    ),
                    (
                        'ST',
                        "ST_t-channel_antitop",
                    ),
                    #( 'ST', "ST_s-channel",         ),
                    #( 'VV', "WW",                   ),
                    #( 'VV', "WZ",                   ),
                    #( 'VV', "ZZ",                   ),
                ]
            else:
                campaign = "Winter17_V2"
                samples_bug = [
                    (
                        'DY',
                        "DYJetsToLL_M-50",
                    ),
                    (
                        'WJ',
                        "W3JetsToLNu",
                    ),
                    (
                        'VV',
                        "WZ",
                    ),
                ]
                samples_fix = [
                    (
                        'DY',
                        "DYJetsToLL_M-10to50",
                    ),
                    (
                        'DY',
                        "DY1JetsToLL_M-50",
                    ),
                    (
                        'DY',
                        "DY2JetsToLL_M-50",
                    ),
                    (
                        'DY',
                        "DY3JetsToLL_M-50",
                    ),
                    (
                        'DY',
                        "DY4JetsToLL_M-50",
                    ),
                    (
                        'TT',
                        "TTTo2L2Nu",
                    ),
                    (
                        'TT',
                        "TTToHadronic",
                    ),
                    (
                        'TT',
                        "TTToSemiLeptonic",
                    ),
                    (
                        'WJ',
                        "WJetsToLNu",
                    ),
                    (
                        'WJ',
                        "W1JetsToLNu",
                    ),
                    (
                        'WJ',
                        "W2JetsToLNu",
                    ),
                    (
                        'WJ',
                        "W4JetsToLNu",
                    ),
                    (
                        'ST',
                        "ST_tW_top",
                    ),
                    (
                        'ST',
                        "ST_tW_antitop",
                    ),
                    (
                        'ST',
                        "ST_t-channel_top",
                    ),
                    (
                        'ST',
                        "ST_t-channel_antitop",
                    ),
                    #( 'ST', "ST_s-channel",         ),
                    (
                        'VV',
                        "WW",
                    ),
                    (
                        'VV',
                        "ZZ",
                    ),
                ]
            samples = samples_bug + samples_fix
        else:
            # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmV2018Analysis
            # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions18/13TeV/PromptReco
            # /afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions18/13TeV/PileUp/pileup_latest.txt
            JSON = os.path.join(
                jsondir,
                "Cert_314472-325175_13TeV_PromptReco_Collisions18_JSON.txt")
            datasets = {
                'A': (315252, 316995),
                'B': (317080, 319310),
                'C': (319337, 320065),
                'D': (320673, 325175),
            }
            campaign = "Autumn18"
            samples = [
                (
                    'TT',
                    "TTTo2L2Nu",
                ),
                (
                    'TT',
                    "TTToHadronic",
                ),
                (
                    'TT',
                    "TTToSemiLeptonic",
                ),
                (
                    'DY',
                    "DYJetsToLL_M-10to50",
                ),
                (
                    'DY',
                    "DYJetsToLL_M-50",
                ),
                (
                    'DY',
                    "DY1JetsToLL_M-50",
                ),
                (
                    'DY',
                    "DY2JetsToLL_M-50",
                ),
                (
                    'DY',
                    "DY3JetsToLL_M-50",
                ),
                (
                    'DY',
                    "DY4JetsToLL_M-50",
                ),
                #( 'WJ', "WJetsToLNu",           ),
                (
                    'WJ',
                    "W1JetsToLNu",
                ),
                (
                    'WJ',
                    "W2JetsToLNu",
                ),
                (
                    'WJ',
                    "W3JetsToLNu",
                ),
                (
                    'WJ',
                    "W4JetsToLNu",
                ),
                (
                    'ST',
                    "ST_tW_top",
                ),
                (
                    'ST',
                    "ST_tW_antitop",
                ),
                (
                    'ST',
                    "ST_t-channel_top",
                ),
                (
                    'ST',
                    "ST_t-channel_antitop",
                ),
                #( 'ST', "ST_s-channel",         ),
                (
                    'VV',
                    "WW",
                ),
                (
                    'VV',
                    "WZ",
                ),
                (
                    'VV',
                    "ZZ",
                ),
            ]

        # SAMPLES FILENAMES
        fname = "$PICODIR/$SAMPLE_$CHANNEL.root"
        if '$PICODIR' in fname:
            import TauFW.PicoProducer.tools.config as GLOB
            CONFIG = GLOB.getconfig(verb=verbosity)
            fname = repkey(fname, PICODIR=CONFIG['picodir'])
        for i, (group, sample) in enumerate(samples):
            fname = repkey(fname,
                           ERA=era,
                           GROUP=group,
                           SAMPLE=sample,
                           CHANNEL=channel)
            samples[i] = (sample, fname)
        if verbosity >= 1:
            print ">>> samples = %r" % (samples)

        # JSON
        jsons = {}
        if periods:
            outdir = ensuredir("json")
            for period in periods:
                start, end = getPeriodRunNumbers(period, datasets)
                erarun = "Run%d%s" % (era, period)
                jsonout = "json/" + re.sub(r"\d{6}-\d{6}", erarun,
                                           JSON.split('/')[-1])
                filterJSONByRunNumberRange(JSON,
                                           jsonout,
                                           start,
                                           end,
                                           verb=verbosity)
                jsons[erarun] = jsonout
        else:
            jsons[era] = JSON

        # DATA
        datahists = {period: [] for period in jsons}
        if 'data' in types:
            for period, json in jsons.iteritems():
                for minbias in minbiases:
                    filename = "Data_PileUp_%s_%s.root" % (
                        period, str(minbias).replace('.', 'p'))
                    datahist = getDataProfile(filename, json, pileup, 100, era,
                                              minbias)
                    datahists[period].append((minbias, datahist))
        elif args.plot:
            for era in jsons:
                for minbias in minbiases:
                    filename = "Data_PileUp_%s_%s.root" % (
                        era, str(minbias).replace('.', 'p'))
                    file, hist = gethist(filename, 'pileup', retfile=True)
                    if not file or not hist: continue
                    hist.SetDirectory(0)
                    file.Close()
                    datahists[era].append((minbias, hist))

        # MC
        if 'mc' in types:
            mcfilename = "MC_PileUp_%s.root" % (era)
            #mcfilename = "MC_PileUp_%s_%s.root"%(era,campaign)
            getMCProfile(mcfilename, samples, channel, era)
            if args.plot:
                mchist = compareMCProfiles(samples, channel, era)
                for era in jsons:
                    for minbias, datahist in datahists[era]:
                        compareDataMCProfiles(datahist, mchist, era, minbias)
                deletehist(mchist)  # clean memory
            if era == '2017':  # also check new/old pmx separately
                mcfilename_bug = mcfilename.replace(".root", "_old_pmx.root")
                mcfilename_fix = mcfilename.replace(".root", "_new_pmx.root")
                getMCProfile(mcfilename_bug, samples_bug, channel, era)
                getMCProfile(mcfilename_fix, samples_fix, channel, era)
                if args.plot:
                    mchist_bug = compareMCProfiles(samples_bug,
                                                   channel,
                                                   era,
                                                   tag="old_pmx")
                    mchist_fix = compareMCProfiles(samples_fix,
                                                   channel,
                                                   era,
                                                   tag="new_pmx")
                    for era in jsons:
                        for minbias, datahist in datahists[era]:
                            compareDataMCProfiles(datahist,
                                                  mchist_bug,
                                                  era,
                                                  minbias,
                                                  tag="old_pmx")
                            compareDataMCProfiles(datahist,
                                                  mchist_fix,
                                                  era,
                                                  minbias,
                                                  tag="new_pmx")

        # FLAT
        if 'flat' in types:
            filename = "MC_PileUp_%d_FlatPU0to75.root" % era
            hist_flat = getFlatProfile(filename, 75)
            for era in jsons:
                for minbias, datahist in datahists[era]:
                    compareDataMCProfiles(datahist,
                                          hist_flat,
                                          era,
                                          minbias,
                                          tag="FlatPU0to75",
                                          rmin=0.0,
                                          rmax=3.1)
Example #16
0
def harvest(obs, channel, era, **kwargs):
    """Harvest cards."""

    tag = kwargs.get('tag', "")  # tag for input and output file names
    outtag = kwargs.get('outtag', "")  # extra tag for output file names
    analysis = kwargs.get('analysis', 'ztt_tid')
    indir = kwargs.get('indir', 'input')
    outdir = kwargs.get('outdir', 'output/$ERA')
    infile = "$INDIR/$ANALYSIS_$OBS_$CHANNEL-$ERA$TAG.inputs.root"
    infile = repkey(infile,
                    INDIR=indir,
                    ANALYSIS=analysis,
                    OBS=obs,
                    CHANNEL=channel,
                    ERA=era,
                    TAG=tag)
    outcard = "$TAG/$ANALYSIS_%s_$CHANNEL-$BINID-$ERA%s.datacard.txt" % (
        obs, tag + outtag)
    outfile = "$TAG/$ANALYSIS_%s_$CHANNEL-$ERA%s.inputs.root" % (obs,
                                                                 tag + outtag)
    indir = repkey(indir, ERA=era, CHANNEL=channel)
    outdir = repkey(outdir, ERA=era, CHANNEL=channel)

    # HARVESTER
    cats = [  # categories "bins"
        'Tight'
    ]
    procs = {  # processes
        'sig': ['ZTT'],
        'bkg': ['ZL', 'ZJ', 'TTT', 'TTJ', 'W', 'QCD', 'ST', 'VV'],
        'noQCD': ['ZL', 'ZJ', 'TTT', 'TTJ', 'W', 'ST', 'VV'],
        'DY': ['ZTT', 'ZL', 'ZJ'],
        'TT': ['TTT', 'TTJ'],
        'ST': ['ST'],  #'STT', 'STJ' ],
        'tau': ['ZTT', 'TTT'],  #'STT'
    }
    procs['all'] = procs['sig'] + procs['bkg']
    if "mtau" in obs:
        for key, plist in procs.iteritems():
            if 'ZTT' in plist:
                procs[key] = ['ZTT_DM0', 'ZTT_DM1', 'ZTT_DM10', 'ZTT_DM11'
                              ] + plist[1:]
    cats = [c
            for c in enumerate(cats, 1)]  # autmatically number; ($BINID,$BIN)
    harvester = CombineHarvester()
    harvester.AddObservations(['*'], [analysis], [era], [channel], cats)
    harvester.AddProcesses(['*'], [analysis], [era], [channel], procs['bkg'],
                           cats, False)
    harvester.AddProcesses(['90'], [analysis], [era], [channel], procs['sig'],
                           cats, True)
    #harvester.FilterAll(lambda obj: obj.process() in ['QCD','W','ZJ','STJ'] )

    # NORM NUISSANCE PARAMETERS
    LOG.color("Defining nuissance parameters ...")

    harvester.cp().process(procs['DY'] + procs['TT'] + procs['ST'] +
                           ['VV']).AddSyst(harvester, 'lumi', 'lnN',
                                           SystMap()(1.025))  # luminosity

    harvester.cp().process(procs['DY'] + procs['TT'] +
                           procs['ST'] + ['VV']).AddSyst(
                               harvester, 'eff_trig', 'lnN',
                               SystMap()(1.02))  # trigger efficiency

    harvester.cp().process(procs['DY'] + procs['TT'] + procs['ST'] +
                           ['VV']).AddSyst(harvester, 'eff_m', 'lnN',
                                           SystMap()(1.02))  # muon efficiency

    if 'mtau' in obs:
        for dm in [0, 1, 10, 11]:
            sf, err = tauidsfs[era][dm]
            harvester.cp().process(['ZTT_DM%d' % dm]).AddSyst(
                harvester, 'eff_t_dm%s' % dm, 'lnN',
                SystMap()(1. + err / sf))  # tau eff. SF (DM-dependent)
    #else:
    #  harvester.cp().process(procs['tau']).AddSyst(
    #    harvester, 'eff_t', 'lnN', SystMap()(1.20)) # tau efficiency

    ###harvester.cp().process(procs['DY']+procs['TT']+procs['ST']+['VV']).AddSyst(
    ###  harvester, 'eff_tracking', 'lnN', SystMap()(1.04))

    harvester.cp().process(['W']).AddSyst(harvester, 'norm_w', 'lnN',
                                          SystMap()(1.15))  # W+jets xsec

    harvester.cp().process(['QCD']).AddSyst(harvester, 'norm_qcd', 'lnN',
                                            SystMap()(1.20))  # QCD xsec

    harvester.cp().process(procs['DY']).AddSyst(
        harvester, 'xsec_dy', 'lnN',
        SystMap()(1.02))  # Drell-Yan xsec

    harvester.cp().process(procs['TT']).AddSyst(harvester, 'xsec_tt', 'lnN',
                                                SystMap()(1.06))  # ttbar xsec

    harvester.cp().process(procs['ST']).AddSyst(
        harvester, 'xsec_st', 'lnN',
        SystMap()(1.05))  # single top xsec

    harvester.cp().process(['VV']).AddSyst(harvester, 'xsec_vv', 'lnN',
                                           SystMap()(1.05))  # diboson xsec

    harvester.cp().process(['ZL', 'TTL', 'STL'
                            ]).AddSyst(harvester, 'rate_ltf', 'lnN',
                                       SystMap()(1.25))  # l -> tau fake rate

    # SHAPE NUISSANCE PARAMETERS
    harvester.cp().process(['W', 'QCD', 'ZJ', 'TTJ', 'STJ'
                            ]).AddSyst(harvester, 'rate_jtf', 'lnN',
                                       SystMap()(1.25))  # j -> tau fake rate

    if doshapes:
        harvester.cp().process(['ZJ', 'W', 'QCD']).AddSyst(  #'ZJ','TTJ','STJ'
            harvester, 'shape_jtf', 'shape',
            SystMap()(1.00))  # j -> tau fake energy scale

        if 'mtau' in obs:
            harvester.cp().process(['TTT', 'STT']).AddSyst(
                harvester, 'shape_tid', 'shape',
                SystMap()(1.00))  # tau eff. SF (pt-dependent)
        else:
            harvester.cp().process(procs['tau']).AddSyst(
                harvester, 'shape_tid', 'shape',
                SystMap()(1.00))  # tau eff. SF (pt-dependent)
            harvester.cp().process(['ZL']).AddSyst(  #bin_id([1,2])
                harvester, 'shape_ltf', 'shape',
                SystMap()(1.00))  # l -> tau fake energy scale

        harvester.cp().process(procs['DY']).AddSyst(
            harvester, 'shape_dy', 'shape',
            SystMap()(1.00))  # Z pT reweighting

        #harvester.cp().process(['ZJ','TTT','TTJ','STT','STJ','W','QCD']).AddSyst(
        #  harvester, 'shape_jes', 'shape', SystMap()(1.00)) # jet energy scale

        #harvester.cp().process(['ZJ','TTT','TTJ','STT','STJ','W','QCD']).AddSyst(
        #  harvester, 'shape_jer', 'shape', SystMap()(1.00)) # jet energy resolution

        #harvester.cp().process(['ZJ','TTT','TTJ','STT','STJ','W','QCD']).AddSyst(
        #  harvester, 'shape_uncEn', 'shape', SystMap()(1.0)) # unclustered energy

    # EXTRACT SHAPES
    LOG.color("Extracting shapes...")
    print ">>>   file %r" % (infile)
    harvester.cp().channel([channel
                            ]).ExtractShapes(infile, "$BIN/$PROCESS",
                                             "$BIN/$PROCESS_$SYSTEMATIC")

    # RESCALE on the fly
    if 'mtau' in obs:
        for dm in [0, 1, 10, 11]:
            sf, err = tauidsfs[era][dm]
            harvester.cp().process(['ZTT_DM%d' % dm
                                    ]).ForEachProc(lambda p: scaleproc(p, sf))

    # AUTOREBIN
    #LOG.color("automatically rebin (30%)...")
    #rebin = AutoRebin().SetBinThreshold(0.).SetBinUncertFraction(0.30).SetRebinMode(1).SetPerformRebin(True).SetVerbosity(1)
    #rebin.Rebin(harvester,harvester)

    # BINS
    LOG.color("Generating unique bin names...")
    bins = harvester.bin_set()
    #SetStandardBinNames(harvester,"%s_$BINID_$ERA"%(obs))

    # BIN NAMES
    if dobbb:
        LOG.color("Generating bbb uncertainties...")
        bbb = BinByBinFactory()
        bbb.SetAddThreshold(0.0)
        bbb.SetFixNorm(False)
        bbb.SetPattern("$PROCESS_bin_$#_$CHANNEL_$BIN")
        bbb.AddBinByBin(harvester, harvester)
        ###bbb.MergeBinErrors(harvester.cp().process(procs['sig'] + ['W', 'QCD', 'ZJ', 'ZL']))
        ###bbb.SetMergeThreshold(0.0)

    # NUISANCE PARAMETER GROUPS
    LOG.color("Setting nuisance parameter groups...")
    harvester.SetGroup('all', [".*"])
    harvester.SetGroup('bin', [".*_bin_.*"])
    harvester.SetGroup('sys',
                       ["^((?!bin).)*$"])  # everything except bin-by-bin
    harvester.SetGroup('lumi', [".*lumi"])
    harvester.SetGroup('xsec', [".*Xsec.*"])
    harvester.SetGroup('eff', [".*eff_.*"])
    harvester.SetGroup('norm', [".*(lumi|xsec|norm|eff).*"])
    harvester.SetGroup('jtf', [".*jtf.*"])
    harvester.SetGroup('ltf', [".*ltf.*"])
    harvester.SetGroup('es', [".*shape_(tes|[eml]tf|jes)_.*"])
    harvester.SetGroup('zpt', [".*shape_dy.*"])

    # PRINT
    if verbosity >= 1:
        LOG.color("\n>>> print observation...\n")
        harvester.PrintObs()
        LOG.color("\n>>> print processes...\n")
        harvester.PrintProcs()
        LOG.color("\n>>> print systematics...\n")
        harvester.PrintSysts()
        LOG.color("\n>>> print parameters...\n")
        harvester.PrintParams()
        print "\n"

    # WRITE CARDS
    LOG.color("Writing datacards...")
    writer = CardWriter(outcard, outfile)
    writer.SetVerbosity(verbosity)
    writer.SetWildcardMasses([])
    writer.WriteCards(outdir, harvester)

    # REPLACE bin ID by bin name
    for bin, cat in cats:
        oldfilename = repkey(outcard,
                             TAG=outdir,
                             ANALYSIS=analysis,
                             CHANNEL=channel,
                             ERA=era,
                             BINID=str(bin))
        newfilename = repkey(outcard,
                             TAG=outdir,
                             ANALYSIS=analysis,
                             CHANNEL=channel,
                             ERA=era,
                             BINID=cat)
        if os.path.exists(oldfilename):
            os.rename(oldfilename, newfilename)
            print '>>> Renaming "%s" -> "%s"' % (oldfilename, newfilename)
        else:
            print '>>> Warning! "%s" does not exist!' % (oldfilename)
Example #17
0
def drawpostfit(fname, bin, procs, **kwargs):
    """Plot pre- and post-fit plots PostFitShapesFromWorkspace."""
    print '>>>\n>>> drawpostfit("%s","%s")' % (fname, bin)
    outdir = kwargs.get('outdir', "")
    pname = kwargs.get('pname',
                       "$FIT.png")  # replace $FIT = 'prefit', 'postfit'
    ratio = kwargs.get('ratio', True)
    tag = kwargs.get('tag', "")
    xtitle = kwargs.get('xtitle', None)
    title = kwargs.get('title', None)
    text = kwargs.get('text', "")
    tsize = kwargs.get('tsize', 0.050)
    xmin = kwargs.get('xmin', None)
    xmax = kwargs.get('xmax', None)
    ymargin = kwargs.get('ymargin', 1.22)
    groups = kwargs.get('group', [])
    position = kwargs.get('pos', None)  # legend position
    ncol = kwargs.get('ncol', None)  # legend columns
    square = kwargs.get('square', False)
    era = kwargs.get('era', "")
    exts = kwargs.get('exts', ['pdf', 'png'])  # figure extension
    ymax = None
    fits = ['prefit', 'postfit']
    file = ensureTFile(fname, 'READ')
    if outdir:
        ensuredir(outdir)
    if era:
        setera(era)

    # DRAW PRE-/POST-FIT
    for fit in fits:
        fitdirname = "%s_%s" % (bin, fit)
        dir = file.Get(fitdirname)
        if not dir:
            LOG.warning('drawpostfit: Did not find dir "%s"' % (fitdirname),
                        pre="   ")
            return
        obshist = None
        exphists = []

        # GET HIST
        for proc in procs:  #reversed(samples):
            hname = "%s/%s" % (fitdirname, proc)
            hist = file.Get(hname)
            if not hist:
                LOG.warning(
                    'drawpostfit: Could not find "%s" template in directory "%s_%s"'
                    % (proc, bin, fit),
                    pre="   ")
                continue
            if 'data_obs' in proc:
                obshist = hist
                hist.SetLineColor(1)
                ymax = hist.GetMaximum() * ymargin
            else:
                exphists.append(hist)
            if proc in STYLE.sample_titles:
                hist.SetTitle(STYLE.sample_titles[proc])
            if proc in STYLE.sample_colors:
                hist.SetFillStyle(1001)
                hist.SetFillColor(STYLE.sample_colors[proc])
        if len(exphists) == 0:
            LOG.warning(
                'drawpostfit: Could not find any templates in directory "%s"' %
                (bin),
                pre="   ")
            continue
        if not obshist:
            LOG.warning(
                'drawpostfit: Could not find a data template in directory "%s"'
                % (bin),
                pre="   ")
            continue
        for groupargs in groups:
            grouphists(exphists, *groupargs, replace=True)

        # PLOT
        xtitle = (xtitle or exphists[0].GetXaxis().GetTitle()
                  )  #.replace('[GeV]','(GeV)')
        xmax = xmax or exphists[0].GetXaxis().GetXmax()
        xmin = xmin or exphists[0].GetXaxis().GetXmin()
        errtitle = "Pre-fit stat. + syst. unc." if fit == 'prefit' else "Post-fit unc."
        pname_ = repkey(pname, FIT=fit, ERA=era)
        rmin, rmax = (0.28, 1.52)
        plot = Stack(xtitle, obshist, exphists)
        plot.draw(xmin=xmin,
                  xmax=xmax,
                  ymax=ymax,
                  square=square,
                  ratio=ratio,
                  rmin=rmin,
                  rmax=rmax,
                  staterror=True,
                  errtitle=errtitle)
        plot.drawlegend(position, tsize=tsize, text=text, ncol=ncol)
        if title:
            plot.drawtext(title, bold=False)
        plot.saveas(pname_, outdir=outdir, ext=exts)
        plot.close()

    file.Close()
Example #18
0
    def __init__(self, group, name, *paths, **kwargs):
        """Container class for CMSSW samples, e.g.:
       - group: DY (used to group similar samples in final output)
       - name:  DYJetsToLL_M-50 (used as shorthand and jobname)
       - path:  /DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIAutumn18NanoAODv6_Nano25Oct2019_102X_mcRun2/NANOAODSIM
       - dtype: 'mc', 'data', 'embed'
    """

        # PATH
        LOG.insist(
            len(paths) >= 1, "Need at least one path to create a sample...")
        if len(paths) == 1 and isinstance(paths[0], list):
            paths = paths[0]
        for path in paths:
            LOG.insist(
                path.count('/') >= 3 and path.startswith('/'),
                "DAS path %r has wrong format. Need /SAMPLE/CAMPAIGN/FORMAT." %
                (path))
            #sample = '/'.join(line.split('/')[-3:])

        # DATA TYPE
        dtype = kwargs.get('dtype', None)
        dtypes = ['mc', 'data', 'embed']
        if dtype == None:  # automatic recognition
            path = paths[0]
            if 'Embed' in path:
                dtype = 'embed'
            elif path.endswith('SIM') or any(g in path
                                             for g in ['pythia', 'madgraph']):
                dtype = 'mc'
            elif re.search(r"/Run20\d\d", path):
                dtype = 'data'
            dtype = 'mc'  # TODO: remove
        LOG.insist(
            dtype in dtypes,
            "Given data type '%s' is not recongized! Please choose from %s..."
            % (dtype, ', '.join(dtypes)))

        # ATTRIBUTES
        self.group = group
        self.name = name
        self.paths = paths  # DAS dataset path
        self.dtype = dtype
        self.channels = kwargs.get('channel', None)
        self.channels = kwargs.get('channels', self.channels)
        self.storage = None
        self.storepath = kwargs.get('store',
                                    None)  # if stored elsewhere than DAS
        self.url = kwargs.get('url', None)  # URL if stored elsewhere
        self.dasurl = kwargs.get(
            'dasurl', None) or "root://cms-xrd-global.cern.ch/"  # URL for DAS
        self.blacklist = kwargs.get('blacklist', [])  # black list file
        self.instance = kwargs.get(
            'instance', 'prod/phys03' if path.endswith('USER') else
            'prod/global')  # if None, does not exist in DAS
        self.nfilesperjob = kwargs.get('nfilesperjob',
                                       -1)  # number of nanoAOD files per job
        self.maxevts = kwargs.get(
            'maxevtsperjob', -1)  # maximum number of events processed per job
        self.maxevts = kwargs.get(
            'maxevts',
            self.maxevts)  # maximum number of events processed per job
        self.extraopts = kwargs.get(
            'opts', []
        )  # extra options for analysis module, e.g. ['doZpt=1','tes=1.1']
        self.subtry = kwargs.get('subtry',
                                 0)  # to help keep track of resubmission
        self.jobcfg = kwargs.get('jobcfg',
                                 {})  # to help keep track of resubmission
        self.nevents = kwargs.get(
            'nevts', 0)  # number of nanoAOD events that can be processed
        self.nevents = kwargs.get('nevents',
                                  self.nevents)  # cache of number of events
        self.files = kwargs.get(
            'files', [])  # list of ROOT files, OR text file with list of files
        self.filenevts = {}  # cache of number of events for each file
        self.postfix = kwargs.get(
            'postfix',
            None) or ""  # post-fix (before '.root') for stored ROOT files
        self.era = kwargs.get('era', "")  # for expansion of $ERA variable
        self.dosplit = kwargs.get(
            'split',
            len(self.paths) >= 2)  # allow splitting (if multiple DAS datasets)
        self.verbosity = kwargs.get('verbosity',
                                    0)  # verbosity level for debugging
        self.refreshable = not self.files  # allow refresh on file list in getfiles()

        # ENSURE LIST
        if self.channels != None and not isinstance(self.channels, list):
            self.channels = [self.channels]
        if isinstance(self.extraopts, str):
            if ',' in self.extraopts:
                self.extraopts = self.extraopts.split(',')
            self.extraopts = [self.extraopts]

        # STORAGE & URL DEFAULTS
        if self.storepath:
            self.storepath = repkey(self.storepath,
                                    USER=_user,
                                    ERA=self.era,
                                    GROUP=self.group,
                                    SAMPLE=self.name)
            self.storage = getstorage(repkey(self.storepath,
                                             PATH=self.paths[0],
                                             DAS=self.paths[0]),
                                      ensure=False)
        if not self.dasurl:
            self.dasurl = self.url if (self.url in dasurls) else dasurls[0]
        if not self.url:
            if self.storepath:
                if self.storage.__class__.__name__ == 'Local':
                    self.url = ""  #root://cms-xrd-global.cern.ch/
                else:
                    self.url = self.storage.fileurl
            else:
                self.url = self.dasurl

        # GET FILE LIST FROM TEXT FILE
        if isinstance(self.files, str):
            self.loadfiles(self.files)
Example #19
0
def plotinputs(fname, varprocs, observables, bins, **kwargs):
    """Plot histogram inputs from ROOT file for datacards, and write to ROOT file.
       fname:       filename pattern of ROOT file
       varprocs:    dictionary for systematic variation to list of processes,
                    e.g. { 'Nom':   ['ZTT','TTT','W','QCD','data_obs'],
                           'TESUp': ['ZTT','TTT'], 'TESDown': ['ZTT','TTT'] }
       observables: list of Variables objects
       bins:        list of Selection objects
  """
    #LOG.header("plotinputs")
    tag = kwargs.get('tag', "")
    pname = kwargs.get('pname', "$OBS_$BIN$TAG.png")
    outdir = kwargs.get('outdir', 'plots')
    text = kwargs.get('text', "$BIN")
    groups = kwargs.get('group',
                        [])  # add processes together into one histogram
    verbosity = kwargs.get('verb', 0)
    ensuredir(outdir)
    print ">>>\n>>> " + color(" plotting... ", 'magenta', bold=True, ul=True)
    for obs in observables:
        obsname = obs.filename
        ftag = tag + obs.tag
        fname_ = repkey(fname, OBS=obsname, TAG=ftag)
        file = ensureTFile(fname_, 'UPDATE')
        for set, procs in varprocs.iteritems(
        ):  # loop over processes with variation
            if set == 'Nom':
                systag = ""  # no systematics tag for nominal
                procs_ = procs[:]
            else:
                systag = '_' + set  # systematics tag for variation, e.g. '_TESUp'
                procs_ = [
                    (p + systag if p in procs else p) for p in varprocs['Nom']
                ]  # add tag to varied processes
            for selection in bins:
                if not obs.plotfor(selection): continue
                obs.changecontext(selection)
                bin = selection.filename
                text_ = repkey(
                    text, BIN=selection.title)  # extra text in plot corner
                tdir = ensureTDirectory(file, bin,
                                        cd=True)  # directory with histograms
                if set == 'Nom':
                    gStyle.Write(
                        'style', TH1.kOverwrite
                    )  # write current TStyle object to reproduce plots

                # STACKS
                pname_ = repkey(pname, OBS=obsname, BIN=bin,
                                TAG=ftag + systag)  # image file name
                wname = "stack" + systag  # name in ROOT file
                stackinputs(tdir,
                            obs,
                            procs_,
                            group=groups,
                            save=pname_,
                            write=wname,
                            text=text_)

                # VARIATIONS
                if 'Down' in set:
                    systag_ = systag.replace(
                        'Down', '')  # e.g.'_TES' without 'Up' or 'Down' suffix
                    pname_ = repkey(pname,
                                    OBS=obsname,
                                    BIN=bin,
                                    TAG=ftag + "_$PROC" +
                                    systag)  # image file name
                    wname = "plot_$PROC" + systag  # name in ROOT file
                    comparevars(tdir,
                                obs,
                                procs,
                                systag_,
                                save=pname_,
                                write=wname,
                                text=text_)

        file.Close()
Example #20
0
def createinputs(fname, sampleset, observables, bins, **kwargs):
    """Create histogram inputs in ROOT file for datacards.
       fname:       filename pattern of ROOT file
       sampleset:   SampleSet object
       observables: list of Variables objects
       bins:        list of Selection objects
  """
    #LOG.header("createinputs")
    outdir = kwargs.get('outdir', "")
    tag = kwargs.get('tag', "")  # file tag
    htag = kwargs.get('htag', "")  # hist tag for systematic
    filters = kwargs.get('filter',
                         None)  # only create histograms for these processes
    vetoes = kwargs.get('veto', None)  # veto these processes
    parallel = kwargs.get('parallel', True)  # MultiDraw histograms in parallel
    recreate = kwargs.get('recreate', False)  # recreate ROOT file
    replaceweight = kwargs.get('replaceweight', None)  # replace weight
    extraweight = kwargs.get('weight', "")  # extraweight
    shiftQCD = kwargs.get('shiftQCD', 0)  # e.g 0.30 for 30%
    verbosity = kwargs.get('verb', 0)
    option = 'RECREATE' if recreate else 'UPDATE'
    method = 'QCD_OSSS' if filters == None or 'QCD' in filters else None
    method = kwargs.get('method', method)

    # FILE LOGISTICS: prepare file and directories
    files = {}
    ensuredir(outdir)
    fname = os.path.join(outdir, fname)
    for obs in observables:
        obsname = obs.filename
        ftag = tag + obs.tag
        fname_ = repkey(fname, OBS=obsname, TAG=tag)
        file = TFile.Open(fname_, option)
        if recreate:
            print ">>> created file %s" % (fname_)
        for selection in bins:
            if not obs.plotfor(selection): continue
            obs.changecontext(selection)
            ensureTDirectory(file, selection.filename, cd=True, verb=verbosity)
            if recreate:
                string = joincuts(selection.selection, obs.cut)
                TNamed("selection", string).Write(
                )  # write exact selection string to ROOT file for the record / debugging
                #TNamed("weight",sampleset.weight).Write()
                LOG.verb(
                    "%s selection %r: %r" % (obsname, selection.name, string),
                    verbosity, 1)
        files[obs] = file

    # GET HISTS
    for selection in bins:
        bin = selection.filename  # bin name
        print ">>>\n>>> " + color(
            " %s " % (bin), 'magenta', bold=True, ul=True)
        if htag:
            print ">>> systematic uncertainty: %s" % (color(
                htag.lstrip('_'), 'grey'))
        if recreate or verbosity >= 1:
            print ">>> %r" % (selection.selection)
        hists = sampleset.gethists(observables,
                                   selection,
                                   method=method,
                                   split=True,
                                   parallel=parallel,
                                   filter=filters,
                                   veto=vetoes)

        # SAVE HIST
        ljust = 4 + max(11, len(htag))  # extra space
        TAB = LOG.table("%10.1f %10d  %-18s  %s")
        TAB.printheader('events', 'entries', 'variable',
                        'process'.ljust(ljust))
        for obs, hist in hists.iterhists():
            name = lreplace(hist.GetName(), obs.filename).strip(
                '_')  # histname = $VAR_$NAME (see Sample.gethist)
            if not name.endswith(htag):
                name += htag  # HIST = $PROCESS_$SYSTEMATIC
            name = repkey(name, BIN=bin)
            drawopt = 'E1' if 'data' in name else 'EHIST'
            lcolor = kBlack if any(
                s in name
                for s in ['data', 'ST', 'VV']) else hist.GetFillColor()
            hist.SetOption(drawopt)
            hist.SetLineColor(lcolor)
            hist.SetFillStyle(0)  # no fill in ROOT file
            hist.SetName(name)
            hist.GetXaxis().SetTitle(obs.title)
            for i, yval in enumerate(hist):
                if yval < 0:
                    print ">>> replace bin %d (%.3f<0) of %r" % (
                        i, yval, hist.GetName())
                    hist.SetBinContent(i, 0)
            files[obs].cd(bin)  # $FILE:$BIN/$PROCESS_$SYSTEMATC
            hist.Write(name, TH1.kOverwrite)
            TAB.printrow(hist.GetSumOfWeights(), hist.GetEntries(),
                         obs.printbins(), name)
            deletehist(hist)  # clean memory

    # CLOSE
    for obs, file in files.iteritems():
        file.Close()
Example #21
0
def main():
  
  eras      = args.eras
  periods   = cleanPeriods(args.periods) 
  channel   = args.channel
  types     = args.types
  verbosity = args.verbosity
  minbiases = [ 69.2 ] if periods else [ 69.2, 69.2*1.046, 69.2*0.954, 80.0 ]
  
  fname_ = "$PICODIR/$SAMPLE_$CHANNEL.root" # sample file name
  if 'mc' in types and '$PICODIR' in fname_:
    import TauFW.PicoProducer.tools.config as GLOB
    CONFIG = GLOB.getconfig(verb=verbosity)
    fname_ = repkey(fname_,PICODIR=CONFIG['picodir'])
  
  for era in args.eras:
    year       = getyear(era)
    mcfilename = "MC_PileUp_%s.root"%(era)
    jsondir    = os.path.join(datadir,'json',str(year))
    pileup     = os.path.join(jsondir,"pileup_latest.txt")
    jname      = getJSON(era)
    CMSStyle.setCMSEra(era)
    samples_bug = [ ] # buggy samples in (pre-UL) 2017 with "old pmx" library
    samples_fix = [ ] # fixed samples in (pre-UL) 2017 with "new pmx" library
    samples = [ # default set of samples
      ( 'DY', "DYJetsToMuTauh_M-50"   ),
      ( 'DY', "DYJetsToLL_M-50"       ),
      ( 'DY', "DY4JetsToLL_M-50"      ),
      ( 'DY', "DY3JetsToLL_M-50"      ),
      ( 'DY', "DY2JetsToLL_M-50"      ),
      ( 'DY', "DY1JetsToLL_M-50"      ),
      ( 'WJ', "WJetsToLNu"            ),
      ( 'WJ', "W4JetsToLNu"           ),
      ( 'WJ', "W3JetsToLNu"           ),
      ( 'WJ', "W2JetsToLNu"           ),
      ( 'WJ', "W1JetsToLNu"           ),
      ( 'TT', "TTToHadronic"          ),
      ( 'TT', "TTTo2L2Nu"             ),
      ( 'TT', "TTToSemiLeptonic"      ),
      ( 'ST', "ST_tW_top"             ),
      ( 'ST', "ST_tW_antitop"         ),
      ( 'ST', "ST_t-channel_top"      ),
      ( 'ST', "ST_t-channel_antitop"  ),
      ( 'VV', "WW"                    ),
      ( 'VV', "WZ"                    ),
      ( 'VV', "ZZ"                    ),
    ]
    if era=='2016':
      campaign = "Moriond17"
      if 'UL' in era and 'preVFP' in era:
        campaign = "Summer19"
      elif 'UL' in era:
        campaign = "Summer19"
      else:
        samples  = [
          ( 'TT', "TT",                   ),
          ( 'DY', "DYJetsToLL_M-10to50",  ),
          ( 'DY', "DYJetsToLL_M-50",      ),
          ( 'DY', "DY1JetsToLL_M-50",     ),
          ( 'DY', "DY2JetsToLL_M-50",     ),
          ( 'DY', "DY3JetsToLL_M-50",     ),
          ( 'WJ', "WJetsToLNu",           ),
          ( 'WJ', "W1JetsToLNu",          ),
          ( 'WJ', "W2JetsToLNu",          ),
          ( 'WJ', "W3JetsToLNu",          ),
          ( 'WJ', "W4JetsToLNu",          ),
          ( 'ST', "ST_tW_top",            ),
          ( 'ST', "ST_tW_antitop",        ),
          ( 'ST', "ST_t-channel_top",     ),
          ( 'ST', "ST_t-channel_antitop", ),
          #( 'ST', "ST_s-channel",         ),
          ( 'VV', "WW",                   ),
          ( 'VV', "WZ",                   ),
          ( 'VV', "ZZ",                   ),
        ]
    elif '2017' in era:
      if 'UL' in era:
        campaign = "Summer19"
      else:
        campaign = "Winter17_V2"
        samples_bug = [ # buggy samples in (pre-UL) 2017
          ( 'DY', "DYJetsToLL_M-50",      ),
          ( 'WJ', "W3JetsToLNu",          ),
          ( 'VV', "WZ",                   ),
        ]
        samples_fix = [ # fixed samples in (pre-UL) 2017
          ( 'DY', "DYJetsToLL_M-10to50",  ),
          ( 'DY', "DY1JetsToLL_M-50",     ),
          ( 'DY', "DY2JetsToLL_M-50",     ),
          ( 'DY', "DY3JetsToLL_M-50",     ),
          ( 'DY', "DY4JetsToLL_M-50",     ),
          ( 'TT', "TTTo2L2Nu",            ),
          ( 'TT', "TTToHadronic",         ),
          ( 'TT', "TTToSemiLeptonic",     ),
          ( 'WJ', "WJetsToLNu",           ),
          ( 'WJ', "W1JetsToLNu",          ),
          ( 'WJ', "W2JetsToLNu",          ),
          ( 'WJ', "W4JetsToLNu",          ),
          ( 'ST', "ST_tW_top",            ),
          ( 'ST', "ST_tW_antitop",        ),
          ( 'ST', "ST_t-channel_top",     ),
          ( 'ST', "ST_t-channel_antitop", ),
          #( 'ST', "ST_s-channel",         ),
          ( 'VV', "WW",                   ),
          ( 'VV', "ZZ",                   ),
        ]
        samples = samples_bug + samples_fix
    else:
      if 'UL' in era:
        campaign = "Summer19"
      else:
        campaign = "Autumn18"
        samples = [
          ( 'TT', "TTTo2L2Nu",            ),
          ( 'TT', "TTToHadronic",         ),
          ( 'TT', "TTToSemiLeptonic",     ),
          ( 'DY', "DYJetsToLL_M-10to50",  ),
          ( 'DY', "DYJetsToLL_M-50",      ),
          ( 'DY', "DY1JetsToLL_M-50",     ),
          ( 'DY', "DY2JetsToLL_M-50",     ),
          ( 'DY', "DY3JetsToLL_M-50",     ),
          ( 'DY', "DY4JetsToLL_M-50",     ),
          #( 'WJ', "WJetsToLNu",           ),
          ( 'WJ', "W1JetsToLNu",          ),
          ( 'WJ', "W2JetsToLNu",          ),
          ( 'WJ', "W3JetsToLNu",          ),
          ( 'WJ', "W4JetsToLNu",          ),
          ( 'ST', "ST_tW_top",            ),
          ( 'ST', "ST_tW_antitop",        ),
          ( 'ST', "ST_t-channel_top",     ),
          ( 'ST', "ST_t-channel_antitop", ),
          #( 'ST', "ST_s-channel",         ),
          ( 'VV', "WW",                   ),
          ( 'VV', "WZ",                   ),
          ( 'VV', "ZZ",                   ),
        ]
    
    # SAMPLES FILENAMES
    samples_ = [ ]
    suberas = [era+"_preVFP",era+"_postVFP"] if era=='UL2016' else [era]
    for subera in suberas:
      for i, (group,sample) in enumerate(samples):
          fname = repkey(fname_,ERA=subera,GROUP=group,SAMPLE=sample,CHANNEL=channel)
          samples_.append((sample,fname))
    samples = samples_ # replace sample list
    if verbosity>=1:
      print ">>> samples = %r"%(samples)
    
    # JSON
    jsons = { }
    if periods:
      for period in periods:
        jsonout = filterJSONByRunNumberRange(jname,era,period=period,outdir='json',verb=verbosity)
        jsons[erarun] = jsonout
    else:
      jsons[era] = jname
    
    # DATA
    datahists = { period: [ ] for period in jsons }
    if 'data' in types:
      for period, json in jsons.iteritems():
        for minbias in minbiases:
          filename = "Data_PileUp_%s_%s.root"%(period,str(minbias).replace('.','p'))
          datahist = getDataProfile(filename,json,pileup,100,era,minbias)
          datahists[period].append((minbias,datahist))
    elif args.plot: # do not create new data profiles, but just load them
      for era in jsons:
        for minbias in minbiases:
          filename = "Data_PileUp_%s_%s.root"%(era,str(minbias).replace('.','p'))
          file, hist = gethist(filename,'pileup',retfile=True)
          if not file or not hist: continue
          hist.SetDirectory(0)
          file.Close()
          datahists[era].append((minbias,hist))
    
    # MC
    if 'mc' in types:
      assert samples, "compareMCProfiles: Did not find any samples for %r..."%(era)
      mcfilename = "MC_PileUp_%s.root"%(era)
      #mcfilename = "MC_PileUp_%s_%s.root"%(era,campaign)
      getMCProfile(mcfilename,samples,channel,era)
      if args.plot:
        mchist = compareMCProfiles(samples,channel,era)
        for era in jsons:
          for minbias, datahist in datahists[era]:
            compareDataMCProfiles(datahist,mchist,era,minbias)
          compareDataMCProfiles(datahists[era],mchist,era,rmin=0.4,rmax=1.5,delete=True)
        deletehist(mchist) # clean memory
      if era=='2017': #and 'UL' not in era # buggy (pre-UL) 2017: also check new/old pmx separately
        mcfilename_bug = mcfilename.replace(".root","_old_pmx.root")
        mcfilename_fix = mcfilename.replace(".root","_new_pmx.root")
        getMCProfile(mcfilename_bug,samples_bug,channel,era)
        getMCProfile(mcfilename_fix,samples_fix,channel,era)
        if args.plot:
          mchist_bug = compareMCProfiles(samples_bug,channel,era,tag="old_pmx")
          mchist_fix = compareMCProfiles(samples_fix,channel,era,tag="new_pmx")
          for era in jsons:
            for minbias, datahist in datahists[era]:
              compareDataMCProfiles(datahist,mchist_bug,era,minbias,tag="old_pmx")
              compareDataMCProfiles(datahist,mchist_fix,era,minbias,tag="new_pmx")
    
    # FLAT
    if 'flat' in types:
      filename  = "MC_PileUp_%d_FlatPU0to75.root"%era
      hist_flat = getFlatProfile(filename,75)
      for era in jsons:
        for minbias, datahist in datahists[era]:
          compareDataMCProfiles(datahist,hist_flat,era,minbias,tag="FlatPU0to75",rmin=0.0,rmax=3.1)