def test_export1d(): import uproot import os from coffea.hist import export1d counts, test_eta, test_pt = dummy_jagged_eta_pt() h_regular_bins = hist.Hist("regular_joe", hist.Bin("x", "x", 20, 0, 200)) h_regular_bins.fill(x=test_pt) hout = export1d(h_regular_bins) filename = 'test_export1d.root' with uproot.create(filename) as fout: fout['regular_joe'] = hout fout.close() with uproot.open(filename) as fin: hin = fin['regular_joe'] assert (np.all(hin.edges == hout.edges)) assert (np.all(hin.values == hout.values)) del hin del fin if os.path.exists(filename): os.remove(filename)
def postprocess(self, accumulator): f = recreate(self.outfile) for h, hist in accumulator.items(): f[h] = export1d(hist) print(f'wrote {h} to {self.outfile}') f.close() return accumulator
def makeCardFromHist(out_cache, hist_name, nonprompt_scale=1, signal_scale=1, bkg_scale=1, overflow='all', ext='', systematics=True): print("Writing cards using histogram:", hist_name) card_dir = os.path.expandvars('$TWHOME/data/cards/') if not os.path.isdir(card_dir): os.makedirs(card_dir) data_card = card_dir + hist_name + ext + '_card.txt' shape_file = card_dir + hist_name + ext + '_shapes.root' histogram = out_cache[hist_name].copy() #histogram = histogram.rebin('mass', bins[hist_name]['bins']) # scale some processes scales = { 'ttbar': nonprompt_scale, 'topW_v2': signal_scale, 'TTW': bkg_scale, # only scale the most important backgrounds 'TTZ': bkg_scale, 'TTH': bkg_scale, } histogram.scale(scales, axis='dataset') ## making a histogram for pseudo observation. this hurts, but rn it seems to be the best option data_counts = np.asarray( np.round( histogram[notdata].integrate('dataset').values( overflow=overflow)[()], 0), int) data_hist = histogram['topW_v2'] data_hist.clear() data_hist_bins = data_hist.axes()[1] for i, edge in enumerate(data_hist_bins.edges(overflow=overflow)): if i >= len(data_counts): break for y in range(data_counts[i]): data_hist.fill(**{ 'dataset': 'data', data_hist_bins.name: edge + 0.0001 }) other_sel = re.compile('(TTTT|diboson|DY|rare)') ##observation = hist.export1d(histogram['pseudodata'].integrate('dataset'), overflow=overflow) #observation = hist.export1d(data_hist['data'].integrate('dataset'), overflow=overflow) observation = hist.export1d(histogram[notdata].integrate('dataset'), overflow=overflow) tw = hist.export1d(histogram['topW_v2'].integrate('dataset'), overflow=overflow) ttw = hist.export1d(histogram['TTW'].integrate('dataset'), overflow=overflow) ttz = hist.export1d(histogram['TTZ'].integrate('dataset'), overflow=overflow) tth = hist.export1d(histogram['TTH'].integrate('dataset'), overflow=overflow) rare = hist.export1d(histogram[other_sel].integrate('dataset'), overflow=overflow) nonprompt = hist.export1d(histogram['ttbar'].integrate('dataset'), overflow=overflow) fout = uproot3.recreate(shape_file) fout["signal"] = tw fout["nonprompt"] = nonprompt fout["ttw"] = ttw fout["ttz"] = ttz fout["tth"] = tth fout["rare"] = rare fout["data_obs"] = observation fout.close() # Get the total yields to write into a data card totals = {} totals['signal'] = histogram['topW_v2'].integrate('dataset').values( overflow=overflow)[()].sum() totals['ttw'] = histogram['TTW'].integrate('dataset').values( overflow=overflow)[()].sum() totals['ttz'] = histogram['TTZ'].integrate('dataset').values( overflow=overflow)[()].sum() totals['tth'] = histogram['TTH'].integrate('dataset').values( overflow=overflow)[()].sum() totals['rare'] = histogram['rare'].integrate('dataset').values( overflow=overflow)[()].sum() totals['nonprompt'] = histogram['ttbar'].integrate('dataset').values( overflow=overflow)[()].sum() ##totals['observation'] = histogram['pseudodata'].integrate('dataset').values(overflow=overflow)[()].sum() #totals['observation'] = int(sum(data_hist['data'].sum('dataset').values(overflow=overflow)[()])) totals['observation'] = histogram[notdata].integrate('dataset').values( overflow=overflow)[()].sum() print("{:30}{:.2f}".format("Signal expectation:", totals['signal'])) print("{:30}{:.2f}".format("Non-prompt background:", totals['nonprompt'])) print("{:30}{:.2f}".format( "t(t)X(X)/rare background:", totals['ttw'] + totals['ttz'] + totals['tth'] + totals['rare'])) print("{:30}{:.2f}".format("Observation:", totals['observation'])) # set up the card card = dataCard() card.reset() card.setPrecision(3) # add the uncertainties (just flat ones for now) card.addUncertainty('lumi', 'lnN') card.addUncertainty('ttw_norm', 'lnN') card.addUncertainty('ttz_norm', 'lnN') card.addUncertainty('tth_norm', 'lnN') card.addUncertainty('rare_norm', 'lnN') card.addUncertainty('fake', 'lnN') # add the single bin card.addBin('Bin0', ['ttw', 'ttz', 'tth', 'rare', 'nonprompt'], 'Bin0') card.specifyExpectation('Bin0', 'signal', totals['signal']) card.specifyExpectation('Bin0', 'ttw', totals['ttw']) card.specifyExpectation('Bin0', 'ttz', totals['ttz']) card.specifyExpectation('Bin0', 'tth', totals['tth']) card.specifyExpectation('Bin0', 'rare', totals['rare']) card.specifyExpectation('Bin0', 'nonprompt', totals['nonprompt']) # set uncertainties if systematics: card.specifyUncertainty('ttw_norm', 'Bin0', 'ttw', 1.15) card.specifyUncertainty('ttz_norm', 'Bin0', 'ttz', 1.10) card.specifyUncertainty('tth_norm', 'Bin0', 'tth', 1.20) card.specifyUncertainty('rare_norm', 'Bin0', 'rare', 1.20) card.specifyUncertainty('fake', 'Bin0', 'nonprompt', 1.25) card.specifyFlatUncertainty('lumi', 1.03) ## observation #card.specifyObservation('Bin0', int(round(totals['observation'],0))) card.specifyObservation('Bin0', totals['observation']) print("Done.\n") return card.writeToFile(data_card, shapeFile=shape_file)
.project('AK8Puppijet0_pt', ptbin) .project('AK8Puppijet0_deepdoubleb', slice(None,0.89), overflow='under') ) pass_template = (h.project('process', proc) .project('AK8Puppijet0_isHadronicV', *mproj) .project('systematic', systreal) .project('AK8Puppijet0_pt', ptbin) .project('AK8Puppijet0_deepdoubleb', slice(0.89,None)) ) content = fail_template.sum('AK8Puppijet0_msd').values() if content == {} or content[()] == 0.: print("Missing", proc, ptbin, syst) continue sname = "_%s" % syst if syst.name != '' else '' name = "%s_pass%s_bin%d" % (proc, sname, i) fout[name] = hist.export1d(pass_template) name = "%s_fail%s_bin%d" % (proc, sname, i) fout[name] = hist.export1d(fail_template) fout.close() if os.path.exists("hist_1DZbb_muonCR.root"): os.remove("hist_1DZbb_muonCR.root") fout = uproot.create("hist_1DZbb_muonCR.root") h = hists['templates_muoncontrol'] lumi = 41.1 h.scale({p: lumi for p in h[nodata].identifiers('process')}, axis="process") rename = { 'trigweight': 'trigger',
def get_bkg_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ # define variables to get histogram for background bkg_fnmatch = "%s.coffea" % base_template_name.replace( "NJETS", njets_regex).replace("SIG", "bkg") bkg_fnames = fnmatch.filter(os.listdir(inputdir), bkg_fnmatch) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) # need to save coffea hist objects to file so they can be opened by uproot3 in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) for bkg_file in bkg_fnames: hdict = load(os.path.join(inputdir, bkg_file)) jmult = "3Jets" if "3Jets" in bkg_file else "4PJets" for lep in hdict.keys(): lepdir = "mujets" if lep == "Muon" else "ejets" for tname in hdict[lep].keys(): #set_trace() template_histo = hdict[lep][tname] proc = tname.split( "_")[0] if not "data_obs" in tname else "data_obs" sys = "_".join(tname.split("_") [1:]) if not "data_obs" in tname else "nosys" if not sys in sys_to_use.keys(): continue #if "RENORM" in sys: set_trace() sysname, onlyTT = sys_to_use[sys] name = proc + lepdir if proc == "QCD" else proc print(lep, jmult, sys, name) outhname = "_".join([jmult, lepdir, name ]) if sys == "nosys" else "_".join( [jmult, lepdir, name, sysname]) if (sys != "nosys") and (args.smooth) and ( templates_to_smooth[proc]): template_histo = smoothing( nominal=histo_dict_3j[lep][proc] if jmult == "3Jets" else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(linearize_binning[0]) - 1, nbinsy=len(linearize_binning[1]) - 1) #set_trace() ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][proc if sys == "nosys" else "%s_%s" % (proc, sysname)] = template_histo if jmult == "4PJets": histo_dict_4pj[lep][proc if sys == "nosys" else "%s_%s" % (proc, sysname)] = template_histo ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) #set_trace() if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"templates_lj_3Jets_bkg_smoothed_{jobid}_{args.year}.coffea" if args.smooth else f"templates_lj_3Jets_bkg_{jobid}_{args.year}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"templates_lj_4PJets_bkg_smoothed_{jobid}_{args.year}.coffea" if args.smooth else f"templates_lj_4PJets_bkg_{jobid}_{args.year}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
try: content = fail_template.sum('msd').values() except: content = pqq_template.sum('msd').values() if content == {} or content[()] == 0.: if proc == "data_obs" and syst != "nominal": pass else: print("Missing", proc, ptbin, syst) continue sname = "_%s" % syst if syst.name != '' else '' if args.type == '3': name = "%s_pqq%s_bin%d" % (proc, sname, i) fout[name] = hist.export1d(pqq_template) name = "%s_pcc%s_bin%d" % (proc, sname, i) fout[name] = hist.export1d(pcc_template) name = "%s_pbb%s_bin%d" % (proc, sname, i) fout[name] = hist.export1d(pbb_template) else: name = "%s_pass%s_bin%d" % (proc, sname, i) fout[name] = hist.export1d(pass_template) name = "%s_fail%s_bin%d" % (proc, sname, i) fout[name] = hist.export1d(fail_template) fout.close() if not args.muon: sys.exit()
def get_bkg_templates(tmp_rname): ''' Function that writes linearized mtt vs costheta distributions to root file. ''' ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] # get correct hist and rebin hname_to_use = 'mtt_vs_tlep_ctstar_abs' if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) nbins = (len(xrebinning)-1)*(len(yrebinning)-1) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other'] names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: ttJets_hdict = load(bkg_ttJets_fname) ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat ## rebin x axis ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins) ## rebin y axis ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins) only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting='placement') process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname) if '3Jets' in njets_to_run: histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) if '4PJets' in njets_to_run: histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) for lep in ['Muon', 'Electron']: lepdir = 'mujets' if lep == 'Muon' else 'ejets' ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates') lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis='dataset') histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype') # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: if len(only_ttJets_cats) > 0: for tt_cat in only_ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) tt_histo = ttJets_histo.copy() tt_histo.scale(lumi_correction, axis='dataset') tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype') for jmult in njets_to_run: iso_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) btag_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) for sys in sys_to_use.keys(): if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue #set_trace() sysname, onlyTT = sys_to_use[sys] if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0]) qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys) ## write nominal and systematic variations for each topology to file for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])): if (proc != 'TT') and onlyTT: continue if (proc == 'data_obs') and not (sys == 'nosys'): continue name = proc+lepdir if proc == 'QCD' else proc print(lep, jmult, sys, name) outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname]) template_histo = qcd_est_histo[proc].integrate('process') if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None): tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys') template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys')) if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]): template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo) #set_trace() if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]): template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False) #set_trace() ## save template histos to coffea dict if jmult == '3Jets': histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo if jmult == '4PJets': histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if '3Jets' in njets_to_run: coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_3j, coffea_out_3j) print("%s written" % coffea_out_3j) if '4PJets' in njets_to_run: coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_4pj, coffea_out_4pj) print("%s written" % coffea_out_4pj) upfout.close() print('%s written' % tmp_rname)
def get_sig_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ widthTOname = lambda width: str(width).replace(".", "p") nameTOwidth = lambda width: str(width).replace("p", ".") ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files( sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0]) # get correct hist and rebin hname_to_use = "mtt_vs_tlep_ctstar_abs" if hname_to_use not in hdict.keys(): raise ValueError(f"{hname_to_use} not found in file") xrebinning, yrebinning = linearize_binning #xrebinning, yrebinning = mtt_ctstar_2d_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat #set_trace() xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning histo = histo.rebin(yaxis_name, new_ybins) rebin_histo = histo[Plotter.signal_samples, :, :, :, "btagPass"].integrate("btag") names = [ dataset for dataset in sorted( set([key[0] for key in rebin_histo.values().keys()])) ] # get dataset names in hists signals = sorted(set([key[0] for key in rebin_histo.values().keys()])) signals = [sig for sig in signals if "TTJetsSL" in sig] # only use SL decays systs = sorted(set([key[1] for key in rebin_histo.values().keys()])) systs.insert(0, systs.pop(systs.index("nosys"))) # move "nosys" to the front # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) # write signal dists to temp file for lep in ["Muon", "Electron"]: orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS" # scale by lumi lumi_correction = lumi_corr_dict[args.year]["%ss" % lep] histo = rebin_histo.copy() histo.scale(lumi_correction, axis="dataset") process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="templates") histo = histo.group( "dataset", hist.Cat("process", "Process", sorting="placement"), process_groups) for jmult in njets_to_run: lepdir = orig_lepdir.replace("NJETS", jmult.lower()) #set_trace() lin_histo = Plotter.linearize_hist( histo[:, :, jmult, lep].integrate("jmult").integrate("leptype")) for signal in signals: if "Int" in signal: boson, mass, width, pI, wt = tuple(signal.split("_")) else: boson, mass, width, pI = tuple(signal.split("_")) sub_name = "_".join([ "%s%s" % (boson[0], mass[1:]), "relw%s" % widthTOname(width).split("W")[-1], pI.lower(), wt ]) if pI == "Int" else "_".join([ "%s%s" % (boson[0], mass[1:]), "relw%s" % widthTOname(width).split("W")[-1], pI.lower() ]) #set_trace() for sys in systs: if sys not in systematics.template_sys_to_name[ args.year].keys(): continue if not lin_histo[signal, sys].values().keys(): print( f"Systematic {sys} for {lep} {jmult} {signal} not found, skipping" ) continue print(args.year, lep, jmult, sub_name, sys) outhname = "_".join( list( filter(None, [ sub_name, systematics.template_sys_to_name[ args.year][sys][0], lepdir, (args.year)[-2:] ]))) if "LEP" in outhname: outhname = outhname.replace( "LEP", "muon") if lep == "Muon" else outhname.replace( "LEP", "electron") template_histo = lin_histo[signal, sys].integrate( "process").integrate("sys") ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][ f"{signal}_{sys}"] = template_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][ f"{signal}_{sys}"] = template_histo.copy() ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"test_raw_templates_lj_3Jets_sig_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"test_raw_templates_lj_4PJets_sig_{args.year}_{jobid}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
def get_bkg_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files( bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) # get correct hist and rebin hname_to_use = "mtt_vs_tlep_ctstar_abs" if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use][ Plotter. nonsignal_samples] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = [ "*right", "*matchable", "*unmatchable", "*sl_tau", "*other" ] names = [ dataset for dataset in sorted(set([key[0] for key in histo.values().keys()])) ] # get dataset names in hists ttJets_cats = [ name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats]) ] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting="placement") process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) for lep in ["Muon", "Electron"]: orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS" #set_trace() ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="templates") #process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="dataset") lumi_correction = lumi_corr_dict[args.year]["%ss" % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = "_".join(tt_cat.split( "_")[:-2]) if "sl_tau" in tt_cat else "_".join( tt_cat.split("_") [:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis="dataset") histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate("leptype") #set_trace() systs = sorted(set([key[1] for key in histo.values().keys()])) systs.insert(0, systs.pop( systs.index("nosys"))) # move "nosys" to the front # loop over each jet multiplicity for jmult in njets_to_run: lepdir = orig_lepdir.replace("NJETS", jmult.lower()) # get sideband and signal region hists cen_sb_histo = Plotter.linearize_hist( histo[:, "nosys", jmult, btag_reg_names_dict["Central"]["reg"]].integrate( "jmult").integrate("btag").integrate("sys")) #up_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Up"]["reg"]].integrate("jmult").integrate("btag") #dw_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Down"]["reg"]].integrate("jmult").integrate("btag") sig_histo = Plotter.linearize_hist( histo[:, :, jmult, btag_reg_names_dict["Signal"]["reg"]].integrate( "jmult").integrate("btag")) # loop over each systematic for sys in systs: if sys not in systematics.template_sys_to_name[ args.year].keys(): continue sys_histo = sig_histo[:, sys].integrate( "sys") if sys in systematics.ttJets_sys.values( ) else Plotter.BKG_Est( sig_reg=sig_histo[:, sys].integrate("sys"), sb_reg=cen_sb_histo, norm_type="SigMC", sys=sys, ignore_uncs=True) ## write nominal and systematic variations for each topology to file #for proc in sorted(set([key[0] for key in sig_histo.values().keys()])): for proc in sorted( set([key[0] for key in sys_histo.values().keys()])): if ("tt" not in proc) and ( sys in systematics.ttJets_sys.values()): continue #if (proc != "tt") and (sys in systematics.ttJets_sys.values()): continue if (proc == "data_obs") and not (sys == "nosys"): continue if not sys_histo[proc].values().keys(): #if not sig_histo[proc, sys].values().keys(): print( f"Systematic {sys} for {lep} {jmult} {proc} not found, skipping" ) continue print(args.year, lep, jmult, sys, proc) #set_trace() outhname = "_".join( list( filter(None, [ proc, systematics.template_sys_to_name[ args.year][sys][0], lepdir, (args.year)[-2:] ]))) if "LEP" in outhname: outhname = outhname.replace( "LEP", "muon") if lep == "Muon" else outhname.replace( "LEP", "electron") template_histo = sys_histo[proc].integrate("process") #template_histo = sig_histo[proc, sys].integrate("process").integrate("sys") #set_trace() ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][ f"{proc}_{sys}"] = template_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][ f"{proc}_{sys}"] = template_histo.copy() ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"test_raw_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"test_raw_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
def main(): raw = False if len(sys.argv) < 2: print("Enter year") return elif len(sys.argv) == 3: if int(sys.argv[2]) > 0: raw = True elif len(sys.argv) > 3: print("Incorrect number of arguments") return year = sys.argv[1] if raw: year = year + "-raw" if os.path.isfile(year + '/2mjj-signalregion.root'): os.remove(year + '/2mjj-signalregion.root') fout = uproot3.create(year + '/2mjj-signalregion.root') samples = [ 'data', 'muondata', 'QCD', 'ttbar', 'singlet', 'VV', 'ggF', 'VBF', 'WH', 'ZH', 'ttH' ] print("2 MJJ BINS SR") mjjbins = [1000, 2000, 13000] # Check if pickle exists picklename = year + '/templates.pkl' if not os.path.isfile(picklename): print("You need to create the pickle") return # Read the histogram from the pickle file vbf = pickle.load(open(picklename, 'rb')).integrate('region', 'signal-vbf') for i, b in enumerate(mjjbins[:-1]): for p in samples: print(p) hpass = vbf.sum('pt1', 'genflavor').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'ddb1', int_range=slice(ddbthr, 1)).integrate('process', p) hfail = vbf.sum('pt1', 'genflavor').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'ddb1', int_range=slice(0, ddbthr)).integrate('process', p) if year == '2016' and p == 'ggF' and not raw: print("Taking shape for 2016 ggF from 2017") vbf17 = pickle.load(open('2017/templates.pkl', 'rb')).integrate( 'region', 'signal-vbf') vbf17.scale(lumis['2016'] / lumis['2017']) hpass = vbf17.sum('pt1', 'genflavor').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'ddb1', int_range=slice(ddbthr, 1)).integrate('process', p) hfail = vbf17.sum('pt1', 'genflavor').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'ddb1', int_range=slice(0, ddbthr)).integrate('process', p) for s in hfail.identifiers('systematic'): fout["vbf_pass_mjj" + str(i + 1) + "_" + p + "_" + str(s)] = hist.export1d(hpass.integrate('systematic', s)) fout["vbf_fail_mjj" + str(i + 1) + "_" + p + "_" + str(s)] = hist.export1d(hfail.integrate('systematic', s)) for p in ['Wjets', 'Zjets']: print(p) hpass = vbf.sum('pt1').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'genflavor', int_range=slice(1, 3)).integrate( 'ddb1', int_range=slice(ddbthr, 1)).integrate('process', p) hfail = vbf.sum('pt1').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'genflavor', int_range=slice(1, 3)).integrate( 'ddb1', int_range=slice(0, ddbthr)).integrate('process', p) hpass_bb = vbf.sum('pt1').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'genflavor', int_range=slice(3, 4)).integrate( 'ddb1', int_range=slice(ddbthr, 1)).integrate('process', p) hfail_bb = vbf.sum('pt1').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'genflavor', int_range=slice(3, 4)).integrate( 'ddb1', int_range=slice(0, ddbthr)).integrate('process', p) for s in hfail.identifiers('systematic'): fout["vbf_pass_mjj" + str(i + 1) + "_" + p + "_" + str(s)] = hist.export1d(hpass.integrate('systematic', s)) fout["vbf_fail_mjj" + str(i + 1) + "_" + p + "_" + str(s)] = hist.export1d(hfail.integrate('systematic', s)) fout["vbf_pass_mjj" + str(i + 1) + "_" + p + "bb_" + str(s)] = hist.export1d( hpass_bb.integrate('systematic', s)) fout["vbf_fail_mjj" + str(i + 1) + "_" + p + "bb_" + str(s)] = hist.export1d( hfail_bb.integrate('systematic', s)) return
def drawStack(h, var_name, var_label, lumifb, sel, vars_cut, save=False, drawData=False): # make cuts exceptions = ['process', var_name] for var, val in vars_cut.items(): exceptions.append(var) x = h.sum(*[ax for ax in h.axes() if ax.name not in exceptions]) for var, val in vars_cut.items(): if var != var_name: x = x.integrate(var, slice(val[0], val[1])) if var_name in vars_cut.keys(): x = x[:, vars_cut[var_name][0]:vars_cut[var_name][1]] # numbers #x[privsig].scale(0.3799) # xsec? #x[privsig].scale(0.29) # higgs pT? scalesig = 5000. scalehpt = 0.29 hsig = x[privsig] hsig.scale(scalesig) hsig.scale(scalehpt) all_bkg = 0 for key, val in x[bkg].values().items(): all_bkg += val.sum() all_sig = 0 for key, val in x[privsig].values().items(): all_sig += val.sum() if all_bkg > 0: print('allbkg %.2f, allsig %.2f, s/sqrt(b) %.4f' % (all_bkg, all_sig, all_sig / np.math.sqrt(all_bkg))) # save if save: out = "templates_%s.root" % var_name if os.path.exists(out): os.remove(out) fout = uproot.create(out) for ih, proc in enumerate(x.identifiers('process')): pass_template = (x.integrate('process', proc)) name = "%s_pass" % (proc.name) if 'data_obs' in proc.name: name = 'data_obs_pass' if 'hww_private' in proc.name: print('scaling by hpt', proc) pass_template.scale(scalehpt) fout[name] = hist.export1d(pass_template) fout.close() # identifiers xaxis = var_name x.axis(xaxis).label = var_label for ih, hkey in enumerate(x.identifiers('process')): name = process_latex[hkey.name] if 'hww_private' in hkey.name: name += ' x %i' % scalesig x.identifiers('process')[ih].label = name x.axis('process').sorting = 'integral' fig, ax = plt.subplots(1, 1, figsize=(8, 8)) hist.plot1d( x[bkg], overlay='process', ax=ax, clear=False, stack=True, fill_opts=fill_opts, error_opts=err_opts, ) if drawData: hist.plot1d(x['data_obs_jetht'], overlay="process", ax=ax, clear=False, error_opts=data_err_opts) hist.plot1d(x[offsig], ax=ax, overlay='process', clear=False, error_opts={ 'color': 'aquamarine', 'linewidth': 2 }) hist.plot1d(hsig, ax=ax, overlay='process', clear=False, error_opts={ 'color': 'greenyellow', 'linewidth': 2 }) ax.autoscale(axis='x', tight=True) maxval = 0 for newkey, val in x[bkg].values().items(): maxval = max(maxval, np.amax(val)) ax.set_ylim(0, None) ax.ticklabel_format(axis='x', style='sci') old_handles, old_labels = ax.get_legend_handles_labels() if 'triggermuon' in sel: leg = ax.legend(handles=old_handles, labels=old_labels, title='Muon trigger') elif 'triggerhad' in sel: leg = ax.legend(handles=old_handles, labels=old_labels, title='Hadronic trigger') elif 'triggermuonall' in sel: leg = ax.legend(handles=old_handles, labels=old_labels, title='All trigger (muon)') elif 'triggerelectronall' in sel: leg = ax.legend(handles=old_handles, labels=old_labels, title='All trigger (electron)') else: leg = ax.legend(handles=old_handles, labels=old_labels, title='All trigger') lumi = plt.text(1., 1., r"%i fb$^{-1}$ (13 TeV)" % lumifb, fontsize=16, horizontalalignment='right', verticalalignment='bottom', transform=ax.transAxes) fig.savefig("stack_%s_lumi%i.pdf" % (var_name, lumifb)) ax.set_ylim(0.01, 100 * maxval) ax.set_yscale('log') fig.savefig("stack_%s_lumi%i_log.pdf" % (var_name, lumifb))
label='Data', yerr=True) hep.histplot( [temp_unmatched_fail.values()[()], temp_matched_fail.values()[()]], temp_matched_fail.axis('msd').edges(), stack=True, ax=ax2, label=['Unmatched', 'Matched'], histtype='fill', color=['green', 'red']) for ax in ax1, ax2: ax.legend() ax.set_xlabel('jet $m_{SD}$') ax1.set_title("pass") ax2.set_title("fail") fig.savefig(f'{store_dir}/templates.png') fout_pass = uproot3.create(f'{store_dir}/wtag_pass.root') fout_fail = uproot3.create(f'{store_dir}/wtag_fail.root') fout_pass['data_obs'] = hist.export1d(temp_data_pass) fout_fail['data_obs'] = hist.export1d(temp_data_fail) fout_pass['catp2'] = hist.export1d(temp_matched_pass) fout_fail['catp2'] = hist.export1d(temp_matched_fail) fout_pass['catp1'] = hist.export1d(temp_unmatched_pass) fout_fail['catp1'] = hist.export1d(temp_unmatched_fail) fout_pass.close() fout_fail.close()
parser.add_argument('fnames', type=str, nargs='+', help='Histogram files') args = parser.parse_args() for fname in args.fnames: outname = fname.replace('.coffea', '.root') try: if os.path.getmtime(fname) < os.path.getmtime(outname): continue except FileNotFoundError: pass print(f'Converting {fname}') hists = load(fname) if os.path.exists(outname): os.remove(outname) fout = uproot.create(outname) if isinstance(hists, tuple): hists = hists[0] for key, h in hists.items(): if not isinstance(h, hist.Hist): continue for dataset in h.identifiers('dataset'): for channel in h.identifiers('channel'): newhist = h.integrate('dataset', dataset).integrate( 'channel', channel) hname = '{}_{}_{}'.format(dataset, channel, key) fout[hname] = hist.export1d(newhist) fout.close()
def analyzeChannel(self, channel='2lss', cuts='2+bm', charges=['ch+', 'ch-'], systematics='nominal', variable='njets'): if isinstance(channel, str) and channel not in self.channels: raise Exception(f'{channel} not found in self.channels!') if isinstance(channel, list) and not all(ch in self.channels for ch in self.channels.keys()): print(self.channels.keys()) print([[ch, ch in self.channels.keys()] for ch in channel]) raise Exception( f'At least one channel in {channels} is not found in self.channels!' ) h = self.hists[variable].integrate( 'channel', self.channels[channel]).integrate('cut', cuts).integrate( 'sumcharge', charges).integrate('systematic', systematics) all_str = ' '.join( [f'{v}' for v in locals().values() if v != self.hists]) all_str = f'{channel} {cuts} {charges} {systematics} {variable}' print(f'Making relish from the pickle file for {all_str}') if isinstance(charges, str): charge = charges else: charge = '' charge = 'p' if charge == 'ch+' else 'm' result = [e for e in re.split("[^0-9]", cuts) if e != ''] maxb = str(max(map(int, result))) + 'b' if systematics == 'nominal': sys = '' else: sys = '_' + systematics if variable == 'njets': if isinstance(charge, str): cat = '_'.join([channel, charge, maxb]) else: cat = '_'.join([channel, maxb]) else: if isinstance(charge, str): cat = '_'.join([channel, charge, maxb, variable]) else: cat = '_'.join([channel, maxb, variable]) fname = f'histos/tmp_ttx_multileptons-{cat}.root' fout = uproot3.recreate(fname) #Scale each plot to the SM for proc in self.samples: #Integrate out processes h_base = h.integrate('sample', proc) if h_base == {}: print(f'Issue with {proc}') continue nwc = self.hsow._nwc if nwc > 0: h_base.scale(self.lumi / self.smsow[proc]) pname = self.rename[ proc] + '_' if proc in self.rename else proc + '_' if variable == 'njets': if '2l' in channel: h_base = h_base.rebin( 'njets', hist.Bin("njets", "Jet multiplicity ", [4, 5, 6, 7])) elif '3l' in channel: h_base = h_base.rebin( 'njets', hist.Bin("njets", "Jet multiplicity ", [2, 3, 4, 5])) elif '4l' in channel: h_base = h_base.rebin( 'njets', hist.Bin("njets", "Jet multiplicity ", [2, 3, 4])) elif variable == 'ht': h_base = h_base.rebin( 'ht', hist.Bin("ht", "H$_{T}$ (GeV)", 10, 0, 1000)) #Save the SM plot h_sm = h_base #.copy() h_sm.set_sm() fout[pname + 'sm'] = hist.export1d(h_sm) #Asimov data: data_obs = MC at SM (all WCs = 0) fout['data_obs'] = hist.export1d(h_sm) h_lin = h_base h_quad = [] h_mix = [] yields = [] for name, wcpt in self.wcs: #Scale plot to the WCPoint #Handle linear and quadratic terms if 'lin' in name: h_lin = h_base #.copy() h_lin.set_wilson_coeff_from_array(wcpt) if np.sum(h_lin.values()[()]) > self.tolerance: fout[pname + name] = hist.export1d(h_lin) if variable == 'njets': if isinstance(charge, str): cat = '_'.join([ channel, charge, ]) else: cat = '_'.join([channel, maxb]) else: if isinstance(charge, str): '_'.join([channel, charge, maxb, variable]) else: '_'.join([channel, maxb, variable]) elif 'quad' in name and 'mix' not in name: h_quad = h_base #.copy() h_quad.set_wilson_coeff_from_array(wcpt) if np.sum(h_quad.values()[()]) > self.tolerance: fout[pname + name] = hist.export1d(h_quad) else: h_mix = h_base #.copy() h_mix.set_wilson_coeff_from_array(wcpt) if np.sum(h_mix.values()[()]) > self.tolerance: fout[pname + name] = hist.export1d(h_mix) fout.close() self.makeCardLevel(channel=channel, cuts=cuts, charges=charges, nbjet=maxb, systematics=systematics, variable=variable)