def test_export1d(): with pytest.warns(FutureWarning): import uproot3 import os from coffea.hist.export import export1d counts, test_eta, test_pt = dummy_jagged_eta_pt() h_regular_bins = hist.Hist("regular_joe", hist.Bin("x", "x", 20, 0, 200)) h_regular_bins.fill(x=test_pt) hout = export1d(h_regular_bins) filename = 'test_export1d.root' with uproot3.create(filename) as fout: fout['regular_joe'] = hout fout.close() with uproot3.open(filename) as fin: hin = fin['regular_joe'] assert (np.all(hin.edges == hout.edges)) assert (np.all(hin.values == hout.values)) del hin del fin if os.path.exists(filename): os.remove(filename)
import mplhep as hep hep.set_style("CMS") fig, ax = plt.subplots() hep.histplot(morph_base.get()[:2], c='black' , ls=':', label='Nominal') hep.histplot(scale_up[:2], c='blue' , ls='--', label='Up') hep.histplot(scale_down[:2], c='red' , ls='--', label='Down') ax.set_xlabel('jet $m_{SD}$') ax.legend() fig.savefig(f'{work_dir}/scale.png') fig, ax = plt.subplots() hep.histplot(morph_base.get()[:2], c='black' , ls=':', label='Nominal') hep.histplot(smear_up[:2], c='blue' , ls='--', label='Up') hep.histplot(smear_down[:2], c='red' , ls='--', label='Down') ax.set_xlabel('jet $m_{SD}$') ax.legend() fig.savefig(f'{work_dir}/smear.png') if os.path.exists(args.out_file): os.remove(args.out_file) fout = uproot3.create(args.out_file) fout['data_obs'] = source_file['data_obs'] fout['catp1'] = source_file['catp1'] fout['catp2'] = source_file['catp2'] fout['catp2_central'] = source_file['catp2'] fout['catp2_smearDown'] = export1d(smear_down, histtype=args.hist_type) fout['catp2_smearUp'] = export1d(smear_up, histtype=args.hist_type) fout['catp2_scaleDown'] = export1d(scale_down, histtype=args.hist_type) fout['catp2_scaleUp'] = export1d(scale_up, histtype=args.hist_type)
# Bookkeeping if args.type == 'cc': file_kind = 'CC' elif args.type == '3': file_kind = "3" else: file_kind = "BB" template_file = f"templates_{args.identifier}_{file_kind}.root" template_mu_file = f"templatesmuCR_{args.identifier}_{file_kind}.root" if os.path.exists(template_file): os.remove(template_file) if os.path.exists(template_mu_file): os.remove(template_mu_file) print(f'Will save templates to {template_file}') fout = uproot3.create(template_file) # Load info print(f'Processing coffea output from hists_{args.identifier}.coffea') output = load(f'hists_{args.identifier}.coffea') #xsecs = xSecReader('metadata/xSections.dat') xsecs = xSecReader('metadata/xSections_manual.dat') # Scale by xsecs output = rescale(output, xsecs) ######### # Load mergemap if args.mergemap is not None: with open(args.mergemap) as json_file: merge_map = json.load(json_file)
def get_bkg_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ # define variables to get histogram for background bkg_fnmatch = "%s.coffea" % base_template_name.replace( "NJETS", njets_regex).replace("SIG", "bkg") bkg_fnames = fnmatch.filter(os.listdir(inputdir), bkg_fnmatch) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) # need to save coffea hist objects to file so they can be opened by uproot3 in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) for bkg_file in bkg_fnames: hdict = load(os.path.join(inputdir, bkg_file)) jmult = "3Jets" if "3Jets" in bkg_file else "4PJets" for lep in hdict.keys(): lepdir = "mujets" if lep == "Muon" else "ejets" for tname in hdict[lep].keys(): #set_trace() template_histo = hdict[lep][tname] proc = tname.split( "_")[0] if not "data_obs" in tname else "data_obs" sys = "_".join(tname.split("_") [1:]) if not "data_obs" in tname else "nosys" if not sys in sys_to_use.keys(): continue #if "RENORM" in sys: set_trace() sysname, onlyTT = sys_to_use[sys] name = proc + lepdir if proc == "QCD" else proc print(lep, jmult, sys, name) outhname = "_".join([jmult, lepdir, name ]) if sys == "nosys" else "_".join( [jmult, lepdir, name, sysname]) if (sys != "nosys") and (args.smooth) and ( templates_to_smooth[proc]): template_histo = smoothing( nominal=histo_dict_3j[lep][proc] if jmult == "3Jets" else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(linearize_binning[0]) - 1, nbinsy=len(linearize_binning[1]) - 1) #set_trace() ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][proc if sys == "nosys" else "%s_%s" % (proc, sysname)] = template_histo if jmult == "4PJets": histo_dict_4pj[lep][proc if sys == "nosys" else "%s_%s" % (proc, sysname)] = template_histo ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) #set_trace() if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"templates_lj_3Jets_bkg_smoothed_{jobid}_{args.year}.coffea" if args.smooth else f"templates_lj_3Jets_bkg_{jobid}_{args.year}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"templates_lj_4PJets_bkg_smoothed_{jobid}_{args.year}.coffea" if args.smooth else f"templates_lj_4PJets_bkg_{jobid}_{args.year}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
def get_bkg_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files( bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) # get correct hist and rebin hname_to_use = "mtt_vs_tlep_ctstar_abs" if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use][ Plotter. nonsignal_samples] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = [ "*right", "*matchable", "*unmatchable", "*sl_tau", "*other" ] names = [ dataset for dataset in sorted(set([key[0] for key in histo.values().keys()])) ] # get dataset names in hists ttJets_cats = [ name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats]) ] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting="placement") process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) for lep in ["Muon", "Electron"]: orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS" #set_trace() ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="templates") #process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="dataset") lumi_correction = lumi_corr_dict[args.year]["%ss" % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = "_".join(tt_cat.split( "_")[:-2]) if "sl_tau" in tt_cat else "_".join( tt_cat.split("_") [:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis="dataset") histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate("leptype") #set_trace() systs = sorted(set([key[1] for key in histo.values().keys()])) systs.insert(0, systs.pop( systs.index("nosys"))) # move "nosys" to the front # loop over each jet multiplicity for jmult in njets_to_run: lepdir = orig_lepdir.replace("NJETS", jmult.lower()) # get sideband and signal region hists cen_sb_histo = Plotter.linearize_hist( histo[:, "nosys", jmult, btag_reg_names_dict["Central"]["reg"]].integrate( "jmult").integrate("btag").integrate("sys")) #up_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Up"]["reg"]].integrate("jmult").integrate("btag") #dw_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Down"]["reg"]].integrate("jmult").integrate("btag") sig_histo = Plotter.linearize_hist( histo[:, :, jmult, btag_reg_names_dict["Signal"]["reg"]].integrate( "jmult").integrate("btag")) # loop over each systematic for sys in systs: if sys not in systematics.template_sys_to_name[ args.year].keys(): continue sys_histo = sig_histo[:, sys].integrate( "sys") if sys in systematics.ttJets_sys.values( ) else Plotter.BKG_Est( sig_reg=sig_histo[:, sys].integrate("sys"), sb_reg=cen_sb_histo, norm_type="SigMC", sys=sys, ignore_uncs=True) ## write nominal and systematic variations for each topology to file #for proc in sorted(set([key[0] for key in sig_histo.values().keys()])): for proc in sorted( set([key[0] for key in sys_histo.values().keys()])): if ("tt" not in proc) and ( sys in systematics.ttJets_sys.values()): continue #if (proc != "tt") and (sys in systematics.ttJets_sys.values()): continue if (proc == "data_obs") and not (sys == "nosys"): continue if not sys_histo[proc].values().keys(): #if not sig_histo[proc, sys].values().keys(): print( f"Systematic {sys} for {lep} {jmult} {proc} not found, skipping" ) continue print(args.year, lep, jmult, sys, proc) #set_trace() outhname = "_".join( list( filter(None, [ proc, systematics.template_sys_to_name[ args.year][sys][0], lepdir, (args.year)[-2:] ]))) if "LEP" in outhname: outhname = outhname.replace( "LEP", "muon") if lep == "Muon" else outhname.replace( "LEP", "electron") template_histo = sys_histo[proc].integrate("process") #template_histo = sig_histo[proc, sys].integrate("process").integrate("sys") #set_trace() ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][ f"{proc}_{sys}"] = template_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][ f"{proc}_{sys}"] = template_histo.copy() ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"test_raw_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"test_raw_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
def get_sig_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ widthTOname = lambda width: str(width).replace(".", "p") nameTOwidth = lambda width: str(width).replace("p", ".") ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files( sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0]) # get correct hist and rebin hname_to_use = "mtt_vs_tlep_ctstar_abs" if hname_to_use not in hdict.keys(): raise ValueError(f"{hname_to_use} not found in file") xrebinning, yrebinning = linearize_binning #xrebinning, yrebinning = mtt_ctstar_2d_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat #set_trace() xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning histo = histo.rebin(yaxis_name, new_ybins) rebin_histo = histo[Plotter.signal_samples, :, :, :, "btagPass"].integrate("btag") names = [ dataset for dataset in sorted( set([key[0] for key in rebin_histo.values().keys()])) ] # get dataset names in hists signals = sorted(set([key[0] for key in rebin_histo.values().keys()])) signals = [sig for sig in signals if "TTJetsSL" in sig] # only use SL decays systs = sorted(set([key[1] for key in rebin_histo.values().keys()])) systs.insert(0, systs.pop(systs.index("nosys"))) # move "nosys" to the front # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) # write signal dists to temp file for lep in ["Muon", "Electron"]: orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS" # scale by lumi lumi_correction = lumi_corr_dict[args.year]["%ss" % lep] histo = rebin_histo.copy() histo.scale(lumi_correction, axis="dataset") process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="templates") histo = histo.group( "dataset", hist.Cat("process", "Process", sorting="placement"), process_groups) for jmult in njets_to_run: lepdir = orig_lepdir.replace("NJETS", jmult.lower()) #set_trace() lin_histo = Plotter.linearize_hist( histo[:, :, jmult, lep].integrate("jmult").integrate("leptype")) for signal in signals: if "Int" in signal: boson, mass, width, pI, wt = tuple(signal.split("_")) else: boson, mass, width, pI = tuple(signal.split("_")) sub_name = "_".join([ "%s%s" % (boson[0], mass[1:]), "relw%s" % widthTOname(width).split("W")[-1], pI.lower(), wt ]) if pI == "Int" else "_".join([ "%s%s" % (boson[0], mass[1:]), "relw%s" % widthTOname(width).split("W")[-1], pI.lower() ]) #set_trace() for sys in systs: if sys not in systematics.template_sys_to_name[ args.year].keys(): continue if not lin_histo[signal, sys].values().keys(): print( f"Systematic {sys} for {lep} {jmult} {signal} not found, skipping" ) continue print(args.year, lep, jmult, sub_name, sys) outhname = "_".join( list( filter(None, [ sub_name, systematics.template_sys_to_name[ args.year][sys][0], lepdir, (args.year)[-2:] ]))) if "LEP" in outhname: outhname = outhname.replace( "LEP", "muon") if lep == "Muon" else outhname.replace( "LEP", "electron") template_histo = lin_histo[signal, sys].integrate( "process").integrate("sys") ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][ f"{signal}_{sys}"] = template_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][ f"{signal}_{sys}"] = template_histo.copy() ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"test_raw_templates_lj_3Jets_sig_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"test_raw_templates_lj_4PJets_sig_{args.year}_{jobid}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
def main(): raw = False if len(sys.argv) < 2: print("Enter year") return elif len(sys.argv) == 3: if int(sys.argv[2]) > 0: raw = True elif len(sys.argv) > 3: print("Incorrect number of arguments") return year = sys.argv[1] if raw: year = year + "-raw" if os.path.isfile(year + '/2mjj-signalregion.root'): os.remove(year + '/2mjj-signalregion.root') fout = uproot3.create(year + '/2mjj-signalregion.root') samples = [ 'data', 'muondata', 'QCD', 'ttbar', 'singlet', 'VV', 'ggF', 'VBF', 'WH', 'ZH', 'ttH' ] print("2 MJJ BINS SR") mjjbins = [1000, 2000, 13000] # Check if pickle exists picklename = year + '/templates.pkl' if not os.path.isfile(picklename): print("You need to create the pickle") return # Read the histogram from the pickle file vbf = pickle.load(open(picklename, 'rb')).integrate('region', 'signal-vbf') for i, b in enumerate(mjjbins[:-1]): for p in samples: print(p) hpass = vbf.sum('pt1', 'genflavor').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'ddb1', int_range=slice(ddbthr, 1)).integrate('process', p) hfail = vbf.sum('pt1', 'genflavor').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'ddb1', int_range=slice(0, ddbthr)).integrate('process', p) if year == '2016' and p == 'ggF' and not raw: print("Taking shape for 2016 ggF from 2017") vbf17 = pickle.load(open('2017/templates.pkl', 'rb')).integrate( 'region', 'signal-vbf') vbf17.scale(lumis['2016'] / lumis['2017']) hpass = vbf17.sum('pt1', 'genflavor').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'ddb1', int_range=slice(ddbthr, 1)).integrate('process', p) hfail = vbf17.sum('pt1', 'genflavor').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'ddb1', int_range=slice(0, ddbthr)).integrate('process', p) for s in hfail.identifiers('systematic'): fout["vbf_pass_mjj" + str(i + 1) + "_" + p + "_" + str(s)] = hist.export1d(hpass.integrate('systematic', s)) fout["vbf_fail_mjj" + str(i + 1) + "_" + p + "_" + str(s)] = hist.export1d(hfail.integrate('systematic', s)) for p in ['Wjets', 'Zjets']: print(p) hpass = vbf.sum('pt1').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'genflavor', int_range=slice(1, 3)).integrate( 'ddb1', int_range=slice(ddbthr, 1)).integrate('process', p) hfail = vbf.sum('pt1').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'genflavor', int_range=slice(1, 3)).integrate( 'ddb1', int_range=slice(0, ddbthr)).integrate('process', p) hpass_bb = vbf.sum('pt1').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'genflavor', int_range=slice(3, 4)).integrate( 'ddb1', int_range=slice(ddbthr, 1)).integrate('process', p) hfail_bb = vbf.sum('pt1').integrate( 'mjj', int_range=slice(mjjbins[i], mjjbins[i + 1])).integrate( 'genflavor', int_range=slice(3, 4)).integrate( 'ddb1', int_range=slice(0, ddbthr)).integrate('process', p) for s in hfail.identifiers('systematic'): fout["vbf_pass_mjj" + str(i + 1) + "_" + p + "_" + str(s)] = hist.export1d(hpass.integrate('systematic', s)) fout["vbf_fail_mjj" + str(i + 1) + "_" + p + "_" + str(s)] = hist.export1d(hfail.integrate('systematic', s)) fout["vbf_pass_mjj" + str(i + 1) + "_" + p + "bb_" + str(s)] = hist.export1d( hpass_bb.integrate('systematic', s)) fout["vbf_fail_mjj" + str(i + 1) + "_" + p + "bb_" + str(s)] = hist.export1d( hfail_bb.integrate('systematic', s)) return
label='Data', yerr=True) hep.histplot( [temp_unmatched_fail.values()[()], temp_matched_fail.values()[()]], temp_matched_fail.axis('msd').edges(), stack=True, ax=ax2, label=['Unmatched', 'Matched'], histtype='fill', color=['green', 'red']) for ax in ax1, ax2: ax.legend() ax.set_xlabel('jet $m_{SD}$') ax1.set_title("pass") ax2.set_title("fail") fig.savefig(f'{store_dir}/templates.png') fout_pass = uproot3.create(f'{store_dir}/wtag_pass.root') fout_fail = uproot3.create(f'{store_dir}/wtag_fail.root') fout_pass['data_obs'] = hist.export1d(temp_data_pass) fout_fail['data_obs'] = hist.export1d(temp_data_fail) fout_pass['catp2'] = hist.export1d(temp_matched_pass) fout_fail['catp2'] = hist.export1d(temp_matched_fail) fout_pass['catp1'] = hist.export1d(temp_unmatched_pass) fout_fail['catp1'] = hist.export1d(temp_unmatched_fail) fout_pass.close() fout_fail.close()
parser.add_argument("-s", '--source', type=str, required=True, help="Source file") parser.add_argument("-t", '--target', type=str, required=True, help="Target file") parser.add_argument("-n", '--new', type=str, default=None, help="Name for modified file") parser.add_argument("--ys", '--yearsource', default=2017, type=int, help="Scale by appropriate lumi") parser.add_argument("--yt", '--yeartarget', default=2018, type=int, help="Scale by appropriate lumi") args = parser.parse_args() if args.new is None: args.new = args.target.rstrip(".root")+"_mod.root" print("Running with the following options:") print(args) fins = uproot3.open(args.source) fint = uproot3.open(args.target) if os.path.exists(args.new): os.remove(args.new) fout = uproot3.create(args.new) for key in fint.keys(): if b'hcc' in key: src_hist = fins[key] for i in range(len(src_hist)): src_hist[i] /= lumi[args.ys] src_hist[i] *= lumi[args.yt] fout[key] = src_hist else: fout[key] = fint[key]