def get_bkg_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files( bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) # get correct hist and rebin hname_to_use = "mtt_vs_tlep_ctstar_abs" if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use][ Plotter. nonsignal_samples] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = [ "*right", "*matchable", "*unmatchable", "*sl_tau", "*other" ] names = [ dataset for dataset in sorted(set([key[0] for key in histo.values().keys()])) ] # get dataset names in hists ttJets_cats = [ name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats]) ] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting="placement") process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) for lep in ["Muon", "Electron"]: orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS" #set_trace() ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="templates") #process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="dataset") lumi_correction = lumi_corr_dict[args.year]["%ss" % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = "_".join(tt_cat.split( "_")[:-2]) if "sl_tau" in tt_cat else "_".join( tt_cat.split("_") [:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis="dataset") histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate("leptype") #set_trace() systs = sorted(set([key[1] for key in histo.values().keys()])) systs.insert(0, systs.pop( systs.index("nosys"))) # move "nosys" to the front # loop over each jet multiplicity for jmult in njets_to_run: lepdir = orig_lepdir.replace("NJETS", jmult.lower()) # get sideband and signal region hists cen_sb_histo = Plotter.linearize_hist( histo[:, "nosys", jmult, btag_reg_names_dict["Central"]["reg"]].integrate( "jmult").integrate("btag").integrate("sys")) #up_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Up"]["reg"]].integrate("jmult").integrate("btag") #dw_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Down"]["reg"]].integrate("jmult").integrate("btag") sig_histo = Plotter.linearize_hist( histo[:, :, jmult, btag_reg_names_dict["Signal"]["reg"]].integrate( "jmult").integrate("btag")) # loop over each systematic for sys in systs: if sys not in systematics.template_sys_to_name[ args.year].keys(): continue sys_histo = sig_histo[:, sys].integrate( "sys") if sys in systematics.ttJets_sys.values( ) else Plotter.BKG_Est( sig_reg=sig_histo[:, sys].integrate("sys"), sb_reg=cen_sb_histo, norm_type="SigMC", sys=sys, ignore_uncs=True) ## write nominal and systematic variations for each topology to file #for proc in sorted(set([key[0] for key in sig_histo.values().keys()])): for proc in sorted( set([key[0] for key in sys_histo.values().keys()])): if ("tt" not in proc) and ( sys in systematics.ttJets_sys.values()): continue #if (proc != "tt") and (sys in systematics.ttJets_sys.values()): continue if (proc == "data_obs") and not (sys == "nosys"): continue if not sys_histo[proc].values().keys(): #if not sig_histo[proc, sys].values().keys(): print( f"Systematic {sys} for {lep} {jmult} {proc} not found, skipping" ) continue print(args.year, lep, jmult, sys, proc) #set_trace() outhname = "_".join( list( filter(None, [ proc, systematics.template_sys_to_name[ args.year][sys][0], lepdir, (args.year)[-2:] ]))) if "LEP" in outhname: outhname = outhname.replace( "LEP", "muon") if lep == "Muon" else outhname.replace( "LEP", "electron") template_histo = sys_histo[proc].integrate("process") #template_histo = sig_histo[proc, sys].integrate("process").integrate("sys") #set_trace() ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][ f"{proc}_{sys}"] = template_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][ f"{proc}_{sys}"] = template_histo.copy() ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"test_raw_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"test_raw_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
def process(self, file, analysis_type): output = self.accumulator.identity() acc = load(file) Muon_lead_acc = acc['Muon_lead'] Muon_trail_acc = acc['Muon_trail'] Dimu_acc = acc['Dimu'] D0_acc = acc['D0'] D0_trk_acc = acc['D0_trk'] Dstar_acc = acc['Dstar'] Dstar_trk_acc = acc['Dstar_trk'] DimuDstar_acc = acc['DimuDstar'] Primary_vertex_acc = acc['Primary_vertex'] DimuDstar_p4 = build_p4(DimuDstar_acc) if (analysis_type == 'mc'): Gen_Part_acc = acc['Gen_particles'] Gen_Jpsi_acc = acc['Gen_Jpsi'] ########## Filling histograms if (analysis_type == 'mc'): ## Gen Particles print(Gen_Jpsi_acc['mass'].value) output['GenPart_pdgId'].fill(pdgId=Gen_Part_acc['pdgId'].value) output['GenJpsi_mass'].fill(mass=Gen_Jpsi_acc['mass'].value) output['GenJpsi_p'].fill(pt=Gen_Jpsi_acc['pt'].value, eta=Gen_Jpsi_acc['eta'].value, phi=Gen_Jpsi_acc['phi'].value) #print(dir(Gen_Jpsi_acc)) #output['GenJpsi_mass'].fill(pdgId=Gen_Jpsi_acc['pdgId'].value) # Primary vertex output['Primary_vertex_npvs'].fill( npvs=Primary_vertex_acc['npvs'].value) #Muon output['Muon_lead_p'].fill(pt=Muon_lead_acc['pt'].value, eta=Muon_lead_acc['eta'].value, phi=Muon_lead_acc['phi'].value) output['Muon_trail_p'].fill(pt=Muon_trail_acc['pt'].value, eta=Muon_trail_acc['eta'].value, phi=Muon_trail_acc['phi'].value) # Upsilon output['Upsilon_mass'].fill( mass=Dimu_acc['mass'].value[Dimu_acc['is_ups'].value]) output['Upsilon_p'].fill( pt=Dimu_acc['pt'].value[Dimu_acc['is_ups'].value], eta=Dimu_acc['eta'].value[Dimu_acc['is_ups'].value], phi=Dimu_acc['phi'].value[Dimu_acc['is_ups'].value]) output['Upsilon_rap'].fill( rap=Dimu_acc['rap'].value[Dimu_acc['is_ups'].value]) output['Upsilon_dl'].fill( dl=Dimu_acc['dl'].value[Dimu_acc['is_ups'].value]) output['Upsilon_dlSig'].fill( dlSig=Dimu_acc['dlSig'].value[Dimu_acc['is_ups'].value]) output['Upsilon_chi2'].fill( chi2=Dimu_acc['chi2'].value[Dimu_acc['is_ups'].value]) output['Upsilon_cosphi'].fill( cosphi=Dimu_acc['cosphi'].value[Dimu_acc['is_ups'].value]) # Jpsi output['Jpsi_mass'].fill( mass=Dimu_acc['mass'].value[Dimu_acc['is_jpsi'].value]) output['Jpsi_p'].fill( pt=Dimu_acc['pt'].value[Dimu_acc['is_jpsi'].value], eta=Dimu_acc['eta'].value[Dimu_acc['is_jpsi'].value], phi=Dimu_acc['phi'].value[Dimu_acc['is_jpsi'].value]) output['Jpsi_rap'].fill( rap=Dimu_acc['rap'].value[Dimu_acc['is_jpsi'].value]) output['Jpsi_dl'].fill( dl=Dimu_acc['dl'].value[Dimu_acc['is_jpsi'].value]) output['Jpsi_dlSig'].fill( dlSig=Dimu_acc['dlSig'].value[Dimu_acc['is_jpsi'].value]) output['Jpsi_chi2'].fill( chi2=Dimu_acc['chi2'].value[Dimu_acc['is_jpsi'].value]) output['Jpsi_cosphi'].fill( cosphi=Dimu_acc['cosphi'].value[Dimu_acc['is_jpsi'].value]) # Psi output['Psi_mass'].fill( mass=Dimu_acc['mass'].value[Dimu_acc['is_psi'].value]) output['Psi_p'].fill( pt=Dimu_acc['pt'].value[Dimu_acc['is_psi'].value], eta=Dimu_acc['eta'].value[Dimu_acc['is_psi'].value], phi=Dimu_acc['phi'].value[Dimu_acc['is_psi'].value]) output['Psi_rap'].fill( rap=Dimu_acc['rap'].value[Dimu_acc['is_psi'].value]) output['Psi_dl'].fill( dl=Dimu_acc['dl'].value[Dimu_acc['is_psi'].value]) output['Psi_dlSig'].fill( dlSig=Dimu_acc['dlSig'].value[Dimu_acc['is_psi'].value]) output['Psi_chi2'].fill( chi2=Dimu_acc['chi2'].value[Dimu_acc['is_psi'].value]) output['Psi_cosphi'].fill( cosphi=Dimu_acc['cosphi'].value[Dimu_acc['is_psi'].value]) # D0 output['D0_mass12'].fill(mass=D0_acc['mass12'].value) output['D0_mass21'].fill(mass=D0_acc['mass21'].value) output['D0_p'].fill(pt=D0_acc['pt'].value, eta=D0_acc['eta'].value, phi=D0_acc['phi'].value) output['D0_rap'].fill(rap=D0_acc['rap'].value) output['D0_dl'].fill(dl=D0_acc['dl'].value) output['D0_dlSig'].fill(dlSig=D0_acc['dlSig'].value) output['D0_chi2'].fill(chi2=D0_acc['chi2'].value) output['D0_cosphi'].fill(cosphi=D0_acc['cosphi'].value) output['D0_eta_mass'].fill(eta=D0_acc['eta'].value, mass=D0_acc['mass'].value) # D0 trks output['D0_trk_p'].fill(pt=D0_trk_acc['t1_pt'].value, eta=D0_trk_acc['t1_eta'].value, phi=D0_trk_acc['t1_phi'].value) output['D0_trk_p'].fill(pt=D0_trk_acc['t2_pt'].value, eta=D0_trk_acc['t2_eta'].value, phi=D0_trk_acc['t2_phi'].value) output['D0_trk_chindof'].fill(chindof=D0_trk_acc['t1_chindof'].value) output['D0_trk_chindof'].fill(chindof=D0_trk_acc['t2_chindof'].value) output['D0_trk_nValid'].fill(nValid=D0_trk_acc['t1_nValid'].value) output['D0_trk_nValid'].fill(nValid=D0_trk_acc['t2_nValid'].value) output['D0_trk_nPix'].fill(nPix=D0_trk_acc['t1_nPix'].value) output['D0_trk_nPix'].fill(nPix=D0_trk_acc['t2_nPix'].value) output['D0_trk_dxy'].fill(dxy=D0_trk_acc['t1_dxy'].value) output['D0_trk_dxy'].fill(dxy=D0_trk_acc['t2_dxy'].value) output['D0_trk_dz'].fill(dz=D0_trk_acc['t1_dz'].value) output['D0_trk_dz'].fill(dz=D0_trk_acc['t2_dz'].value) # Dstar output['Dstar_p'].fill( chg='right charge', pt=Dstar_acc['pt'].value[~Dstar_acc['wrg_chg'].value], eta=Dstar_acc['eta'].value[~Dstar_acc['wrg_chg'].value], phi=Dstar_acc['phi'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_p'].fill( chg='wrong charge', pt=Dstar_acc['pt'].value[Dstar_acc['wrg_chg'].value], eta=Dstar_acc['eta'].value[Dstar_acc['wrg_chg'].value], phi=Dstar_acc['phi'].value[Dstar_acc['wrg_chg'].value]) output['Dstar_rap'].fill( chg='right charge', rap=Dstar_acc['rap'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_rap'].fill( chg='wrong charge', rap=Dstar_acc['rap'].value[Dstar_acc['wrg_chg'].value]) output['Dstar_deltamr'].fill( chg='right charge', deltamr=Dstar_acc['deltamr'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_deltamr'].fill( chg='wrong charge', deltamr=Dstar_acc['deltamr'].value[Dstar_acc['wrg_chg'].value]) output['Dstar_deltam'].fill( chg='right charge', deltam=Dstar_acc['deltam'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_deltam'].fill( chg='wrong charge', deltam=Dstar_acc['deltam'].value[Dstar_acc['wrg_chg'].value]) # Dstar trks output['Dstar_K_p'].fill( pt=Dstar_trk_acc['K_pt'].value[~Dstar_acc['wrg_chg'].value], eta=Dstar_trk_acc['K_eta'].value[~Dstar_acc['wrg_chg'].value], phi=Dstar_trk_acc['K_phi'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_K_chindof'].fill(chindof=Dstar_trk_acc['K_chindof']. value[~Dstar_acc['wrg_chg'].value]) output['Dstar_K_nValid'].fill(nValid=Dstar_trk_acc['K_nValid']. value[~Dstar_acc['wrg_chg'].value]) output['Dstar_K_nPix'].fill( nPix=Dstar_trk_acc['K_nPix'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_K_dxy'].fill( dxy=Dstar_trk_acc['K_dxy'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_K_dz'].fill( dz=Dstar_trk_acc['K_dz'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_K_pt_eta'].fill( pt=Dstar_trk_acc['K_pt'].value[~Dstar_acc['wrg_chg'].value], eta=Dstar_trk_acc['K_eta'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pi_p'].fill( pt=Dstar_trk_acc['pi_pt'].value[~Dstar_acc['wrg_chg'].value], eta=Dstar_trk_acc['pi_eta'].value[~Dstar_acc['wrg_chg'].value], phi=Dstar_trk_acc['pi_phi'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pi_chindof'].fill(chindof=Dstar_trk_acc['pi_chindof']. value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pi_nValid'].fill(nValid=Dstar_trk_acc['pi_nValid']. value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pi_nPix'].fill( nPix=Dstar_trk_acc['pi_nPix'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pi_dxy'].fill( dxy=Dstar_trk_acc['pi_dxy'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pi_dz'].fill( dz=Dstar_trk_acc['pi_dz'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pi_pt_eta'].fill( pt=Dstar_trk_acc['pi_pt'].value[~Dstar_acc['wrg_chg'].value], eta=Dstar_trk_acc['pi_eta'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pis_p'].fill( pt=Dstar_trk_acc['pis_pt'].value[~Dstar_acc['wrg_chg'].value], eta=Dstar_trk_acc['pis_eta'].value[~Dstar_acc['wrg_chg'].value], phi=Dstar_trk_acc['pis_phi'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pis_chindof'].fill(chindof=Dstar_trk_acc['pis_chindof']. value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pis_nValid'].fill(nValid=Dstar_trk_acc['pis_nValid']. value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pis_nPix'].fill( nPix=Dstar_trk_acc['pis_nPix'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pis_dxy'].fill( dxy=Dstar_trk_acc['pis_dxy'].value[~Dstar_acc['wrg_chg'].value]) output['Dstar_pis_dz'].fill( dz=Dstar_trk_acc['pis_dz'].value[~Dstar_acc['wrg_chg'].value]) ############# DimuDstar is_ups = DimuDstar_acc['Dimu']['is_ups'].value is_jpsi = DimuDstar_acc['Dimu']['is_jpsi'].value is_psi = DimuDstar_acc['Dimu']['is_psi'].value wrg_chg = DimuDstar_acc['Dstar']['wrg_chg'].value # Upsilon output['UpsilonDstar']['Upsilon_mass'].fill( mass=DimuDstar_acc['Dimu']['mass'].value[is_ups & ~wrg_chg]) output['UpsilonDstar']['Upsilon_p'].fill( pt=DimuDstar_acc['Dimu']['pt'].value[is_ups & ~wrg_chg], eta=DimuDstar_acc['Dimu']['eta'].value[is_ups & ~wrg_chg], phi=DimuDstar_acc['Dimu']['phi'].value[is_ups & ~wrg_chg]) output['UpsilonDstar']['Upsilon_rap'].fill( rap=DimuDstar_acc['Dimu']['rap'].value[is_ups & ~wrg_chg]) output['UpsilonDstar']['Dstar_deltamr'].fill( chg='right charge', deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_ups & ~wrg_chg]) output['UpsilonDstar']['Dstar_deltamr'].fill( chg='wrong charge', deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_ups & wrg_chg]) output['UpsilonDstar']['Dstar_deltam'].fill( chg='right charge', deltam=DimuDstar_acc['Dstar']['deltam'].value[is_ups & ~wrg_chg]) output['UpsilonDstar']['Dstar_deltam'].fill( chg='wrong charge', deltam=DimuDstar_acc['Dstar']['deltam'].value[is_ups & wrg_chg]) output['UpsilonDstar']['Dstar_p'].fill( chg='right charge', pt=DimuDstar_acc['Dstar']['pt'].value[is_ups & ~wrg_chg], eta=DimuDstar_acc['Dstar']['eta'].value[is_ups & ~wrg_chg], phi=DimuDstar_acc['Dstar']['phi'].value[is_ups & ~wrg_chg]) output['UpsilonDstar']['Dstar_p'].fill( chg='wrong charge', pt=DimuDstar_acc['Dstar']['pt'].value[is_ups & wrg_chg], eta=DimuDstar_acc['Dstar']['eta'].value[is_ups & wrg_chg], phi=DimuDstar_acc['Dstar']['phi'].value[is_ups & wrg_chg]) output['UpsilonDstar']['Dstar_rap'].fill( chg='right charge', rap=DimuDstar_acc['Dstar']['rap'].value[is_ups & ~wrg_chg]) output['UpsilonDstar']['Dstar_rap'].fill( chg='wrong charge', rap=DimuDstar_acc['Dstar']['rap'].value[is_ups & wrg_chg]) output['UpsilonDstar']['UpsilonDstar_deltarap'].fill( deltarap=DimuDstar_acc['deltarap'].value[is_ups & ~wrg_chg]) output['UpsilonDstar']['UpsilonDstar_mass'].fill( mass=DimuDstar_p4.mass[is_ups & ~wrg_chg]) # Jpsi output['JpsiDstar']['Jpsi_mass'].fill( mass=DimuDstar_acc['Dimu']['mass'].value[is_jpsi & ~wrg_chg]) output['JpsiDstar']['Jpsi_p'].fill( pt=DimuDstar_acc['Dimu']['pt'].value[is_jpsi & ~wrg_chg], eta=DimuDstar_acc['Dimu']['eta'].value[is_jpsi & ~wrg_chg], phi=DimuDstar_acc['Dimu']['phi'].value[is_jpsi & ~wrg_chg]) output['JpsiDstar']['Jpsi_rap'].fill( rap=DimuDstar_acc['Dimu']['rap'].value[is_jpsi & ~wrg_chg]) output['JpsiDstar']['Dstar_deltamr'].fill( chg='right charge', deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_jpsi & ~wrg_chg]) output['JpsiDstar']['Dstar_deltamr'].fill( chg='wrong charge', deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_jpsi & wrg_chg]) output['JpsiDstar']['Dstar_deltam'].fill( chg='right charge', deltam=DimuDstar_acc['Dstar']['deltam'].value[is_jpsi & ~wrg_chg]) output['JpsiDstar']['Dstar_deltam'].fill( chg='wrong charge', deltam=DimuDstar_acc['Dstar']['deltam'].value[is_jpsi & wrg_chg]) output['JpsiDstar']['Dstar_p'].fill( chg='right charge', pt=DimuDstar_acc['Dstar']['pt'].value[is_jpsi & ~wrg_chg], eta=DimuDstar_acc['Dstar']['eta'].value[is_jpsi & ~wrg_chg], phi=DimuDstar_acc['Dstar']['phi'].value[is_jpsi & ~wrg_chg]) output['JpsiDstar']['Dstar_p'].fill( chg='wrong charge', pt=DimuDstar_acc['Dstar']['pt'].value[is_jpsi & wrg_chg], eta=DimuDstar_acc['Dstar']['eta'].value[is_jpsi & wrg_chg], phi=DimuDstar_acc['Dstar']['phi'].value[is_jpsi & wrg_chg]) output['JpsiDstar']['Dstar_rap'].fill( chg='right charge', rap=DimuDstar_acc['Dstar']['rap'].value[is_jpsi & ~wrg_chg]) output['JpsiDstar']['Dstar_rap'].fill( chg='wrong charge', rap=DimuDstar_acc['Dstar']['rap'].value[is_jpsi & wrg_chg]) output['JpsiDstar']['JpsiDstar_deltarap'].fill( deltarap=DimuDstar_acc['deltarap'].value[is_jpsi & ~wrg_chg]) output['JpsiDstar']['JpsiDstar_mass'].fill( mass=DimuDstar_p4.mass[is_jpsi & ~wrg_chg]) # Psi output['PsiDstar']['Psi_mass'].fill( mass=DimuDstar_acc['Dimu']['mass'].value[is_psi & ~wrg_chg]) output['PsiDstar']['Psi_p'].fill( pt=DimuDstar_acc['Dimu']['pt'].value[is_psi & ~wrg_chg], eta=DimuDstar_acc['Dimu']['eta'].value[is_psi & ~wrg_chg], phi=DimuDstar_acc['Dimu']['phi'].value[is_psi & ~wrg_chg]) output['PsiDstar']['Psi_rap'].fill( rap=DimuDstar_acc['Dimu']['rap'].value[is_psi & ~wrg_chg]) output['PsiDstar']['Dstar_deltamr'].fill( chg='right charge', deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_psi & ~wrg_chg]) output['PsiDstar']['Dstar_deltamr'].fill( chg='wrong charge', deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_psi & wrg_chg]) output['PsiDstar']['Dstar_deltam'].fill( chg='right charge', deltam=DimuDstar_acc['Dstar']['deltam'].value[is_psi & ~wrg_chg]) output['PsiDstar']['Dstar_deltam'].fill( chg='wrong charge', deltam=DimuDstar_acc['Dstar']['deltam'].value[is_psi & wrg_chg]) output['PsiDstar']['Dstar_p'].fill( chg='right charge', pt=DimuDstar_acc['Dstar']['pt'].value[is_psi & ~wrg_chg], eta=DimuDstar_acc['Dstar']['eta'].value[is_psi & ~wrg_chg], phi=DimuDstar_acc['Dstar']['phi'].value[is_psi & ~wrg_chg]) output['PsiDstar']['Dstar_p'].fill( chg='wrong charge', pt=DimuDstar_acc['Dstar']['pt'].value[is_psi & wrg_chg], eta=DimuDstar_acc['Dstar']['eta'].value[is_psi & wrg_chg], phi=DimuDstar_acc['Dstar']['phi'].value[is_psi & wrg_chg]) output['PsiDstar']['Dstar_rap'].fill( chg='right charge', rap=DimuDstar_acc['Dstar']['rap'].value[is_psi & ~wrg_chg]) output['PsiDstar']['Dstar_rap'].fill( chg='wrong charge', rap=DimuDstar_acc['Dstar']['rap'].value[is_psi & wrg_chg]) output['PsiDstar']['PsiDstar_deltarap'].fill( deltarap=DimuDstar_acc['deltarap'].value[is_psi & ~wrg_chg]) output['PsiDstar']['PsiDstar_mass'].fill( mass=DimuDstar_p4.mass[is_psi & ~wrg_chg]) return output
def make_trees(args): filelists = files_by_dataset(args.files) # The output for each dataset will be written into a separate file for dataset, files in filelists.items(): # Find region and branch names datatypes = {} tree_by_variable = {} variables = [] regions = [] # Scout out what branches there are for fname in files: acc = load(fname) treenames = [ x for x in map(str, acc.keys()) if x.startswith("tree") ] for tn in treenames: datatype = tn.split("_")[-1] for region in acc[tn].keys(): vars = acc[tn][region].keys() regions.append(region) variables.extend(vars) for v in vars: datatypes[v] = np.float64 #getattr(np, datatype) tree_by_variable[v] = tn # Combine with uproot.recreate(pjoin(args.outdir, f"tree_{dataset}.root"), compression=uproot.ZLIB(4)) as f: for region, fname in tqdm(list( itertools.product(set(regions), files)), desc=dataset): acc = load(fname) d = { x: acc[tree_by_variable[x]][region][x].value for x in variables } # Remove empty entries to_remove = [] for k, v in d.items(): if not len(v): to_remove.append(k) for k in to_remove: d.pop(k) if not len(d): continue if not (region in [ re.sub(";.*", "", x.decode("utf-8")) for x in f.keys() ]): f[region] = uproot.newtree( {x: np.float64 for x in d.keys()}) lengths = set() for k, v in d.items(): lengths.add(len(v)) assert (len(lengths) == 1) # write f[region].extend(d)
## Json file reader with open(metadata) as fin: datadict = json.load(fin) filelist = glob.glob(datadict[data_sample]) if isFake: sample_name = "Fake_Photon" else: sample_name = data_sample.split("_")[0] corr_file = "../Corrections/corrections.coffea" # corr_file = "corrections.coffea" # Condor-batch corrections = load(corr_file) ## Read PU weight file if not isdata: pu_path_dict = { "DY": "mcPileupDist_DYToEE_M-50_NNPDF31_TuneCP5_13TeV-powheg-pythia8.npy", "TTWJets": "mcPileupDist_TTWJetsToLNu_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8.npy", "TTZtoLL": "mcPileupDist_TTZToLLNuNu_M-10_TuneCP5_13TeV-amcatnlo-pythia8.npy", "WW": "mcPileupDist_WW_TuneCP5_DoubleScattering_13TeV-pythia8.npy", "WZ": "mcPileupDist_WZ_TuneCP5_13TeV-pythia8.npy", "ZZ": "mcPileupDist_ZZ_TuneCP5_13TeV-pythia8.npy", "tZq": "mcPileupDist_tZq_ll_4f_ckm_NLO_TuneCP5_13TeV-amcatnlo-pythia8.npy", "WZG": "mcPileupDist_wza_UL18.npy", "ZGToLLG": "mcPileupDist_ZGToLLG_01J_5f_TuneCP5_13TeV-amcatnloFXFX-pythia8.npy", "TTGJets": "mcPileupDist_TTGJets_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8.npy",
triggers = ["HLT_Mu7_IP4", "HLT_Mu9_IP5", "HLT_Mu9_IP6", "HLT_Mu12_IP6"] btypes = ["Bu", "Bs", "Bd"] btype_longnames = { "Bu": "Bu2KJpsi2KMuMu", "Bs": "Bs2PhiJpsi2KKMuMu", "Bd": "Bd2KsJpsi2KPiMuMu" } btype_shortnames = {"Bu": "BuToKMuMu", "Bs": "BsToKKMuMu", "Bd": "BdToKPiMuMu"} vars = ["pt", "y"] # MC probefilter efficiency coffea_files = {} for btype in btypes: coffea_files[btype] = util.load( f"{btype_longnames[btype]}/MCEfficiencyHistograms.coffea") axes = {} axes["pt"] = { "probe": hist.Bin("pt", r"$p_{T}$ [GeV]", np.array([8., 13., 18., 23., 28., 33.])), "tag": hist.Bin( "pt", r"$p_{T}$ [GeV]", np.array([ 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 18.0, 20.0, 23.0, 26.0, 29.0, 34.0, 45.0 ])) } axes["y"] = { "probe": hist.Bin("y", r"$|y|$", np.array(np.arange(0., 2.25 + 0.25,
} for year in years_to_run } if args.construct_btag: from copy import deepcopy btag_contructs_dict = deepcopy(flav_effs) jet_mults = { '3Jets': '3 jets', '4PJets': '4+ jets', } flav_to_name = {'bjet': 'bottom', 'cjet': 'charm', 'ljet': 'light'} hname = 'Jets_pt_eta' lumi_correction = load( os.path.join(proj_dir, 'Corrections', jobid, 'MC_LumiWeights.coffea')) #pt_binning = np.array([30.0, 35.0, 40.0, 45.0, 50.0, 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0, 105.0, 110.0, 125.0, 150.0,170.0, 200.0, 250.0, 1000.0]) #eta_binning = np.array([-2.5, -1.5, -0.5, 0.0, 0.5, 1.5, 2.5]) pt_binning = np.array([ 30.0, 35.0, 40.0, 45.0, 50.0, 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0, 105.0, 110.0, 125.0, 150.0, 170.0, 200.0, 1000.0 ]) eta_binning = np.array( [-2.5, -2., -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5]) pt_bins = hist.Bin('pt', 'pt', pt_binning) eta_bins = hist.Bin('eta', 'eta', eta_binning) working_points = []
def test_hist_compat(): from coffea.util import load test = load("tests/samples/old_hist_format.coffea") expected_bins = np.array([ -np.inf, 0.0, 20.0, 40.0, 60.0, 80.0, 100.0, 120.0, 140.0, 160.0, 180.0, 200.0, 220.0, 240.0, 260.0, 280.0, 300.0, 320.0, 340.0, 360.0, 380.0, 400.0, 420.0, 440.0, 460.0, 480.0, 500.0, 520.0, 540.0, 560.0, 580.0, 600.0, 620.0, 640.0, 660.0, 680.0, 700.0, 720.0, 740.0, 760.0, 780.0, 800.0, 820.0, 840.0, 860.0, 880.0, 900.0, 920.0, 940.0, 960.0, 980.0, 1000.0, 1020.0, 1040.0, 1060.0, 1080.0, 1100.0, 1120.0, 1140.0, 1160.0, 1180.0, 1200.0, np.inf, np.nan, ]) assert np.all(test._axes[2]._interval_bins[:-1] == expected_bins[:-1]) assert np.isnan(test._axes[2]._interval_bins[-1])
parser.add_argument('lepton', choices=['Electron', 'Muon'], help='Choose which lepton to make plots for') args = parser.parse_args() proj_dir = os.environ['PROJECT_DIR'] jobid = os.environ['jobid'] base_jobid = os.environ['base_jobid'] analyzer = 'data_hem_comp' input_dir = os.path.join(proj_dir, 'results', '%s_%s' % (args.year, jobid), analyzer) f_ext = 'TOT.coffea' outdir = os.path.join(proj_dir, 'plots', '%s_%s' % (args.year, jobid), analyzer) if not os.path.isdir(outdir): os.makedirs(outdir) fnames = sorted(['%s/%s' % (input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(f_ext)]) hdict = plt_tools.add_coffea_files(fnames) if len(fnames) > 1 else load(fnames[0]) jet_mults = { '3Jets' : '3 jets', '4PJets' : '4+ jets' } objtypes = { 'Jets' : 'jets', 'Lep' : { 'Muon' : '$\\mu$', 'Electron' : '$e$', } } btag_cats = {
def process(self, ds): output = self.accumulator.identity() acc = load(ds["file"]) ############ Histogram definition # Muons hist_muon_lead = bh.Histogram( bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,\mu}$ [GeV]"), bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{\mu}$"), bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{\mu}$"), ) hist_muon_trail = bh.Histogram( bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,\mu}$ [GeV]"), bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{\mu}$"), bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{\mu}$"), ) #Dimu hist_dimu = bh.Histogram( bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,\mu^+\mu^-}$ [GeV]"), bh.axis.Regular(80, -2.5, 2.5, metadata=r"$\eta_{\mu^+\mu^-}$"), bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{\mu^+\mu^-}$"), ) hist_dimu_mass = bh.Histogram( bh.axis.Regular(100, 8.6, 11, metadata=r"$m_{\mu^+\mu^-}$ [GeV]")) # D0 hist_D0 = bh.Histogram( bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,D^0}$ [GeV]"), bh.axis.Regular(80, -2.5, 2.5, metadata=r"$\eta_{D^0}$"), bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{D^0}$"), ) hist_D0_mass = bh.Histogram( bh.axis.Regular(100, 1.7, 2.0, metadata=r"$m_{D^0}$ [GeV]")) hist_D0_eta_mass = bh.Histogram( bh.axis.Regular(80, -2.5, 2.5, metadata=r"$\eta_{D^0}$"), bh.axis.Regular(100, 1.7, 2.0, metadata=r"$m_{D^0}$ [GeV]")) hist_D0_trk = bh.Histogram( bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,D^0 trks}$ [GeV]"), bh.axis.Regular(80, -2.5, 2.5, metadata=r"$\eta_{D^0 trks}$"), bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{D^0 trks}$"), ) # Dstar hist_Dstar = bh.Histogram( bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,D*}$ [GeV]"), bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{D*}$"), bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{D*}$"), ) hist_Dstar_K = bh.Histogram( bh.axis.Regular(100, 0, 30, metadata=r"$p_{T,D* K}$ [GeV]"), bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{D* K}$"), bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{D* K}$"), ) hist_Dstar_pi = bh.Histogram( bh.axis.Regular(100, 0, 30, metadata=r"$p_{T,D* \pi}$ [GeV]"), bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{D* \pi}$"), bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{D* \pi}$"), ) hist_Dstar_pis = bh.Histogram( bh.axis.Regular(100, 0, 20, metadata=r"$p_{T,\pi_s}$ [GeV]"), bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{\pi_s}$"), bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{\pi_s}$"), ) hist_Dstar_mass = bh.Histogram( bh.axis.Regular(100, 1.8, 2.2, metadata=r"$m_{D*}$ [GeV]")) hist_Dstar_mass_refit = bh.Histogram( bh.axis.Regular(100, 1.8, 2.2, metadata=r"$m_{D* refit}$ [GeV]")) hist_Dstar_deltamr = bh.Histogram( bh.axis.Regular(50, 0.138, 0.162, metadata=r"$\Delta m_{refit}$ [GeV]")) hist_Dstar_deltam = bh.Histogram( bh.axis.Regular(50, 0.138, 0.162, metadata=r"$\Delta m$ [GeV]")) # Filling histograms hist_muon_lead.fill(acc["Muon_lead"]["__fast_pt"].value, acc["Muon_lead"]["__fast_eta"].value, acc["Muon_lead"]["__fast_phi"].value) hist_muon_trail.fill(acc["Muon_trail"]["__fast_pt"].value, acc["Muon_trail"]["__fast_eta"].value, acc["Muon_trail"]["__fast_phi"].value) hist_dimu.fill(acc["Dimu"]["__fast_pt"].value, acc["Dimu"]["__fast_eta"].value, acc["Dimu"]["__fast_phi"].value) hist_dimu_mass.fill(acc["Dimu"]["__fast_mass"].value) hist_D0.fill(acc["D0"]["__fast_pt"].value, acc["D0"]["__fast_eta"].value, acc["D0"]["__fast_phi"].value) hist_D0_mass.fill(acc["D0"]["__fast_mass"].value) hist_D0_eta_mass.fill(acc["D0"]["__fast_eta"].value, acc["D0"]["__fast_mass"].value) hist_D0_trk.fill(acc["D0_trk"]["t1_pt"].value, acc["D0_trk"]["t1_eta"].value, acc["D0_trk"]["t1_phi"].value) hist_D0_trk.fill(acc["D0_trk"]["t2_pt"].value, acc["D0_trk"]["t2_eta"].value, acc["D0_trk"]["t2_phi"].value) hist_Dstar.fill(acc["Dstar"]["__fast_pt"].value, acc["Dstar"]["__fast_eta"].value, acc["Dstar"]["__fast_phi"].value) hist_Dstar_mass.fill(acc["Dstar"]["__fast_mass"].value) hist_Dstar_mass_refit.fill(acc["Dstar"]["deltamr"].value + acc["Dstar_D0"]["D0_mass"].value) hist_Dstar_K.fill(acc["Dstar_trk"]["K_pt"].value, acc["Dstar_trk"]["K_eta"].value, acc["Dstar_trk"]["K_phi"].value) hist_Dstar_pi.fill(acc["Dstar_trk"]["pi_pt"].value, acc["Dstar_trk"]["pi_eta"].value, acc["Dstar_trk"]["pi_phi"].value) hist_Dstar_pis.fill(acc["Dstar_trk"]["pis_pt"].value, acc["Dstar_trk"]["pis_eta"].value, acc["Dstar_trk"]["pis_phi"].value) hist_Dstar_deltamr.fill(acc["Dstar"]["deltamr"].value) hist_Dstar_deltam.fill(acc["Dstar"]["deltam"].value) # Saving histograms save(hist_muon_lead, "output/" + ds['analyzer_name'] + "/hist/hist_Muon_lead.hist") save(hist_muon_trail, "output/" + ds['analyzer_name'] + "/hist/hist_Muon_trail.hist") save(hist_dimu, "output/" + ds['analyzer_name'] + "/hist/hist_Dimu.hist") save(hist_dimu_mass, "output/" + ds['analyzer_name'] + "/hist/hist_Dimu_mass.hist") save(hist_D0, "output/" + ds['analyzer_name'] + "/hist/hist_D0.hist") save(hist_D0_mass, "output/" + ds['analyzer_name'] + "/hist/hist_D0_mass.hist") save(hist_Dstar, "output/" + ds['analyzer_name'] + "/hist/hist_Dstar.hist") save(hist_Dstar_mass, "output/" + ds['analyzer_name'] + "/hist/hist_Dstar_mass.hist") save( hist_Dstar_mass_refit, "output/" + ds['analyzer_name'] + "/hist/hist_Dstar_mass_refit.hist") save(hist_Dstar_deltamr, "output/" + ds['analyzer_name'] + "/hist/hist_Dstar_deltamr.hist") save(hist_Dstar_deltam, "output/" + ds['analyzer_name'] + "/hist/hist_Dstar_deltam.hist") # Creating plots 1D plots_path = "plots/" + ds['analyzer_name'] + "/" create_plot1d(hist_muon_lead[:, sum, sum], plots_path + "Muon_lead_pt.png", log=True) create_plot1d(hist_muon_lead[sum, :, sum], plots_path + "Muon_lead_eta.png") create_plot1d(hist_muon_lead[sum, sum, :], plots_path + "Muon_lead_phi.png") create_plot1d(hist_muon_trail[:, sum, sum], plots_path + "Muon_trail_pt.png", log=True) create_plot1d(hist_muon_trail[sum, :, sum], plots_path + "Muon_trail_eta.png") create_plot1d(hist_muon_trail[sum, sum, :], plots_path + "Muon_trail_phi.png") create_plot1d(hist_dimu[:, sum, sum], plots_path + "Dimu_pt.png", log=True) create_plot1d(hist_dimu[sum, :, sum], plots_path + "Dimu_eta.png") create_plot1d(hist_dimu[sum, sum, :], plots_path + "Dimu_phi.png") create_plot1d(hist_dimu_mass, plots_path + "Dimu_mass.png") create_plot1d(hist_D0[:, sum, sum], plots_path + "D0_pt.png", log=True) create_plot1d(hist_D0[sum, :, sum], plots_path + "D0_eta.png") create_plot1d(hist_D0[sum, sum, :], plots_path + "D0_phi.png") create_plot1d(hist_D0_mass, plots_path + "D0_mass.png") create_plot1d(hist_D0_trk[:, sum, sum], plots_path + "D0_trk_pt.png", log=True) create_plot1d(hist_D0_trk[sum, :, sum], plots_path + "D0_trk_eta.png") create_plot1d(hist_D0_trk[sum, sum, :], plots_path + "D0_trk_phi.png") create_plot1d(hist_Dstar[:, sum, sum], plots_path + "Dstar_pt.png", log=True) create_plot1d(hist_Dstar[sum, :, sum], plots_path + "Dstar_eta.png") create_plot1d(hist_Dstar[sum, sum, :], plots_path + "Dstar_phi.png") create_plot1d(hist_Dstar_mass, plots_path + "Dstar_mass.png") create_plot1d(hist_Dstar_mass_refit, plots_path + "Dstar_mass_refit.png") create_plot1d(hist_Dstar_deltamr, plots_path + "Dstar_deltamr.png") create_plot1d(hist_Dstar_deltam, plots_path + "Dstar_deltam.png") create_plot1d(hist_Dstar_K[:, sum, sum], plots_path + "Dstar_K_pt.png", log=True) create_plot1d(hist_Dstar_K[sum, :, sum], plots_path + "Dstar_K_eta.png") create_plot1d(hist_Dstar_K[sum, sum, :], plots_path + "Dstar_K_phi.png") create_plot1d(hist_Dstar_pi[:, sum, sum], plots_path + "Dstar_pi_pt.png", log=True) create_plot1d(hist_Dstar_pi[sum, :, sum], plots_path + "Dstar_pi_eta.png") create_plot1d(hist_Dstar_pi[sum, sum, :], plots_path + "Dstar_pi_phi.png") create_plot1d(hist_Dstar_pis[:, sum, sum], plots_path + "Dstar_pis_pt.png", log=True) create_plot1d(hist_Dstar_pis[sum, :, sum], plots_path + "Dstar_pis_eta.png") create_plot1d(hist_Dstar_pis[sum, sum, :], plots_path + "Dstar_pis_phi.png") # Creating plots 2D create_plot2d(hist_muon_lead[:, sum, :], plots_path + "Muon_lead_ptXphi") create_plot2d(hist_muon_trail[:, sum, :], plots_path + "Muon_trail_ptXphi") create_plot2d(hist_D0[:, :, sum], plots_path + "D0_ptXeta.png") create_plot2d(hist_D0[sum, :, :], plots_path + "D0_etaXphi.png") create_plot2d(hist_D0_eta_mass, plots_path + "D0_etaXmass.png") # return dummy accumulator return output
def __init__(self, mcEventYields=None, jetSyst='nominal'): ################################ # INITIALIZE COFFEA PROCESSOR ################################ self.mcEventYields = mcEventYields if not jetSyst in ['nominal', 'JERUp', 'JERDown', 'JESUp', 'JESDown']: raise Exception( f'{jetSyst} is not in acceptable jet systematic types [nominal, JERUp, JERDown, JESUp, JESDown]' ) self.jetSyst = jetSyst dataset_axis = hist.Cat("dataset", "Dataset") lep_axis = hist.Cat("lepFlavor", "Lepton Flavor") systematic_axis = hist.Cat("systematic", "Systematic Uncertainty") m3_axis = hist.Bin("M3", r"$M_3$ [GeV]", 200, 0., 1000) mass_axis = hist.Bin("mass", r"$m_{\ell\gamma}$ [GeV]", 400, 0., 400) pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 200, 0., 1000) eta_axis = hist.Bin("eta", r"$\eta_{\gamma}$", 300, -1.5, 1.5) chIso_axis = hist.Bin("chIso", r"Charged Hadron Isolation", np.arange(-0.1, 20.001, .05)) ## Define axis to keep track of photon category phoCategory_axis = hist.Bin("category", r"Photon Category", [1, 2, 3, 4, 5]) phoCategory_axis.identifiers()[0].label = "Genuine Photon" phoCategory_axis.identifiers()[1].label = "Misidentified Electron" phoCategory_axis.identifiers()[2].label = "Hadronic Photon" phoCategory_axis.identifiers()[3].label = "Hadronic Fake" ### Accumulator for holding histograms self._accumulator = processor.dict_accumulator({ 'photon_pt': hist.Hist("Counts", dataset_axis, pt_axis, phoCategory_axis, lep_axis, systematic_axis), 'photon_eta': hist.Hist("Counts", dataset_axis, eta_axis, phoCategory_axis, lep_axis, systematic_axis), 'photon_chIso': hist.Hist("Counts", dataset_axis, chIso_axis, phoCategory_axis, lep_axis, systematic_axis), 'photon_lepton_mass_3j0t': hist.Hist("Counts", dataset_axis, mass_axis, phoCategory_axis, lep_axis, systematic_axis), 'M3': hist.Hist("Counts", dataset_axis, m3_axis, phoCategory_axis, lep_axis, systematic_axis), # 3. ADD HISTOGRAMS ## book histograms for photon pt, eta, and charged hadron isolation #'photon_pt': #'photon_eta': #'photon_chIso': ## book histogram for photon/lepton mass in a 3j0t region #'photon_lepton_mass_3j0t': ## book histogram for M3 variable #'M3': 'EventCount': processor.value_accumulator(int) }) ext = extractor() ext.add_weight_sets([ f"btag2016 * {cwd}/ScaleFactors/Btag/DeepCSV_2016LegacySF_V1.btag.csv" ]) ext.finalize() self.evaluator = ext.make_evaluator() self.ele_id_sf = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_id_sf.coffea') self.ele_id_err = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_id_err.coffea') self.ele_reco_sf = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_reco_sf.coffea') self.ele_reco_err = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_reco_err.coffea') self.mu_id_sf = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_id_sf.coffea') self.mu_id_err = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_id_err.coffea') self.mu_iso_sf = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_iso_sf.coffea') self.mu_iso_err = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_iso_err.coffea') self.mu_trig_sf = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_trig_sf.coffea') self.mu_trig_err = util.load( f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_trig_err.coffea')
if ch in ['eeeSSonZ', 'eeeSSoffZ']: values = values.eta[:,2] hout[var].fill(e2eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut) elif var == 'm1pt': if ch in ['eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ', 'eemSSonZ', 'eemSSoffZ', 'emSS']: continue values = values.pt[:,1] hout[var].fill(m1pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut) elif var == 'm1eta': if ch in ['eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ', 'eemSSonZ', 'eemSSoffZ', 'emSS']: continue values = values.eta[:,1] hout[var].fill(m1eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut) elif var == 'm2pt': if ch in ['mmmSSonZ', 'mmmSSoffZ']: values = values.pt[:,2] hout[var].fill(m2pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut) elif var == 'm2eta': if ch in ['mmmSSonZ', 'mmmSSoffZ']: values = values.eta[:,2] hout[var].fill(m2eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut) return hout def postprocess(self, accumulator): return accumulator if __name__ == '__main__': # Load the .coffea files outpath= './coffeaFiles/' samples = load(outpath+'samples.coffea') topprocessor = AnalysisProcessor(samples)
from .utils.crossSections import * from .utils.efficiencies import getMuSF, getEleSF from .utils.genParentage import maxHistoryPDGID from .utils.updateJets import updateJetP4 import os.path cwd = os.path.dirname(__file__) #load lookup tool for btagging efficiencies with open(f'{cwd}/utils/taggingEfficienciesDenseLookup.pkl', 'rb') as _file: taggingEffLookup = pickle.load(_file) #load lookup tools for pileup scale factors puLookup = util.load(f'{cwd}/ScaleFactors/puLookup.coffea') puLookup_Down = util.load(f'{cwd}/ScaleFactors/puLookup_Down.coffea') puLookup_Up = util.load(f'{cwd}/ScaleFactors/puLookup_Up.coffea') #create and load jet extractor Jetext = extractor() Jetext.add_weight_sets([ f"* * {cwd}/ScaleFactors/JEC/Summer16_07Aug2017_V11_MC_L1FastJet_AK4PFchs.jec.txt", f"* * {cwd}/ScaleFactors/JEC/Summer16_07Aug2017_V11_MC_L2Relative_AK4PFchs.jec.txt", f"* * {cwd}/ScaleFactors/JEC/Summer16_07Aug2017_V11_MC_Uncertainty_AK4PFchs.junc.txt", f"* * {cwd}/ScaleFactors/JEC/Summer16_25nsV1_MC_PtResolution_AK4PFchs.jr.txt", f"* * {cwd}/ScaleFactors/JEC/Summer16_25nsV1_MC_SF_AK4PFchs.jersf.txt", ]) Jetext.finalize() Jetevaluator = Jetext.make_evaluator()
import numpy as np import awkward as ak from coffea.util import load compiled = load(__file__.replace('.py', '.coffea')) def _msoftdrop_weight(pt, eta): gpar = np.array([1.00626, -1.06161, 0.0799900, 1.20454]) cpar = np.array([1.09302, -0.000150068, 3.44866e-07, -2.68100e-10, 8.67440e-14, -1.00114e-17]) fpar = np.array([1.27212, -0.000571640, 8.37289e-07, -5.20433e-10, 1.45375e-13, -1.50389e-17]) genw = gpar[0] + gpar[1]*np.power(pt*gpar[2], -gpar[3]) ptpow = np.power.outer(pt, np.arange(cpar.size)) cenweight = np.dot(ptpow, cpar) forweight = np.dot(ptpow, fpar) weight = np.where(np.abs(eta) < 1.3, cenweight, forweight) return genw*weight def corrected_msoftdrop(fatjets): if not isinstance(fatjets, ak.JaggedArray): raise ValueError sf_flat = _msoftdrop_weight(fatjets.p4.pt.flatten(), fatjets.p4.eta.flatten()) sf_flat = np.maximum(1e-5, sf_flat) return fatjets.msoftdrop * fatjets.copy(content=sf_flat) def n2ddt_shift(fatjets, year='2017'): return compiled[f'{year}_n2ddt_rho_pt'](fatjets.rho, fatjets.p4.pt)
# ---- Reiterate categories ---- # ttagcats = ["at"] #, "0t", "1t", "It", "2t"] btagcats = ["0b", "1b", "2b"] ycats = ['cen', 'fwd'] list_of_cats = [ t + b + y for t, b, y in itertools.product(ttagcats, btagcats, ycats) ] from Filesets import filesets outputs_unweighted = {} for name, files in filesets.items(): outputs_unweighted[name] = util.load( 'TTbarAllHadUproot/CoffeaOutputs/UnweightedOutputs/TTbarResCoffea_' + name + '_unweighted_output.coffea') outputs_unweighted """ ---------------- CREATE RAW MISTAG PLOTS ---------------- """ # ---- Only Use This When LookUp Tables Were Not In Use for Previous Uproot Job (i.e. UseLookUpTables = False) ---- # # ---- This Creates Mistag plots for every dataset in every category for debugging if necessary or for curiosity ---- # # ---- Look up tables are a bit more sophisticated and much more useful to the analysis ---- # SaveDirectory = maindirectory + '/TTbarAllHadUproot/MistagPlots/' DoesDirectoryExist( SaveDirectory) # no need to create the directory several times # Function sqrt(x) def forward(x): return x**(1 / 2)
2017 : 41.53, 2018 : 59.74, } file_kind = "CC" if args.output is None: template_file = f"templates_{args.identifier}_{file_kind}.root" template_mu_file = f"templatesmuCR_{args.identifier}_{file_kind}.root" else: _base_name = args.output.split(".root")[0] template_file = f"{_base_name}.root" template_mu_file = f"{_base_name}_mu.root" # Load info print(f'Processing coffea output from: hists_{args.identifier}.coffea') output = load(f'hists_{args.identifier}.coffea') xsecs = xSecReader('metadata/xSections_manual.dat') sumw = getSumW(output) if args.mergemap is not None: print(f'Processing with mergemap from: {args.mergemap}') with open(args.mergemap) as json_file: merge_map = json.load(json_file) else: merge_map = None if args.cvl not in output[list(output.keys())[0]]['templates'].axes['ddc'].edges: raise ValueError( f"args.cvl = {args.cvl} not available. Axis edges are {output[list(output.keys())[0]]['templates'].axes['ddc'].edges}" )
compiled['2017_pileupweight']._values = np.minimum( 5, compiled['2017_pileupweight']._values) compiled['2017_pileupweight_puUp']._values = np.minimum( 5, compiled['2017_pileupweight_puUp']._values) compiled['2017_pileupweight_puDown']._values = np.minimum( 5, compiled['2017_pileupweight_puDown']._values) compiled['2018_pileupweight']._values = np.minimum( 5, compiled['2018_pileupweight']._values) compiled['2018_pileupweight_puUp']._values = np.minimum( 5, compiled['2018_pileupweight_puUp']._values) compiled['2018_pileupweight_puDown']._values = np.minimum( 5, compiled['2018_pileupweight_puDown']._values) with importlib.resources.path("boostedhiggs.data", 'powhegToMinloPtCC.coffea') as filename: compiled['powheg_to_nnlops'] = util.load(filename) class SoftDropWeight(lookup_base): def _evaluate(self, pt, eta): gpar = np.array([1.00626, -1.06161, 0.0799900, 1.20454]) cpar = np.array([ 1.09302, -0.000150068, 3.44866e-07, -2.68100e-10, 8.67440e-14, -1.00114e-17 ]) fpar = np.array([ 1.27212, -0.000571640, 8.37289e-07, -5.20433e-10, 1.45375e-13, -1.50389e-17 ]) genw = gpar[0] + gpar[1] * np.power(pt * gpar[2], -gpar[3]) cenweight = np.polyval(cpar[::-1], pt)
import os import numpy as np from coffea.util import load, save import matplotlib.pyplot as plt import coffea.hist as hist import time #import mplhep #plt.style.use(mplhep.style.CMS) filename = "WZ_Run2018_40000.futures" histo = load(filename) h1_mass = histo['mass'] h1_Nele = histo['nElectrons'] nWZ = histo['sumw']['WZ'] if not isinstance(h1_mass, hist.Hist): raise "Error type is not hist" if not isinstance(h1_Nele, hist.Hist): raise "Error type is not hist" hist.plot1d(h1_mass, overlay='dataset') plt.show() plt.close() hist.plot1d(h1_Nele, overlay='dataset') plt.show()
def plot_bkg_templates(fnames_to_run): """ Runs LOWESS smoothing algorithm ntoys times and finds 1 and 2 sigma bands for interpolation """ for bkg_file in fnames_to_run: hdict = load(bkg_file) jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets" for tname, orig_template in hdict[args.lepton].items(): proc = tname.split( "_")[0] if not "data_obs" in tname else "data_obs" sys = sorted(filter(None, tname.split(f"{proc}_")))[0] if proc == "BKG": continue #if sys not in ["hdampUP", "hdampDOWN", "mtop1665", "mtop1695", "mtop1715", "mtop1735", "mtop1755", "mtop1785", "ueUP", "ueDOWN"]: continue if sys == "nosys": continue print(args.lepton, jmult, sys, proc) nosys_hist = hdict[args.lepton][f"{proc}_nosys"].copy() orig_smooth_hist = Plotter.smoothing_mttbins( nosys=nosys_hist, systematic=orig_template, mtt_centers=mtt_centers, nbinsx=nbinsx, nbinsy=nbinsy) x_lims = (0, nosys_hist.dense_axes()[0].centers().size) # get vals and errors of systematic variation sys_histo_vals, sys_histo_sumw2 = orig_template.values( sumw2=True)[()] sys_histo_errs = np.sqrt(sys_histo_sumw2) # make toys based on Gaussian distribution of mu=bin_val, sigma=bin_error toy_arrays = np.zeros((nbins, ntoys)) for idx in range(nbins): toy_arrays[idx] = np.random.normal(sys_histo_vals[idx], sys_histo_errs[idx], size=ntoys) # get smoothed relative deviation distributions from toys smoothed_rel_dev_arrays = np.zeros((ntoys, nbins)) chi2_pvals = np.zeros((ntoys, 2)) for idx in range(ntoys): smoothed_array = Plotter.smoothing_mttbins( nosys=nosys_hist, systematic=(toy_arrays.T)[idx], mtt_centers=mtt_centers, nbinsx=nbinsx, nbinsy=nbinsy) chi2_pval = chisquare( f_obs=smoothed_array, f_exp=orig_smooth_hist.values()[()] ) # convert to expected yields so inputs are greater than 5 chi2_pvals[idx] = np.array( [chi2_pval.statistic, chi2_pval.pvalue]) smoothed_rel_dev_arrays[idx] = ( smoothed_array - nosys_hist.values()[()]) / nosys_hist.values()[()] ## find 68% and 95% intervals plus_one_sigma_smooth_vals, minus_one_sigma_smooth_vals = np.zeros( nbins), np.zeros(nbins) plus_two_sigma_smooth_vals, minus_two_sigma_smooth_vals = np.zeros( nbins), np.zeros(nbins) for bin in range(nbins): plus_one_sigma_smooth_vals[bin] = np.sort( smoothed_rel_dev_arrays[:, bin])[plus_one_sigma_ind] minus_one_sigma_smooth_vals[bin] = np.sort( smoothed_rel_dev_arrays[:, bin])[minus_one_sigma_ind] plus_two_sigma_smooth_vals[bin] = np.sort( smoothed_rel_dev_arrays[:, bin])[plus_two_sigma_ind] minus_two_sigma_smooth_vals[bin] = np.sort( smoothed_rel_dev_arrays[:, bin])[minus_two_sigma_ind] # plot relative deviation fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) # original relative deviations orig_masked_vals, orig_masked_bins = Plotter.get_ratio_arrays( num_vals=orig_template.values()[()] - nosys_hist.values()[()], denom_vals=nosys_hist.values()[()], input_bins=nosys_hist.dense_axes()[0].edges()) ax.step(orig_masked_bins, orig_masked_vals, where="post", **{ "color": "k", "linestyle": "-", "label": "Original" }) # original smoothing relative deviations orig_smoothed_masked_vals, orig_smoothed_masked_bins = Plotter.get_ratio_arrays( num_vals=orig_smooth_hist.values()[()] - nosys_hist.values()[()], denom_vals=nosys_hist.values()[()], input_bins=nosys_hist.dense_axes()[0].edges()) ax.step(orig_smoothed_masked_bins, orig_smoothed_masked_vals, where="post", **{ "color": "r", "linestyle": "-", "label": "Original Smoothing" }) # plot 68 and 95% intervals for yields ax.fill_between(nosys_hist.dense_axes()[0].edges(), np.r_[minus_one_sigma_smooth_vals, minus_one_sigma_smooth_vals[-1]], np.r_[plus_one_sigma_smooth_vals, plus_one_sigma_smooth_vals[-1]], where=np.r_[plus_one_sigma_smooth_vals, plus_one_sigma_smooth_vals[-1]] > np.r_[minus_one_sigma_smooth_vals, minus_one_sigma_smooth_vals[-1]], step="post", **{ "label": "68%", "facecolor": "#00cc00", "alpha": 0.5 }) ax.fill_between(nosys_hist.dense_axes()[0].edges(), np.r_[minus_two_sigma_smooth_vals, minus_two_sigma_smooth_vals[-1]], np.r_[plus_two_sigma_smooth_vals, plus_two_sigma_smooth_vals[-1]], where=np.r_[plus_two_sigma_smooth_vals, plus_two_sigma_smooth_vals[-1]] > np.r_[minus_two_sigma_smooth_vals, minus_two_sigma_smooth_vals[-1]], step="post", **{ "label": "95%", "facecolor": "#ffcc00", "alpha": 0.5 }) ax.legend(loc="upper right", title=f"{sys}, {proc}") ax.axhline( 0, **{ "linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1 }) ax.autoscale() ax.set_ylim(ax.get_ylim()[0], ax.get_ylim()[1] * 1.15) ax.set_xlim(x_lims) ax.set_xlabel( "$m_{t\\bar{t}}$ $\otimes$ |cos($\\theta^{*}_{t_{l}}$)|") ax.set_ylabel("Rel. Deviaton from Nominal") # add lepton/jet multiplicity label ax.text(0.02, 0.94, f"{leptypes[args.lepton]}, {jet_mults[jmult]}", fontsize=rcParams["font.size"] * 0.9, horizontalalignment="left", verticalalignment="bottom", transform=ax.transAxes) ## draw vertical lines for distinguishing different ctstar bins vlines = [x_lims[1] * ybin / 5 for ybin in range(1, 5)] for vline in vlines: ax.axvline(vline, color="k", linestyle="--") hep.cms.label(ax=ax, data=False, paper=False, year=args.year, lumi=round(data_lumi_year[f"{args.lepton}s"] / 1000., 1)) #set_trace() pltdir = os.path.join(outdir, args.lepton, jmult, sys) if not os.path.isdir(pltdir): os.makedirs(pltdir) figname = os.path.join( pltdir, "_".join([ jmult, args.lepton, sys, proc, "SmoothingConfidenceIntervals" ])) fig.savefig(figname) print(f"{figname} written") plt.close()
coffeapath = './coffeaFiles/' outname = 'plotsTopEFT' mocapath = 'moca' mocaScripts = ['corrections', 'functions', 'objects', 'samples', 'selection'] #analysis = #treeName nworkers = 8 ### (Re)produce inputs... ### Produce/load analysis object #print("Executing python analysis/topEFT/topeft.py...") #os.system('python analysis/topEFT/topeft.py') processor_instance = load(coffeapath + nameProcessor + '.coffea') ### Load samples samplesdict = load(coffeapath + nameSamples + '.coffea') flist = {} xsec = {} sow = {} isData = {} for k in samplesdict.keys(): flist[k] = samplesdict[k]['files'] xsec[k] = samplesdict[k]['xsec'] sow[k] = samplesdict[k]['nSumOfWeights'] isData[k] = samplesdict[k]['isData'] # Run the processor and get the output tstart = time.time()
def merge(folder, _dataset): filelist = {} pd = [] for filename in os.listdir(folder): if '.pkl.gz' in filename: if filename.split("____")[0] not in pd: pd.append(filename.split("____")[0]) for pdi in pd: files = [] for filename in os.listdir(folder): if pdi not in filename: continue if '.pkl.gz' not in filename: continue files.append(filename) #print(pdi,'length:',len(files)) split_files = split(files, 100) #print(pdi,'number of lists:',len(split_files)) for i in range(0, len(split_files)): filelist[pdi + '___' + str(i) + '_'] = split_files[i] coffealist = [] for pdi in filelist.keys(): if _dataset not in 'None' and _dataset not in pdi: continue print(pdi) #print(filelist[pdi]) hists = {} for filename in filelist[pdi]: fin = gzip.open(folder + '/' + filename) print('Opening:', folder + '/' + filename) hin = cloudpickle.load(fin) #print('before',hin['recoil'].integrate('dataset',filename.split(".")[0]).integrate('region','isoneE').integrate('jet_selection','baggy').values()) for k in hin.keys(): if k not in hists: hists[k] = hin[k] else: hists[k] += hin[k] #print('middle',hists['recoil'].integrate('dataset',filename.split(".")[0]).integrate('region','isoneE').integrate('jet_selection','baggy').values()) fin.close() del hin dataset = hist.Cat("dataset", "dataset", sorting='placement') dataset_cats = ("dataset", ) dataset_map = OrderedDict() dataset_map[pdi] = (pdi.split("___")[0] + "*", ) for key in hists.keys(): hists[key] = hists[key].group(dataset_cats, dataset, dataset_map) #print('after',hists['recoil'].integrate('dataset',pdi).integrate('region','isoneE').integrate('jet_selection','baggy').values()) save(hists, folder + '/' + pdi + '.coffea') del hists #coffealist.append(folder+'/'+pdi+'.coffea') for coffeafile in os.listdir(folder): if '.coffea' not in coffeafile: continue coffealist.append(folder + '/' + coffeafile) print('coffealist', coffealist) htot = {} for coffeafile in coffealist: print('Opening', coffeafile) hists = load(coffeafile) #print(hists) #print(coffeafile.split("/")[1].split(".")[0]) #print('before',hists['recoil'].integrate('dataset',coffeafile.split("/")[1].split(".")[0]).integrate('region','isoneE').integrate('jet_selection','baggy').values()) for k in hists: if k not in htot: htot[k] = hists[k] else: htot[k] += hists[k] #print('after',htot['recoil'].integrate('dataset',coffeafile.split("/")[1].split(".")[0]).integrate('region','isoneE').integrate('jet_selection','baggy').values()) del hists if _dataset in 'None': _dataset = '' save(htot, 'condor_hists_' + folder + '.coffea')
def get_bkg_templates(fnames_to_run): """ Function that writes linearized mtt vs costheta distributions to root file. """ #set_trace() for bkg_file in fnames_to_run: hdict = load(bkg_file) jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets" for lep in hdict.keys(): for tname, orig_template in hdict[lep].items(): proc = tname.split( "_")[0] if not "data_obs" in tname else "data_obs" sys = sorted(filter(None, tname.split(f"{proc}_")))[0] #if not ((sys == "ueDOWN") and (proc == "ttJets")): continue if sys == "nosys": continue print(lep, jmult, sys, proc) nominal_hist = hdict[lep][f"{proc}_nosys"].copy() x_lims = (0, nominal_hist.dense_axes()[0].centers().size) # perform smoothing smoothed_histos_list = [(Plotter.smoothing_mttbins( nosys=nominal_hist, systematic=orig_template, mtt_centers=mtt_centers, nbinsx=len(linearize_binning[0]) - 1, nbinsy=len(linearize_binning[1]) - 1, **{"frac": frac_val / 10.}), frac_val / 10.) for frac_val in np.arange(2, 7, 2)] #smoothed_histos_chi2 = {frac_val : find_chi2(h_fitted=smooth_histo, h_unc=orig_template) for smooth_histo, frac_val in smoothed_histos_list} # perform flattening flattened_histo = Plotter.flatten(nosys=nominal_hist, systematic=orig_template) #flat_chi2 = find_chi2(h_fitted=flattened_histo, h_unc=orig_template) # plot relative deviation fig, ax = plt.subplots() fig.subplots_adjust(hspace=.07) # plot original dist orig_masked_vals, orig_masked_bins = Plotter.get_ratio_arrays( num_vals=orig_template.values()[()] - nominal_hist.values()[()], denom_vals=nominal_hist.values()[()], input_bins=nominal_hist.dense_axes()[0].edges()) ax.fill_between(orig_masked_bins, orig_masked_vals, facecolor="k", step="post", alpha=0.5, label="Unsmoothed") # plot smoothed versions for smooth_histo, frac_val in smoothed_histos_list: smooth_masked_vals, smooth_masked_bins = Plotter.get_ratio_arrays( num_vals=smooth_histo.values()[()] - nominal_hist.values()[()], denom_vals=nominal_hist.values()[()], input_bins=nominal_hist.dense_axes()[0].edges()) ax.step(smooth_masked_bins, smooth_masked_vals, where="post", **{ "linestyle": "-", "label": f"Frac={frac_val}", "linewidth": 2 }) # plot flattened val flat_masked_vals, flat_masked_bins = Plotter.get_ratio_arrays( num_vals=flattened_histo.values()[()] - nominal_hist.values()[()], denom_vals=nominal_hist.values()[()], input_bins=nominal_hist.dense_axes()[0].edges()) ax.step(flat_masked_bins, flat_masked_vals, where="post", **{ "linestyle": "-", "label": "Flat", "linewidth": 2 }) ax.legend(loc="upper right", title=f"{sys}, {proc}") ax.axhline( 0, **{ "linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1 }) ax.autoscale() ax.set_xlim(x_lims) ax.set_xlabel( "$m_{t\\bar{t}}$ $\otimes$ |cos($\\theta^{*}_{t_{l}}$)|") ax.set_ylabel("Rel. Deviaton from Nominal") # add lepton/jet multiplicity label ax.text(0.02, 0.94, f"{leptypes[lep]}, {jet_mults[jmult]}", fontsize=rcParams["font.size"] * 0.9, horizontalalignment="left", verticalalignment="bottom", transform=ax.transAxes) ## draw vertical lines for distinguishing different ctstar bins vlines = [x_lims[1] * ybin / 5 for ybin in range(1, 5)] for vline in vlines: ax.axvline(vline, color="k", linestyle="--") hep.cms.label(ax=ax, data=False, paper=False, year=args.year, lumi=round(data_lumi_year[f"{lep}s"] / 1000., 1)) #set_trace() pltdir = os.path.join(outdir, lep, jmult, sys) if not os.path.isdir(pltdir): os.makedirs(pltdir) #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "BinWidths_Comp"])) #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "SmoothValues_Comp"])) #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "MttBinWidths_SmoothValues_Comp"])) figname = os.path.join( pltdir, "_".join([jmult, lep, sys, proc, "SmoothedFlatVals_Comp"])) fig.savefig(figname) print(f"{figname} written") plt.close()
def get_sig_templates(tmp_rname): ''' Function that writes linearized mtt vs costheta distributions to root file. ''' from rootpy.plotting import Hist2D widthTOname = lambda width : str(width).replace('.', 'p') nameTOwidth = lambda width : str(width).replace('p', '.') ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files(sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0]) ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] # get correct hist and rebin hname_to_use = 'mtt_vs_tlep_ctstar_abs' if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = mtt_ctstar_2d_binning #xrebinning, yrebinning = 2, 1 histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat #set_trace() xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning #set_trace() histo = histo.rebin(yaxis_name, new_ybins) rebin_histo = histo[:, :, :, :, 'btagPass', 'Tight'].integrate('lepcat').integrate('btag') signals = sorted(set([key[0] for key in rebin_histo.values().keys()])) # create 2D signal hists and write to temp file with root_open(tmp_rname, 'w') as out: #for lep in ['Muon']: for lep in ['Muon', 'Electron']: lepdir = 'mujets' if lep == 'Muon' else 'ejets' # scale by lumi lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep] scaled_histo = rebin_histo.copy() scaled_histo.scale(lumi_correction, axis='dataset') for jmult in njets_to_run: histo = scaled_histo[:, :, jmult, lep].integrate('jmult').integrate('leptype') for signal in signals: _, mass, width, pI, wt = tuple(signal.split('_')) samtype = 'int' if pI == 'Int' else 'sgn' bostype = 'ggA' if _ == 'AtoTT' else 'ggH' sub_name = '%s_%s-%s-%s-%s' % (bostype, wt, samtype, widthTOname(width).split('W')[-1]+'pc', mass) if pI == 'Int' else '%s_pos-%s-%s-%s' % (bostype, samtype, widthTOname(width).split('W')[-1]+'pc', mass) #set_trace() for sys in sys_to_use.keys(): sysname, onlyTT = sys_to_use[sys] if onlyTT: continue if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue #set_trace() if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0]) template_histo = histo[signal, sys].integrate('dataset').integrate('sys') if wt == 'neg': template_histo.scale(-1.) #if (pI == 'Int') and (wt == 'pos'): continue print(lep, jmult, sub_name, sys) sumw, sumw2 = template_histo.values(sumw2=True, overflow='all')[()] # get vals and errors for all bins (including under/overflow) #if args.smooth: # set_trace() ## create rootpy hist and rename rtpy_h2d = Hist2D(template_histo.dense_axes()[0].edges(), template_histo.dense_axes()[1].edges()) outhname = '_'.join([jmult, lepdir, sub_name]) if sys == 'nosys' else '_'.join([jmult, lepdir, sub_name, sysname]) rtpy_h2d.name = outhname # set bin content for rootpy hist for binx in range(0, rtpy_h2d.GetNbinsX()+2): for biny in range(0, rtpy_h2d.GetNbinsY()+2): rtpy_h2d[binx, biny] = sumw[binx, biny], sumw2[binx, biny] #set_trace() rtpy_h2d.Write() print('%s written' % tmp_rname)
input_dir = '/'.join( [proj_dir, 'results', '%s_%s' % (args.year, jobid), analyzer]) f_ext = 'TOT.coffea' outdir = '/'.join([proj_dir, 'plots', '%s_%s' % (args.year, jobid), analyzer]) if not os.path.isdir(outdir): os.makedirs(outdir) fnames = sorted([ '%s/%s' % (input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(f_ext) ]) #set_trace() hdict = plt_tools.add_coffea_files(fnames) if len(fnames) > 1 else load( fnames[0]) jet_mults = {'3Jets': '3 jets', '4PJets': '4+ jets'} objtypes = { 'Jets': 'jets', 'Lep': { 'Muon': '$\\mu$', 'Electron': '$e$', } } btag_cats = { 'btagFail': '0 btags', 'btagPass': '******', }
def get_bkg_templates(tmp_rname): ''' Function that writes linearized mtt vs costheta distributions to root file. ''' ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] # get correct hist and rebin hname_to_use = 'mtt_vs_tlep_ctstar_abs' if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) nbins = (len(xrebinning)-1)*(len(yrebinning)-1) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other'] names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: ttJets_hdict = load(bkg_ttJets_fname) ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat ## rebin x axis ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins) ## rebin y axis ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins) only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting='placement') process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname) if '3Jets' in njets_to_run: histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) if '4PJets' in njets_to_run: histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) for lep in ['Muon', 'Electron']: lepdir = 'mujets' if lep == 'Muon' else 'ejets' ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates') lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis='dataset') histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype') # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: if len(only_ttJets_cats) > 0: for tt_cat in only_ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) tt_histo = ttJets_histo.copy() tt_histo.scale(lumi_correction, axis='dataset') tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype') for jmult in njets_to_run: iso_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) btag_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) for sys in sys_to_use.keys(): if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue #set_trace() sysname, onlyTT = sys_to_use[sys] if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0]) qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys) ## write nominal and systematic variations for each topology to file for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])): if (proc != 'TT') and onlyTT: continue if (proc == 'data_obs') and not (sys == 'nosys'): continue name = proc+lepdir if proc == 'QCD' else proc print(lep, jmult, sys, name) outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname]) template_histo = qcd_est_histo[proc].integrate('process') if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None): tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys') template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys')) if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]): template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo) #set_trace() if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]): template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False) #set_trace() ## save template histos to coffea dict if jmult == '3Jets': histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo if jmult == '4PJets': histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if '3Jets' in njets_to_run: coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_3j, coffea_out_3j) print("%s written" % coffea_out_3j) if '4PJets' in njets_to_run: coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_4pj, coffea_out_4pj) print("%s written" % coffea_out_4pj) upfout.close() print('%s written' % tmp_rname)
return accumulator samples = {"default":args.files} #with open('files_prev.json') as fin: # samples = json.load(fin) output = processor.run_uproot_job(samples, treename='Events', processor_instance=GenVisualizer(), executor=processor.futures_executor, executor_args={'workers': 4}, chunksize=500000, ) save(output, 'genstuff.coffea') output = load("genstuff.coffea") #hmass = output["hmass"] #bin_contents = hmass.values()[('ZPrimeToQQ_DMsimp_HT400_M50',)] #edges = hmass.axis('mass').edges() #edge_pairs = [(edges[i], edges[i+1]) for i in range(len(edges)-1)] #histd = zip(edge_pairs, bin_contents) #for thing in histd: # print(thing) # In[13]: for hname, axisname in [("hmass", "mass"), ("hpt", "pt")]: print(hname) hists = {k[0]: v for k,v in output[hname].values(sumw2=True, overflow='over').items()}
def getRoc(args): # vars vars = args.vars.split(',') # open hists hists_unmapped = load('%s.coffea' % args.hists) print(hists_unmapped) # map to hists hists_mapped = {} for key, val in hists_unmapped.items(): if isinstance(val, hist.Hist): hists_mapped[key] = processmap.apply(val) print('hists mapped ', hists_mapped) # build roc for all vars vals = {} rocs = {} labels = {} for lep in ['ele', 'mu']: for jet in ['jet0', 'jet1']: vals['%s_%s' % (lep, jet)] = {} rocs['%s_%s' % (lep, jet)] = {} for var in vars: for lep in ['ele', 'mu']: for jet in ['jet0', 'jet1']: print('getting roc for ', var, lep, jet) hist_name = 'roc_%ssel%s' % (lep, jet) if 'lsf' in var: var_name = jet + '_' + var var_cut_dir = -1 else: var_name = lep + '0_' + var var_cut_dir = 1 # get hist h = hists_mapped[hist_name] print(h) print([ ax for ax in h.axes() if ax.name not in {'process', var_name} ]) x = h.sum(*[ ax for ax in h.axes() if ax.name not in {'process', var_name} ]) bkg = 'qcd' sig = 'h125' #vals['%s_%s'%(lep,jet)][var], rocs['%s_%s'%(lep,jet)][var] = roc(x, bkg, sig, direction=var_cut_dir) labels[var] = var # plot variable fig, ax = plt.subplots(1, 1, figsize=(8, 8)) print(x) hist.plot1d(x, ax=ax, overlay='process', clear=False, density=True) fig.savefig("plots/rocs/lsf_%s_%s_%s.png" % (var, lep, jet)) return vals, rocs, labels
import mplhep plt.style.use(mplhep.style.ROOT) plt.tight_layout() from brazil.aguapreta import * figure_directory = "/home/dryu/BFrag/data/figures/" input_files = [ "/home/dryu/BFrag/data/histograms/condor/job20200513_215445/DataHistograms_Run2018.coffea", ] hists = {} subjob_cutflows = {} for i, input_file in enumerate(input_files): print(input_file) this_hists = util.load(input_file) # Integrate dataset to save space for key in this_hists.keys(): obj = this_hists[key] if isinstance(obj, hist.hist_tools.Hist): if "dataset" in obj.axes(): this_hists[key] = obj.integrate("dataset") del obj #if i == 0: # pprint(this_hists.keys()) for item_name, item in this_hists.items(): if isinstance(item, hist.Hist): if item_name in hists: hists[item_name].add(item) else:
mask = mc_pu > 0. corr = data_pu.copy() corr_puUp = data_pu_puUp.copy() corr_puDown = data_pu_puDown.copy() corr[mask] /= mc_pu[mask] corr_puUp[mask] /= mc_pu[mask] corr_puDown[mask] /= mc_pu[mask] pileup_corr = lookup_tools.dense_lookup.dense_lookup(corr, fin_pileup["pileup"].edges) pileup_corr_puUp = lookup_tools.dense_lookup.dense_lookup(corr_puUp, fin_pileup["pileup"].edges) pileup_corr_puDown = lookup_tools.dense_lookup.dense_lookup(corr_puDown, fin_pileup["pileup"].edges) corrections['2016_pileupweight'] = pileup_corr corrections['2016_pileupweight_puUp'] = pileup_corr_puUp corrections['2016_pileupweight_puDown'] = pileup_corr_puDown pileup_corr = load('correction_files/pileup_mc.coffea') with uproot.open("correction_files/pileup_Cert_294927-306462_13TeV_PromptReco_Collisions17_withVar.root") as fin_pileup: norm = lambda x: x / x.sum() data_pu = norm(fin_pileup["pileup"].values) data_pu_puUp = norm(fin_pileup["pileup_plus"].values) data_pu_puDown = norm(fin_pileup["pileup_minus"].values) pileup_corr_puUp = {} pileup_corr_puDown = {} for k in pileup_corr.keys(): mc_pu = norm(pileup_corr[k].value) mask = mc_pu > 0. corr = data_pu.copy() corr_puUp = data_pu_puUp.copy() corr_puDown = data_pu_puDown.copy() corr[mask] /= mc_pu[mask]
def acc_from_dir(indir): """Load Coffea accumulator from directory with *.coffea files :param indir: Directory to search for coffea files :type indir: string :return: Sum of all found accumulators :rtype: dict """ files = filter(lambda x: x.endswith(".coffea") and not ('cache' in x), os.listdir(indir)) files = list(map(lambda x: os.path.abspath(pjoin(indir, x)), files)) listhash = sha256sum(files) cache = pjoin(indir, f'merged_cache_{listhash}.coffea') if os.path.exists(cache): return load(cache) else: # Progress bar t = tqdm(total=len(files), desc='Merging input files') # Recursive merging to_merge = files # Use temporary files to store intermediate # merger results tmp_files = [] def load_and_remove(path): data = load(path) os.remove(path) return data def next(): '''Get next item to merge''' x = to_merge.pop(0) if isinstance(x, str): if x in tmp_files: tmp_files.remove(x) x = load_and_remove(x) else: x = load(x) return x while len(to_merge) > 1: # Remove first two items from list, # merge them and insert in the back t.update() x = next() y = next() tmp = "/tmp/tmp_bucoffea_merge_" + "".join( random.sample(string.ascii_uppercase + string.digits, 24)) merged = x + y # clean up to save memory x = None y = None save(merged, tmp) merged = None to_merge.append(tmp) tmp_files.append(tmp) t.update() assert (len(to_merge) == 1) shutil.copy(to_merge[0], cache) return load(cache)
def get_sig_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ widthTOname = lambda width: str(width).replace(".", "p") nameTOwidth = lambda width: str(width).replace("p", ".") ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files( sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0]) # get correct hist and rebin hname_to_use = "mtt_vs_tlep_ctstar_abs" if hname_to_use not in hdict.keys(): raise ValueError(f"{hname_to_use} not found in file") xrebinning, yrebinning = linearize_binning #xrebinning, yrebinning = mtt_ctstar_2d_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat #set_trace() xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning histo = histo.rebin(yaxis_name, new_ybins) rebin_histo = histo[Plotter.signal_samples, :, :, :, "btagPass"].integrate("btag") names = [ dataset for dataset in sorted( set([key[0] for key in rebin_histo.values().keys()])) ] # get dataset names in hists signals = sorted(set([key[0] for key in rebin_histo.values().keys()])) signals = [sig for sig in signals if "TTJetsSL" in sig] # only use SL decays systs = sorted(set([key[1] for key in rebin_histo.values().keys()])) systs.insert(0, systs.pop(systs.index("nosys"))) # move "nosys" to the front # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) # write signal dists to temp file for lep in ["Muon", "Electron"]: orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS" # scale by lumi lumi_correction = lumi_corr_dict[args.year]["%ss" % lep] histo = rebin_histo.copy() histo.scale(lumi_correction, axis="dataset") process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="templates") histo = histo.group( "dataset", hist.Cat("process", "Process", sorting="placement"), process_groups) for jmult in njets_to_run: lepdir = orig_lepdir.replace("NJETS", jmult.lower()) #set_trace() lin_histo = Plotter.linearize_hist( histo[:, :, jmult, lep].integrate("jmult").integrate("leptype")) for signal in signals: if "Int" in signal: boson, mass, width, pI, wt = tuple(signal.split("_")) else: boson, mass, width, pI = tuple(signal.split("_")) sub_name = "_".join([ "%s%s" % (boson[0], mass[1:]), "relw%s" % widthTOname(width).split("W")[-1], pI.lower(), wt ]) if pI == "Int" else "_".join([ "%s%s" % (boson[0], mass[1:]), "relw%s" % widthTOname(width).split("W")[-1], pI.lower() ]) #set_trace() for sys in systs: if sys not in systematics.template_sys_to_name[ args.year].keys(): continue if not lin_histo[signal, sys].values().keys(): print( f"Systematic {sys} for {lep} {jmult} {signal} not found, skipping" ) continue print(args.year, lep, jmult, sub_name, sys) outhname = "_".join( list( filter(None, [ sub_name, systematics.template_sys_to_name[ args.year][sys][0], lepdir, (args.year)[-2:] ]))) if "LEP" in outhname: outhname = outhname.replace( "LEP", "muon") if lep == "Muon" else outhname.replace( "LEP", "electron") template_histo = lin_histo[signal, sys].integrate( "process").integrate("sys") ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][ f"{signal}_{sys}"] = template_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][ f"{signal}_{sys}"] = template_histo.copy() ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"test_raw_templates_lj_3Jets_sig_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"test_raw_templates_lj_4PJets_sig_{args.year}_{jobid}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")