double_sb = Plotter.linearize_hist(double_sb_histo) sig_histo = Plotter.linearize_hist(sig_group[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) ## plot systematic variations after qcd estimation is made #set_trace() for sys in systs_to_run: if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue print('QCD est:', jmult, sys, hname) #set_trace() shape_reg = 'BTAG' for norm in ['Sideband']: #for norm in ['ABCD', 'Sideband']: qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type=norm, shape_region=shape_reg, norm_region=shape_reg if norm=='Sideband' else None, sys=sys) if sys == 'nosys': qcd_name = '%s%s_Norm' % (shape_reg, norm) if norm == 'Sideband' else '%s_Norm' % norm qcd_dir = os.path.join(outdir, args.lepton, jmult, 'QCD_Est', sys_to_name[sys], qcd_name) if not os.path.isdir(qcd_dir): os.makedirs(qcd_dir) fig, (ax, rax) = plt.subplots(2, 1, gridspec_kw={"height_ratios": (3, 1)}, sharex=True) fig.subplots_adjust(hspace=.07) if hname == 'Jets_njets': print('QCD est:', jmult, sys) yields_txt, yields_json = Plotter.get_samples_yield_and_frac(qcd_est_histo, data_lumi_year['%ss' % args.lepton]/1000., sys=sys) frac_name = '%s_yields_and_fracs_QCD_Est_%s' % ('_'.join([sys, jmult, args.lepton]), qcd_name) plt_tools.print_table(yields_txt, filename='%s/%s.txt' % (qcd_dir, frac_name), print_output=True) print('%s/%s.txt written' % (qcd_dir, frac_name))
fig.subplots_adjust(hspace=.07) #set_trace() iso_sb = histo[:, jmult, 'btagPass', 'Loose'].integrate('jmult').integrate( 'lepcat').integrate('btag') btag_sb = histo[:, jmult, 'btagFail', 'Tight'].integrate('jmult').integrate( 'lepcat').integrate('btag') double_sb = histo[:, jmult, 'btagFail', 'Loose'].integrate( 'jmult').integrate( 'lepcat').integrate('btag') hslice = Plotter.QCD_Est(sig_reg=hslice, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='ABCD', shape_region='BTAG') if withData: ax, rax = Plotter.plot_stack1d(ax, rax, hslice, xlabel=xtitle, xlimits=x_lims, **mc_opts) else: ax = Plotter.plot_mc1d(ax, hslice, xlabel=xtitle, xlimits=x_lims,
def get_bkg_templates(tmp_rname): ''' Function that writes linearized mtt vs costheta distributions to root file. ''' ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] # get correct hist and rebin hname_to_use = 'mtt_vs_tlep_ctstar_abs' if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) nbins = (len(xrebinning)-1)*(len(yrebinning)-1) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other'] names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: ttJets_hdict = load(bkg_ttJets_fname) ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat ## rebin x axis ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins) ## rebin y axis ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins) only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting='placement') process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname) if '3Jets' in njets_to_run: histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) if '4PJets' in njets_to_run: histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) for lep in ['Muon', 'Electron']: lepdir = 'mujets' if lep == 'Muon' else 'ejets' ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates') lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis='dataset') histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype') # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: if len(only_ttJets_cats) > 0: for tt_cat in only_ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) tt_histo = ttJets_histo.copy() tt_histo.scale(lumi_correction, axis='dataset') tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype') for jmult in njets_to_run: iso_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) btag_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) for sys in sys_to_use.keys(): if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue #set_trace() sysname, onlyTT = sys_to_use[sys] if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0]) qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys) ## write nominal and systematic variations for each topology to file for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])): if (proc != 'TT') and onlyTT: continue if (proc == 'data_obs') and not (sys == 'nosys'): continue name = proc+lepdir if proc == 'QCD' else proc print(lep, jmult, sys, name) outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname]) template_histo = qcd_est_histo[proc].integrate('process') if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None): tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys') template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys')) if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]): template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo) #set_trace() if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]): template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False) #set_trace() ## save template histos to coffea dict if jmult == '3Jets': histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo if jmult == '4PJets': histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if '3Jets' in njets_to_run: coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_3j, coffea_out_3j) print("%s written" % coffea_out_3j) if '4PJets' in njets_to_run: coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_4pj, coffea_out_4pj) print("%s written" % coffea_out_4pj) upfout.close() print('%s written' % tmp_rname)