def merge_files(jobdir, samples): #set_trace() for sample in samples: merged_fname = os.path.join(jobdir, sample, '%s_TOT.coffea' % sample) output_files = [ '%s/%s/%s' % (jobdir, sample, fname) for fname in os.listdir('%s/%s' % (jobdir, sample)) if fname.endswith('.coffea') ] # don't re-merge files if this has already been done if merged_fname in output_files: tot_files[sample] = merged_fname print('%s already exists' % merged_fname) continue else: if len(output_files) > 1: ## merge files output_acc = plot_tools.add_coffea_files(output_files) plot_tools.save_accumulator(output_acc, merged_fname) else: ## rename file os.system('mv %s %s' % (output_files[0], merged_fname)) print('%s written' % merged_fname) tot_files[sample] = merged_fname
analyzer = "genpartons" from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument("year", choices=["2016", "2017", "2018"] if base_jobid == "NanoAODv6" else ["2016APV", "2016", "2017", "2018"], help="What year is the ntuple from.") args = parser.parse_args() input_dir = os.path.join(proj_dir, "results", "%s_%s" % (args.year, base_jobid), analyzer) f_ext = "TOT.coffea" outdir = os.path.join(proj_dir, "plots", "%s_%s" % (args.year, base_jobid), analyzer) if not os.path.isdir(outdir): os.makedirs(outdir) #set_trace() fnames = sorted(["%s/%s" % (input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(f_ext)]) hdict = plt_tools.add_coffea_files(fnames) if len(fnames) > 1 else load(fnames[0]) objects = { "SL" : { "TTbar" : ("t\\bar{t}", (200., 2000.)), "THad" : ("t_{h}", (150., 200.)), "TLep" : ("t_{l}", (150., 200.)), "BHad" : ("b_{h}", (0., 5.)), "BLep" : ("b_{l}", (0., 5.)), "WHad" : ("W_{h}", (50., 100.)), "WLep" : ("W_{l}", (50., 100.)), "Lepton" : ("l", (0., 5.)), }, "DL" : { "TTbar" : ("t\\bar{t}", (200., 2000.)), "Top" : ("t", (150., 200.)),
#!/usr/bin/env python import Utilities.plot_tools as plt_tools from pdb import set_trace from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('output_fname', type=str, help='Name of output file with file extension.') parser.add_argument('input_files', type=str, help="Input files separated by ':'") args = parser.parse_args() input_files = args.input_files.split(':') output_acc = plt_tools.add_coffea_files(input_files) plt_tools.save_accumulator(output_acc, args.output_fname)
analyzer) input_dir_2016APV = os.path.join(proj_dir, 'results', '2016APV_%s' % jobid, analyzer) outdir = os.path.join(proj_dir, 'plots', '2016Combined_%s' % jobid, analyzer) fnames_2016 = sorted([ os.path.join(input_dir_2016, fname) for fname in os.listdir(input_dir_2016) if fname.endswith(f_ext) ]) fnames_2016APV = sorted([ os.path.join(input_dir_2016APV, fname) for fname in os.listdir(input_dir_2016APV) if fname.endswith(f_ext) ]) hdict_2016 = plt_tools.add_coffea_files( fnames_2016) if len(fnames_2016) > 1 else load(fnames_2016[0]) hdict_2016APV = plt_tools.add_coffea_files( fnames_2016APV) if len(fnames_2016APV) > 1 else load( fnames_2016APV[0]) lumi_to_use_2016 = (data_lumi_dict['2016']['Muons'] + data_lumi_dict['2016']['Electrons']) / 2000. lumi_to_use_2016APV = (data_lumi_dict['2016APV']['Muons'] + data_lumi_dict['2016APV']['Electrons']) / 2000. lumi_to_use = lumi_to_use_2016 + lumi_to_use_2016APV computed_combined_2016 = True computed_combined_2016_year_key = year else: input_dir = os.path.join(proj_dir, 'results', '%s_%s' % (year, jobid),
hem_odir = 'Scaled' fname_str = 'SCALED' if args.hem == 'removed': hem_odir = 'Removed' fname_str = 'REMOVE' input_dir = os.path.join(proj_dir, 'results', '%s_%s' % (args.year, jobid), analyzer) f_ext = 'TOT.coffea' outdir = os.path.join(proj_dir, 'plots', '%s_%s' % (args.year, jobid), analyzer, hem_odir) if not os.path.isdir(outdir): os.makedirs(outdir) fnames = sorted(['%s/%s' % (input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(f_ext)]) fname_to_use = [fname for fname in fnames if fname_str in fname] hdict = plt_tools.add_coffea_files(fname_to_use) if len(fname_to_use) > 1 else load(fname_to_use[0]) jet_mults = { '3Jets' : '3 jets', '4PJets' : '4+ jets' } objtypes = { 'Jets' : 'jets', 'Lep' : { 'Muon' : '$\\mu$', 'Electron' : '$e$', } } btag_cats = {
def get_bkg_templates(tmp_rname): ''' Function that writes linearized mtt vs costheta distributions to root file. ''' ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] # get correct hist and rebin hname_to_use = 'mtt_vs_tlep_ctstar_abs' if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) nbins = (len(xrebinning)-1)*(len(yrebinning)-1) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other'] names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: ttJets_hdict = load(bkg_ttJets_fname) ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat ## rebin x axis ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins) ## rebin y axis ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins) only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting='placement') process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname) if '3Jets' in njets_to_run: histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) if '4PJets' in njets_to_run: histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) for lep in ['Muon', 'Electron']: lepdir = 'mujets' if lep == 'Muon' else 'ejets' ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates') lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis='dataset') histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype') # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: if len(only_ttJets_cats) > 0: for tt_cat in only_ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) tt_histo = ttJets_histo.copy() tt_histo.scale(lumi_correction, axis='dataset') tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype') for jmult in njets_to_run: iso_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) btag_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) for sys in sys_to_use.keys(): if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue #set_trace() sysname, onlyTT = sys_to_use[sys] if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0]) qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys) ## write nominal and systematic variations for each topology to file for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])): if (proc != 'TT') and onlyTT: continue if (proc == 'data_obs') and not (sys == 'nosys'): continue name = proc+lepdir if proc == 'QCD' else proc print(lep, jmult, sys, name) outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname]) template_histo = qcd_est_histo[proc].integrate('process') if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None): tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys') template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys')) if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]): template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo) #set_trace() if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]): template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False) #set_trace() ## save template histos to coffea dict if jmult == '3Jets': histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo if jmult == '4PJets': histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if '3Jets' in njets_to_run: coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_3j, coffea_out_3j) print("%s written" % coffea_out_3j) if '4PJets' in njets_to_run: coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_4pj, coffea_out_4pj) print("%s written" % coffea_out_4pj) upfout.close() print('%s written' % tmp_rname)
def get_sig_templates(tmp_rname): ''' Function that writes linearized mtt vs costheta distributions to root file. ''' from rootpy.plotting import Hist2D widthTOname = lambda width : str(width).replace('.', 'p') nameTOwidth = lambda width : str(width).replace('p', '.') ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files(sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0]) ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] # get correct hist and rebin hname_to_use = 'mtt_vs_tlep_ctstar_abs' if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = mtt_ctstar_2d_binning #xrebinning, yrebinning = 2, 1 histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat #set_trace() xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning #set_trace() histo = histo.rebin(yaxis_name, new_ybins) rebin_histo = histo[:, :, :, :, 'btagPass', 'Tight'].integrate('lepcat').integrate('btag') signals = sorted(set([key[0] for key in rebin_histo.values().keys()])) # create 2D signal hists and write to temp file with root_open(tmp_rname, 'w') as out: #for lep in ['Muon']: for lep in ['Muon', 'Electron']: lepdir = 'mujets' if lep == 'Muon' else 'ejets' # scale by lumi lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep] scaled_histo = rebin_histo.copy() scaled_histo.scale(lumi_correction, axis='dataset') for jmult in njets_to_run: histo = scaled_histo[:, :, jmult, lep].integrate('jmult').integrate('leptype') for signal in signals: _, mass, width, pI, wt = tuple(signal.split('_')) samtype = 'int' if pI == 'Int' else 'sgn' bostype = 'ggA' if _ == 'AtoTT' else 'ggH' sub_name = '%s_%s-%s-%s-%s' % (bostype, wt, samtype, widthTOname(width).split('W')[-1]+'pc', mass) if pI == 'Int' else '%s_pos-%s-%s-%s' % (bostype, samtype, widthTOname(width).split('W')[-1]+'pc', mass) #set_trace() for sys in sys_to_use.keys(): sysname, onlyTT = sys_to_use[sys] if onlyTT: continue if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue #set_trace() if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0]) template_histo = histo[signal, sys].integrate('dataset').integrate('sys') if wt == 'neg': template_histo.scale(-1.) #if (pI == 'Int') and (wt == 'pos'): continue print(lep, jmult, sub_name, sys) sumw, sumw2 = template_histo.values(sumw2=True, overflow='all')[()] # get vals and errors for all bins (including under/overflow) #if args.smooth: # set_trace() ## create rootpy hist and rename rtpy_h2d = Hist2D(template_histo.dense_axes()[0].edges(), template_histo.dense_axes()[1].edges()) outhname = '_'.join([jmult, lepdir, sub_name]) if sys == 'nosys' else '_'.join([jmult, lepdir, sub_name, sysname]) rtpy_h2d.name = outhname # set bin content for rootpy hist for binx in range(0, rtpy_h2d.GetNbinsX()+2): for biny in range(0, rtpy_h2d.GetNbinsY()+2): rtpy_h2d[binx, biny] = sumw[binx, biny], sumw2[binx, biny] #set_trace() rtpy_h2d.Write() print('%s written' % tmp_rname)
def get_bkg_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files( bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) # get correct hist and rebin hname_to_use = "mtt_vs_tlep_ctstar_abs" if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use][ Plotter. nonsignal_samples] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = [ "*right", "*matchable", "*unmatchable", "*sl_tau", "*other" ] names = [ dataset for dataset in sorted(set([key[0] for key in histo.values().keys()])) ] # get dataset names in hists ttJets_cats = [ name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats]) ] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting="placement") process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) for lep in ["Muon", "Electron"]: orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS" #set_trace() ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="templates") #process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="dataset") lumi_correction = lumi_corr_dict[args.year]["%ss" % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = "_".join(tt_cat.split( "_")[:-2]) if "sl_tau" in tt_cat else "_".join( tt_cat.split("_") [:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis="dataset") histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate("leptype") #set_trace() systs = sorted(set([key[1] for key in histo.values().keys()])) systs.insert(0, systs.pop( systs.index("nosys"))) # move "nosys" to the front # loop over each jet multiplicity for jmult in njets_to_run: lepdir = orig_lepdir.replace("NJETS", jmult.lower()) # get sideband and signal region hists cen_sb_histo = Plotter.linearize_hist( histo[:, "nosys", jmult, btag_reg_names_dict["Central"]["reg"]].integrate( "jmult").integrate("btag").integrate("sys")) #up_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Up"]["reg"]].integrate("jmult").integrate("btag") #dw_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Down"]["reg"]].integrate("jmult").integrate("btag") sig_histo = Plotter.linearize_hist( histo[:, :, jmult, btag_reg_names_dict["Signal"]["reg"]].integrate( "jmult").integrate("btag")) # loop over each systematic for sys in systs: if sys not in systematics.template_sys_to_name[ args.year].keys(): continue sys_histo = sig_histo[:, sys].integrate( "sys") if sys in systematics.ttJets_sys.values( ) else Plotter.BKG_Est( sig_reg=sig_histo[:, sys].integrate("sys"), sb_reg=cen_sb_histo, norm_type="SigMC", sys=sys, ignore_uncs=True) ## write nominal and systematic variations for each topology to file #for proc in sorted(set([key[0] for key in sig_histo.values().keys()])): for proc in sorted( set([key[0] for key in sys_histo.values().keys()])): if ("tt" not in proc) and ( sys in systematics.ttJets_sys.values()): continue #if (proc != "tt") and (sys in systematics.ttJets_sys.values()): continue if (proc == "data_obs") and not (sys == "nosys"): continue if not sys_histo[proc].values().keys(): #if not sig_histo[proc, sys].values().keys(): print( f"Systematic {sys} for {lep} {jmult} {proc} not found, skipping" ) continue print(args.year, lep, jmult, sys, proc) #set_trace() outhname = "_".join( list( filter(None, [ proc, systematics.template_sys_to_name[ args.year][sys][0], lepdir, (args.year)[-2:] ]))) if "LEP" in outhname: outhname = outhname.replace( "LEP", "muon") if lep == "Muon" else outhname.replace( "LEP", "electron") template_histo = sys_histo[proc].integrate("process") #template_histo = sig_histo[proc, sys].integrate("process").integrate("sys") #set_trace() ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][ f"{proc}_{sys}"] = template_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][ f"{proc}_{sys}"] = template_histo.copy() ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"test_raw_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"test_raw_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
def get_sig_templates(tmp_rname): """ Function that writes linearized mtt vs costheta distributions to root file. """ widthTOname = lambda width: str(width).replace(".", "p") nameTOwidth = lambda width: str(width).replace("p", ".") ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files( sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0]) # get correct hist and rebin hname_to_use = "mtt_vs_tlep_ctstar_abs" if hname_to_use not in hdict.keys(): raise ValueError(f"{hname_to_use} not found in file") xrebinning, yrebinning = linearize_binning #xrebinning, yrebinning = mtt_ctstar_2d_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat #set_trace() xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning histo = histo.rebin(yaxis_name, new_ybins) rebin_histo = histo[Plotter.signal_samples, :, :, :, "btagPass"].integrate("btag") names = [ dataset for dataset in sorted( set([key[0] for key in rebin_histo.values().keys()])) ] # get dataset names in hists signals = sorted(set([key[0] for key in rebin_histo.values().keys()])) signals = [sig for sig in signals if "TTJetsSL" in sig] # only use SL decays systs = sorted(set([key[1] for key in rebin_histo.values().keys()])) systs.insert(0, systs.pop(systs.index("nosys"))) # move "nosys" to the front # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB( 4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname) if "3Jets" in njets_to_run: histo_dict_3j = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) if "4PJets" in njets_to_run: histo_dict_4pj = processor.dict_accumulator({ "Muon": {}, "Electron": {} }) # write signal dists to temp file for lep in ["Muon", "Electron"]: orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS" # scale by lumi lumi_correction = lumi_corr_dict[args.year]["%ss" % lep] histo = rebin_histo.copy() histo.scale(lumi_correction, axis="dataset") process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="templates") histo = histo.group( "dataset", hist.Cat("process", "Process", sorting="placement"), process_groups) for jmult in njets_to_run: lepdir = orig_lepdir.replace("NJETS", jmult.lower()) #set_trace() lin_histo = Plotter.linearize_hist( histo[:, :, jmult, lep].integrate("jmult").integrate("leptype")) for signal in signals: if "Int" in signal: boson, mass, width, pI, wt = tuple(signal.split("_")) else: boson, mass, width, pI = tuple(signal.split("_")) sub_name = "_".join([ "%s%s" % (boson[0], mass[1:]), "relw%s" % widthTOname(width).split("W")[-1], pI.lower(), wt ]) if pI == "Int" else "_".join([ "%s%s" % (boson[0], mass[1:]), "relw%s" % widthTOname(width).split("W")[-1], pI.lower() ]) #set_trace() for sys in systs: if sys not in systematics.template_sys_to_name[ args.year].keys(): continue if not lin_histo[signal, sys].values().keys(): print( f"Systematic {sys} for {lep} {jmult} {signal} not found, skipping" ) continue print(args.year, lep, jmult, sub_name, sys) outhname = "_".join( list( filter(None, [ sub_name, systematics.template_sys_to_name[ args.year][sys][0], lepdir, (args.year)[-2:] ]))) if "LEP" in outhname: outhname = outhname.replace( "LEP", "muon") if lep == "Muon" else outhname.replace( "LEP", "electron") template_histo = lin_histo[signal, sys].integrate( "process").integrate("sys") ## save template histos to coffea dict if jmult == "3Jets": histo_dict_3j[lep][ f"{signal}_{sys}"] = template_histo.copy() if jmult == "4PJets": histo_dict_4pj[lep][ f"{signal}_{sys}"] = template_histo.copy() ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if "3Jets" in njets_to_run: coffea_out_3j = os.path.join( outdir, f"test_raw_templates_lj_3Jets_sig_{args.year}_{jobid}.coffea") save(histo_dict_3j, coffea_out_3j) print(f"{coffea_out_3j} written") if "4PJets" in njets_to_run: coffea_out_4pj = os.path.join( outdir, f"test_raw_templates_lj_4PJets_sig_{args.year}_{jobid}.coffea") save(histo_dict_4pj, coffea_out_4pj) print(f"{coffea_out_4pj} written") upfout.close() print(f"{tmp_rname} written")
failed_samples = [] finished_samples = [ ] # reinitialize finished_samples to check if they're actually finished for sample in samples: isCorrect, nfails, alreadyComplete = check_correctness( '%s/%s' % (jobdir, sample), dump_rescue=True) if isCorrect == False: failed_samples.append(sample) os.system( 'cd %s/%s && condor_submit condor.rescue.jdl && cd %s' % (jobdir, sample, orig_dir)) else: finished_samples.append(sample) if finished_samples: merge_files(jobdir, finished_samples) if len(failed_samples) == 0: print('All jobs completed') escape = True else: time.sleep(30) else: print("%i jobs are still running, checking again in 30 seconds\n" % njobs) time.sleep(30) ## merge all TOT files from each sample into one if list(set(tot_files.keys())) == list(set(samples)): tot_acc = plot_tools.add_coffea_files(list(tot_files.values())) tot_outname = '%s_TOT.coffea' % args.jobdir.strip('/') plot_tools.save_accumulator(tot_acc, '%s/%s' % (jobdir, tot_outname))