def make_electron_ids(electrons, year): el_pars = prettyjson.loads( open( os.path.join(os.environ['PROJECT_DIR'], 'cfg_files', 'cfg_pars_%s.json' % os.environ['jobid'])).read())['Electrons'][year] id_names = { #'FAIL' : fail, 'VETO_15': veto_15, #'LOOSE_15' : loose_15, #'MEDIUM_15' : medium_15, #'TIGHT_15' : tight_15, 'TIGHT_15_NoECAL_Gap': tight_15_NoECAL_Gap, #'NOTVETO_15' : notveto_15, 'FAKES': fakes } if el_pars['VETOEL']['id'] not in id_names.keys(): raise IOError("veto Electron ID name not valid") if el_pars['LOOSEEL']['id'] not in id_names.keys(): raise IOError("loose Electron ID name not valid") if el_pars['TIGHTEL']['id'] not in id_names.keys(): raise IOError("tight Electron ID name not valid") for elID in el_pars.keys(): pt_cut = (electrons['pt'] >= el_pars[elID]['ptmin']) #etaSC_cut = (np.abs(electrons['etaSC']) <= el_pars[elID]['etascmax']) eta_cut = (np.abs(electrons['eta']) <= el_pars[elID]['etamax']) pass_id = id_names[el_pars[elID]['id']](electrons) electrons[elID] = (pass_id) & (pt_cut) & (eta_cut) #electrons[elID] = (pass_id) & (pt_cut) & (etaSC_cut) return electrons
def process_muons(muons, year): mu_pars = prettyjson.loads( open( os.path.join(os.environ['PROJECT_DIR'], 'cfg_files', 'cfg_pars_%s.json' % os.environ['jobid'])).read())['Muons'][year] id_names = { 'FAIL': fail, 'LOOSE_12': loose_12, 'TIGHT_12': tight_12, 'LOOSE_12Db': loose_12Db, 'TIGHT_12Db': tight_12Db, 'LOOSE_15': loose_15, 'TIGHT_15': tight_15, 'LOOSE_15Db': loose_15Db, 'TIGHT_15Db': tight_15Db, 'TIGHT_NOISO': tight_noIso, 'ANTILOOSE_15Db': antiloose_15Db } if mu_pars['VETOMU']['id'] not in id_names.keys(): raise IOError("veto Muon ID name not valid") if mu_pars['LOOSEMU']['id'] not in id_names.keys(): raise IOError("loose Muon ID name not valid") if mu_pars['TIGHTMU']['id'] not in id_names.keys(): raise IOError("tight Muon ID name not valid") for muID in mu_pars.keys(): pt_cut = (muons['pt'] >= mu_pars[muID]['ptmin']) eta_cut = (np.abs(muons['eta']) <= mu_pars[muID]['etamax']) pass_id = id_names[mu_pars[muID]['id']](muons) muons[muID] = (pass_id) & (pt_cut) & (eta_cut) return muons
def __init__(self, year): print("TTBarSolver:", year) proj_dir = os.environ["PROJECT_DIR"] jobid = os.environ["jobid"] base_jobid = os.environ["base_jobid"] cfg_pars = prettyjson.loads( open( os.path.join(proj_dir, "cfg_files", "cfg_pars_%s.json" % jobid)).read())["ttsolver"] probs = load( os.path.join(proj_dir, "Corrections", base_jobid, cfg_pars["filename"]))[year] ## create arrays for binning and values separately for each dist because njit can"t handle constant dictionaries currently self.USEMASS = cfg_pars["USEMASS"] self.WTmass_right = probs["4PJets"]["mWHad_vs_mTHad"] WTmass_right = probs["4PJets"]["mWHad_vs_mTHad"] self.WTmass_right_binning = WTmass_right._axes self.WTmass_right_values = WTmass_right._values self.USE3JMERGED = cfg_pars["USE3JMERGED"] Mass_3J_Merged_right = probs["3Jets"]["Merged_mTHadProxy_vs_maxmjet"] self.Mass_3J_Merged_right_binning = Mass_3J_Merged_right._axes self.Mass_3J_Merged_right_values = Mass_3J_Merged_right._values self.USE3JLOST = cfg_pars["USE3JLOST"] Mass_3J_Lost_right = probs["3Jets"]["Lost_mTHadProxy"] self.Mass_3J_Lost_right = probs["3Jets"]["Lost_mTHadProxy"] self.Mass_3J_Lost_right_binning = Mass_3J_Lost_right._axes self.Mass_3J_Lost_right_values = Mass_3J_Lost_right._values self.USENS = cfg_pars["USENS"] self.NS_4PJ_right = probs["4PJets"]["nusolver_dist"] NS_4PJ_right = probs["4PJets"]["nusolver_dist"] #NS_4PJ_right = probs["4PJets"]["nusolver_chi2"] self.NS_4PJ_right_binning = NS_4PJ_right._axes self.NS_4PJ_right_values = NS_4PJ_right._values # merged 3 jet vars NS_3J_Merged_right = probs["3Jets"]["Merged_nusolver_dist"] #NS_3J_Merged_right = probs["3Jets"]["Merged_nusolver_chi2"] self.NS_3J_Merged_right_binning = NS_3J_Merged_right._axes self.NS_3J_Merged_right_values = NS_3J_Merged_right._values # lost 3 jet vars self.NS_3J_Lost_right = probs["3Jets"]["Lost_nusolver_dist"] NS_3J_Lost_right = probs["3Jets"]["Lost_nusolver_dist"] #NS_3J_Lost_right = probs["3Jets"]["Lost_nusolver_chi2"] self.NS_3J_Lost_right_binning = NS_3J_Lost_right._axes self.NS_3J_Lost_right_values = NS_3J_Lost_right._values
choices=["2016", "2017", "2018"] if base_jobid == "NanoAODv6" else ["2016APV", "2016", "2017", "2018"], help="Specify which year to run over") parser.add_argument( "outfname", type=str, help="Specify output filename, including directory and file extension") parser.add_argument( "opts", type=str, help="Fileset dictionary (in string form) to be used for the processor") args = parser.parse_args() # convert input string of fileset dictionary to actual dictionary fdict = (args.fset).replace("\'", "\"") fileset = prettyjson.loads(fdict) if len(fileset.keys()) > 1: raise ValueError( "Only one topology run at a time in order to determine which corrections and systematics to run" ) samplename = list(fileset.keys())[0] isTTbar_ = ["ttJets_PS", "ttJets"] if ( (args.year == "2016") and (base_jobid == "NanoAODv6")) else ["ttJetsSL", "ttJetsHad", "ttJetsDiLep"] isSignal_ = (samplename.startswith("AtoTT") or samplename.startswith("HtoTT")) isInt_ = isSignal_ and ("Int" in samplename) if (samplename not in isTTbar_) and (not isSignal_): raise ValueError("This should only be run on SM ttbar or signal events!")
yields_dict = { 'Electron' : { '3Jets' : {}, '4PJets' : {}, }, 'Muon' : { '3Jets' : {}, '4PJets' : {}, }, '3Jets' : {}, '4PJets' : {}, } for jmult in jmults: for lep in leptons: dtc = '/'.join([input_dir, lep, jmult]) # dir to check json_fname = ['%s/%s' % (dtc, fname) for fname in os.listdir(dtc) if fname.endswith('.json')][0] if not os.path.isfile(json_fname): raise IOError("File %s does not exist" % json_fname) yields_dict[lep][jmult] = prettyjson.loads(open(json_fname).read()) for jmult in jmults: sum_yields = Counter(yields_dict['Electron'][jmult])+Counter(yields_dict['Muon'][jmult]) sum_yields['data/SIM'] = round(sum_yields['data']/sum_yields['SIM'], 3) yields_dict[jmult] = sum_yields with open('%s/yields_compilation.json' % input_dir, 'w') as out: out.write(prettyjson.dumps(yields_dict))
#'Reso_pt_thad' : ('$p_{T}$($t_{h}$) Resolution (Gen-Reco) [GeV]', 2, (0., 500.)), #'Reso_pt_tlep' : ('$p_{T}$($t_{l}$) Resolution (Gen-Reco) [GeV]', 2, (0., 500.)), #'Reso_pt_tt' : ('$p_{T}$($t\\bar{t}$) Resolution (Gen-Reco) [GeV]', 2, (0., 500.)), #'Reso_eta_thad' : ('$\\eta$($t_{h}$) Resolution (Gen-Reco)', 2, (-4., 4.)), #'Reso_eta_tlep' : ('$\\eta$($t_{l}$) Resolution (Gen-Reco)', 2, (-4., 4.)), #'Reso_eta_tt' : ('$\\eta$($t\\bar{t}$) Resolution (Gen-Reco)', 2, (-4., 4.)), 'Reso_tlep_ctstar': ('cos($\\theta^{*}_{t_{l}}$) Resolution (Gen-Reco)', 2, (-1., 1.)), 'Reso_tlep_ctstar_abs': ('|cos($\\theta^{*}_{t_{l}}$)| Resolution (Gen-Reco)', 1, (0., 1.)), ###'Reso_mtt_vs_tlep_ctstar_abs' : ('m($t\\bar{t}$)', '|cos($\\theta^{*}_{t_{l}}$)|', linearize_binning[0], linearize_binning[1], (linearize_binning[0][0], linearize_binning[0][-1]), (linearize_binning[1][0], linearize_binning[1][-1]), True), } ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads( open(os.path.join(proj_dir, 'inputs', 'lumis_data.json')).read())[args.year] lumi_correction = load( os.path.join(proj_dir, 'Corrections', base_jobid, 'MC_LumiWeights_Test.coffea'))[args.year]['%ss' % args.lepton] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*sl_tau', '*other'] names_list = [[ dataset for dataset in sorted(set([key[0] for key in hdict['mtt'].values().keys()])) ] for hdict in hdicts.values() ] # 'mtt' hardcoded because it has all ttJets event cats names = sorted(set(sum(names_list, []))) # get dataset names in hists ttJets_cats = [ name for name in names
'rho_noweight': ('Unweighted $\\rho$', 1, (0., 100.), True, False), 'nvtx_puweight': ('Reweighted n vertices', 1, (0., 100.), True, False), 'nvtx_noweight': ('Unweighted n vertices', 1, (0., 100.), True, False), 'BTagSF': ('$SF_{btag}$', 1, (0.7, 1.5), False, True), 'LepSF': ('$SF_{lep}$', 1, (0.8, 1.1), False, False), 'PileupWeight': ('Pileup Weight', 1, (0., 2.), False, False), 'EvtWeight': ('Event Weight', 1, (0., 2.), False, True), } ## get plotting colors/settings hstyles = styles.styles stack_fill_opts = {'alpha': 0.8, 'edgecolor': (0, 0, 0, .5)} stack_error_opts = {'edgecolor': (0, 0, 0, .5)} ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads( open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] lumi_correction = load( '%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid)) for hname in hdict.keys(): if hname == 'cutflow': continue hdict[hname].scale(lumi_correction[args.year]['%ss' % args.lepton], axis='dataset') ## make groups based on process process = hist.Cat("process", "Process", sorting='placement') process_cat = "dataset" process_groups = plt_tools.make_dataset_groups(args.lepton, args.year) #set_trace() for hname in hdict.keys(): if hname == 'cutflow': continue
from coffea.util import load, save from pdb import set_trace import os from fnmatch import fnmatch import Utilities.prettyjson as prettyjson proj_dir = os.environ["PROJECT_DIR"] jobid = os.environ["jobid"] base_jobid = os.environ["base_jobid"] outdir = os.path.join(proj_dir, "Corrections", base_jobid) if not os.path.isdir(outdir): os.makedirs(outdir) data_lumi = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "%s_lumis_data.json" % base_jobid)).read()) # file with integrated luminosity for all three years signal_xsecs = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "signal_xsecs.json")).read()) # file with signal cross sections years_to_run = ["2016", "2017", "2018"] if base_jobid == "NanoAODv6" else ["2016APV", "2016", "2017", "2018"] lumi_weights = {year:{"Electrons" : {}, "Muons" : {}} for year in years_to_run} # for each year, read sumGenWeights from all meta.json files for year in years_to_run: print(year) xsec_file = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "samples_%s_%s.json" % (year, base_jobid))).read()) # file with cross sections datasets = list(filter(lambda x: fnmatch(x["name"], "*"), xsec_file)) for dataset in datasets: sample = dataset["name"] if sample.startswith("data_Single"): continue if dataset["DBSName"] == "NOT PRESENT": print(f"Dataset {sample} not present, will be skipped") continue
error_opts={"color": "k", "marker" : None}, ) # update max/min values logy_min, logy_max = np.min(nnlo_histo.values()[()]), np.max(nnlo_histo.values()[()]) normed_logy_min, normed_logy_max = np.min(nnlo_normed_histo.values()[()]), np.max(nnlo_normed_histo.values()[()]) years_to_run = ["2016", "2017", "2018"] if base_jobid == "NanoAODv6" else ["2016APV", "2016", "2017", "2018"] for year in years_to_run: input_dir = os.path.join(proj_dir, "results", "%s_%s" % (year, base_jobid), analyzer) fnames = sorted(["%s/%s" % (input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(f_ext)]) hdict = plt_tools.add_coffea_files(fnames) if len(fnames) > 1 else load(fnames[0]) ## get NLO values from hdict ttSL = "ttJets_PS" if ((year == "2016") and (base_jobid == "NanoAODv6")) else "ttJetsSL" xsec_file = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "samples_%s_%s.json" % (year, base_jobid))).read()) tt_dataset = list(filter(lambda x: fnmatch(x["name"], ttSL), xsec_file))[0] xsec = tt_dataset["xsection"] meta_json = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "%s_%s" % (year, base_jobid), "%s.meta.json" % ttSL)).read()) sumGenWeights = meta_json["sumGenWeights"] # orig tune_histo = hdict[tune_var].integrate("dataset") tune_histo.scale(xsec/sumGenWeights) if linearize: #set_trace() tune_histo = tune_histo.rebin(tune_histo.dense_axes()[0].name, hist.Bin(tune_histo.dense_axes()[0].name, tune_histo.dense_axes()[0].name, mtt_binning)) tune_histo = tune_histo.rebin(tune_histo.dense_axes()[1].name, hist.Bin(tune_histo.dense_axes()[1].name, tune_histo.dense_axes()[1].name, ctstar_binning)) # save integral to make normalized hist tune_integral = tune_histo.values(overflow="all")[()].sum()
parser.add_argument( 'outfname', type=str, help='Specify output filename, including directory and file extension') parser.add_argument( '--debug', action='store_true', help= 'Uses iterative_executor for debugging purposes, otherwise futures_excutor will be used (faster)' ) args = parser.parse_args() # convert input string of fileset dictionary to actual dictionary fdict = (args.fset).replace("\'", "\"") fileset = prettyjson.loads(fdict) init_btag = ~(np.array([key.startswith('data') for key in fileset.keys()]).all()) ## init tt probs for likelihoods ttpermutator.year_to_run(year=args.year) ## load lumimask for data and corrections for event weights pu_correction = load('%s/Corrections/%s/MC_PU_Weights.coffea' % (proj_dir, jobid)) lepSF_correction = load('%s/Corrections/leptonSFs.coffea' % proj_dir) jet_corrections = load('%s/Corrections/JetCorrections.coffea' % proj_dir)[args.year] corrections = { 'Pileup': pu_correction,
hep.cms.label(ax=ax, fontsize=rcParams['font.size'], data=False, paper=False, year=year, lumi=round(lumi_to_use, 1)) figname = os.path.join( plotdir, '%s_Efficiency' % '_'.join([btagger, wp, jmult, flav])) fig.savefig(figname) print('%s written' % figname) plt.close() data_lumi_dict = prettyjson.loads( open(os.path.join(proj_dir, 'inputs', '%s_lumis_data.json' % base_jobid)).read()) combine_2016 = ('2016' in years_to_run) and ('2016APV' in years_to_run) and ( base_jobid == 'ULnanoAOD') computed_combined_2016 = False # ZJets Summer20UL samples have too many negative contributions import re non_ZJets_samples = re.compile('(?!ZJets*)') for year in years_to_run: print(year) f_ext = 'TOT.coffea' if combine_2016 and ('2016' in year):
def get_bkg_templates(tmp_rname): ''' Function that writes linearized mtt vs costheta distributions to root file. ''' ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0]) ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] # get correct hist and rebin hname_to_use = 'mtt_vs_tlep_ctstar_abs' if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = linearize_binning histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning rebin_histo = histo.rebin(yaxis_name, new_ybins) nbins = (len(xrebinning)-1)*(len(yrebinning)-1) ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other'] names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: ttJets_hdict = load(bkg_ttJets_fname) ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat ## rebin x axis ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins) ## rebin y axis ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins) only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ... ## make groups based on process process = hist.Cat("process", "Process", sorting='placement') process_cat = "dataset" # need to save coffea hist objects to file so they can be opened by uproot in the proper format upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname) if '3Jets' in njets_to_run: histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) if '4PJets' in njets_to_run: histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}}) for lep in ['Muon', 'Electron']: lepdir = 'mujets' if lep == 'Muon' else 'ejets' ## make groups based on process process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates') lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep] # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction if len(ttJets_cats) > 0: for tt_cat in ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) histo = rebin_histo.copy() histo.scale(lumi_correction, axis='dataset') histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype') # use ttJets events that don't have PS weights for dedicated sys samples in 2016 if bkg_ttJets_fname is not None: if len(only_ttJets_cats) > 0: for tt_cat in only_ttJets_cats: ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo] lumi_correction.update({tt_cat: ttJets_eff_lumi}) tt_histo = ttJets_histo.copy() tt_histo.scale(lumi_correction, axis='dataset') tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype') for jmult in njets_to_run: iso_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) btag_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag')) sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) for sys in sys_to_use.keys(): if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue #set_trace() sysname, onlyTT = sys_to_use[sys] if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0]) qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys) ## write nominal and systematic variations for each topology to file for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])): if (proc != 'TT') and onlyTT: continue if (proc == 'data_obs') and not (sys == 'nosys'): continue name = proc+lepdir if proc == 'QCD' else proc print(lep, jmult, sys, name) outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname]) template_histo = qcd_est_histo[proc].integrate('process') if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None): tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag')) tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys') template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys')) if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]): template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo) #set_trace() if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]): template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False) #set_trace() ## save template histos to coffea dict if jmult == '3Jets': histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo if jmult == '4PJets': histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo ## save template histo to root file upfout[outhname] = hist.export1d(template_histo) if '3Jets' in njets_to_run: coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_3j, coffea_out_3j) print("%s written" % coffea_out_3j) if '4PJets' in njets_to_run: coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) save(histo_dict_4pj, coffea_out_4pj) print("%s written" % coffea_out_4pj) upfout.close() print('%s written' % tmp_rname)
def get_sig_templates(tmp_rname): ''' Function that writes linearized mtt vs costheta distributions to root file. ''' from rootpy.plotting import Hist2D widthTOname = lambda width : str(width).replace('.', 'p') nameTOwidth = lambda width : str(width).replace('p', '.') ## variables that only need to be defined/evaluated once hdict = plt_tools.add_coffea_files(sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0]) ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year] # get correct hist and rebin hname_to_use = 'mtt_vs_tlep_ctstar_abs' if hname_to_use not in hdict.keys(): raise ValueError("%s not found in file" % hname_to_use) xrebinning, yrebinning = mtt_ctstar_2d_binning #xrebinning, yrebinning = 2, 1 histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat #set_trace() xaxis_name = histo.dense_axes()[0].name yaxis_name = histo.dense_axes()[1].name ## rebin x axis if isinstance(xrebinning, np.ndarray): new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning) elif isinstance(xrebinning, float) or isinstance(xrebinning, int): new_xbins = xrebinning histo = histo.rebin(xaxis_name, new_xbins) ## rebin y axis if isinstance(yrebinning, np.ndarray): new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning) elif isinstance(yrebinning, float) or isinstance(yrebinning, int): new_ybins = yrebinning #set_trace() histo = histo.rebin(yaxis_name, new_ybins) rebin_histo = histo[:, :, :, :, 'btagPass', 'Tight'].integrate('lepcat').integrate('btag') signals = sorted(set([key[0] for key in rebin_histo.values().keys()])) # create 2D signal hists and write to temp file with root_open(tmp_rname, 'w') as out: #for lep in ['Muon']: for lep in ['Muon', 'Electron']: lepdir = 'mujets' if lep == 'Muon' else 'ejets' # scale by lumi lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep] scaled_histo = rebin_histo.copy() scaled_histo.scale(lumi_correction, axis='dataset') for jmult in njets_to_run: histo = scaled_histo[:, :, jmult, lep].integrate('jmult').integrate('leptype') for signal in signals: _, mass, width, pI, wt = tuple(signal.split('_')) samtype = 'int' if pI == 'Int' else 'sgn' bostype = 'ggA' if _ == 'AtoTT' else 'ggH' sub_name = '%s_%s-%s-%s-%s' % (bostype, wt, samtype, widthTOname(width).split('W')[-1]+'pc', mass) if pI == 'Int' else '%s_pos-%s-%s-%s' % (bostype, samtype, widthTOname(width).split('W')[-1]+'pc', mass) #set_trace() for sys in sys_to_use.keys(): sysname, onlyTT = sys_to_use[sys] if onlyTT: continue if sys not in histo.axis('sys')._sorted: print('\n\n Systematic %s not available, skipping\n\n' % sys) continue #set_trace() if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0]) template_histo = histo[signal, sys].integrate('dataset').integrate('sys') if wt == 'neg': template_histo.scale(-1.) #if (pI == 'Int') and (wt == 'pos'): continue print(lep, jmult, sub_name, sys) sumw, sumw2 = template_histo.values(sumw2=True, overflow='all')[()] # get vals and errors for all bins (including under/overflow) #if args.smooth: # set_trace() ## create rootpy hist and rename rtpy_h2d = Hist2D(template_histo.dense_axes()[0].edges(), template_histo.dense_axes()[1].edges()) outhname = '_'.join([jmult, lepdir, sub_name]) if sys == 'nosys' else '_'.join([jmult, lepdir, sub_name, sysname]) rtpy_h2d.name = outhname # set bin content for rootpy hist for binx in range(0, rtpy_h2d.GetNbinsX()+2): for biny in range(0, rtpy_h2d.GetNbinsY()+2): rtpy_h2d[binx, biny] = sumw[binx, biny], sumw2[binx, biny] #set_trace() rtpy_h2d.Write() print('%s written' % tmp_rname)
from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--type', default='data', nargs='?', choices=['SM', 'signal', 'data', 'all'], help='specify which sample type to print') args = parser.parse_args() proj_dir = os.environ['PROJECT_DIR'] jobid = os.environ['jobid'] # only print signal MC info if (args.type == 'signal') or (args.type == 'all'): import itertools from string import Template name2val = lambda x: float(x.replace('pc','').replace('p', '.')) samples_list = prettyjson.loads(open('%s/inputs/samples_2016.json' % proj_dir).read()) samples_dict = {} for sample in samples_list: name = sample.pop('name') samples_dict[name] = sample ## make table for A A_output = "\multirow{2}{*}{Parity} & \multirow{2}{*}{$\mathsf{m_{A}}$ [GeV]} & \multirow{2}{*}{$\Gamma_{\mathsf{A}}$ [\% $\mathsf{m_{A}}$]} & \multicolumn{2}{c |}{LO $\sigma$ [pb]} & \multirow{2}{*}{$\mathsf{k_{R}}$} \\\ \n" A_output += " & & & Resonance & Interference & \\\ \n\hline \n" H_output = "\multirow{2}{*}{Parity} & \multirow{2}{*}{$\mathsf{m_{H}}$ [GeV]} & \multirow{2}{*}{$\Gamma_{\mathsf{H}}$ [\% $\mathsf{m_{H}}$]} & \multicolumn{2}{c |}{LO $\sigma$ [pb]} & \multirow{2}{*}{$\mathsf{k_{R}}$} \\\ \n" H_output += " & & & Resonance & Interference & \\\ \n\hline \n" for sig_point in itertools.product(['M400','M500', 'M600', 'M750'], ['W2p5', 'W5', 'W10', 'W25']): signal = '_'.join(sig_point) mass = sig_point[0][1:]
} btag_values["2018"] = { "btagDeepB": { "DeepCSVLoose": 0.1208, "DeepCSVMedium": 0.4168, "DeepCSVTight": 0.7665, }, "btagDeepFlavB": { "DeepJetLoose": 0.0490, "DeepJetMedium": 0.2783, "DeepJetTight": 0.7100, } } jet_pars = prettyjson.loads( open( os.path.join(os.environ["PROJECT_DIR"], "cfg_files", "cfg_pars_%s.json" % os.environ["jobid"])).read())["Jets"] valid_taggers = ["DeepCSV", "DeepJet"] valid_WPs = ["Loose", "Medium", "Tight"] if jet_pars["btagger"] not in valid_taggers: raise IOError("%s is not a supported b-tagger" % jet_pars["btagger"]) if jet_pars["permutations"]["tightb"] not in valid_WPs: raise IOError("%s is not a valid working point" % jet_pars["permutations"]["tightb"]) if jet_pars["permutations"]["looseb"] not in valid_WPs: raise IOError("%s is not a valid working point" % jet_pars["permutations"]["looseb"])
"Had": "$\\rightarrow$ jj", } isSignal = lambda x : (x.startswith("AtoTT") or x.startswith("HtoTT")) variables = { "pt" : ("$p_{T}$($obj$) [GeV]", 2, (0., 500.)), "eta": ("$\\eta$($obj$)", 2, (-2.6, 2.6)), "phi": ("$\\phi$($obj$)", 2, (-4, 4)), "mass": ("$m_{obj}$ [GeV]", 1, (0., 300.)), "energy": ("$E_{obj}$ [GeV]", 2, (0., 1000.)), } ## get data lumi and scale MC by lumi data_lumi_year = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "%s_lumis_data.json" % base_jobid)).read())[args.year] lumi_to_use = (data_lumi_year["Muons"]+data_lumi_year["Electrons"])/2000. lumi_correction = load(os.path.join(proj_dir, "Corrections", base_jobid, "MC_LumiWeights.coffea"))[args.year] # scale events by lumi correction for hname in hdict.keys(): if hname == "cutflow": continue hdict[hname].scale(lumi_correction["Muons"], axis="dataset") #hdict[hname] = hdict[hname].integrate("dataset") #set_trace() ## make bp plots for hname in variables.keys(): if hname not in hdict.keys(): raise ValueError(f"{hname} not found in file")
default="") args = parser.parse_args() jobid = os.environ["jobid"] proj_dir = os.environ["PROJECT_DIR"] if not os.path.isfile(args.json): raise ValueError(f"file {args.json} does not exist") outdir = os.path.join(proj_dir, "inputs", "_".join( os.path.basename(args.json).split(".")[0].split("_") [1:])) # get name of json file except for "samples_" if not os.path.isdir(outdir): os.makedirs(outdir) all_samples = prettyjson.loads(open(args.json).read()) samples_to_run = list( filter(lambda x: fnmatch(x["name"], args.sample if args.sample else "*"), all_samples)) if not len(samples_to_run): raise RuntimeError("Could not find any sample matching the pattern") analyzer_inputs = [] for sample in samples_to_run: #set_trace() if "DBSName" in sample: if sample["DBSName"] == "NOT PRESENT": continue if "Ext" in sample["name"]: print("Must combine %s with non-extenstion dataset!" % sample["name"])
(0., 500.), True, True), }) if '4+' in njets_to_run: variables.update({ 'nusolver_chi2': ('$\\chi_{\\nu}^{2}$', 5, (0., 1000.), True, False), 'nusolver_dist': ('$D_{\\nu, min}$ [GeV]', 1, (0., 150.), True, False), 'mWHad_vs_mTHad': ('$m_{t_{h}}$ [GeV]', '$m_{W_{h}}$ [GeV]', 10, (0., 500.), 10, (0., 500.), True, False), }) ## get plotting colors/settings hstyles = styles.styles ## get data lumi and scale MC by lumi data_lumi_dict = prettyjson.loads( open(os.path.join(proj_dir, 'inputs', '%s_lumis_data.json' % base_jobid)).read()) lumi_correction = load( os.path.join(proj_dir, 'Corrections', jobid, 'MC_LumiWeights.coffea')) ## make groups based on perm category pcat = hist.Cat("permcat", "Perm Category", sorting='placement') pcat_cat = "permcat" computed_combined_2016 = False for year in years_to_run: f_ext = 'TOT.coffea' if combine_2016 and ('2016' in year): if computed_combined_2016: computed_combined_2016_year_to_copy = year continue