Exemple #1
0
def make_electron_ids(electrons, year):

    el_pars = prettyjson.loads(
        open(
            os.path.join(os.environ['PROJECT_DIR'], 'cfg_files',
                         'cfg_pars_%s.json' %
                         os.environ['jobid'])).read())['Electrons'][year]

    id_names = {
        #'FAIL' : fail,
        'VETO_15': veto_15,
        #'LOOSE_15' : loose_15,
        #'MEDIUM_15' : medium_15,
        #'TIGHT_15' : tight_15,
        'TIGHT_15_NoECAL_Gap': tight_15_NoECAL_Gap,
        #'NOTVETO_15' : notveto_15,
        'FAKES': fakes
    }

    if el_pars['VETOEL']['id'] not in id_names.keys():
        raise IOError("veto Electron ID name not valid")
    if el_pars['LOOSEEL']['id'] not in id_names.keys():
        raise IOError("loose Electron ID name not valid")
    if el_pars['TIGHTEL']['id'] not in id_names.keys():
        raise IOError("tight Electron ID name not valid")

    for elID in el_pars.keys():
        pt_cut = (electrons['pt'] >= el_pars[elID]['ptmin'])
        #etaSC_cut = (np.abs(electrons['etaSC']) <= el_pars[elID]['etascmax'])
        eta_cut = (np.abs(electrons['eta']) <= el_pars[elID]['etamax'])
        pass_id = id_names[el_pars[elID]['id']](electrons)
        electrons[elID] = (pass_id) & (pt_cut) & (eta_cut)
        #electrons[elID] = (pass_id) & (pt_cut) & (etaSC_cut)

    return electrons
Exemple #2
0
def process_muons(muons, year):
    mu_pars = prettyjson.loads(
        open(
            os.path.join(os.environ['PROJECT_DIR'], 'cfg_files',
                         'cfg_pars_%s.json' %
                         os.environ['jobid'])).read())['Muons'][year]

    id_names = {
        'FAIL': fail,
        'LOOSE_12': loose_12,
        'TIGHT_12': tight_12,
        'LOOSE_12Db': loose_12Db,
        'TIGHT_12Db': tight_12Db,
        'LOOSE_15': loose_15,
        'TIGHT_15': tight_15,
        'LOOSE_15Db': loose_15Db,
        'TIGHT_15Db': tight_15Db,
        'TIGHT_NOISO': tight_noIso,
        'ANTILOOSE_15Db': antiloose_15Db
    }

    if mu_pars['VETOMU']['id'] not in id_names.keys():
        raise IOError("veto Muon ID name not valid")
    if mu_pars['LOOSEMU']['id'] not in id_names.keys():
        raise IOError("loose Muon ID name not valid")
    if mu_pars['TIGHTMU']['id'] not in id_names.keys():
        raise IOError("tight Muon ID name not valid")

    for muID in mu_pars.keys():
        pt_cut = (muons['pt'] >= mu_pars[muID]['ptmin'])
        eta_cut = (np.abs(muons['eta']) <= mu_pars[muID]['etamax'])
        pass_id = id_names[mu_pars[muID]['id']](muons)
        muons[muID] = (pass_id) & (pt_cut) & (eta_cut)

    return muons
Exemple #3
0
    def __init__(self, year):
        print("TTBarSolver:", year)
        proj_dir = os.environ["PROJECT_DIR"]
        jobid = os.environ["jobid"]
        base_jobid = os.environ["base_jobid"]
        cfg_pars = prettyjson.loads(
            open(
                os.path.join(proj_dir, "cfg_files",
                             "cfg_pars_%s.json" % jobid)).read())["ttsolver"]

        probs = load(
            os.path.join(proj_dir, "Corrections", base_jobid,
                         cfg_pars["filename"]))[year]

        ## create arrays for binning and values separately for each dist because njit can"t handle constant dictionaries currently
        self.USEMASS = cfg_pars["USEMASS"]
        self.WTmass_right = probs["4PJets"]["mWHad_vs_mTHad"]
        WTmass_right = probs["4PJets"]["mWHad_vs_mTHad"]
        self.WTmass_right_binning = WTmass_right._axes
        self.WTmass_right_values = WTmass_right._values

        self.USE3JMERGED = cfg_pars["USE3JMERGED"]
        Mass_3J_Merged_right = probs["3Jets"]["Merged_mTHadProxy_vs_maxmjet"]
        self.Mass_3J_Merged_right_binning = Mass_3J_Merged_right._axes
        self.Mass_3J_Merged_right_values = Mass_3J_Merged_right._values

        self.USE3JLOST = cfg_pars["USE3JLOST"]
        Mass_3J_Lost_right = probs["3Jets"]["Lost_mTHadProxy"]
        self.Mass_3J_Lost_right = probs["3Jets"]["Lost_mTHadProxy"]
        self.Mass_3J_Lost_right_binning = Mass_3J_Lost_right._axes
        self.Mass_3J_Lost_right_values = Mass_3J_Lost_right._values

        self.USENS = cfg_pars["USENS"]
        self.NS_4PJ_right = probs["4PJets"]["nusolver_dist"]
        NS_4PJ_right = probs["4PJets"]["nusolver_dist"]
        #NS_4PJ_right = probs["4PJets"]["nusolver_chi2"]
        self.NS_4PJ_right_binning = NS_4PJ_right._axes
        self.NS_4PJ_right_values = NS_4PJ_right._values

        # merged 3 jet vars
        NS_3J_Merged_right = probs["3Jets"]["Merged_nusolver_dist"]
        #NS_3J_Merged_right = probs["3Jets"]["Merged_nusolver_chi2"]
        self.NS_3J_Merged_right_binning = NS_3J_Merged_right._axes
        self.NS_3J_Merged_right_values = NS_3J_Merged_right._values

        # lost 3 jet vars
        self.NS_3J_Lost_right = probs["3Jets"]["Lost_nusolver_dist"]
        NS_3J_Lost_right = probs["3Jets"]["Lost_nusolver_dist"]
        #NS_3J_Lost_right = probs["3Jets"]["Lost_nusolver_chi2"]
        self.NS_3J_Lost_right_binning = NS_3J_Lost_right._axes
        self.NS_3J_Lost_right_values = NS_3J_Lost_right._values
Exemple #4
0
                    choices=["2016", "2017", "2018"] if base_jobid
                    == "NanoAODv6" else ["2016APV", "2016", "2017", "2018"],
                    help="Specify which year to run over")
parser.add_argument(
    "outfname",
    type=str,
    help="Specify output filename, including directory and file extension")
parser.add_argument(
    "opts",
    type=str,
    help="Fileset dictionary (in string form) to be used for the processor")
args = parser.parse_args()

# convert input string of fileset dictionary to actual dictionary
fdict = (args.fset).replace("\'", "\"")
fileset = prettyjson.loads(fdict)

if len(fileset.keys()) > 1:
    raise ValueError(
        "Only one topology run at a time in order to determine which corrections and systematics to run"
    )
samplename = list(fileset.keys())[0]

isTTbar_ = ["ttJets_PS", "ttJets"] if (
    (args.year == "2016") and
    (base_jobid == "NanoAODv6")) else ["ttJetsSL", "ttJetsHad", "ttJetsDiLep"]
isSignal_ = (samplename.startswith("AtoTT") or samplename.startswith("HtoTT"))
isInt_ = isSignal_ and ("Int" in samplename)
if (samplename not in isTTbar_) and (not isSignal_):
    raise ValueError("This should only be run on SM ttbar or signal events!")
Exemple #5
0
yields_dict = {
    'Electron' : {
        '3Jets' : {},
        '4PJets' : {},
    },
    'Muon' : {
        '3Jets' : {},
        '4PJets' : {},
    },
    '3Jets' : {},
    '4PJets' : {},
}

for jmult in jmults:
    for lep in leptons:
        dtc = '/'.join([input_dir, lep, jmult]) # dir to check
        json_fname = ['%s/%s' % (dtc, fname) for fname in os.listdir(dtc) if fname.endswith('.json')][0]
        if not os.path.isfile(json_fname):
            raise IOError("File %s does not exist" % json_fname)

        yields_dict[lep][jmult] = prettyjson.loads(open(json_fname).read())

for jmult in jmults:
    sum_yields = Counter(yields_dict['Electron'][jmult])+Counter(yields_dict['Muon'][jmult])
    sum_yields['data/SIM'] = round(sum_yields['data']/sum_yields['SIM'], 3)
    yields_dict[jmult] = sum_yields 

with open('%s/yields_compilation.json' % input_dir, 'w') as out:
    out.write(prettyjson.dumps(yields_dict))

    #'Reso_pt_thad' : ('$p_{T}$($t_{h}$) Resolution (Gen-Reco) [GeV]', 2, (0., 500.)),
    #'Reso_pt_tlep' : ('$p_{T}$($t_{l}$) Resolution (Gen-Reco) [GeV]', 2, (0., 500.)),
    #'Reso_pt_tt' : ('$p_{T}$($t\\bar{t}$) Resolution (Gen-Reco) [GeV]', 2, (0., 500.)),
    #'Reso_eta_thad' : ('$\\eta$($t_{h}$) Resolution (Gen-Reco)', 2, (-4., 4.)),
    #'Reso_eta_tlep' : ('$\\eta$($t_{l}$) Resolution (Gen-Reco)', 2, (-4., 4.)),
    #'Reso_eta_tt' : ('$\\eta$($t\\bar{t}$) Resolution (Gen-Reco)', 2, (-4., 4.)),
    'Reso_tlep_ctstar':
    ('cos($\\theta^{*}_{t_{l}}$) Resolution (Gen-Reco)', 2, (-1., 1.)),
    'Reso_tlep_ctstar_abs':
    ('|cos($\\theta^{*}_{t_{l}}$)| Resolution (Gen-Reco)', 1, (0., 1.)),
    ###'Reso_mtt_vs_tlep_ctstar_abs' : ('m($t\\bar{t}$)', '|cos($\\theta^{*}_{t_{l}}$)|', linearize_binning[0], linearize_binning[1], (linearize_binning[0][0], linearize_binning[0][-1]), (linearize_binning[1][0], linearize_binning[1][-1]), True),
}

## get data lumi and scale MC by lumi
data_lumi_year = prettyjson.loads(
    open(os.path.join(proj_dir, 'inputs',
                      'lumis_data.json')).read())[args.year]
lumi_correction = load(
    os.path.join(proj_dir, 'Corrections', base_jobid,
                 'MC_LumiWeights_Test.coffea'))[args.year]['%ss' % args.lepton]
# scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*sl_tau', '*other']
names_list = [[
    dataset
    for dataset in sorted(set([key[0]
                               for key in hdict['mtt'].values().keys()]))
] for hdict in hdicts.values()
              ]  # 'mtt' hardcoded because it has all ttJets event cats
names = sorted(set(sum(names_list, [])))  # get dataset names in hists
ttJets_cats = [
    name for name in names
Exemple #7
0
    'rho_noweight': ('Unweighted $\\rho$', 1, (0., 100.), True, False),
    'nvtx_puweight': ('Reweighted n vertices', 1, (0., 100.), True, False),
    'nvtx_noweight': ('Unweighted n vertices', 1, (0., 100.), True, False),
    'BTagSF': ('$SF_{btag}$', 1, (0.7, 1.5), False, True),
    'LepSF': ('$SF_{lep}$', 1, (0.8, 1.1), False, False),
    'PileupWeight': ('Pileup Weight', 1, (0., 2.), False, False),
    'EvtWeight': ('Event Weight', 1, (0., 2.), False, True),
}

## get plotting colors/settings
hstyles = styles.styles
stack_fill_opts = {'alpha': 0.8, 'edgecolor': (0, 0, 0, .5)}
stack_error_opts = {'edgecolor': (0, 0, 0, .5)}

## get data lumi and scale MC by lumi
data_lumi_year = prettyjson.loads(
    open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]
lumi_correction = load(
    '%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' %
    (proj_dir, jobid))
for hname in hdict.keys():
    if hname == 'cutflow': continue
    hdict[hname].scale(lumi_correction[args.year]['%ss' % args.lepton],
                       axis='dataset')

## make groups based on process
process = hist.Cat("process", "Process", sorting='placement')
process_cat = "dataset"
process_groups = plt_tools.make_dataset_groups(args.lepton, args.year)
#set_trace()
for hname in hdict.keys():
    if hname == 'cutflow': continue
from coffea.util import load, save
from pdb import set_trace
import os
from fnmatch import fnmatch
import Utilities.prettyjson as prettyjson

proj_dir = os.environ["PROJECT_DIR"]
jobid = os.environ["jobid"]
base_jobid = os.environ["base_jobid"]

outdir = os.path.join(proj_dir, "Corrections", base_jobid)
if not os.path.isdir(outdir):
    os.makedirs(outdir)

data_lumi = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "%s_lumis_data.json" % base_jobid)).read()) # file with integrated luminosity for all three years
signal_xsecs = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "signal_xsecs.json")).read()) # file with signal cross sections

years_to_run = ["2016", "2017", "2018"] if base_jobid == "NanoAODv6" else ["2016APV", "2016", "2017", "2018"]
lumi_weights = {year:{"Electrons" : {}, "Muons" : {}} for year in years_to_run}

# for each year, read sumGenWeights from all meta.json files
for year in years_to_run:
    print(year)
    xsec_file = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "samples_%s_%s.json" % (year, base_jobid))).read()) # file with cross sections
    datasets = list(filter(lambda x: fnmatch(x["name"], "*"), xsec_file))
    for dataset in datasets:
        sample = dataset["name"]
        if sample.startswith("data_Single"): continue
        if dataset["DBSName"] == "NOT PRESENT":
            print(f"Dataset {sample} not present, will be skipped")
            continue
        error_opts={"color": "k", "marker" : None},
    )

    # update max/min values
    logy_min, logy_max = np.min(nnlo_histo.values()[()]), np.max(nnlo_histo.values()[()]) 
    normed_logy_min, normed_logy_max = np.min(nnlo_normed_histo.values()[()]), np.max(nnlo_normed_histo.values()[()]) 

    years_to_run = ["2016", "2017", "2018"] if base_jobid == "NanoAODv6" else ["2016APV", "2016", "2017", "2018"]    
    for year in years_to_run:
        input_dir = os.path.join(proj_dir, "results", "%s_%s" % (year, base_jobid), analyzer)
        fnames = sorted(["%s/%s" % (input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(f_ext)])
        hdict = plt_tools.add_coffea_files(fnames) if len(fnames) > 1 else load(fnames[0])
    
            ## get NLO values from hdict
        ttSL = "ttJets_PS" if ((year == "2016") and (base_jobid == "NanoAODv6")) else "ttJetsSL"
        xsec_file = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "samples_%s_%s.json" % (year, base_jobid))).read())
        tt_dataset = list(filter(lambda x: fnmatch(x["name"], ttSL), xsec_file))[0]
        xsec = tt_dataset["xsection"]
        meta_json = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "%s_%s" % (year, base_jobid), "%s.meta.json" % ttSL)).read())
        sumGenWeights = meta_json["sumGenWeights"]
        
            # orig
        tune_histo = hdict[tune_var].integrate("dataset")
        tune_histo.scale(xsec/sumGenWeights)
        if linearize:
            #set_trace()
            tune_histo = tune_histo.rebin(tune_histo.dense_axes()[0].name, hist.Bin(tune_histo.dense_axes()[0].name, tune_histo.dense_axes()[0].name, mtt_binning))
            tune_histo = tune_histo.rebin(tune_histo.dense_axes()[1].name, hist.Bin(tune_histo.dense_axes()[1].name, tune_histo.dense_axes()[1].name, ctstar_binning))

            # save integral to make normalized hist
        tune_integral = tune_histo.values(overflow="all")[()].sum()
parser.add_argument(
    'outfname',
    type=str,
    help='Specify output filename, including directory and file extension')
parser.add_argument(
    '--debug',
    action='store_true',
    help=
    'Uses iterative_executor for debugging purposes, otherwise futures_excutor will be used (faster)'
)

args = parser.parse_args()

# convert input string of fileset dictionary to actual dictionary
fdict = (args.fset).replace("\'", "\"")
fileset = prettyjson.loads(fdict)

init_btag = ~(np.array([key.startswith('data')
                        for key in fileset.keys()]).all())

## init tt probs for likelihoods
ttpermutator.year_to_run(year=args.year)

## load lumimask for data and corrections for event weights
pu_correction = load('%s/Corrections/%s/MC_PU_Weights.coffea' %
                     (proj_dir, jobid))
lepSF_correction = load('%s/Corrections/leptonSFs.coffea' % proj_dir)
jet_corrections = load('%s/Corrections/JetCorrections.coffea' %
                       proj_dir)[args.year]
corrections = {
    'Pileup': pu_correction,
Exemple #11
0
    hep.cms.label(ax=ax,
                  fontsize=rcParams['font.size'],
                  data=False,
                  paper=False,
                  year=year,
                  lumi=round(lumi_to_use, 1))

    figname = os.path.join(
        plotdir, '%s_Efficiency' % '_'.join([btagger, wp, jmult, flav]))
    fig.savefig(figname)
    print('%s written' % figname)
    plt.close()


data_lumi_dict = prettyjson.loads(
    open(os.path.join(proj_dir, 'inputs',
                      '%s_lumis_data.json' % base_jobid)).read())
combine_2016 = ('2016' in years_to_run) and ('2016APV' in years_to_run) and (
    base_jobid == 'ULnanoAOD')
computed_combined_2016 = False

# ZJets Summer20UL samples have too many negative contributions
import re

non_ZJets_samples = re.compile('(?!ZJets*)')

for year in years_to_run:
    print(year)

    f_ext = 'TOT.coffea'
    if combine_2016 and ('2016' in year):
Exemple #12
0
def get_bkg_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)
    
    nbins = (len(xrebinning)-1)*(len(yrebinning)-1)
    
        ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other']
    names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists
    ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...

        # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
    if bkg_ttJets_fname is not None:
        ttJets_hdict = load(bkg_ttJets_fname)
        ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
        
            ## rebin x axis
        ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins)
            ## rebin y axis
        ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins)
        
        only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists
        only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...


        ## make groups based on process
    process = hist.Cat("process", "Process", sorting='placement')
    process_cat = "dataset"

        # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname)

    if '3Jets' in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})
    if '4PJets' in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})

    for lep in ['Muon', 'Electron']:
        lepdir = 'mujets' if lep == 'Muon' else 'ejets'
    
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates')
        
        lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
                # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})
    
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis='dataset')
        histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype')

            # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
        if bkg_ttJets_fname is not None:
            if len(only_ttJets_cats) > 0:
                for tt_cat in only_ttJets_cats:
                    ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                    ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                    lumi_correction.update({tt_cat: ttJets_eff_lumi})

            tt_histo = ttJets_histo.copy()
            tt_histo.scale(lumi_correction, axis='dataset')
            tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype')


        for jmult in njets_to_run:
            iso_sb    = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            btag_sb   = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
        
            for sys in sys_to_use.keys():
                if sys not in histo.axis('sys')._sorted:
                    print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                    continue

                #set_trace()
                sysname, onlyTT = sys_to_use[sys]
                if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
        
                qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys)

                    ## write nominal and systematic variations for each topology to file
                for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])):
                    if (proc != 'TT') and onlyTT: continue
                    if (proc == 'data_obs') and not (sys == 'nosys'): continue
                    name = proc+lepdir if proc == 'QCD' else proc
                    print(lep, jmult, sys, name)
                    outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname])
                    template_histo = qcd_est_histo[proc].integrate('process')
                    if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None):
                        tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
                        tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys')
                        template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys'))

                    if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]):
                        template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo)
                        #set_trace()

                    if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]):
                        template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False)
                        #set_trace()

                        ## save template histos to coffea dict
                    if jmult == '3Jets':
                        histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo
                    if jmult == '4PJets':
                        histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if '3Jets' in njets_to_run:
        coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_3j, coffea_out_3j)
        print("%s written" % coffea_out_3j)
    if '4PJets' in njets_to_run:
        coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_4pj, coffea_out_4pj)
        print("%s written" % coffea_out_4pj)

    
    upfout.close()
    print('%s written' % tmp_rname)
Exemple #13
0
def get_sig_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    from rootpy.plotting import Hist2D

    widthTOname = lambda width : str(width).replace('.', 'p')
    nameTOwidth = lambda width : str(width).replace('p', '.')

    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = mtt_ctstar_2d_binning
    #xrebinning, yrebinning = 2, 1
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat

    #set_trace()    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)

        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    #set_trace()
    histo = histo.rebin(yaxis_name, new_ybins)
    rebin_histo = histo[:, :, :, :, 'btagPass', 'Tight'].integrate('lepcat').integrate('btag')

    signals = sorted(set([key[0] for key in rebin_histo.values().keys()]))    

        # create 2D signal hists and write to temp file        
    with root_open(tmp_rname, 'w') as out:
        #for lep in ['Muon']:
        for lep in ['Muon', 'Electron']:
            lepdir = 'mujets' if lep == 'Muon' else 'ejets'

                # scale by lumi
            lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
            scaled_histo = rebin_histo.copy()
            scaled_histo.scale(lumi_correction, axis='dataset')
    
            for jmult in njets_to_run:
                histo = scaled_histo[:, :, jmult, lep].integrate('jmult').integrate('leptype')
    
                for signal in signals:
                    _, mass, width, pI, wt = tuple(signal.split('_'))
                    samtype = 'int' if pI == 'Int' else 'sgn'
                    bostype = 'ggA' if _ == 'AtoTT' else 'ggH'
    
                    sub_name = '%s_%s-%s-%s-%s' % (bostype, wt, samtype, widthTOname(width).split('W')[-1]+'pc', mass) if pI == 'Int' else '%s_pos-%s-%s-%s' % (bostype, samtype, widthTOname(width).split('W')[-1]+'pc', mass)
    
                    #set_trace()
                    for sys in sys_to_use.keys():
                        sysname, onlyTT = sys_to_use[sys]
                        if onlyTT: continue
                        if sys not in histo.axis('sys')._sorted:
                            print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                            continue
                        #set_trace()
                        if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
    
                        template_histo = histo[signal, sys].integrate('dataset').integrate('sys')
                        if wt == 'neg':
                            template_histo.scale(-1.)
                        #if (pI == 'Int') and (wt == 'pos'): continue
                        print(lep, jmult, sub_name, sys)
                        sumw, sumw2 = template_histo.values(sumw2=True, overflow='all')[()] # get vals and errors for all bins (including under/overflow)
                        #if args.smooth:
                        #    set_trace()

                            ## create rootpy hist and rename
                        rtpy_h2d = Hist2D(template_histo.dense_axes()[0].edges(), template_histo.dense_axes()[1].edges())
                        outhname = '_'.join([jmult, lepdir, sub_name]) if sys == 'nosys' else '_'.join([jmult, lepdir, sub_name, sysname])
                        rtpy_h2d.name = outhname
                            # set bin content for rootpy hist
                        for binx in range(0, rtpy_h2d.GetNbinsX()+2):
                            for biny in range(0, rtpy_h2d.GetNbinsY()+2):
                                rtpy_h2d[binx, biny] = sumw[binx, biny], sumw2[binx, biny]
                        #set_trace()
                        rtpy_h2d.Write()
        
    print('%s written' % tmp_rname)
Exemple #14
0
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument('--type', default='data', nargs='?', choices=['SM', 'signal', 'data', 'all'], help='specify which sample type to print')
args = parser.parse_args()

proj_dir = os.environ['PROJECT_DIR']
jobid = os.environ['jobid']

    # only print signal MC info
if (args.type == 'signal') or (args.type == 'all'):
    import itertools
    from string import Template
    name2val = lambda x: float(x.replace('pc','').replace('p', '.'))

    samples_list = prettyjson.loads(open('%s/inputs/samples_2016.json' % proj_dir).read())
    
    samples_dict = {}
    for sample in samples_list:
        name = sample.pop('name')
        samples_dict[name] = sample

        ## make table for A
    A_output = "\multirow{2}{*}{Parity} & \multirow{2}{*}{$\mathsf{m_{A}}$ [GeV]} & \multirow{2}{*}{$\Gamma_{\mathsf{A}}$ [\% $\mathsf{m_{A}}$]} & \multicolumn{2}{c |}{LO $\sigma$ [pb]} & \multirow{2}{*}{$\mathsf{k_{R}}$} \\\ \n"
    A_output += " & & & Resonance & Interference & \\\ \n\hline \n"
    H_output = "\multirow{2}{*}{Parity} & \multirow{2}{*}{$\mathsf{m_{H}}$ [GeV]} & \multirow{2}{*}{$\Gamma_{\mathsf{H}}$ [\% $\mathsf{m_{H}}$]} & \multicolumn{2}{c |}{LO $\sigma$ [pb]} & \multirow{2}{*}{$\mathsf{k_{R}}$} \\\ \n"
    H_output += " & & & Resonance & Interference & \\\ \n\hline \n"
    for sig_point in itertools.product(['M400','M500', 'M600', 'M750'], ['W2p5', 'W5', 'W10', 'W25']):
        signal = '_'.join(sig_point)

        mass = sig_point[0][1:]
Exemple #15
0
    }
    btag_values["2018"] = {
        "btagDeepB": {
            "DeepCSVLoose": 0.1208,
            "DeepCSVMedium": 0.4168,
            "DeepCSVTight": 0.7665,
        },
        "btagDeepFlavB": {
            "DeepJetLoose": 0.0490,
            "DeepJetMedium": 0.2783,
            "DeepJetTight": 0.7100,
        }
    }

jet_pars = prettyjson.loads(
    open(
        os.path.join(os.environ["PROJECT_DIR"], "cfg_files",
                     "cfg_pars_%s.json" % os.environ["jobid"])).read())["Jets"]

valid_taggers = ["DeepCSV", "DeepJet"]
valid_WPs = ["Loose", "Medium", "Tight"]

if jet_pars["btagger"] not in valid_taggers:
    raise IOError("%s is not a supported b-tagger" % jet_pars["btagger"])
if jet_pars["permutations"]["tightb"] not in valid_WPs:
    raise IOError("%s is not a valid working point" %
                  jet_pars["permutations"]["tightb"])
if jet_pars["permutations"]["looseb"] not in valid_WPs:
    raise IOError("%s is not a valid working point" %
                  jet_pars["permutations"]["looseb"])

Exemple #16
0
    "Had": "$\\rightarrow$ jj",
}

isSignal = lambda x : (x.startswith("AtoTT") or x.startswith("HtoTT"))

variables = {
    "pt" : ("$p_{T}$($obj$) [GeV]", 2, (0., 500.)),
    "eta": ("$\\eta$($obj$)", 2, (-2.6, 2.6)),
    "phi": ("$\\phi$($obj$)", 2, (-4, 4)),
    "mass": ("$m_{obj}$ [GeV]", 1, (0., 300.)),
    "energy": ("$E_{obj}$ [GeV]", 2, (0., 1000.)),
}


    ## get data lumi and scale MC by lumi
data_lumi_year = prettyjson.loads(open(os.path.join(proj_dir, "inputs", "%s_lumis_data.json" % base_jobid)).read())[args.year]
lumi_to_use = (data_lumi_year["Muons"]+data_lumi_year["Electrons"])/2000.
lumi_correction = load(os.path.join(proj_dir, "Corrections", base_jobid, "MC_LumiWeights.coffea"))[args.year]

        # scale events by lumi correction
for hname in hdict.keys():
    if hname == "cutflow": continue
    hdict[hname].scale(lumi_correction["Muons"], axis="dataset")
    #hdict[hname] = hdict[hname].integrate("dataset")


#set_trace()
    ## make bp plots
for hname in variables.keys():
    if hname not in hdict.keys():
        raise ValueError(f"{hname} not found in file")
Exemple #17
0
                    default="")

args = parser.parse_args()

jobid = os.environ["jobid"]
proj_dir = os.environ["PROJECT_DIR"]

if not os.path.isfile(args.json):
    raise ValueError(f"file {args.json} does not exist")

outdir = os.path.join(proj_dir, "inputs", "_".join(
    os.path.basename(args.json).split(".")[0].split("_")
    [1:]))  # get name of json file except for "samples_"
if not os.path.isdir(outdir): os.makedirs(outdir)

all_samples = prettyjson.loads(open(args.json).read())
samples_to_run = list(
    filter(lambda x: fnmatch(x["name"], args.sample
                             if args.sample else "*"), all_samples))
if not len(samples_to_run):
    raise RuntimeError("Could not find any sample matching the pattern")

analyzer_inputs = []
for sample in samples_to_run:
    #set_trace()

    if "DBSName" in sample:
        if sample["DBSName"] == "NOT PRESENT": continue
        if "Ext" in sample["name"]:
            print("Must combine %s with non-extenstion dataset!" %
                  sample["name"])
         (0., 500.), True, True),
    })
if '4+' in njets_to_run:
    variables.update({
        'nusolver_chi2': ('$\\chi_{\\nu}^{2}$', 5, (0., 1000.), True, False),
        'nusolver_dist': ('$D_{\\nu, min}$ [GeV]', 1, (0., 150.), True, False),
        'mWHad_vs_mTHad': ('$m_{t_{h}}$ [GeV]', '$m_{W_{h}}$ [GeV]', 10,
                           (0., 500.), 10, (0., 500.), True, False),
    })

    ## get plotting colors/settings
hstyles = styles.styles

## get data lumi and scale MC by lumi
data_lumi_dict = prettyjson.loads(
    open(os.path.join(proj_dir, 'inputs',
                      '%s_lumis_data.json' % base_jobid)).read())
lumi_correction = load(
    os.path.join(proj_dir, 'Corrections', jobid, 'MC_LumiWeights.coffea'))

## make groups based on perm category
pcat = hist.Cat("permcat", "Perm Category", sorting='placement')
pcat_cat = "permcat"

computed_combined_2016 = False
for year in years_to_run:
    f_ext = 'TOT.coffea'
    if combine_2016 and ('2016' in year):
        if computed_combined_2016:
            computed_combined_2016_year_to_copy = year
            continue