Esempio n. 1
0
def merge_files(jobdir, samples):
    #set_trace()
    for sample in samples:
        merged_fname = os.path.join(jobdir, sample, '%s_TOT.coffea' % sample)
        output_files = [
            '%s/%s/%s' % (jobdir, sample, fname)
            for fname in os.listdir('%s/%s' % (jobdir, sample))
            if fname.endswith('.coffea')
        ]
        # don't re-merge files if this has already been done
        if merged_fname in output_files:
            tot_files[sample] = merged_fname
            print('%s already exists' % merged_fname)
            continue
        else:
            if len(output_files) > 1:
                ## merge files
                output_acc = plot_tools.add_coffea_files(output_files)
                plot_tools.save_accumulator(output_acc, merged_fname)
            else:
                ## rename file
                os.system('mv %s %s' % (output_files[0], merged_fname))
                print('%s written' % merged_fname)
            tot_files[sample] = merged_fname
Esempio n. 2
0
analyzer = "genpartons"

from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("year", choices=["2016", "2017", "2018"] if base_jobid == "NanoAODv6" else ["2016APV", "2016", "2017", "2018"], help="What year is the ntuple from.")
args = parser.parse_args()

input_dir = os.path.join(proj_dir, "results", "%s_%s" % (args.year, base_jobid), analyzer)
f_ext = "TOT.coffea"
outdir = os.path.join(proj_dir, "plots", "%s_%s" % (args.year, base_jobid), analyzer)
if not os.path.isdir(outdir):
    os.makedirs(outdir)

#set_trace()
fnames = sorted(["%s/%s" % (input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(f_ext)])
hdict = plt_tools.add_coffea_files(fnames) if len(fnames) > 1 else load(fnames[0])

objects = {
    "SL" : {
        "TTbar" : ("t\\bar{t}", (200., 2000.)),
        "THad" : ("t_{h}", (150., 200.)),
        "TLep" : ("t_{l}", (150., 200.)),
        "BHad" : ("b_{h}", (0., 5.)),
        "BLep" : ("b_{l}", (0., 5.)),
        "WHad" : ("W_{h}", (50., 100.)),
        "WLep" : ("W_{l}", (50., 100.)),
        "Lepton" : ("l", (0., 5.)),
    },
    "DL" : {
        "TTbar" : ("t\\bar{t}", (200., 2000.)),
        "Top" : ("t", (150., 200.)),
Esempio n. 3
0
#!/usr/bin/env python

import Utilities.plot_tools as plt_tools
from pdb import set_trace

from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument('output_fname', type=str, help='Name of output file with file extension.')
parser.add_argument('input_files', type=str, help="Input files separated by ':'")

args = parser.parse_args()

input_files = args.input_files.split(':')

output_acc = plt_tools.add_coffea_files(input_files)
plt_tools.save_accumulator(output_acc, args.output_fname)

                                      analyzer)
        input_dir_2016APV = os.path.join(proj_dir, 'results',
                                         '2016APV_%s' % jobid, analyzer)
        outdir = os.path.join(proj_dir, 'plots', '2016Combined_%s' % jobid,
                              analyzer)

        fnames_2016 = sorted([
            os.path.join(input_dir_2016, fname)
            for fname in os.listdir(input_dir_2016) if fname.endswith(f_ext)
        ])
        fnames_2016APV = sorted([
            os.path.join(input_dir_2016APV, fname)
            for fname in os.listdir(input_dir_2016APV) if fname.endswith(f_ext)
        ])

        hdict_2016 = plt_tools.add_coffea_files(
            fnames_2016) if len(fnames_2016) > 1 else load(fnames_2016[0])
        hdict_2016APV = plt_tools.add_coffea_files(
            fnames_2016APV) if len(fnames_2016APV) > 1 else load(
                fnames_2016APV[0])

        lumi_to_use_2016 = (data_lumi_dict['2016']['Muons'] +
                            data_lumi_dict['2016']['Electrons']) / 2000.
        lumi_to_use_2016APV = (data_lumi_dict['2016APV']['Muons'] +
                               data_lumi_dict['2016APV']['Electrons']) / 2000.
        lumi_to_use = lumi_to_use_2016 + lumi_to_use_2016APV

        computed_combined_2016 = True
        computed_combined_2016_year_key = year

    else:
        input_dir = os.path.join(proj_dir, 'results', '%s_%s' % (year, jobid),
Esempio n. 5
0
    hem_odir = 'Scaled'
    fname_str = 'SCALED'
if args.hem == 'removed':
    hem_odir = 'Removed'
    fname_str = 'REMOVE'
    

input_dir = os.path.join(proj_dir, 'results', '%s_%s' % (args.year, jobid), analyzer)
f_ext = 'TOT.coffea'
outdir = os.path.join(proj_dir, 'plots', '%s_%s' % (args.year, jobid), analyzer, hem_odir)
if not os.path.isdir(outdir):
    os.makedirs(outdir)

fnames = sorted(['%s/%s' % (input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(f_ext)])
fname_to_use = [fname for fname in fnames if fname_str in fname]
hdict = plt_tools.add_coffea_files(fname_to_use) if len(fname_to_use) > 1 else load(fname_to_use[0])

jet_mults = {
    '3Jets' : '3 jets',
    '4PJets' : '4+ jets'
}

objtypes = {
    'Jets' : 'jets',
    'Lep' :  {
        'Muon' : '$\\mu$',
        'Electron' : '$e$',
    }
}

btag_cats = {
Esempio n. 6
0
def get_bkg_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)
    
    nbins = (len(xrebinning)-1)*(len(yrebinning)-1)
    
        ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other']
    names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists
    ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...

        # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
    if bkg_ttJets_fname is not None:
        ttJets_hdict = load(bkg_ttJets_fname)
        ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
        
            ## rebin x axis
        ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins)
            ## rebin y axis
        ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins)
        
        only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists
        only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...


        ## make groups based on process
    process = hist.Cat("process", "Process", sorting='placement')
    process_cat = "dataset"

        # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname)

    if '3Jets' in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})
    if '4PJets' in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})

    for lep in ['Muon', 'Electron']:
        lepdir = 'mujets' if lep == 'Muon' else 'ejets'
    
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates')
        
        lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
                # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})
    
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis='dataset')
        histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype')

            # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
        if bkg_ttJets_fname is not None:
            if len(only_ttJets_cats) > 0:
                for tt_cat in only_ttJets_cats:
                    ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                    ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                    lumi_correction.update({tt_cat: ttJets_eff_lumi})

            tt_histo = ttJets_histo.copy()
            tt_histo.scale(lumi_correction, axis='dataset')
            tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype')


        for jmult in njets_to_run:
            iso_sb    = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            btag_sb   = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
        
            for sys in sys_to_use.keys():
                if sys not in histo.axis('sys')._sorted:
                    print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                    continue

                #set_trace()
                sysname, onlyTT = sys_to_use[sys]
                if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
        
                qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys)

                    ## write nominal and systematic variations for each topology to file
                for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])):
                    if (proc != 'TT') and onlyTT: continue
                    if (proc == 'data_obs') and not (sys == 'nosys'): continue
                    name = proc+lepdir if proc == 'QCD' else proc
                    print(lep, jmult, sys, name)
                    outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname])
                    template_histo = qcd_est_histo[proc].integrate('process')
                    if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None):
                        tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
                        tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys')
                        template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys'))

                    if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]):
                        template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo)
                        #set_trace()

                    if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]):
                        template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False)
                        #set_trace()

                        ## save template histos to coffea dict
                    if jmult == '3Jets':
                        histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo
                    if jmult == '4PJets':
                        histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if '3Jets' in njets_to_run:
        coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_3j, coffea_out_3j)
        print("%s written" % coffea_out_3j)
    if '4PJets' in njets_to_run:
        coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_4pj, coffea_out_4pj)
        print("%s written" % coffea_out_4pj)

    
    upfout.close()
    print('%s written' % tmp_rname)
Esempio n. 7
0
def get_sig_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    from rootpy.plotting import Hist2D

    widthTOname = lambda width : str(width).replace('.', 'p')
    nameTOwidth = lambda width : str(width).replace('p', '.')

    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = mtt_ctstar_2d_binning
    #xrebinning, yrebinning = 2, 1
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat

    #set_trace()    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)

        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    #set_trace()
    histo = histo.rebin(yaxis_name, new_ybins)
    rebin_histo = histo[:, :, :, :, 'btagPass', 'Tight'].integrate('lepcat').integrate('btag')

    signals = sorted(set([key[0] for key in rebin_histo.values().keys()]))    

        # create 2D signal hists and write to temp file        
    with root_open(tmp_rname, 'w') as out:
        #for lep in ['Muon']:
        for lep in ['Muon', 'Electron']:
            lepdir = 'mujets' if lep == 'Muon' else 'ejets'

                # scale by lumi
            lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
            scaled_histo = rebin_histo.copy()
            scaled_histo.scale(lumi_correction, axis='dataset')
    
            for jmult in njets_to_run:
                histo = scaled_histo[:, :, jmult, lep].integrate('jmult').integrate('leptype')
    
                for signal in signals:
                    _, mass, width, pI, wt = tuple(signal.split('_'))
                    samtype = 'int' if pI == 'Int' else 'sgn'
                    bostype = 'ggA' if _ == 'AtoTT' else 'ggH'
    
                    sub_name = '%s_%s-%s-%s-%s' % (bostype, wt, samtype, widthTOname(width).split('W')[-1]+'pc', mass) if pI == 'Int' else '%s_pos-%s-%s-%s' % (bostype, samtype, widthTOname(width).split('W')[-1]+'pc', mass)
    
                    #set_trace()
                    for sys in sys_to_use.keys():
                        sysname, onlyTT = sys_to_use[sys]
                        if onlyTT: continue
                        if sys not in histo.axis('sys')._sorted:
                            print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                            continue
                        #set_trace()
                        if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
    
                        template_histo = histo[signal, sys].integrate('dataset').integrate('sys')
                        if wt == 'neg':
                            template_histo.scale(-1.)
                        #if (pI == 'Int') and (wt == 'pos'): continue
                        print(lep, jmult, sub_name, sys)
                        sumw, sumw2 = template_histo.values(sumw2=True, overflow='all')[()] # get vals and errors for all bins (including under/overflow)
                        #if args.smooth:
                        #    set_trace()

                            ## create rootpy hist and rename
                        rtpy_h2d = Hist2D(template_histo.dense_axes()[0].edges(), template_histo.dense_axes()[1].edges())
                        outhname = '_'.join([jmult, lepdir, sub_name]) if sys == 'nosys' else '_'.join([jmult, lepdir, sub_name, sysname])
                        rtpy_h2d.name = outhname
                            # set bin content for rootpy hist
                        for binx in range(0, rtpy_h2d.GetNbinsX()+2):
                            for biny in range(0, rtpy_h2d.GetNbinsY()+2):
                                rtpy_h2d[binx, biny] = sumw[binx, biny], sumw2[binx, biny]
                        #set_trace()
                        rtpy_h2d.Write()
        
    print('%s written' % tmp_rname)
def get_bkg_templates(tmp_rname):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(
        bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

    # get correct hist and rebin
    hname_to_use = "mtt_vs_tlep_ctstar_abs"
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use][
        Plotter.
        nonsignal_samples]  # process, sys, jmult, leptype, btag, lepcat

    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
    ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
    ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)

    ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = [
        "*right", "*matchable", "*unmatchable", "*sl_tau", "*other"
    ]
    names = [
        dataset
        for dataset in sorted(set([key[0] for key in histo.values().keys()]))
    ]  # get dataset names in hists
    ttJets_cats = [
        name for name in names
        if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])
    ]  # gets ttJets(_PS)_other, ...

    ## make groups based on process
    process = hist.Cat("process", "Process", sorting="placement")
    process_cat = "dataset"

    # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB(
        4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname)

    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

    for lep in ["Muon", "Electron"]:
        orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS"

        #set_trace()
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep,
                                                       args.year,
                                                       samples=names,
                                                       gdict="templates")
        #process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="dataset")

        lumi_correction = lumi_corr_dict[args.year]["%ss" % lep]
        # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = "_".join(tt_cat.split(
                    "_")[:-2]) if "sl_tau" in tt_cat else "_".join(
                        tt_cat.split("_")
                        [:-1])  # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})

        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis="dataset")
        histo = histo.group(process_cat, process,
                            process_groups)[:, :, :,
                                            lep, :, :].integrate("leptype")

        #set_trace()
        systs = sorted(set([key[1] for key in histo.values().keys()]))
        systs.insert(0, systs.pop(
            systs.index("nosys")))  # move "nosys" to the front

        # loop over each jet multiplicity
        for jmult in njets_to_run:
            lepdir = orig_lepdir.replace("NJETS", jmult.lower())

            # get sideband and signal region hists
            cen_sb_histo = Plotter.linearize_hist(
                histo[:, "nosys", jmult,
                      btag_reg_names_dict["Central"]["reg"]].integrate(
                          "jmult").integrate("btag").integrate("sys"))
            #up_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Up"]["reg"]].integrate("jmult").integrate("btag")
            #dw_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Down"]["reg"]].integrate("jmult").integrate("btag")
            sig_histo = Plotter.linearize_hist(
                histo[:, :, jmult,
                      btag_reg_names_dict["Signal"]["reg"]].integrate(
                          "jmult").integrate("btag"))

            # loop over each systematic
            for sys in systs:
                if sys not in systematics.template_sys_to_name[
                        args.year].keys():
                    continue

                sys_histo = sig_histo[:, sys].integrate(
                    "sys") if sys in systematics.ttJets_sys.values(
                    ) else Plotter.BKG_Est(
                        sig_reg=sig_histo[:, sys].integrate("sys"),
                        sb_reg=cen_sb_histo,
                        norm_type="SigMC",
                        sys=sys,
                        ignore_uncs=True)

                ## write nominal and systematic variations for each topology to file
                #for proc in sorted(set([key[0] for key in sig_histo.values().keys()])):
                for proc in sorted(
                        set([key[0] for key in sys_histo.values().keys()])):
                    if ("tt" not in proc) and (
                            sys in systematics.ttJets_sys.values()):
                        continue
                    #if (proc != "tt") and (sys in systematics.ttJets_sys.values()): continue
                    if (proc == "data_obs") and not (sys == "nosys"): continue
                    if not sys_histo[proc].values().keys():
                        #if not sig_histo[proc, sys].values().keys():
                        print(
                            f"Systematic {sys} for {lep} {jmult} {proc} not found, skipping"
                        )
                        continue

                    print(args.year, lep, jmult, sys, proc)
                    #set_trace()
                    outhname = "_".join(
                        list(
                            filter(None, [
                                proc, systematics.template_sys_to_name[
                                    args.year][sys][0], lepdir,
                                (args.year)[-2:]
                            ])))
                    if "LEP" in outhname:
                        outhname = outhname.replace(
                            "LEP",
                            "muon") if lep == "Muon" else outhname.replace(
                                "LEP", "electron")

                    template_histo = sys_histo[proc].integrate("process")
                    #template_histo = sig_histo[proc, sys].integrate("process").integrate("sys")

                    #set_trace()
                    ## save template histos to coffea dict
                    if jmult == "3Jets":
                        histo_dict_3j[lep][
                            f"{proc}_{sys}"] = template_histo.copy()
                    if jmult == "4PJets":
                        histo_dict_4pj[lep][
                            f"{proc}_{sys}"] = template_histo.copy()

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            outdir,
            f"test_raw_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            outdir,
            f"test_raw_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")

    upfout.close()
    print(f"{tmp_rname} written")
def get_sig_templates(tmp_rname):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    widthTOname = lambda width: str(width).replace(".", "p")
    nameTOwidth = lambda width: str(width).replace("p", ".")

    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(
        sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0])

    # get correct hist and rebin
    hname_to_use = "mtt_vs_tlep_ctstar_abs"
    if hname_to_use not in hdict.keys():
        raise ValueError(f"{hname_to_use} not found in file")
    xrebinning, yrebinning = linearize_binning
    #xrebinning, yrebinning = mtt_ctstar_2d_binning
    histo = hdict[hname_to_use]  # process, sys, jmult, leptype, btag, lepcat

    #set_trace()
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
    ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)

    ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    histo = histo.rebin(yaxis_name, new_ybins)
    rebin_histo = histo[Plotter.signal_samples, :, :, :,
                        "btagPass"].integrate("btag")

    names = [
        dataset for dataset in sorted(
            set([key[0] for key in rebin_histo.values().keys()]))
    ]  # get dataset names in hists

    signals = sorted(set([key[0] for key in rebin_histo.values().keys()]))
    signals = [sig for sig in signals
               if "TTJetsSL" in sig]  # only use SL decays

    systs = sorted(set([key[1] for key in rebin_histo.values().keys()]))
    systs.insert(0,
                 systs.pop(systs.index("nosys")))  # move "nosys" to the front

    # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB(
        4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname)

    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

        # write signal dists to temp file
    for lep in ["Muon", "Electron"]:
        orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS"

        # scale by lumi
        lumi_correction = lumi_corr_dict[args.year]["%ss" % lep]
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis="dataset")
        process_groups = plt_tools.make_dataset_groups(lep,
                                                       args.year,
                                                       samples=names,
                                                       gdict="templates")
        histo = histo.group(
            "dataset", hist.Cat("process", "Process", sorting="placement"),
            process_groups)

        for jmult in njets_to_run:
            lepdir = orig_lepdir.replace("NJETS", jmult.lower())

            #set_trace()
            lin_histo = Plotter.linearize_hist(
                histo[:, :, jmult,
                      lep].integrate("jmult").integrate("leptype"))
            for signal in signals:
                if "Int" in signal:
                    boson, mass, width, pI, wt = tuple(signal.split("_"))
                else:
                    boson, mass, width, pI = tuple(signal.split("_"))
                sub_name = "_".join([
                    "%s%s" % (boson[0], mass[1:]),
                    "relw%s" % widthTOname(width).split("W")[-1],
                    pI.lower(), wt
                ]) if pI == "Int" else "_".join([
                    "%s%s" % (boson[0], mass[1:]),
                    "relw%s" % widthTOname(width).split("W")[-1],
                    pI.lower()
                ])

                #set_trace()
                for sys in systs:
                    if sys not in systematics.template_sys_to_name[
                            args.year].keys():
                        continue
                    if not lin_histo[signal, sys].values().keys():
                        print(
                            f"Systematic {sys} for {lep} {jmult} {signal} not found, skipping"
                        )
                        continue

                    print(args.year, lep, jmult, sub_name, sys)
                    outhname = "_".join(
                        list(
                            filter(None, [
                                sub_name, systematics.template_sys_to_name[
                                    args.year][sys][0], lepdir,
                                (args.year)[-2:]
                            ])))
                    if "LEP" in outhname:
                        outhname = outhname.replace(
                            "LEP",
                            "muon") if lep == "Muon" else outhname.replace(
                                "LEP", "electron")

                    template_histo = lin_histo[signal, sys].integrate(
                        "process").integrate("sys")

                    ## save template histos to coffea dict
                    if jmult == "3Jets":
                        histo_dict_3j[lep][
                            f"{signal}_{sys}"] = template_histo.copy()
                    if jmult == "4PJets":
                        histo_dict_4pj[lep][
                            f"{signal}_{sys}"] = template_histo.copy()

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            outdir,
            f"test_raw_templates_lj_3Jets_sig_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            outdir,
            f"test_raw_templates_lj_4PJets_sig_{args.year}_{jobid}.coffea")
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")

    upfout.close()
    print(f"{tmp_rname} written")
Esempio n. 10
0
        failed_samples = []
        finished_samples = [
        ]  # reinitialize finished_samples to check if they're actually finished
        for sample in samples:
            isCorrect, nfails, alreadyComplete = check_correctness(
                '%s/%s' % (jobdir, sample), dump_rescue=True)
            if isCorrect == False:
                failed_samples.append(sample)
                os.system(
                    'cd %s/%s && condor_submit condor.rescue.jdl && cd %s' %
                    (jobdir, sample, orig_dir))
            else:
                finished_samples.append(sample)
        if finished_samples:
            merge_files(jobdir, finished_samples)
        if len(failed_samples) == 0:
            print('All jobs completed')
            escape = True
        else:
            time.sleep(30)
    else:
        print("%i jobs are still running, checking again in 30 seconds\n" %
              njobs)
        time.sleep(30)

## merge all TOT files from each sample into one
if list(set(tot_files.keys())) == list(set(samples)):
    tot_acc = plot_tools.add_coffea_files(list(tot_files.values()))
    tot_outname = '%s_TOT.coffea' % args.jobdir.strip('/')
    plot_tools.save_accumulator(tot_acc, '%s/%s' % (jobdir, tot_outname))