def get_bkg_templates(tmp_rname):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(
        bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

    # get correct hist and rebin
    hname_to_use = "mtt_vs_tlep_ctstar_abs"
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use][
        Plotter.
        nonsignal_samples]  # process, sys, jmult, leptype, btag, lepcat

    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
    ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
    ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)

    ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = [
        "*right", "*matchable", "*unmatchable", "*sl_tau", "*other"
    ]
    names = [
        dataset
        for dataset in sorted(set([key[0] for key in histo.values().keys()]))
    ]  # get dataset names in hists
    ttJets_cats = [
        name for name in names
        if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])
    ]  # gets ttJets(_PS)_other, ...

    ## make groups based on process
    process = hist.Cat("process", "Process", sorting="placement")
    process_cat = "dataset"

    # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB(
        4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname)

    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

    for lep in ["Muon", "Electron"]:
        orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS"

        #set_trace()
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep,
                                                       args.year,
                                                       samples=names,
                                                       gdict="templates")
        #process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict="dataset")

        lumi_correction = lumi_corr_dict[args.year]["%ss" % lep]
        # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = "_".join(tt_cat.split(
                    "_")[:-2]) if "sl_tau" in tt_cat else "_".join(
                        tt_cat.split("_")
                        [:-1])  # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})

        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis="dataset")
        histo = histo.group(process_cat, process,
                            process_groups)[:, :, :,
                                            lep, :, :].integrate("leptype")

        #set_trace()
        systs = sorted(set([key[1] for key in histo.values().keys()]))
        systs.insert(0, systs.pop(
            systs.index("nosys")))  # move "nosys" to the front

        # loop over each jet multiplicity
        for jmult in njets_to_run:
            lepdir = orig_lepdir.replace("NJETS", jmult.lower())

            # get sideband and signal region hists
            cen_sb_histo = Plotter.linearize_hist(
                histo[:, "nosys", jmult,
                      btag_reg_names_dict["Central"]["reg"]].integrate(
                          "jmult").integrate("btag").integrate("sys"))
            #up_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Up"]["reg"]].integrate("jmult").integrate("btag")
            #dw_sb_histo = histo[:, "nosys", jmult, btag_reg_names_dict["Down"]["reg"]].integrate("jmult").integrate("btag")
            sig_histo = Plotter.linearize_hist(
                histo[:, :, jmult,
                      btag_reg_names_dict["Signal"]["reg"]].integrate(
                          "jmult").integrate("btag"))

            # loop over each systematic
            for sys in systs:
                if sys not in systematics.template_sys_to_name[
                        args.year].keys():
                    continue

                sys_histo = sig_histo[:, sys].integrate(
                    "sys") if sys in systematics.ttJets_sys.values(
                    ) else Plotter.BKG_Est(
                        sig_reg=sig_histo[:, sys].integrate("sys"),
                        sb_reg=cen_sb_histo,
                        norm_type="SigMC",
                        sys=sys,
                        ignore_uncs=True)

                ## write nominal and systematic variations for each topology to file
                #for proc in sorted(set([key[0] for key in sig_histo.values().keys()])):
                for proc in sorted(
                        set([key[0] for key in sys_histo.values().keys()])):
                    if ("tt" not in proc) and (
                            sys in systematics.ttJets_sys.values()):
                        continue
                    #if (proc != "tt") and (sys in systematics.ttJets_sys.values()): continue
                    if (proc == "data_obs") and not (sys == "nosys"): continue
                    if not sys_histo[proc].values().keys():
                        #if not sig_histo[proc, sys].values().keys():
                        print(
                            f"Systematic {sys} for {lep} {jmult} {proc} not found, skipping"
                        )
                        continue

                    print(args.year, lep, jmult, sys, proc)
                    #set_trace()
                    outhname = "_".join(
                        list(
                            filter(None, [
                                proc, systematics.template_sys_to_name[
                                    args.year][sys][0], lepdir,
                                (args.year)[-2:]
                            ])))
                    if "LEP" in outhname:
                        outhname = outhname.replace(
                            "LEP",
                            "muon") if lep == "Muon" else outhname.replace(
                                "LEP", "electron")

                    template_histo = sys_histo[proc].integrate("process")
                    #template_histo = sig_histo[proc, sys].integrate("process").integrate("sys")

                    #set_trace()
                    ## save template histos to coffea dict
                    if jmult == "3Jets":
                        histo_dict_3j[lep][
                            f"{proc}_{sys}"] = template_histo.copy()
                    if jmult == "4PJets":
                        histo_dict_4pj[lep][
                            f"{proc}_{sys}"] = template_histo.copy()

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            outdir,
            f"test_raw_templates_lj_3Jets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            outdir,
            f"test_raw_templates_lj_4PJets_bkg_{args.year}_{jobid}.coffea")
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")

    upfout.close()
    print(f"{tmp_rname} written")
    def process(self, file, analysis_type):
        output = self.accumulator.identity()
        acc = load(file)

        Muon_lead_acc = acc['Muon_lead']
        Muon_trail_acc = acc['Muon_trail']
        Dimu_acc = acc['Dimu']
        D0_acc = acc['D0']
        D0_trk_acc = acc['D0_trk']
        Dstar_acc = acc['Dstar']
        Dstar_trk_acc = acc['Dstar_trk']
        DimuDstar_acc = acc['DimuDstar']
        Primary_vertex_acc = acc['Primary_vertex']

        DimuDstar_p4 = build_p4(DimuDstar_acc)

        if (analysis_type == 'mc'):
            Gen_Part_acc = acc['Gen_particles']
            Gen_Jpsi_acc = acc['Gen_Jpsi']

        ########## Filling histograms
        if (analysis_type == 'mc'):
            ## Gen Particles
            print(Gen_Jpsi_acc['mass'].value)
            output['GenPart_pdgId'].fill(pdgId=Gen_Part_acc['pdgId'].value)
            output['GenJpsi_mass'].fill(mass=Gen_Jpsi_acc['mass'].value)
            output['GenJpsi_p'].fill(pt=Gen_Jpsi_acc['pt'].value,
                                     eta=Gen_Jpsi_acc['eta'].value,
                                     phi=Gen_Jpsi_acc['phi'].value)

            #print(dir(Gen_Jpsi_acc))
            #output['GenJpsi_mass'].fill(pdgId=Gen_Jpsi_acc['pdgId'].value)

        # Primary vertex
        output['Primary_vertex_npvs'].fill(
            npvs=Primary_vertex_acc['npvs'].value)

        #Muon
        output['Muon_lead_p'].fill(pt=Muon_lead_acc['pt'].value,
                                   eta=Muon_lead_acc['eta'].value,
                                   phi=Muon_lead_acc['phi'].value)
        output['Muon_trail_p'].fill(pt=Muon_trail_acc['pt'].value,
                                    eta=Muon_trail_acc['eta'].value,
                                    phi=Muon_trail_acc['phi'].value)

        # Upsilon
        output['Upsilon_mass'].fill(
            mass=Dimu_acc['mass'].value[Dimu_acc['is_ups'].value])
        output['Upsilon_p'].fill(
            pt=Dimu_acc['pt'].value[Dimu_acc['is_ups'].value],
            eta=Dimu_acc['eta'].value[Dimu_acc['is_ups'].value],
            phi=Dimu_acc['phi'].value[Dimu_acc['is_ups'].value])
        output['Upsilon_rap'].fill(
            rap=Dimu_acc['rap'].value[Dimu_acc['is_ups'].value])
        output['Upsilon_dl'].fill(
            dl=Dimu_acc['dl'].value[Dimu_acc['is_ups'].value])
        output['Upsilon_dlSig'].fill(
            dlSig=Dimu_acc['dlSig'].value[Dimu_acc['is_ups'].value])
        output['Upsilon_chi2'].fill(
            chi2=Dimu_acc['chi2'].value[Dimu_acc['is_ups'].value])
        output['Upsilon_cosphi'].fill(
            cosphi=Dimu_acc['cosphi'].value[Dimu_acc['is_ups'].value])

        # Jpsi
        output['Jpsi_mass'].fill(
            mass=Dimu_acc['mass'].value[Dimu_acc['is_jpsi'].value])
        output['Jpsi_p'].fill(
            pt=Dimu_acc['pt'].value[Dimu_acc['is_jpsi'].value],
            eta=Dimu_acc['eta'].value[Dimu_acc['is_jpsi'].value],
            phi=Dimu_acc['phi'].value[Dimu_acc['is_jpsi'].value])
        output['Jpsi_rap'].fill(
            rap=Dimu_acc['rap'].value[Dimu_acc['is_jpsi'].value])
        output['Jpsi_dl'].fill(
            dl=Dimu_acc['dl'].value[Dimu_acc['is_jpsi'].value])
        output['Jpsi_dlSig'].fill(
            dlSig=Dimu_acc['dlSig'].value[Dimu_acc['is_jpsi'].value])
        output['Jpsi_chi2'].fill(
            chi2=Dimu_acc['chi2'].value[Dimu_acc['is_jpsi'].value])
        output['Jpsi_cosphi'].fill(
            cosphi=Dimu_acc['cosphi'].value[Dimu_acc['is_jpsi'].value])

        # Psi
        output['Psi_mass'].fill(
            mass=Dimu_acc['mass'].value[Dimu_acc['is_psi'].value])
        output['Psi_p'].fill(
            pt=Dimu_acc['pt'].value[Dimu_acc['is_psi'].value],
            eta=Dimu_acc['eta'].value[Dimu_acc['is_psi'].value],
            phi=Dimu_acc['phi'].value[Dimu_acc['is_psi'].value])
        output['Psi_rap'].fill(
            rap=Dimu_acc['rap'].value[Dimu_acc['is_psi'].value])
        output['Psi_dl'].fill(
            dl=Dimu_acc['dl'].value[Dimu_acc['is_psi'].value])
        output['Psi_dlSig'].fill(
            dlSig=Dimu_acc['dlSig'].value[Dimu_acc['is_psi'].value])
        output['Psi_chi2'].fill(
            chi2=Dimu_acc['chi2'].value[Dimu_acc['is_psi'].value])
        output['Psi_cosphi'].fill(
            cosphi=Dimu_acc['cosphi'].value[Dimu_acc['is_psi'].value])

        # D0
        output['D0_mass12'].fill(mass=D0_acc['mass12'].value)
        output['D0_mass21'].fill(mass=D0_acc['mass21'].value)
        output['D0_p'].fill(pt=D0_acc['pt'].value,
                            eta=D0_acc['eta'].value,
                            phi=D0_acc['phi'].value)
        output['D0_rap'].fill(rap=D0_acc['rap'].value)
        output['D0_dl'].fill(dl=D0_acc['dl'].value)
        output['D0_dlSig'].fill(dlSig=D0_acc['dlSig'].value)
        output['D0_chi2'].fill(chi2=D0_acc['chi2'].value)
        output['D0_cosphi'].fill(cosphi=D0_acc['cosphi'].value)
        output['D0_eta_mass'].fill(eta=D0_acc['eta'].value,
                                   mass=D0_acc['mass'].value)

        # D0 trks
        output['D0_trk_p'].fill(pt=D0_trk_acc['t1_pt'].value,
                                eta=D0_trk_acc['t1_eta'].value,
                                phi=D0_trk_acc['t1_phi'].value)
        output['D0_trk_p'].fill(pt=D0_trk_acc['t2_pt'].value,
                                eta=D0_trk_acc['t2_eta'].value,
                                phi=D0_trk_acc['t2_phi'].value)
        output['D0_trk_chindof'].fill(chindof=D0_trk_acc['t1_chindof'].value)
        output['D0_trk_chindof'].fill(chindof=D0_trk_acc['t2_chindof'].value)
        output['D0_trk_nValid'].fill(nValid=D0_trk_acc['t1_nValid'].value)
        output['D0_trk_nValid'].fill(nValid=D0_trk_acc['t2_nValid'].value)
        output['D0_trk_nPix'].fill(nPix=D0_trk_acc['t1_nPix'].value)
        output['D0_trk_nPix'].fill(nPix=D0_trk_acc['t2_nPix'].value)
        output['D0_trk_dxy'].fill(dxy=D0_trk_acc['t1_dxy'].value)
        output['D0_trk_dxy'].fill(dxy=D0_trk_acc['t2_dxy'].value)
        output['D0_trk_dz'].fill(dz=D0_trk_acc['t1_dz'].value)
        output['D0_trk_dz'].fill(dz=D0_trk_acc['t2_dz'].value)

        # Dstar
        output['Dstar_p'].fill(
            chg='right charge',
            pt=Dstar_acc['pt'].value[~Dstar_acc['wrg_chg'].value],
            eta=Dstar_acc['eta'].value[~Dstar_acc['wrg_chg'].value],
            phi=Dstar_acc['phi'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_p'].fill(
            chg='wrong charge',
            pt=Dstar_acc['pt'].value[Dstar_acc['wrg_chg'].value],
            eta=Dstar_acc['eta'].value[Dstar_acc['wrg_chg'].value],
            phi=Dstar_acc['phi'].value[Dstar_acc['wrg_chg'].value])
        output['Dstar_rap'].fill(
            chg='right charge',
            rap=Dstar_acc['rap'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_rap'].fill(
            chg='wrong charge',
            rap=Dstar_acc['rap'].value[Dstar_acc['wrg_chg'].value])
        output['Dstar_deltamr'].fill(
            chg='right charge',
            deltamr=Dstar_acc['deltamr'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_deltamr'].fill(
            chg='wrong charge',
            deltamr=Dstar_acc['deltamr'].value[Dstar_acc['wrg_chg'].value])
        output['Dstar_deltam'].fill(
            chg='right charge',
            deltam=Dstar_acc['deltam'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_deltam'].fill(
            chg='wrong charge',
            deltam=Dstar_acc['deltam'].value[Dstar_acc['wrg_chg'].value])

        # Dstar trks
        output['Dstar_K_p'].fill(
            pt=Dstar_trk_acc['K_pt'].value[~Dstar_acc['wrg_chg'].value],
            eta=Dstar_trk_acc['K_eta'].value[~Dstar_acc['wrg_chg'].value],
            phi=Dstar_trk_acc['K_phi'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_K_chindof'].fill(chindof=Dstar_trk_acc['K_chindof'].
                                       value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_K_nValid'].fill(nValid=Dstar_trk_acc['K_nValid'].
                                      value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_K_nPix'].fill(
            nPix=Dstar_trk_acc['K_nPix'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_K_dxy'].fill(
            dxy=Dstar_trk_acc['K_dxy'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_K_dz'].fill(
            dz=Dstar_trk_acc['K_dz'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_K_pt_eta'].fill(
            pt=Dstar_trk_acc['K_pt'].value[~Dstar_acc['wrg_chg'].value],
            eta=Dstar_trk_acc['K_eta'].value[~Dstar_acc['wrg_chg'].value])

        output['Dstar_pi_p'].fill(
            pt=Dstar_trk_acc['pi_pt'].value[~Dstar_acc['wrg_chg'].value],
            eta=Dstar_trk_acc['pi_eta'].value[~Dstar_acc['wrg_chg'].value],
            phi=Dstar_trk_acc['pi_phi'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pi_chindof'].fill(chindof=Dstar_trk_acc['pi_chindof'].
                                        value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pi_nValid'].fill(nValid=Dstar_trk_acc['pi_nValid'].
                                       value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pi_nPix'].fill(
            nPix=Dstar_trk_acc['pi_nPix'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pi_dxy'].fill(
            dxy=Dstar_trk_acc['pi_dxy'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pi_dz'].fill(
            dz=Dstar_trk_acc['pi_dz'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pi_pt_eta'].fill(
            pt=Dstar_trk_acc['pi_pt'].value[~Dstar_acc['wrg_chg'].value],
            eta=Dstar_trk_acc['pi_eta'].value[~Dstar_acc['wrg_chg'].value])

        output['Dstar_pis_p'].fill(
            pt=Dstar_trk_acc['pis_pt'].value[~Dstar_acc['wrg_chg'].value],
            eta=Dstar_trk_acc['pis_eta'].value[~Dstar_acc['wrg_chg'].value],
            phi=Dstar_trk_acc['pis_phi'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pis_chindof'].fill(chindof=Dstar_trk_acc['pis_chindof'].
                                         value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pis_nValid'].fill(nValid=Dstar_trk_acc['pis_nValid'].
                                        value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pis_nPix'].fill(
            nPix=Dstar_trk_acc['pis_nPix'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pis_dxy'].fill(
            dxy=Dstar_trk_acc['pis_dxy'].value[~Dstar_acc['wrg_chg'].value])
        output['Dstar_pis_dz'].fill(
            dz=Dstar_trk_acc['pis_dz'].value[~Dstar_acc['wrg_chg'].value])

        ############# DimuDstar
        is_ups = DimuDstar_acc['Dimu']['is_ups'].value
        is_jpsi = DimuDstar_acc['Dimu']['is_jpsi'].value
        is_psi = DimuDstar_acc['Dimu']['is_psi'].value
        wrg_chg = DimuDstar_acc['Dstar']['wrg_chg'].value

        # Upsilon
        output['UpsilonDstar']['Upsilon_mass'].fill(
            mass=DimuDstar_acc['Dimu']['mass'].value[is_ups & ~wrg_chg])
        output['UpsilonDstar']['Upsilon_p'].fill(
            pt=DimuDstar_acc['Dimu']['pt'].value[is_ups & ~wrg_chg],
            eta=DimuDstar_acc['Dimu']['eta'].value[is_ups & ~wrg_chg],
            phi=DimuDstar_acc['Dimu']['phi'].value[is_ups & ~wrg_chg])
        output['UpsilonDstar']['Upsilon_rap'].fill(
            rap=DimuDstar_acc['Dimu']['rap'].value[is_ups & ~wrg_chg])

        output['UpsilonDstar']['Dstar_deltamr'].fill(
            chg='right charge',
            deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_ups & ~wrg_chg])
        output['UpsilonDstar']['Dstar_deltamr'].fill(
            chg='wrong charge',
            deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_ups & wrg_chg])
        output['UpsilonDstar']['Dstar_deltam'].fill(
            chg='right charge',
            deltam=DimuDstar_acc['Dstar']['deltam'].value[is_ups & ~wrg_chg])
        output['UpsilonDstar']['Dstar_deltam'].fill(
            chg='wrong charge',
            deltam=DimuDstar_acc['Dstar']['deltam'].value[is_ups & wrg_chg])
        output['UpsilonDstar']['Dstar_p'].fill(
            chg='right charge',
            pt=DimuDstar_acc['Dstar']['pt'].value[is_ups & ~wrg_chg],
            eta=DimuDstar_acc['Dstar']['eta'].value[is_ups & ~wrg_chg],
            phi=DimuDstar_acc['Dstar']['phi'].value[is_ups & ~wrg_chg])
        output['UpsilonDstar']['Dstar_p'].fill(
            chg='wrong charge',
            pt=DimuDstar_acc['Dstar']['pt'].value[is_ups & wrg_chg],
            eta=DimuDstar_acc['Dstar']['eta'].value[is_ups & wrg_chg],
            phi=DimuDstar_acc['Dstar']['phi'].value[is_ups & wrg_chg])
        output['UpsilonDstar']['Dstar_rap'].fill(
            chg='right charge',
            rap=DimuDstar_acc['Dstar']['rap'].value[is_ups & ~wrg_chg])
        output['UpsilonDstar']['Dstar_rap'].fill(
            chg='wrong charge',
            rap=DimuDstar_acc['Dstar']['rap'].value[is_ups & wrg_chg])

        output['UpsilonDstar']['UpsilonDstar_deltarap'].fill(
            deltarap=DimuDstar_acc['deltarap'].value[is_ups & ~wrg_chg])
        output['UpsilonDstar']['UpsilonDstar_mass'].fill(
            mass=DimuDstar_p4.mass[is_ups & ~wrg_chg])

        # Jpsi
        output['JpsiDstar']['Jpsi_mass'].fill(
            mass=DimuDstar_acc['Dimu']['mass'].value[is_jpsi & ~wrg_chg])
        output['JpsiDstar']['Jpsi_p'].fill(
            pt=DimuDstar_acc['Dimu']['pt'].value[is_jpsi & ~wrg_chg],
            eta=DimuDstar_acc['Dimu']['eta'].value[is_jpsi & ~wrg_chg],
            phi=DimuDstar_acc['Dimu']['phi'].value[is_jpsi & ~wrg_chg])
        output['JpsiDstar']['Jpsi_rap'].fill(
            rap=DimuDstar_acc['Dimu']['rap'].value[is_jpsi & ~wrg_chg])

        output['JpsiDstar']['Dstar_deltamr'].fill(
            chg='right charge',
            deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_jpsi
                                                            & ~wrg_chg])
        output['JpsiDstar']['Dstar_deltamr'].fill(
            chg='wrong charge',
            deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_jpsi & wrg_chg])
        output['JpsiDstar']['Dstar_deltam'].fill(
            chg='right charge',
            deltam=DimuDstar_acc['Dstar']['deltam'].value[is_jpsi & ~wrg_chg])
        output['JpsiDstar']['Dstar_deltam'].fill(
            chg='wrong charge',
            deltam=DimuDstar_acc['Dstar']['deltam'].value[is_jpsi & wrg_chg])
        output['JpsiDstar']['Dstar_p'].fill(
            chg='right charge',
            pt=DimuDstar_acc['Dstar']['pt'].value[is_jpsi & ~wrg_chg],
            eta=DimuDstar_acc['Dstar']['eta'].value[is_jpsi & ~wrg_chg],
            phi=DimuDstar_acc['Dstar']['phi'].value[is_jpsi & ~wrg_chg])
        output['JpsiDstar']['Dstar_p'].fill(
            chg='wrong charge',
            pt=DimuDstar_acc['Dstar']['pt'].value[is_jpsi & wrg_chg],
            eta=DimuDstar_acc['Dstar']['eta'].value[is_jpsi & wrg_chg],
            phi=DimuDstar_acc['Dstar']['phi'].value[is_jpsi & wrg_chg])
        output['JpsiDstar']['Dstar_rap'].fill(
            chg='right charge',
            rap=DimuDstar_acc['Dstar']['rap'].value[is_jpsi & ~wrg_chg])
        output['JpsiDstar']['Dstar_rap'].fill(
            chg='wrong charge',
            rap=DimuDstar_acc['Dstar']['rap'].value[is_jpsi & wrg_chg])

        output['JpsiDstar']['JpsiDstar_deltarap'].fill(
            deltarap=DimuDstar_acc['deltarap'].value[is_jpsi & ~wrg_chg])
        output['JpsiDstar']['JpsiDstar_mass'].fill(
            mass=DimuDstar_p4.mass[is_jpsi & ~wrg_chg])

        # Psi
        output['PsiDstar']['Psi_mass'].fill(
            mass=DimuDstar_acc['Dimu']['mass'].value[is_psi & ~wrg_chg])
        output['PsiDstar']['Psi_p'].fill(
            pt=DimuDstar_acc['Dimu']['pt'].value[is_psi & ~wrg_chg],
            eta=DimuDstar_acc['Dimu']['eta'].value[is_psi & ~wrg_chg],
            phi=DimuDstar_acc['Dimu']['phi'].value[is_psi & ~wrg_chg])
        output['PsiDstar']['Psi_rap'].fill(
            rap=DimuDstar_acc['Dimu']['rap'].value[is_psi & ~wrg_chg])

        output['PsiDstar']['Dstar_deltamr'].fill(
            chg='right charge',
            deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_psi & ~wrg_chg])
        output['PsiDstar']['Dstar_deltamr'].fill(
            chg='wrong charge',
            deltamr=DimuDstar_acc['Dstar']['deltamr'].value[is_psi & wrg_chg])
        output['PsiDstar']['Dstar_deltam'].fill(
            chg='right charge',
            deltam=DimuDstar_acc['Dstar']['deltam'].value[is_psi & ~wrg_chg])
        output['PsiDstar']['Dstar_deltam'].fill(
            chg='wrong charge',
            deltam=DimuDstar_acc['Dstar']['deltam'].value[is_psi & wrg_chg])
        output['PsiDstar']['Dstar_p'].fill(
            chg='right charge',
            pt=DimuDstar_acc['Dstar']['pt'].value[is_psi & ~wrg_chg],
            eta=DimuDstar_acc['Dstar']['eta'].value[is_psi & ~wrg_chg],
            phi=DimuDstar_acc['Dstar']['phi'].value[is_psi & ~wrg_chg])
        output['PsiDstar']['Dstar_p'].fill(
            chg='wrong charge',
            pt=DimuDstar_acc['Dstar']['pt'].value[is_psi & wrg_chg],
            eta=DimuDstar_acc['Dstar']['eta'].value[is_psi & wrg_chg],
            phi=DimuDstar_acc['Dstar']['phi'].value[is_psi & wrg_chg])
        output['PsiDstar']['Dstar_rap'].fill(
            chg='right charge',
            rap=DimuDstar_acc['Dstar']['rap'].value[is_psi & ~wrg_chg])
        output['PsiDstar']['Dstar_rap'].fill(
            chg='wrong charge',
            rap=DimuDstar_acc['Dstar']['rap'].value[is_psi & wrg_chg])

        output['PsiDstar']['PsiDstar_deltarap'].fill(
            deltarap=DimuDstar_acc['deltarap'].value[is_psi & ~wrg_chg])
        output['PsiDstar']['PsiDstar_mass'].fill(
            mass=DimuDstar_p4.mass[is_psi & ~wrg_chg])

        return output
Exemple #3
0
def make_trees(args):

    filelists = files_by_dataset(args.files)
    # The output for each dataset will be written into a separate file
    for dataset, files in filelists.items():
        # Find region and branch names
        datatypes = {}
        tree_by_variable = {}
        variables = []
        regions = []

        # Scout out what branches there are
        for fname in files:
            acc = load(fname)

            treenames = [
                x for x in map(str, acc.keys()) if x.startswith("tree")
            ]

            for tn in treenames:
                datatype = tn.split("_")[-1]
                for region in acc[tn].keys():
                    vars = acc[tn][region].keys()
                    regions.append(region)
                    variables.extend(vars)
                    for v in vars:
                        datatypes[v] = np.float64  #getattr(np, datatype)
                        tree_by_variable[v] = tn

        # Combine
        with uproot.recreate(pjoin(args.outdir, f"tree_{dataset}.root"),
                             compression=uproot.ZLIB(4)) as f:
            for region, fname in tqdm(list(
                    itertools.product(set(regions), files)),
                                      desc=dataset):
                acc = load(fname)
                d = {
                    x: acc[tree_by_variable[x]][region][x].value
                    for x in variables
                }

                # Remove empty entries
                to_remove = []
                for k, v in d.items():
                    if not len(v):
                        to_remove.append(k)
                for k in to_remove:
                    d.pop(k)

                if not len(d):
                    continue
                if not (region in [
                        re.sub(";.*", "", x.decode("utf-8")) for x in f.keys()
                ]):
                    f[region] = uproot.newtree(
                        {x: np.float64
                         for x in d.keys()})

                lengths = set()
                for k, v in d.items():
                    lengths.add(len(v))
                assert (len(lengths) == 1)
                # write
                f[region].extend(d)
Exemple #4
0
	## Json file reader
	with open(metadata) as fin:
		datadict = json.load(fin)

	filelist = glob.glob(datadict[data_sample])

	if isFake:
		sample_name = "Fake_Photon"
	else:
		sample_name = data_sample.split("_")[0]

	corr_file = "../Corrections/corrections.coffea"
	# corr_file = "corrections.coffea" # Condor-batch

	corrections = load(corr_file)

	## Read PU weight file

	if not isdata:
		pu_path_dict = {
			"DY": "mcPileupDist_DYToEE_M-50_NNPDF31_TuneCP5_13TeV-powheg-pythia8.npy",
			"TTWJets": "mcPileupDist_TTWJetsToLNu_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8.npy",
			"TTZtoLL": "mcPileupDist_TTZToLLNuNu_M-10_TuneCP5_13TeV-amcatnlo-pythia8.npy",
			"WW": "mcPileupDist_WW_TuneCP5_DoubleScattering_13TeV-pythia8.npy",
			"WZ": "mcPileupDist_WZ_TuneCP5_13TeV-pythia8.npy",
			"ZZ": "mcPileupDist_ZZ_TuneCP5_13TeV-pythia8.npy",
			"tZq": "mcPileupDist_tZq_ll_4f_ckm_NLO_TuneCP5_13TeV-amcatnlo-pythia8.npy",
			"WZG": "mcPileupDist_wza_UL18.npy",
			"ZGToLLG": "mcPileupDist_ZGToLLG_01J_5f_TuneCP5_13TeV-amcatnloFXFX-pythia8.npy",
			"TTGJets": "mcPileupDist_TTGJets_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8.npy",
Exemple #5
0
triggers = ["HLT_Mu7_IP4", "HLT_Mu9_IP5", "HLT_Mu9_IP6", "HLT_Mu12_IP6"]
btypes = ["Bu", "Bs", "Bd"]
btype_longnames = {
    "Bu": "Bu2KJpsi2KMuMu",
    "Bs": "Bs2PhiJpsi2KKMuMu",
    "Bd": "Bd2KsJpsi2KPiMuMu"
}
btype_shortnames = {"Bu": "BuToKMuMu", "Bs": "BsToKKMuMu", "Bd": "BdToKPiMuMu"}

vars = ["pt", "y"]

# MC probefilter efficiency
coffea_files = {}
for btype in btypes:
    coffea_files[btype] = util.load(
        f"{btype_longnames[btype]}/MCEfficiencyHistograms.coffea")

axes = {}
axes["pt"] = {
    "probe":
    hist.Bin("pt", r"$p_{T}$ [GeV]", np.array([8., 13., 18., 23., 28., 33.])),
    "tag":
    hist.Bin(
        "pt", r"$p_{T}$ [GeV]",
        np.array([
            10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 18.0, 20.0, 23.0, 26.0,
            29.0, 34.0, 45.0
        ]))
}
axes["y"] = {
    "probe": hist.Bin("y", r"$|y|$", np.array(np.arange(0., 2.25 + 0.25,
Exemple #6
0
    }
    for year in years_to_run
}

if args.construct_btag:
    from copy import deepcopy
    btag_contructs_dict = deepcopy(flav_effs)

jet_mults = {
    '3Jets': '3 jets',
    '4PJets': '4+ jets',
}

flav_to_name = {'bjet': 'bottom', 'cjet': 'charm', 'ljet': 'light'}
hname = 'Jets_pt_eta'
lumi_correction = load(
    os.path.join(proj_dir, 'Corrections', jobid, 'MC_LumiWeights.coffea'))

#pt_binning = np.array([30.0, 35.0, 40.0, 45.0, 50.0, 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0, 90.0, 95.0, 100.0, 105.0, 110.0, 125.0, 150.0,170.0, 200.0, 250.0, 1000.0])
#eta_binning = np.array([-2.5, -1.5, -0.5, 0.0, 0.5, 1.5, 2.5])
pt_binning = np.array([
    30.0, 35.0, 40.0, 45.0, 50.0, 55.0, 60.0, 65.0, 70.0, 75.0, 80.0, 85.0,
    90.0, 95.0, 100.0, 105.0, 110.0, 125.0, 150.0, 170.0, 200.0, 1000.0
])
eta_binning = np.array(
    [-2.5, -2., -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5])
pt_bins = hist.Bin('pt', 'pt', pt_binning)
eta_bins = hist.Bin('eta', 'eta', eta_binning)

working_points = []

Exemple #7
0
def test_hist_compat():
    from coffea.util import load

    test = load("tests/samples/old_hist_format.coffea")

    expected_bins = np.array([
        -np.inf,
        0.0,
        20.0,
        40.0,
        60.0,
        80.0,
        100.0,
        120.0,
        140.0,
        160.0,
        180.0,
        200.0,
        220.0,
        240.0,
        260.0,
        280.0,
        300.0,
        320.0,
        340.0,
        360.0,
        380.0,
        400.0,
        420.0,
        440.0,
        460.0,
        480.0,
        500.0,
        520.0,
        540.0,
        560.0,
        580.0,
        600.0,
        620.0,
        640.0,
        660.0,
        680.0,
        700.0,
        720.0,
        740.0,
        760.0,
        780.0,
        800.0,
        820.0,
        840.0,
        860.0,
        880.0,
        900.0,
        920.0,
        940.0,
        960.0,
        980.0,
        1000.0,
        1020.0,
        1040.0,
        1060.0,
        1080.0,
        1100.0,
        1120.0,
        1140.0,
        1160.0,
        1180.0,
        1200.0,
        np.inf,
        np.nan,
    ])
    assert np.all(test._axes[2]._interval_bins[:-1] == expected_bins[:-1])
    assert np.isnan(test._axes[2]._interval_bins[-1])
parser.add_argument('lepton', choices=['Electron', 'Muon'], help='Choose which lepton to make plots for')
args = parser.parse_args()

proj_dir = os.environ['PROJECT_DIR']
jobid = os.environ['jobid']
base_jobid = os.environ['base_jobid']
analyzer = 'data_hem_comp'

input_dir = os.path.join(proj_dir, 'results', '%s_%s' % (args.year, jobid), analyzer)
f_ext = 'TOT.coffea'
outdir = os.path.join(proj_dir, 'plots', '%s_%s' % (args.year, jobid), analyzer)
if not os.path.isdir(outdir):
    os.makedirs(outdir)

fnames = sorted(['%s/%s' % (input_dir, fname) for fname in os.listdir(input_dir) if fname.endswith(f_ext)])
hdict = plt_tools.add_coffea_files(fnames) if len(fnames) > 1 else load(fnames[0])

jet_mults = {
    '3Jets' : '3 jets',
    '4PJets' : '4+ jets'
}

objtypes = {
    'Jets' : 'jets',
    'Lep' :  {
        'Muon' : '$\\mu$',
        'Electron' : '$e$',
    }
}

btag_cats = {
Exemple #9
0
    def process(self, ds):
        output = self.accumulator.identity()
        acc = load(ds["file"])

        ############ Histogram definition
        # Muons
        hist_muon_lead = bh.Histogram(
            bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,\mu}$ [GeV]"),
            bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{\mu}$"),
            bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{\mu}$"),
        )

        hist_muon_trail = bh.Histogram(
            bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,\mu}$ [GeV]"),
            bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{\mu}$"),
            bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{\mu}$"),
        )

        #Dimu
        hist_dimu = bh.Histogram(
            bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,\mu^+\mu^-}$ [GeV]"),
            bh.axis.Regular(80, -2.5, 2.5, metadata=r"$\eta_{\mu^+\mu^-}$"),
            bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{\mu^+\mu^-}$"),
        )

        hist_dimu_mass = bh.Histogram(
            bh.axis.Regular(100, 8.6, 11, metadata=r"$m_{\mu^+\mu^-}$ [GeV]"))

        # D0
        hist_D0 = bh.Histogram(
            bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,D^0}$ [GeV]"),
            bh.axis.Regular(80, -2.5, 2.5, metadata=r"$\eta_{D^0}$"),
            bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{D^0}$"),
        )

        hist_D0_mass = bh.Histogram(
            bh.axis.Regular(100, 1.7, 2.0, metadata=r"$m_{D^0}$ [GeV]"))

        hist_D0_eta_mass = bh.Histogram(
            bh.axis.Regular(80, -2.5, 2.5, metadata=r"$\eta_{D^0}$"),
            bh.axis.Regular(100, 1.7, 2.0, metadata=r"$m_{D^0}$ [GeV]"))

        hist_D0_trk = bh.Histogram(
            bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,D^0 trks}$ [GeV]"),
            bh.axis.Regular(80, -2.5, 2.5, metadata=r"$\eta_{D^0 trks}$"),
            bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{D^0 trks}$"),
        )

        # Dstar
        hist_Dstar = bh.Histogram(
            bh.axis.Regular(100, 0, 50, metadata=r"$p_{T,D*}$ [GeV]"),
            bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{D*}$"),
            bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{D*}$"),
        )

        hist_Dstar_K = bh.Histogram(
            bh.axis.Regular(100, 0, 30, metadata=r"$p_{T,D* K}$ [GeV]"),
            bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{D* K}$"),
            bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{D* K}$"),
        )

        hist_Dstar_pi = bh.Histogram(
            bh.axis.Regular(100, 0, 30, metadata=r"$p_{T,D* \pi}$ [GeV]"),
            bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{D* \pi}$"),
            bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{D* \pi}$"),
        )

        hist_Dstar_pis = bh.Histogram(
            bh.axis.Regular(100, 0, 20, metadata=r"$p_{T,\pi_s}$ [GeV]"),
            bh.axis.Regular(60, -2.5, 2.5, metadata=r"$\eta_{\pi_s}$"),
            bh.axis.Regular(70, -3.5, 3.5, metadata=r"$\phi_{\pi_s}$"),
        )

        hist_Dstar_mass = bh.Histogram(
            bh.axis.Regular(100, 1.8, 2.2, metadata=r"$m_{D*}$ [GeV]"))
        hist_Dstar_mass_refit = bh.Histogram(
            bh.axis.Regular(100, 1.8, 2.2, metadata=r"$m_{D* refit}$ [GeV]"))
        hist_Dstar_deltamr = bh.Histogram(
            bh.axis.Regular(50,
                            0.138,
                            0.162,
                            metadata=r"$\Delta m_{refit}$ [GeV]"))
        hist_Dstar_deltam = bh.Histogram(
            bh.axis.Regular(50, 0.138, 0.162, metadata=r"$\Delta m$ [GeV]"))

        # Filling histograms
        hist_muon_lead.fill(acc["Muon_lead"]["__fast_pt"].value,
                            acc["Muon_lead"]["__fast_eta"].value,
                            acc["Muon_lead"]["__fast_phi"].value)

        hist_muon_trail.fill(acc["Muon_trail"]["__fast_pt"].value,
                             acc["Muon_trail"]["__fast_eta"].value,
                             acc["Muon_trail"]["__fast_phi"].value)

        hist_dimu.fill(acc["Dimu"]["__fast_pt"].value,
                       acc["Dimu"]["__fast_eta"].value,
                       acc["Dimu"]["__fast_phi"].value)

        hist_dimu_mass.fill(acc["Dimu"]["__fast_mass"].value)

        hist_D0.fill(acc["D0"]["__fast_pt"].value,
                     acc["D0"]["__fast_eta"].value,
                     acc["D0"]["__fast_phi"].value)

        hist_D0_mass.fill(acc["D0"]["__fast_mass"].value)

        hist_D0_eta_mass.fill(acc["D0"]["__fast_eta"].value,
                              acc["D0"]["__fast_mass"].value)

        hist_D0_trk.fill(acc["D0_trk"]["t1_pt"].value,
                         acc["D0_trk"]["t1_eta"].value,
                         acc["D0_trk"]["t1_phi"].value)

        hist_D0_trk.fill(acc["D0_trk"]["t2_pt"].value,
                         acc["D0_trk"]["t2_eta"].value,
                         acc["D0_trk"]["t2_phi"].value)

        hist_Dstar.fill(acc["Dstar"]["__fast_pt"].value,
                        acc["Dstar"]["__fast_eta"].value,
                        acc["Dstar"]["__fast_phi"].value)

        hist_Dstar_mass.fill(acc["Dstar"]["__fast_mass"].value)

        hist_Dstar_mass_refit.fill(acc["Dstar"]["deltamr"].value +
                                   acc["Dstar_D0"]["D0_mass"].value)

        hist_Dstar_K.fill(acc["Dstar_trk"]["K_pt"].value,
                          acc["Dstar_trk"]["K_eta"].value,
                          acc["Dstar_trk"]["K_phi"].value)

        hist_Dstar_pi.fill(acc["Dstar_trk"]["pi_pt"].value,
                           acc["Dstar_trk"]["pi_eta"].value,
                           acc["Dstar_trk"]["pi_phi"].value)

        hist_Dstar_pis.fill(acc["Dstar_trk"]["pis_pt"].value,
                            acc["Dstar_trk"]["pis_eta"].value,
                            acc["Dstar_trk"]["pis_phi"].value)

        hist_Dstar_deltamr.fill(acc["Dstar"]["deltamr"].value)
        hist_Dstar_deltam.fill(acc["Dstar"]["deltam"].value)

        # Saving histograms
        save(hist_muon_lead,
             "output/" + ds['analyzer_name'] + "/hist/hist_Muon_lead.hist")
        save(hist_muon_trail,
             "output/" + ds['analyzer_name'] + "/hist/hist_Muon_trail.hist")
        save(hist_dimu,
             "output/" + ds['analyzer_name'] + "/hist/hist_Dimu.hist")
        save(hist_dimu_mass,
             "output/" + ds['analyzer_name'] + "/hist/hist_Dimu_mass.hist")
        save(hist_D0, "output/" + ds['analyzer_name'] + "/hist/hist_D0.hist")
        save(hist_D0_mass,
             "output/" + ds['analyzer_name'] + "/hist/hist_D0_mass.hist")
        save(hist_Dstar,
             "output/" + ds['analyzer_name'] + "/hist/hist_Dstar.hist")
        save(hist_Dstar_mass,
             "output/" + ds['analyzer_name'] + "/hist/hist_Dstar_mass.hist")
        save(
            hist_Dstar_mass_refit, "output/" + ds['analyzer_name'] +
            "/hist/hist_Dstar_mass_refit.hist")
        save(hist_Dstar_deltamr,
             "output/" + ds['analyzer_name'] + "/hist/hist_Dstar_deltamr.hist")
        save(hist_Dstar_deltam,
             "output/" + ds['analyzer_name'] + "/hist/hist_Dstar_deltam.hist")

        # Creating plots 1D
        plots_path = "plots/" + ds['analyzer_name'] + "/"
        create_plot1d(hist_muon_lead[:, sum, sum],
                      plots_path + "Muon_lead_pt.png",
                      log=True)
        create_plot1d(hist_muon_lead[sum, :, sum],
                      plots_path + "Muon_lead_eta.png")
        create_plot1d(hist_muon_lead[sum, sum, :],
                      plots_path + "Muon_lead_phi.png")

        create_plot1d(hist_muon_trail[:, sum, sum],
                      plots_path + "Muon_trail_pt.png",
                      log=True)
        create_plot1d(hist_muon_trail[sum, :, sum],
                      plots_path + "Muon_trail_eta.png")
        create_plot1d(hist_muon_trail[sum, sum, :],
                      plots_path + "Muon_trail_phi.png")

        create_plot1d(hist_dimu[:, sum, sum],
                      plots_path + "Dimu_pt.png",
                      log=True)
        create_plot1d(hist_dimu[sum, :, sum], plots_path + "Dimu_eta.png")
        create_plot1d(hist_dimu[sum, sum, :], plots_path + "Dimu_phi.png")
        create_plot1d(hist_dimu_mass, plots_path + "Dimu_mass.png")

        create_plot1d(hist_D0[:, sum, sum], plots_path + "D0_pt.png", log=True)
        create_plot1d(hist_D0[sum, :, sum], plots_path + "D0_eta.png")
        create_plot1d(hist_D0[sum, sum, :], plots_path + "D0_phi.png")
        create_plot1d(hist_D0_mass, plots_path + "D0_mass.png")

        create_plot1d(hist_D0_trk[:, sum, sum],
                      plots_path + "D0_trk_pt.png",
                      log=True)
        create_plot1d(hist_D0_trk[sum, :, sum], plots_path + "D0_trk_eta.png")
        create_plot1d(hist_D0_trk[sum, sum, :], plots_path + "D0_trk_phi.png")

        create_plot1d(hist_Dstar[:, sum, sum],
                      plots_path + "Dstar_pt.png",
                      log=True)
        create_plot1d(hist_Dstar[sum, :, sum], plots_path + "Dstar_eta.png")
        create_plot1d(hist_Dstar[sum, sum, :], plots_path + "Dstar_phi.png")
        create_plot1d(hist_Dstar_mass, plots_path + "Dstar_mass.png")
        create_plot1d(hist_Dstar_mass_refit,
                      plots_path + "Dstar_mass_refit.png")
        create_plot1d(hist_Dstar_deltamr, plots_path + "Dstar_deltamr.png")
        create_plot1d(hist_Dstar_deltam, plots_path + "Dstar_deltam.png")

        create_plot1d(hist_Dstar_K[:, sum, sum],
                      plots_path + "Dstar_K_pt.png",
                      log=True)
        create_plot1d(hist_Dstar_K[sum, :, sum],
                      plots_path + "Dstar_K_eta.png")
        create_plot1d(hist_Dstar_K[sum, sum, :],
                      plots_path + "Dstar_K_phi.png")

        create_plot1d(hist_Dstar_pi[:, sum, sum],
                      plots_path + "Dstar_pi_pt.png",
                      log=True)
        create_plot1d(hist_Dstar_pi[sum, :, sum],
                      plots_path + "Dstar_pi_eta.png")
        create_plot1d(hist_Dstar_pi[sum, sum, :],
                      plots_path + "Dstar_pi_phi.png")

        create_plot1d(hist_Dstar_pis[:, sum, sum],
                      plots_path + "Dstar_pis_pt.png",
                      log=True)
        create_plot1d(hist_Dstar_pis[sum, :, sum],
                      plots_path + "Dstar_pis_eta.png")
        create_plot1d(hist_Dstar_pis[sum, sum, :],
                      plots_path + "Dstar_pis_phi.png")

        # Creating plots 2D
        create_plot2d(hist_muon_lead[:, sum, :],
                      plots_path + "Muon_lead_ptXphi")
        create_plot2d(hist_muon_trail[:, sum, :],
                      plots_path + "Muon_trail_ptXphi")

        create_plot2d(hist_D0[:, :, sum], plots_path + "D0_ptXeta.png")
        create_plot2d(hist_D0[sum, :, :], plots_path + "D0_etaXphi.png")
        create_plot2d(hist_D0_eta_mass, plots_path + "D0_etaXmass.png")

        # return dummy accumulator
        return output
    def __init__(self, mcEventYields=None, jetSyst='nominal'):
        ################################
        # INITIALIZE COFFEA PROCESSOR
        ################################

        self.mcEventYields = mcEventYields

        if not jetSyst in ['nominal', 'JERUp', 'JERDown', 'JESUp', 'JESDown']:
            raise Exception(
                f'{jetSyst} is not in acceptable jet systematic types [nominal, JERUp, JERDown, JESUp, JESDown]'
            )

        self.jetSyst = jetSyst

        dataset_axis = hist.Cat("dataset", "Dataset")
        lep_axis = hist.Cat("lepFlavor", "Lepton Flavor")

        systematic_axis = hist.Cat("systematic", "Systematic Uncertainty")

        m3_axis = hist.Bin("M3", r"$M_3$ [GeV]", 200, 0., 1000)
        mass_axis = hist.Bin("mass", r"$m_{\ell\gamma}$ [GeV]", 400, 0., 400)
        pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 200, 0., 1000)
        eta_axis = hist.Bin("eta", r"$\eta_{\gamma}$", 300, -1.5, 1.5)
        chIso_axis = hist.Bin("chIso", r"Charged Hadron Isolation",
                              np.arange(-0.1, 20.001, .05))

        ## Define axis to keep track of photon category
        phoCategory_axis = hist.Bin("category", r"Photon Category",
                                    [1, 2, 3, 4, 5])
        phoCategory_axis.identifiers()[0].label = "Genuine Photon"
        phoCategory_axis.identifiers()[1].label = "Misidentified Electron"
        phoCategory_axis.identifiers()[2].label = "Hadronic Photon"
        phoCategory_axis.identifiers()[3].label = "Hadronic Fake"

        ### Accumulator for holding histograms
        self._accumulator = processor.dict_accumulator({
            'photon_pt':
            hist.Hist("Counts", dataset_axis, pt_axis, phoCategory_axis,
                      lep_axis, systematic_axis),
            'photon_eta':
            hist.Hist("Counts", dataset_axis, eta_axis, phoCategory_axis,
                      lep_axis, systematic_axis),
            'photon_chIso':
            hist.Hist("Counts", dataset_axis, chIso_axis, phoCategory_axis,
                      lep_axis, systematic_axis),
            'photon_lepton_mass_3j0t':
            hist.Hist("Counts", dataset_axis, mass_axis, phoCategory_axis,
                      lep_axis, systematic_axis),
            'M3':
            hist.Hist("Counts", dataset_axis, m3_axis, phoCategory_axis,
                      lep_axis, systematic_axis),
            # 3. ADD HISTOGRAMS
            ## book histograms for photon pt, eta, and charged hadron isolation
            #'photon_pt':
            #'photon_eta':
            #'photon_chIso':

            ## book histogram for photon/lepton mass in a 3j0t region
            #'photon_lepton_mass_3j0t':

            ## book histogram for M3 variable
            #'M3':
            'EventCount':
            processor.value_accumulator(int)
        })

        ext = extractor()
        ext.add_weight_sets([
            f"btag2016 * {cwd}/ScaleFactors/Btag/DeepCSV_2016LegacySF_V1.btag.csv"
        ])
        ext.finalize()
        self.evaluator = ext.make_evaluator()

        self.ele_id_sf = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_id_sf.coffea')
        self.ele_id_err = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_id_err.coffea')

        self.ele_reco_sf = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_reco_sf.coffea')
        self.ele_reco_err = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/ele_reco_err.coffea')

        self.mu_id_sf = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_id_sf.coffea')
        self.mu_id_err = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_id_err.coffea')

        self.mu_iso_sf = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_iso_sf.coffea')
        self.mu_iso_err = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_iso_err.coffea')

        self.mu_trig_sf = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_trig_sf.coffea')
        self.mu_trig_err = util.load(
            f'{cwd}/ScaleFactors/MuEGammaScaleFactors/mu_trig_err.coffea')
Exemple #11
0
                if ch in ['eeeSSonZ', 'eeeSSoffZ']:
                  values = values.eta[:,2]
                  hout[var].fill(e2eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'm1pt':
                if ch in ['eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ', 'eemSSonZ', 'eemSSoffZ', 'emSS']: continue
                values = values.pt[:,1]
                hout[var].fill(m1pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'm1eta':
                if ch in ['eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ', 'eemSSonZ', 'eemSSoffZ', 'emSS']: continue
                values = values.eta[:,1]
                hout[var].fill(m1eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'm2pt':
                if ch in ['mmmSSonZ', 'mmmSSoffZ']:
                  values = values.pt[:,2]
                  hout[var].fill(m2pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
              elif var == 'm2eta':
                if ch in ['mmmSSonZ', 'mmmSSoffZ']:
                  values = values.eta[:,2]
                  hout[var].fill(m2eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat, eft_coeff=eft_coeffs_cut, eft_err_coeff=eft_w2_coeffs_cut)
        return hout

    def postprocess(self, accumulator):
        return accumulator

if __name__ == '__main__':
    # Load the .coffea files
    outpath= './coffeaFiles/'
    samples     = load(outpath+'samples.coffea')
    topprocessor = AnalysisProcessor(samples)

from .utils.crossSections import *
from .utils.efficiencies import getMuSF, getEleSF

from .utils.genParentage import maxHistoryPDGID
from .utils.updateJets import updateJetP4

import os.path
cwd = os.path.dirname(__file__)

#load lookup tool for btagging efficiencies
with open(f'{cwd}/utils/taggingEfficienciesDenseLookup.pkl', 'rb') as _file:
    taggingEffLookup = pickle.load(_file)

#load lookup tools for pileup scale factors
puLookup = util.load(f'{cwd}/ScaleFactors/puLookup.coffea')
puLookup_Down = util.load(f'{cwd}/ScaleFactors/puLookup_Down.coffea')
puLookup_Up = util.load(f'{cwd}/ScaleFactors/puLookup_Up.coffea')

#create and load jet extractor
Jetext = extractor()
Jetext.add_weight_sets([
    f"* * {cwd}/ScaleFactors/JEC/Summer16_07Aug2017_V11_MC_L1FastJet_AK4PFchs.jec.txt",
    f"* * {cwd}/ScaleFactors/JEC/Summer16_07Aug2017_V11_MC_L2Relative_AK4PFchs.jec.txt",
    f"* * {cwd}/ScaleFactors/JEC/Summer16_07Aug2017_V11_MC_Uncertainty_AK4PFchs.junc.txt",
    f"* * {cwd}/ScaleFactors/JEC/Summer16_25nsV1_MC_PtResolution_AK4PFchs.jr.txt",
    f"* * {cwd}/ScaleFactors/JEC/Summer16_25nsV1_MC_SF_AK4PFchs.jersf.txt",
])
Jetext.finalize()
Jetevaluator = Jetext.make_evaluator()
Exemple #13
0
import numpy as np
import awkward as ak
from coffea.util import load

compiled = load(__file__.replace('.py', '.coffea'))


def _msoftdrop_weight(pt, eta):
    gpar = np.array([1.00626, -1.06161, 0.0799900, 1.20454])
    cpar = np.array([1.09302, -0.000150068, 3.44866e-07, -2.68100e-10, 8.67440e-14, -1.00114e-17])
    fpar = np.array([1.27212, -0.000571640, 8.37289e-07, -5.20433e-10, 1.45375e-13, -1.50389e-17])
    genw = gpar[0] + gpar[1]*np.power(pt*gpar[2], -gpar[3])
    ptpow = np.power.outer(pt, np.arange(cpar.size))
    cenweight = np.dot(ptpow, cpar)
    forweight = np.dot(ptpow, fpar)
    weight = np.where(np.abs(eta) < 1.3, cenweight, forweight)
    return genw*weight


def corrected_msoftdrop(fatjets):
    if not isinstance(fatjets, ak.JaggedArray):
        raise ValueError
    sf_flat = _msoftdrop_weight(fatjets.p4.pt.flatten(), fatjets.p4.eta.flatten())
    sf_flat = np.maximum(1e-5, sf_flat)
    return fatjets.msoftdrop * fatjets.copy(content=sf_flat)


def n2ddt_shift(fatjets, year='2017'):
    return compiled[f'{year}_n2ddt_rho_pt'](fatjets.rho, fatjets.p4.pt)

Exemple #14
0
# ---- Reiterate categories ---- #
ttagcats = ["at"]  #, "0t", "1t", "It", "2t"]
btagcats = ["0b", "1b", "2b"]
ycats = ['cen', 'fwd']

list_of_cats = [
    t + b + y for t, b, y in itertools.product(ttagcats, btagcats, ycats)
]

from Filesets import filesets

outputs_unweighted = {}
for name, files in filesets.items():
    outputs_unweighted[name] = util.load(
        'TTbarAllHadUproot/CoffeaOutputs/UnweightedOutputs/TTbarResCoffea_' +
        name + '_unweighted_output.coffea')
outputs_unweighted
""" ---------------- CREATE RAW MISTAG PLOTS ---------------- """
# ---- Only Use This When LookUp Tables Were Not In Use for Previous Uproot Job (i.e. UseLookUpTables = False) ---- #
# ---- This Creates Mistag plots for every dataset in every category for debugging if necessary or for curiosity ---- #
# ---- Look up tables are a bit more sophisticated and much more useful to the analysis ---- #

SaveDirectory = maindirectory + '/TTbarAllHadUproot/MistagPlots/'
DoesDirectoryExist(
    SaveDirectory)  # no need to create the directory several times


# Function sqrt(x)
def forward(x):
    return x**(1 / 2)
Exemple #15
0
        2017 : 41.53,
        2018 : 59.74,
    }

    file_kind = "CC"
    if args.output is None:
        template_file = f"templates_{args.identifier}_{file_kind}.root"
        template_mu_file = f"templatesmuCR_{args.identifier}_{file_kind}.root"
    else:
        _base_name = args.output.split(".root")[0]
        template_file = f"{_base_name}.root"
        template_mu_file = f"{_base_name}_mu.root"

    # Load info
    print(f'Processing coffea output from: hists_{args.identifier}.coffea')
    output = load(f'hists_{args.identifier}.coffea')

    xsecs = xSecReader('metadata/xSections_manual.dat')
    sumw = getSumW(output)

    if args.mergemap is not None:
        print(f'Processing with mergemap from: {args.mergemap}')
        with open(args.mergemap) as json_file:
            merge_map = json.load(json_file)
    else:
        merge_map = None

    if args.cvl not in output[list(output.keys())[0]]['templates'].axes['ddc'].edges:
        raise ValueError(
            f"args.cvl = {args.cvl} not available. Axis edges are {output[list(output.keys())[0]]['templates'].axes['ddc'].edges}"
        )
Exemple #16
0
compiled['2017_pileupweight']._values = np.minimum(
    5, compiled['2017_pileupweight']._values)
compiled['2017_pileupweight_puUp']._values = np.minimum(
    5, compiled['2017_pileupweight_puUp']._values)
compiled['2017_pileupweight_puDown']._values = np.minimum(
    5, compiled['2017_pileupweight_puDown']._values)
compiled['2018_pileupweight']._values = np.minimum(
    5, compiled['2018_pileupweight']._values)
compiled['2018_pileupweight_puUp']._values = np.minimum(
    5, compiled['2018_pileupweight_puUp']._values)
compiled['2018_pileupweight_puDown']._values = np.minimum(
    5, compiled['2018_pileupweight_puDown']._values)

with importlib.resources.path("boostedhiggs.data",
                              'powhegToMinloPtCC.coffea') as filename:
    compiled['powheg_to_nnlops'] = util.load(filename)


class SoftDropWeight(lookup_base):
    def _evaluate(self, pt, eta):
        gpar = np.array([1.00626, -1.06161, 0.0799900, 1.20454])
        cpar = np.array([
            1.09302, -0.000150068, 3.44866e-07, -2.68100e-10, 8.67440e-14,
            -1.00114e-17
        ])
        fpar = np.array([
            1.27212, -0.000571640, 8.37289e-07, -5.20433e-10, 1.45375e-13,
            -1.50389e-17
        ])
        genw = gpar[0] + gpar[1] * np.power(pt * gpar[2], -gpar[3])
        cenweight = np.polyval(cpar[::-1], pt)
Exemple #17
0
import os
import numpy as np
from coffea.util import load, save
import matplotlib.pyplot as plt
import coffea.hist as hist
import time

#import mplhep
#plt.style.use(mplhep.style.CMS)

filename = "WZ_Run2018_40000.futures"
histo = load(filename)

h1_mass = histo['mass']
h1_Nele = histo['nElectrons']
nWZ = histo['sumw']['WZ']

if not isinstance(h1_mass, hist.Hist):
    raise "Error type is not hist"

if not isinstance(h1_Nele, hist.Hist):
    raise "Error type is not hist"

hist.plot1d(h1_mass, overlay='dataset')
plt.show()

plt.close()
hist.plot1d(h1_Nele, overlay='dataset')
plt.show()
def plot_bkg_templates(fnames_to_run):
    """
    Runs LOWESS smoothing algorithm ntoys times and finds 1 and 2 sigma bands for interpolation
    """

    for bkg_file in fnames_to_run:
        hdict = load(bkg_file)
        jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets"
        for tname, orig_template in hdict[args.lepton].items():

            proc = tname.split(
                "_")[0] if not "data_obs" in tname else "data_obs"
            sys = sorted(filter(None, tname.split(f"{proc}_")))[0]

            if proc == "BKG": continue
            #if sys not in ["hdampUP", "hdampDOWN", "mtop1665", "mtop1695", "mtop1715", "mtop1735", "mtop1755", "mtop1785", "ueUP", "ueDOWN"]: continue
            if sys == "nosys": continue
            print(args.lepton, jmult, sys, proc)

            nosys_hist = hdict[args.lepton][f"{proc}_nosys"].copy()
            orig_smooth_hist = Plotter.smoothing_mttbins(
                nosys=nosys_hist,
                systematic=orig_template,
                mtt_centers=mtt_centers,
                nbinsx=nbinsx,
                nbinsy=nbinsy)

            x_lims = (0, nosys_hist.dense_axes()[0].centers().size)

            # get vals and errors of systematic variation
            sys_histo_vals, sys_histo_sumw2 = orig_template.values(
                sumw2=True)[()]
            sys_histo_errs = np.sqrt(sys_histo_sumw2)

            # make toys based on Gaussian distribution of mu=bin_val, sigma=bin_error
            toy_arrays = np.zeros((nbins, ntoys))
            for idx in range(nbins):
                toy_arrays[idx] = np.random.normal(sys_histo_vals[idx],
                                                   sys_histo_errs[idx],
                                                   size=ntoys)

                # get smoothed relative deviation distributions from toys
            smoothed_rel_dev_arrays = np.zeros((ntoys, nbins))
            chi2_pvals = np.zeros((ntoys, 2))
            for idx in range(ntoys):
                smoothed_array = Plotter.smoothing_mttbins(
                    nosys=nosys_hist,
                    systematic=(toy_arrays.T)[idx],
                    mtt_centers=mtt_centers,
                    nbinsx=nbinsx,
                    nbinsy=nbinsy)
                chi2_pval = chisquare(
                    f_obs=smoothed_array, f_exp=orig_smooth_hist.values()[()]
                )  # convert to expected yields so inputs are greater than 5
                chi2_pvals[idx] = np.array(
                    [chi2_pval.statistic, chi2_pval.pvalue])
                smoothed_rel_dev_arrays[idx] = (
                    smoothed_array -
                    nosys_hist.values()[()]) / nosys_hist.values()[()]

                ## find 68% and 95% intervals
            plus_one_sigma_smooth_vals, minus_one_sigma_smooth_vals = np.zeros(
                nbins), np.zeros(nbins)
            plus_two_sigma_smooth_vals, minus_two_sigma_smooth_vals = np.zeros(
                nbins), np.zeros(nbins)
            for bin in range(nbins):
                plus_one_sigma_smooth_vals[bin] = np.sort(
                    smoothed_rel_dev_arrays[:, bin])[plus_one_sigma_ind]
                minus_one_sigma_smooth_vals[bin] = np.sort(
                    smoothed_rel_dev_arrays[:, bin])[minus_one_sigma_ind]
                plus_two_sigma_smooth_vals[bin] = np.sort(
                    smoothed_rel_dev_arrays[:, bin])[plus_two_sigma_ind]
                minus_two_sigma_smooth_vals[bin] = np.sort(
                    smoothed_rel_dev_arrays[:, bin])[minus_two_sigma_ind]

            # plot relative deviation
            fig, ax = plt.subplots()
            fig.subplots_adjust(hspace=.07)

            # original relative deviations
            orig_masked_vals, orig_masked_bins = Plotter.get_ratio_arrays(
                num_vals=orig_template.values()[()] - nosys_hist.values()[()],
                denom_vals=nosys_hist.values()[()],
                input_bins=nosys_hist.dense_axes()[0].edges())
            ax.step(orig_masked_bins,
                    orig_masked_vals,
                    where="post",
                    **{
                        "color": "k",
                        "linestyle": "-",
                        "label": "Original"
                    })
            # original smoothing relative deviations
            orig_smoothed_masked_vals, orig_smoothed_masked_bins = Plotter.get_ratio_arrays(
                num_vals=orig_smooth_hist.values()[()] -
                nosys_hist.values()[()],
                denom_vals=nosys_hist.values()[()],
                input_bins=nosys_hist.dense_axes()[0].edges())
            ax.step(orig_smoothed_masked_bins,
                    orig_smoothed_masked_vals,
                    where="post",
                    **{
                        "color": "r",
                        "linestyle": "-",
                        "label": "Original Smoothing"
                    })
            # plot 68 and 95% intervals for yields
            ax.fill_between(nosys_hist.dense_axes()[0].edges(),
                            np.r_[minus_one_sigma_smooth_vals,
                                  minus_one_sigma_smooth_vals[-1]],
                            np.r_[plus_one_sigma_smooth_vals,
                                  plus_one_sigma_smooth_vals[-1]],
                            where=np.r_[plus_one_sigma_smooth_vals,
                                        plus_one_sigma_smooth_vals[-1]] >
                            np.r_[minus_one_sigma_smooth_vals,
                                  minus_one_sigma_smooth_vals[-1]],
                            step="post",
                            **{
                                "label": "68%",
                                "facecolor": "#00cc00",
                                "alpha": 0.5
                            })
            ax.fill_between(nosys_hist.dense_axes()[0].edges(),
                            np.r_[minus_two_sigma_smooth_vals,
                                  minus_two_sigma_smooth_vals[-1]],
                            np.r_[plus_two_sigma_smooth_vals,
                                  plus_two_sigma_smooth_vals[-1]],
                            where=np.r_[plus_two_sigma_smooth_vals,
                                        plus_two_sigma_smooth_vals[-1]] >
                            np.r_[minus_two_sigma_smooth_vals,
                                  minus_two_sigma_smooth_vals[-1]],
                            step="post",
                            **{
                                "label": "95%",
                                "facecolor": "#ffcc00",
                                "alpha": 0.5
                            })

            ax.legend(loc="upper right", title=f"{sys}, {proc}")
            ax.axhline(
                0, **{
                    "linestyle": "--",
                    "color": (0, 0, 0, 0.5),
                    "linewidth": 1
                })
            ax.autoscale()
            ax.set_ylim(ax.get_ylim()[0], ax.get_ylim()[1] * 1.15)
            ax.set_xlim(x_lims)
            ax.set_xlabel(
                "$m_{t\\bar{t}}$ $\otimes$ |cos($\\theta^{*}_{t_{l}}$)|")
            ax.set_ylabel("Rel. Deviaton from Nominal")

            # add lepton/jet multiplicity label
            ax.text(0.02,
                    0.94,
                    f"{leptypes[args.lepton]}, {jet_mults[jmult]}",
                    fontsize=rcParams["font.size"] * 0.9,
                    horizontalalignment="left",
                    verticalalignment="bottom",
                    transform=ax.transAxes)
            ## draw vertical lines for distinguishing different ctstar bins
            vlines = [x_lims[1] * ybin / 5 for ybin in range(1, 5)]
            for vline in vlines:
                ax.axvline(vline, color="k", linestyle="--")
            hep.cms.label(ax=ax,
                          data=False,
                          paper=False,
                          year=args.year,
                          lumi=round(data_lumi_year[f"{args.lepton}s"] / 1000.,
                                     1))

            #set_trace()
            pltdir = os.path.join(outdir, args.lepton, jmult, sys)
            if not os.path.isdir(pltdir):
                os.makedirs(pltdir)

            figname = os.path.join(
                pltdir, "_".join([
                    jmult, args.lepton, sys, proc,
                    "SmoothingConfidenceIntervals"
                ]))
            fig.savefig(figname)
            print(f"{figname} written")
            plt.close()
Exemple #19
0
coffeapath = './coffeaFiles/'
outname = 'plotsTopEFT'

mocapath = 'moca'
mocaScripts = ['corrections', 'functions', 'objects', 'samples', 'selection']
#analysis =
#treeName

nworkers = 8

### (Re)produce inputs...

### Produce/load analysis object
#print("Executing python analysis/topEFT/topeft.py...")
#os.system('python analysis/topEFT/topeft.py')
processor_instance = load(coffeapath + nameProcessor + '.coffea')

### Load samples
samplesdict = load(coffeapath + nameSamples + '.coffea')
flist = {}
xsec = {}
sow = {}
isData = {}
for k in samplesdict.keys():
    flist[k] = samplesdict[k]['files']
    xsec[k] = samplesdict[k]['xsec']
    sow[k] = samplesdict[k]['nSumOfWeights']
    isData[k] = samplesdict[k]['isData']

# Run the processor and get the output
tstart = time.time()
Exemple #20
0
def merge(folder, _dataset):

    filelist = {}
    pd = []
    for filename in os.listdir(folder):
        if '.pkl.gz' in filename:
            if filename.split("____")[0] not in pd:
                pd.append(filename.split("____")[0])

    for pdi in pd:
        files = []
        for filename in os.listdir(folder):
            if pdi not in filename: continue
            if '.pkl.gz' not in filename: continue
            files.append(filename)
        #print(pdi,'length:',len(files))
        split_files = split(files, 100)
        #print(pdi,'number of lists:',len(split_files))
        for i in range(0, len(split_files)):
            filelist[pdi + '___' + str(i) + '_'] = split_files[i]

    coffealist = []
    for pdi in filelist.keys():
        if _dataset not in 'None' and _dataset not in pdi: continue
        print(pdi)
        #print(filelist[pdi])
        hists = {}
        for filename in filelist[pdi]:
            fin = gzip.open(folder + '/' + filename)
            print('Opening:', folder + '/' + filename)
            hin = cloudpickle.load(fin)
            #print('before',hin['recoil'].integrate('dataset',filename.split(".")[0]).integrate('region','isoneE').integrate('jet_selection','baggy').values())
            for k in hin.keys():
                if k not in hists: hists[k] = hin[k]
                else: hists[k] += hin[k]
            #print('middle',hists['recoil'].integrate('dataset',filename.split(".")[0]).integrate('region','isoneE').integrate('jet_selection','baggy').values())
            fin.close()
            del hin
        dataset = hist.Cat("dataset", "dataset", sorting='placement')
        dataset_cats = ("dataset", )
        dataset_map = OrderedDict()
        dataset_map[pdi] = (pdi.split("___")[0] + "*", )
        for key in hists.keys():
            hists[key] = hists[key].group(dataset_cats, dataset, dataset_map)
        #print('after',hists['recoil'].integrate('dataset',pdi).integrate('region','isoneE').integrate('jet_selection','baggy').values())
        save(hists, folder + '/' + pdi + '.coffea')
        del hists
        #coffealist.append(folder+'/'+pdi+'.coffea')

    for coffeafile in os.listdir(folder):
        if '.coffea' not in coffeafile: continue
        coffealist.append(folder + '/' + coffeafile)
    print('coffealist', coffealist)

    htot = {}
    for coffeafile in coffealist:
        print('Opening', coffeafile)
        hists = load(coffeafile)
        #print(hists)
        #print(coffeafile.split("/")[1].split(".")[0])
        #print('before',hists['recoil'].integrate('dataset',coffeafile.split("/")[1].split(".")[0]).integrate('region','isoneE').integrate('jet_selection','baggy').values())
        for k in hists:
            if k not in htot: htot[k] = hists[k]
            else: htot[k] += hists[k]
        #print('after',htot['recoil'].integrate('dataset',coffeafile.split("/")[1].split(".")[0]).integrate('region','isoneE').integrate('jet_selection','baggy').values())
        del hists
    if _dataset in 'None': _dataset = ''
    save(htot, 'condor_hists_' + folder + '.coffea')
def get_bkg_templates(fnames_to_run):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """

    #set_trace()
    for bkg_file in fnames_to_run:
        hdict = load(bkg_file)
        jmult = "3Jets" if "3Jets" in os.path.basename(bkg_file) else "4PJets"
        for lep in hdict.keys():
            for tname, orig_template in hdict[lep].items():

                proc = tname.split(
                    "_")[0] if not "data_obs" in tname else "data_obs"
                sys = sorted(filter(None, tname.split(f"{proc}_")))[0]

                #if not ((sys == "ueDOWN") and (proc == "ttJets")): continue
                if sys == "nosys": continue
                print(lep, jmult, sys, proc)

                nominal_hist = hdict[lep][f"{proc}_nosys"].copy()

                x_lims = (0, nominal_hist.dense_axes()[0].centers().size)

                # perform smoothing
                smoothed_histos_list = [(Plotter.smoothing_mttbins(
                    nosys=nominal_hist,
                    systematic=orig_template,
                    mtt_centers=mtt_centers,
                    nbinsx=len(linearize_binning[0]) - 1,
                    nbinsy=len(linearize_binning[1]) - 1,
                    **{"frac": frac_val / 10.}), frac_val / 10.)
                                        for frac_val in np.arange(2, 7, 2)]
                #smoothed_histos_chi2 = {frac_val :  find_chi2(h_fitted=smooth_histo, h_unc=orig_template) for smooth_histo, frac_val in smoothed_histos_list}
                # perform flattening
                flattened_histo = Plotter.flatten(nosys=nominal_hist,
                                                  systematic=orig_template)
                #flat_chi2 = find_chi2(h_fitted=flattened_histo, h_unc=orig_template)

                # plot relative deviation
                fig, ax = plt.subplots()
                fig.subplots_adjust(hspace=.07)

                # plot original dist
                orig_masked_vals, orig_masked_bins = Plotter.get_ratio_arrays(
                    num_vals=orig_template.values()[()] -
                    nominal_hist.values()[()],
                    denom_vals=nominal_hist.values()[()],
                    input_bins=nominal_hist.dense_axes()[0].edges())
                ax.fill_between(orig_masked_bins,
                                orig_masked_vals,
                                facecolor="k",
                                step="post",
                                alpha=0.5,
                                label="Unsmoothed")

                # plot smoothed versions
                for smooth_histo, frac_val in smoothed_histos_list:
                    smooth_masked_vals, smooth_masked_bins = Plotter.get_ratio_arrays(
                        num_vals=smooth_histo.values()[()] -
                        nominal_hist.values()[()],
                        denom_vals=nominal_hist.values()[()],
                        input_bins=nominal_hist.dense_axes()[0].edges())
                    ax.step(smooth_masked_bins,
                            smooth_masked_vals,
                            where="post",
                            **{
                                "linestyle": "-",
                                "label": f"Frac={frac_val}",
                                "linewidth": 2
                            })

                # plot flattened val
                flat_masked_vals, flat_masked_bins = Plotter.get_ratio_arrays(
                    num_vals=flattened_histo.values()[()] -
                    nominal_hist.values()[()],
                    denom_vals=nominal_hist.values()[()],
                    input_bins=nominal_hist.dense_axes()[0].edges())
                ax.step(flat_masked_bins,
                        flat_masked_vals,
                        where="post",
                        **{
                            "linestyle": "-",
                            "label": "Flat",
                            "linewidth": 2
                        })

                ax.legend(loc="upper right", title=f"{sys}, {proc}")
                ax.axhline(
                    0, **{
                        "linestyle": "--",
                        "color": (0, 0, 0, 0.5),
                        "linewidth": 1
                    })
                ax.autoscale()
                ax.set_xlim(x_lims)
                ax.set_xlabel(
                    "$m_{t\\bar{t}}$ $\otimes$ |cos($\\theta^{*}_{t_{l}}$)|")
                ax.set_ylabel("Rel. Deviaton from Nominal")

                # add lepton/jet multiplicity label
                ax.text(0.02,
                        0.94,
                        f"{leptypes[lep]}, {jet_mults[jmult]}",
                        fontsize=rcParams["font.size"] * 0.9,
                        horizontalalignment="left",
                        verticalalignment="bottom",
                        transform=ax.transAxes)
                ## draw vertical lines for distinguishing different ctstar bins
                vlines = [x_lims[1] * ybin / 5 for ybin in range(1, 5)]
                for vline in vlines:
                    ax.axvline(vline, color="k", linestyle="--")
                hep.cms.label(ax=ax,
                              data=False,
                              paper=False,
                              year=args.year,
                              lumi=round(data_lumi_year[f"{lep}s"] / 1000., 1))

                #set_trace()
                pltdir = os.path.join(outdir, lep, jmult, sys)
                if not os.path.isdir(pltdir):
                    os.makedirs(pltdir)

                #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "BinWidths_Comp"]))
                #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "SmoothValues_Comp"]))
                #figname = os.path.join(pltdir, "_".join([jmult, lep, sys, proc, "MttBinWidths_SmoothValues_Comp"]))
                figname = os.path.join(
                    pltdir,
                    "_".join([jmult, lep, sys, proc, "SmoothedFlatVals_Comp"]))
                fig.savefig(figname)
                print(f"{figname} written")
                plt.close()
Exemple #22
0
def get_sig_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    from rootpy.plotting import Hist2D

    widthTOname = lambda width : str(width).replace('.', 'p')
    nameTOwidth = lambda width : str(width).replace('p', '.')

    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = mtt_ctstar_2d_binning
    #xrebinning, yrebinning = 2, 1
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat

    #set_trace()    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)

        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    #set_trace()
    histo = histo.rebin(yaxis_name, new_ybins)
    rebin_histo = histo[:, :, :, :, 'btagPass', 'Tight'].integrate('lepcat').integrate('btag')

    signals = sorted(set([key[0] for key in rebin_histo.values().keys()]))    

        # create 2D signal hists and write to temp file        
    with root_open(tmp_rname, 'w') as out:
        #for lep in ['Muon']:
        for lep in ['Muon', 'Electron']:
            lepdir = 'mujets' if lep == 'Muon' else 'ejets'

                # scale by lumi
            lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
            scaled_histo = rebin_histo.copy()
            scaled_histo.scale(lumi_correction, axis='dataset')
    
            for jmult in njets_to_run:
                histo = scaled_histo[:, :, jmult, lep].integrate('jmult').integrate('leptype')
    
                for signal in signals:
                    _, mass, width, pI, wt = tuple(signal.split('_'))
                    samtype = 'int' if pI == 'Int' else 'sgn'
                    bostype = 'ggA' if _ == 'AtoTT' else 'ggH'
    
                    sub_name = '%s_%s-%s-%s-%s' % (bostype, wt, samtype, widthTOname(width).split('W')[-1]+'pc', mass) if pI == 'Int' else '%s_pos-%s-%s-%s' % (bostype, samtype, widthTOname(width).split('W')[-1]+'pc', mass)
    
                    #set_trace()
                    for sys in sys_to_use.keys():
                        sysname, onlyTT = sys_to_use[sys]
                        if onlyTT: continue
                        if sys not in histo.axis('sys')._sorted:
                            print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                            continue
                        #set_trace()
                        if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
    
                        template_histo = histo[signal, sys].integrate('dataset').integrate('sys')
                        if wt == 'neg':
                            template_histo.scale(-1.)
                        #if (pI == 'Int') and (wt == 'pos'): continue
                        print(lep, jmult, sub_name, sys)
                        sumw, sumw2 = template_histo.values(sumw2=True, overflow='all')[()] # get vals and errors for all bins (including under/overflow)
                        #if args.smooth:
                        #    set_trace()

                            ## create rootpy hist and rename
                        rtpy_h2d = Hist2D(template_histo.dense_axes()[0].edges(), template_histo.dense_axes()[1].edges())
                        outhname = '_'.join([jmult, lepdir, sub_name]) if sys == 'nosys' else '_'.join([jmult, lepdir, sub_name, sysname])
                        rtpy_h2d.name = outhname
                            # set bin content for rootpy hist
                        for binx in range(0, rtpy_h2d.GetNbinsX()+2):
                            for biny in range(0, rtpy_h2d.GetNbinsY()+2):
                                rtpy_h2d[binx, biny] = sumw[binx, biny], sumw2[binx, biny]
                        #set_trace()
                        rtpy_h2d.Write()
        
    print('%s written' % tmp_rname)
input_dir = '/'.join(
    [proj_dir, 'results',
     '%s_%s' % (args.year, jobid), analyzer])
f_ext = 'TOT.coffea'
outdir = '/'.join([proj_dir, 'plots', '%s_%s' % (args.year, jobid), analyzer])
if not os.path.isdir(outdir):
    os.makedirs(outdir)

fnames = sorted([
    '%s/%s' % (input_dir, fname) for fname in os.listdir(input_dir)
    if fname.endswith(f_ext)
])

#set_trace()
hdict = plt_tools.add_coffea_files(fnames) if len(fnames) > 1 else load(
    fnames[0])

jet_mults = {'3Jets': '3 jets', '4PJets': '4+ jets'}

objtypes = {
    'Jets': 'jets',
    'Lep': {
        'Muon': '$\\mu$',
        'Electron': '$e$',
    }
}

btag_cats = {
    'btagFail': '0 btags',
    'btagPass': '******',
}
Exemple #24
0
def get_bkg_templates(tmp_rname):
    '''
    Function that writes linearized mtt vs costheta distributions to root file.
    '''
    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(bkg_fnames) if len(bkg_fnames) > 1 else load(bkg_fnames[0])

        ## get data lumi and scale MC by lumi
    data_lumi_year = prettyjson.loads(open('%s/inputs/lumis_data.json' % proj_dir).read())[args.year]

        # get correct hist and rebin
    hname_to_use = 'mtt_vs_tlep_ctstar_abs'
    if hname_to_use not in hdict.keys():
        raise ValueError("%s not found in file" % hname_to_use)
    xrebinning, yrebinning = linearize_binning
    histo = hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
    
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
        ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)
        ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    rebin_histo = histo.rebin(yaxis_name, new_ybins)
    
    nbins = (len(xrebinning)-1)*(len(yrebinning)-1)
    
        ## scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
    ttJets_permcats = ['*right', '*matchable', '*unmatchable', '*other']
    names = [dataset for dataset in sorted(set([key[0] for key in hdict[hname_to_use].values().keys()]))] # get dataset names in hists
    ttJets_cats = [name for name in names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...

        # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
    if bkg_ttJets_fname is not None:
        ttJets_hdict = load(bkg_ttJets_fname)
        ttJets_histo = ttJets_hdict[hname_to_use] # process, sys, jmult, leptype, btag, lepcat
        
            ## rebin x axis
        ttJets_histo = ttJets_histo.rebin(xaxis_name, new_xbins)
            ## rebin y axis
        ttJets_histo = ttJets_histo.rebin(yaxis_name, new_ybins)
        
        only_ttJets_names = [dataset for dataset in sorted(set([key[0] for key in ttJets_hdict[hname_to_use].values().keys()]))] # get dataset names in hists
        only_ttJets_cats = [name for name in only_ttJets_names if any([fnmatch.fnmatch(name, cat) for cat in ttJets_permcats])] # gets ttJets(_PS)_other, ...


        ## make groups based on process
    process = hist.Cat("process", "Process", sorting='placement')
    process_cat = "dataset"

        # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot.recreate(tmp_rname, compression=uproot.ZLIB(4)) if os.path.isfile(tmp_rname) else uproot.create(tmp_rname)

    if '3Jets' in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})
    if '4PJets' in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({'Muon' : {}, 'Electron' :{}})

    for lep in ['Muon', 'Electron']:
        lepdir = 'mujets' if lep == 'Muon' else 'ejets'
    
        ## make groups based on process
        process_groups = plt_tools.make_dataset_groups(lep, args.year, samples=names, gdict='templates')
        
        lumi_correction = load('%s/Corrections/%s/MC_LumiWeights_IgnoreSigEvts.coffea' % (proj_dir, jobid))[args.year]['%ss' % lep]
                # scale ttJets events, split by reconstruction type, by normal ttJets lumi correction
        if len(ttJets_cats) > 0:
            for tt_cat in ttJets_cats:
                ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                lumi_correction.update({tt_cat: ttJets_eff_lumi})
    
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis='dataset')
        histo = histo.group(process_cat, process, process_groups)[:, :, :, lep, :, :].integrate('leptype')

            # use ttJets events that don't have PS weights for dedicated sys samples in 2016    
        if bkg_ttJets_fname is not None:
            if len(only_ttJets_cats) > 0:
                for tt_cat in only_ttJets_cats:
                    ttJets_lumi_topo = '_'.join(tt_cat.split('_')[:-1]) # gets ttJets[SL, Had, DiLep] or ttJets_PS
                    ttJets_eff_lumi = lumi_correction[ttJets_lumi_topo]
                    lumi_correction.update({tt_cat: ttJets_eff_lumi})

            tt_histo = ttJets_histo.copy()
            tt_histo.scale(lumi_correction, axis='dataset')
            tt_histo = tt_histo.group(process_cat, process, {'TT' : ['ttJets_right', 'ttJets_matchable', 'ttJets_unmatchable', 'ttJets_other']})[:, :, :, lep, :, :].integrate('leptype')


        for jmult in njets_to_run:
            iso_sb    = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagPass', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            btag_sb   = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Tight'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            double_sb = Plotter.linearize_hist(histo[:, 'nosys', jmult, 'btagFail', 'Loose'].integrate('sys').integrate('jmult').integrate('lepcat').integrate('btag'))
            sig_histo = Plotter.linearize_hist(histo[:, :, jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
        
            for sys in sys_to_use.keys():
                if sys not in histo.axis('sys')._sorted:
                    print('\n\n   Systematic %s not available, skipping\n\n' % sys)
                    continue

                #set_trace()
                sysname, onlyTT = sys_to_use[sys]
                if 'LEP' in sysname: sysname = sysname.replace('LEP', lepdir[0])
        
                qcd_est_histo = Plotter.QCD_Est(sig_reg=sig_histo, iso_sb=iso_sb, btag_sb=btag_sb, double_sb=double_sb, norm_type='Sideband', shape_region='BTAG', norm_region='BTAG', sys=sys)

                    ## write nominal and systematic variations for each topology to file
                for proc in sorted(set([key[0] for key in qcd_est_histo.values().keys()])):
                    if (proc != 'TT') and onlyTT: continue
                    if (proc == 'data_obs') and not (sys == 'nosys'): continue
                    name = proc+lepdir if proc == 'QCD' else proc
                    print(lep, jmult, sys, name)
                    outhname = '_'.join([jmult, lepdir, name]) if sys == 'nosys' else '_'.join([jmult, lepdir, name, sysname])
                    template_histo = qcd_est_histo[proc].integrate('process')
                    if (('ue' in sys) or ('hdamp' in sys) or ('mtop' in sys)) and (bkg_ttJets_fname is not None):
                        tt_lin_histo = Plotter.linearize_hist(tt_histo['TT', 'nosys', jmult, 'btagPass', 'Tight'].integrate('jmult').integrate('lepcat').integrate('btag'))
                        tt_lin_histo = tt_lin_histo['TT', 'nosys'].integrate('process').integrate('sys')
                        template_histo = substitute_ttJets(sys_histo=template_histo, ttJets_histo=tt_lin_histo, ttJets_PS_histo=sig_histo['TT', 'nosys'].integrate('process').integrate('sys'))

                    if ((sys == 'mtop1695') or (sys == 'mtop1755')) and (templates_to_smooth[proc]):
                        template_histo = scale_mtop3gev(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo)
                        #set_trace()

                    if (sys != 'nosys') and (args.smooth) and (templates_to_smooth[proc]):
                        template_histo = smoothing(nominal=histo_dict_3j[lep][proc] if jmult == '3Jets' else histo_dict_4pj[lep][proc], template=template_histo, nbinsx=len(xrebinning)-1, nbinsy=len(yrebinning)-1)#, debug=True if proc=='VV' else False)
                        #set_trace()

                        ## save template histos to coffea dict
                    if jmult == '3Jets':
                        histo_dict_3j[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo
                    if jmult == '4PJets':
                        histo_dict_4pj[lep][proc if sys == 'nosys' else '%s_%s' % (proc, sys)] = template_histo

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if '3Jets' in njets_to_run:
        coffea_out_3j = '%s/templates_lj_3Jets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_3Jets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_3j, coffea_out_3j)
        print("%s written" % coffea_out_3j)
    if '4PJets' in njets_to_run:
        coffea_out_4pj = '%s/templates_lj_4PJets_bkg_smoothed_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year) if args.smooth else '%s/templates_lj_4PJets_bkg_%s_QCD_Est_%s.coffea' % (outdir, jobid, args.year)
        save(histo_dict_4pj, coffea_out_4pj)
        print("%s written" % coffea_out_4pj)

    
    upfout.close()
    print('%s written' % tmp_rname)
Exemple #25
0
        return accumulator

samples = {"default":args.files}
#with open('files_prev.json') as fin:
#    samples = json.load(fin)

output = processor.run_uproot_job(samples,
                                  treename='Events',
                                  processor_instance=GenVisualizer(),
                                  executor=processor.futures_executor,
                                  executor_args={'workers': 4},
                                  chunksize=500000,
                                 )
save(output, 'genstuff.coffea')

output = load("genstuff.coffea")

#hmass = output["hmass"]
#bin_contents = hmass.values()[('ZPrimeToQQ_DMsimp_HT400_M50',)]
#edges = hmass.axis('mass').edges()
#edge_pairs = [(edges[i], edges[i+1]) for i in range(len(edges)-1)]
#histd = zip(edge_pairs, bin_contents)
#for thing in histd:
#    print(thing)


# In[13]:

for hname, axisname in [("hmass", "mass"), ("hpt", "pt")]:
    print(hname)
    hists = {k[0]: v for k,v in output[hname].values(sumw2=True, overflow='over').items()}
Exemple #26
0
def getRoc(args):

    # vars
    vars = args.vars.split(',')

    # open hists
    hists_unmapped = load('%s.coffea' % args.hists)
    print(hists_unmapped)

    # map to hists
    hists_mapped = {}
    for key, val in hists_unmapped.items():
        if isinstance(val, hist.Hist):
            hists_mapped[key] = processmap.apply(val)
    print('hists mapped ', hists_mapped)

    # build roc for all vars
    vals = {}
    rocs = {}
    labels = {}

    for lep in ['ele', 'mu']:
        for jet in ['jet0', 'jet1']:
            vals['%s_%s' % (lep, jet)] = {}
            rocs['%s_%s' % (lep, jet)] = {}

    for var in vars:
        for lep in ['ele', 'mu']:
            for jet in ['jet0', 'jet1']:
                print('getting roc for ', var, lep, jet)
                hist_name = 'roc_%ssel%s' % (lep, jet)
                if 'lsf' in var:
                    var_name = jet + '_' + var
                    var_cut_dir = -1
                else:
                    var_name = lep + '0_' + var
                    var_cut_dir = 1

                # get hist
                h = hists_mapped[hist_name]
                print(h)
                print([
                    ax for ax in h.axes()
                    if ax.name not in {'process', var_name}
                ])
                x = h.sum(*[
                    ax for ax in h.axes()
                    if ax.name not in {'process', var_name}
                ])

                bkg = 'qcd'
                sig = 'h125'

                #vals['%s_%s'%(lep,jet)][var], rocs['%s_%s'%(lep,jet)][var] = roc(x, bkg, sig, direction=var_cut_dir)
                labels[var] = var

                # plot variable
                fig, ax = plt.subplots(1, 1, figsize=(8, 8))
                print(x)
                hist.plot1d(x,
                            ax=ax,
                            overlay='process',
                            clear=False,
                            density=True)
                fig.savefig("plots/rocs/lsf_%s_%s_%s.png" % (var, lep, jet))

    return vals, rocs, labels
Exemple #27
0
import mplhep
plt.style.use(mplhep.style.ROOT)
plt.tight_layout()
from brazil.aguapreta import *

figure_directory = "/home/dryu/BFrag/data/figures/"

input_files = [
    "/home/dryu/BFrag/data/histograms/condor/job20200513_215445/DataHistograms_Run2018.coffea",
]
hists = {}
subjob_cutflows = {}
for i, input_file in enumerate(input_files):
    print(input_file)
    this_hists = util.load(input_file)

    # Integrate dataset to save space
    for key in this_hists.keys():
        obj = this_hists[key]
        if isinstance(obj, hist.hist_tools.Hist):
            if "dataset" in obj.axes():
                this_hists[key] = obj.integrate("dataset")
                del obj
    #if i == 0:
    #    pprint(this_hists.keys())
    for item_name, item in this_hists.items():
        if isinstance(item, hist.Hist):
            if item_name in hists:
                hists[item_name].add(item)
            else:
Exemple #28
0
    mask = mc_pu > 0.
    corr = data_pu.copy()
    corr_puUp = data_pu_puUp.copy()
    corr_puDown = data_pu_puDown.copy()
    corr[mask] /= mc_pu[mask]
    corr_puUp[mask] /= mc_pu[mask]
    corr_puDown[mask] /= mc_pu[mask]
    pileup_corr = lookup_tools.dense_lookup.dense_lookup(corr, fin_pileup["pileup"].edges)
    pileup_corr_puUp = lookup_tools.dense_lookup.dense_lookup(corr_puUp, fin_pileup["pileup"].edges)
    pileup_corr_puDown = lookup_tools.dense_lookup.dense_lookup(corr_puDown, fin_pileup["pileup"].edges)

corrections['2016_pileupweight'] = pileup_corr
corrections['2016_pileupweight_puUp'] = pileup_corr_puUp
corrections['2016_pileupweight_puDown'] = pileup_corr_puDown

pileup_corr = load('correction_files/pileup_mc.coffea')
with uproot.open("correction_files/pileup_Cert_294927-306462_13TeV_PromptReco_Collisions17_withVar.root") as fin_pileup:
    norm = lambda x: x / x.sum()
    data_pu = norm(fin_pileup["pileup"].values)
    data_pu_puUp = norm(fin_pileup["pileup_plus"].values)
    data_pu_puDown = norm(fin_pileup["pileup_minus"].values)

    pileup_corr_puUp = {}
    pileup_corr_puDown = {}
    for k in pileup_corr.keys():
        mc_pu = norm(pileup_corr[k].value)
        mask = mc_pu > 0.
        corr = data_pu.copy()
        corr_puUp = data_pu_puUp.copy()
        corr_puDown = data_pu_puDown.copy()
        corr[mask] /= mc_pu[mask]
Exemple #29
0
def acc_from_dir(indir):
    """Load Coffea accumulator from directory with *.coffea files

    :param indir: Directory to search for coffea files
    :type indir: string
    :return: Sum of all found accumulators
    :rtype: dict
    """
    files = filter(lambda x: x.endswith(".coffea") and not ('cache' in x),
                   os.listdir(indir))
    files = list(map(lambda x: os.path.abspath(pjoin(indir, x)), files))
    listhash = sha256sum(files)
    cache = pjoin(indir, f'merged_cache_{listhash}.coffea')
    if os.path.exists(cache):
        return load(cache)
    else:
        # Progress bar
        t = tqdm(total=len(files), desc='Merging input files')

        # Recursive merging
        to_merge = files

        # Use temporary files to store intermediate
        # merger results
        tmp_files = []

        def load_and_remove(path):
            data = load(path)
            os.remove(path)
            return data

        def next():
            '''Get next item to merge'''
            x = to_merge.pop(0)
            if isinstance(x, str):
                if x in tmp_files:
                    tmp_files.remove(x)
                    x = load_and_remove(x)
                else:
                    x = load(x)
            return x

        while len(to_merge) > 1:
            # Remove first two items from list,
            # merge them and insert in the back
            t.update()

            x = next()
            y = next()

            tmp = "/tmp/tmp_bucoffea_merge_" + "".join(
                random.sample(string.ascii_uppercase + string.digits, 24))
            merged = x + y
            # clean up to save memory
            x = None
            y = None
            save(merged, tmp)
            merged = None
            to_merge.append(tmp)
            tmp_files.append(tmp)

        t.update()
        assert (len(to_merge) == 1)

        shutil.copy(to_merge[0], cache)
        return load(cache)
def get_sig_templates(tmp_rname):
    """
    Function that writes linearized mtt vs costheta distributions to root file.
    """
    widthTOname = lambda width: str(width).replace(".", "p")
    nameTOwidth = lambda width: str(width).replace("p", ".")

    ## variables that only need to be defined/evaluated once
    hdict = plt_tools.add_coffea_files(
        sig_fnames) if len(sig_fnames) > 1 else load(sig_fnames[0])

    # get correct hist and rebin
    hname_to_use = "mtt_vs_tlep_ctstar_abs"
    if hname_to_use not in hdict.keys():
        raise ValueError(f"{hname_to_use} not found in file")
    xrebinning, yrebinning = linearize_binning
    #xrebinning, yrebinning = mtt_ctstar_2d_binning
    histo = hdict[hname_to_use]  # process, sys, jmult, leptype, btag, lepcat

    #set_trace()
    xaxis_name = histo.dense_axes()[0].name
    yaxis_name = histo.dense_axes()[1].name
    ## rebin x axis
    if isinstance(xrebinning, np.ndarray):
        new_xbins = hist.Bin(xaxis_name, xaxis_name, xrebinning)
    elif isinstance(xrebinning, float) or isinstance(xrebinning, int):
        new_xbins = xrebinning
    histo = histo.rebin(xaxis_name, new_xbins)

    ## rebin y axis
    if isinstance(yrebinning, np.ndarray):
        new_ybins = hist.Bin(yaxis_name, yaxis_name, yrebinning)
    elif isinstance(yrebinning, float) or isinstance(yrebinning, int):
        new_ybins = yrebinning
    histo = histo.rebin(yaxis_name, new_ybins)
    rebin_histo = histo[Plotter.signal_samples, :, :, :,
                        "btagPass"].integrate("btag")

    names = [
        dataset for dataset in sorted(
            set([key[0] for key in rebin_histo.values().keys()]))
    ]  # get dataset names in hists

    signals = sorted(set([key[0] for key in rebin_histo.values().keys()]))
    signals = [sig for sig in signals
               if "TTJetsSL" in sig]  # only use SL decays

    systs = sorted(set([key[1] for key in rebin_histo.values().keys()]))
    systs.insert(0,
                 systs.pop(systs.index("nosys")))  # move "nosys" to the front

    # need to save coffea hist objects to file so they can be opened by uproot in the proper format
    upfout = uproot3.recreate(tmp_rname, compression=uproot3.ZLIB(
        4)) if os.path.isfile(tmp_rname) else uproot3.create(tmp_rname)

    if "3Jets" in njets_to_run:
        histo_dict_3j = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })
    if "4PJets" in njets_to_run:
        histo_dict_4pj = processor.dict_accumulator({
            "Muon": {},
            "Electron": {}
        })

        # write signal dists to temp file
    for lep in ["Muon", "Electron"]:
        orig_lepdir = "muNJETS" if lep == "Muon" else "eNJETS"

        # scale by lumi
        lumi_correction = lumi_corr_dict[args.year]["%ss" % lep]
        histo = rebin_histo.copy()
        histo.scale(lumi_correction, axis="dataset")
        process_groups = plt_tools.make_dataset_groups(lep,
                                                       args.year,
                                                       samples=names,
                                                       gdict="templates")
        histo = histo.group(
            "dataset", hist.Cat("process", "Process", sorting="placement"),
            process_groups)

        for jmult in njets_to_run:
            lepdir = orig_lepdir.replace("NJETS", jmult.lower())

            #set_trace()
            lin_histo = Plotter.linearize_hist(
                histo[:, :, jmult,
                      lep].integrate("jmult").integrate("leptype"))
            for signal in signals:
                if "Int" in signal:
                    boson, mass, width, pI, wt = tuple(signal.split("_"))
                else:
                    boson, mass, width, pI = tuple(signal.split("_"))
                sub_name = "_".join([
                    "%s%s" % (boson[0], mass[1:]),
                    "relw%s" % widthTOname(width).split("W")[-1],
                    pI.lower(), wt
                ]) if pI == "Int" else "_".join([
                    "%s%s" % (boson[0], mass[1:]),
                    "relw%s" % widthTOname(width).split("W")[-1],
                    pI.lower()
                ])

                #set_trace()
                for sys in systs:
                    if sys not in systematics.template_sys_to_name[
                            args.year].keys():
                        continue
                    if not lin_histo[signal, sys].values().keys():
                        print(
                            f"Systematic {sys} for {lep} {jmult} {signal} not found, skipping"
                        )
                        continue

                    print(args.year, lep, jmult, sub_name, sys)
                    outhname = "_".join(
                        list(
                            filter(None, [
                                sub_name, systematics.template_sys_to_name[
                                    args.year][sys][0], lepdir,
                                (args.year)[-2:]
                            ])))
                    if "LEP" in outhname:
                        outhname = outhname.replace(
                            "LEP",
                            "muon") if lep == "Muon" else outhname.replace(
                                "LEP", "electron")

                    template_histo = lin_histo[signal, sys].integrate(
                        "process").integrate("sys")

                    ## save template histos to coffea dict
                    if jmult == "3Jets":
                        histo_dict_3j[lep][
                            f"{signal}_{sys}"] = template_histo.copy()
                    if jmult == "4PJets":
                        histo_dict_4pj[lep][
                            f"{signal}_{sys}"] = template_histo.copy()

                        ## save template histo to root file
                    upfout[outhname] = hist.export1d(template_histo)

    if "3Jets" in njets_to_run:
        coffea_out_3j = os.path.join(
            outdir,
            f"test_raw_templates_lj_3Jets_sig_{args.year}_{jobid}.coffea")
        save(histo_dict_3j, coffea_out_3j)
        print(f"{coffea_out_3j} written")
    if "4PJets" in njets_to_run:
        coffea_out_4pj = os.path.join(
            outdir,
            f"test_raw_templates_lj_4PJets_sig_{args.year}_{jobid}.coffea")
        save(histo_dict_4pj, coffea_out_4pj)
        print(f"{coffea_out_4pj} written")

    upfout.close()
    print(f"{tmp_rname} written")