Ejemplo n.º 1
0
    def prepare_data_mc_mcgen(self):

        self.logger.info("Prepare data reco as well as MC reco and gen")
        if os.path.exists(self.f_reco_applieddata) \
                and os.path.exists(self.f_reco_appliedmc) \
                and self.step_done("preparemlsamples_data_mc_mcgen"):
            self.df_data = pickle.load(openfile(self.f_reco_applieddata, "rb"))
            self.df_mc = pickle.load(openfile(self.f_reco_appliedmc, "rb"))
        else:
            self.df_data = pickle.load(openfile(self.f_reco_data, "rb"))
            self.df_mc = pickle.load(openfile(self.f_reco_mc, "rb"))
            self.df_data = selectdfquery(self.df_data, self.p_evtsel)
            self.df_mc = selectdfquery(self.df_mc, self.p_evtsel)

            self.df_data = selectdfquery(self.df_data, self.p_triggersel_data)
            self.df_mc = selectdfquery(self.df_mc, self.p_triggersel_mc)

        self.df_mcgen = pickle.load(openfile(self.f_gen_mc, "rb"))
        self.df_mcgen = selectdfquery(self.df_mcgen, self.p_evtsel)
        self.df_mcgen = selectdfquery(self.df_mcgen, self.p_triggersel_mc)
        self.df_mcgen = self.df_mcgen.query(self.p_presel_gen_eff)

        self.arraydf = [self.df_data, self.df_mc]
        self.df_mc = seldf_singlevar(self.df_mc, self.v_bin, self.p_binmin,
                                     self.p_binmax)
        self.df_mcgen = seldf_singlevar(self.df_mcgen, self.v_bin,
                                        self.p_binmin, self.p_binmax)
        self.df_data = seldf_singlevar(self.df_data, self.v_bin, self.p_binmin,
                                       self.p_binmax)
Ejemplo n.º 2
0
    def process_histomass(self):
        myfile = TFile.Open(self.n_filemass, "recreate")

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb"))
            df = df.query(self.l_selml[bin_id])
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            for ibin2 in range(len(self.lvar2_binmin)):
                suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                df_bin = seldf_singlevar(df, self.v_var2_binning,
                                         self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                fill_hist(h_invmass, df_bin.inv_mass)
                myfile.cd()
                h_invmass.Write()

                if "pt_jet" in df_bin.columns:
                    zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet,
                                    df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand)
                    h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5)
                    zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T
                    fill_hist(h_zvsinvmass, zvsinvmass)
                    h_zvsinvmass.Write()
Ejemplo n.º 3
0
 def skim(self, file_index):
     try:
         dfreco = pickle.load(openfile(self.l_reco[file_index], "rb"))
     except Exception as e:  # pylint: disable=broad-except
         print('failed to open file', self.l_reco[file_index], str(e))
     for ipt in range(self.p_nptbins):
         dfrecosk = seldf_singlevar(dfreco, self.v_var_binning,
                                    self.lpt_anbinmin[ipt],
                                    self.lpt_anbinmax[ipt])
         dfrecosk = selectdfquery(dfrecosk, self.s_reco_skim[ipt])
         dfrecosk = dfrecosk.reset_index(drop=True)
         f = openfile(self.mptfiles_recosk[ipt][file_index], "wb")
         pickle.dump(dfrecosk, f, protocol=4)
         f.close()
         if self.mcordata == "mc":
             try:
                 dfgen = pickle.load(openfile(self.l_gen[file_index], "rb"))
             except Exception as e:  # pylint: disable=broad-except
                 print('failed to open MC file', self.l_gen[file_index],
                       str(e))
             dfgensk = seldf_singlevar(dfgen, self.v_var_binning,
                                       self.lpt_anbinmin[ipt],
                                       self.lpt_anbinmax[ipt])
             dfgensk = selectdfquery(dfgensk, self.s_gen_skim[ipt])
             dfgensk = dfgensk.reset_index(drop=True)
             pickle.dump(dfgensk,
                         openfile(self.mptfiles_gensk[ipt][file_index],
                                  "wb"),
                         protocol=4)
Ejemplo n.º 4
0
    def preparesample(self):
        self.logger.info("Prepare Sample")
        self.df_data = pickle.load(openfile(self.f_reco_data, "rb"))
        self.df_mc = pickle.load(openfile(self.f_reco_mc, "rb"))
        self.df_mcgen = pickle.load(openfile(self.f_gen_mc, "rb"))
        self.df_mcgen = self.df_mcgen.query(self.p_presel_gen_eff)
        arraydf = [self.df_data, self.df_mc]
        self.df_mc = seldf_singlevar(self.df_mc, self.v_bin, self.p_binmin, self.p_binmax)
        self.df_mcgen = seldf_singlevar(self.df_mcgen, self.v_bin, self.p_binmin, self.p_binmax)
        self.df_data = seldf_singlevar(self.df_data, self.v_bin, self.p_binmin, self.p_binmax)


        self.df_sig, self.df_bkg = arraydf[self.p_tagsig], arraydf[self.p_tagbkg]
        self.df_sig = seldf_singlevar(self.df_sig, self.v_bin, self.p_binmin, self.p_binmax)
        self.df_bkg = seldf_singlevar(self.df_bkg, self.v_bin, self.p_binmin, self.p_binmax)
        self.df_sig = self.df_sig.query(self.s_selsigml)
        self.df_bkg = self.df_bkg.query(self.s_selbkgml)
        self.df_bkg["ismcsignal"] = 0
        self.df_bkg["ismcprompt"] = 0
        self.df_bkg["ismcfd"] = 0
        self.df_bkg["ismcbkg"] = 0


        if self.p_nsig > len(self.df_sig):
            self.logger.warning("There are not enough signal events")
        if self.p_nbkg > len(self.df_bkg):
            self.logger.warning("There are not enough background events")

        self.p_nsig = min(len(self.df_sig), self.p_nsig)
        self.p_nbkg = min(len(self.df_bkg), self.p_nbkg)

        self.logger.info("Used number of signal events is %d", self.p_nsig)
        self.logger.info("Used number of background events is %d", self.p_nbkg)

        self.df_ml = pd.DataFrame()
        self.df_sig = shuffle(self.df_sig, random_state=self.rnd_shuffle)
        self.df_bkg = shuffle(self.df_bkg, random_state=self.rnd_shuffle)
        self.df_sig = self.df_sig[:self.p_nsig]
        self.df_bkg = self.df_bkg[:self.p_nbkg]
        self.df_sig[self.v_sig] = 1
        self.df_bkg[self.v_sig] = 0
        self.df_ml = pd.concat([self.df_sig, self.df_bkg])
        self.df_mltrain, self.df_mltest = train_test_split(self.df_ml, \
                                           test_size=self.test_frac, random_state=self.rnd_splt)
        self.df_mltrain = self.df_mltrain.reset_index(drop=True)
        self.df_mltest = self.df_mltest.reset_index(drop=True)
        self.df_sigtrain, self.df_bkgtrain = split_df_sigbkg(self.df_mltrain, self.v_sig)
        self.df_sigtest, self.df_bkgtest = split_df_sigbkg(self.df_mltest, self.v_sig)
        self.logger.info("Total number of candidates: train %d and test %d", len(self.df_mltrain),
                         len(self.df_mltest))
        self.logger.info("Number of signal candidates: train %d and test %d",
                         len(self.df_sigtrain), len(self.df_sigtest))
        self.logger.info("Number of bkg candidates: %d and test %d", len(self.df_bkgtrain),
                         len(self.df_bkgtest))

        self.df_xtrain = self.df_mltrain[self.v_train]
        self.df_ytrain = self.df_mltrain[self.v_sig]
        self.df_xtest = self.df_mltest[self.v_train]
        self.df_ytest = self.df_mltest[self.v_sig]
 def process_histomass_single(self, index):
     myfile = TFile.Open(self.l_histomass[index], "recreate")
     dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb"))
     if self.s_trigger is not None:
         dfevtorig = dfevtorig.query(self.s_trigger)
     if self.runlistrigger is not None:
         dfevtorig = selectdfrunlist(dfevtorig, \
                          self.run_param[self.runlistrigger], "run_number")
     hNorm = TH1F("hEvForNorm", "hEvForNorm", 2, 0.5, 2.5)
     hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor")
     hNorm.GetXaxis().SetBinLabel(2, "selected events")
     nselevt = 0
     norm = 0
     if not dfevtorig.empty:
         nselevt = len(dfevtorig.query("is_ev_rej==0"))
         norm = getnormforselevt(dfevtorig)
     hNorm.SetBinContent(1, norm)
     hNorm.SetBinContent(2, nselevt)
     hNorm.Write()
     dfevtorig = dfevtorig.query("is_ev_rej==0")
     for ipt in range(self.p_nptfinbins):
         bin_id = self.bin_matching[ipt]
         df = pickle.load(
             openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
         if self.doml is True:
             df = df.query(self.l_selml[bin_id])
         if self.s_evtsel is not None:
             df = df.query(self.s_evtsel)
         if self.s_trigger is not None:
             df = df.query(self.s_trigger)
         df = seldf_singlevar(df, self.v_var_binning, \
                              self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
         suffix = "%s%d_%d" % \
                  (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
         h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                          self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
         if self.runlistrigger is not None:
             df = selectdfrunlist(df, \
                      self.run_param[self.runlistrigger], "run_number")
         fill_hist(h_invmass, df.inv_mass)
         myfile.cd()
         h_invmass.Write()
         if self.mcordata == "mc":
             df[self.v_ismcrefl] = np.array(tag_bit_df(
                 df, self.v_bitvar, self.b_mcrefl),
                                            dtype=int)
             df_sig = df[df[self.v_ismcsignal] == 1]
             df_refl = df[df[self.v_ismcrefl] == 1]
             h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins,
                                  self.p_mass_fit_lim[0],
                                  self.p_mass_fit_lim[1])
             h_invmass_refl = TH1F("hmass_refl" + suffix, "",
                                   self.p_num_bins, self.p_mass_fit_lim[0],
                                   self.p_mass_fit_lim[1])
             fill_hist(h_invmass_sig, df_sig.inv_mass)
             fill_hist(h_invmass_refl, df_refl.inv_mass)
             myfile.cd()
             h_invmass_sig.Write()
             h_invmass_refl.Write()
         print("FINISHED")
Ejemplo n.º 6
0
 def skim(self, file_index):
     dfreco = pickle.load(open(self.l_reco[file_index], "rb"))
     for ipt in range(self.p_nptbins):
         dfrecosk = seldf_singlevar(dfreco, self.v_var_binning,
                                    self.lpt_anbinmin[ipt],
                                    self.lpt_anbinmax[ipt])
         dfrecosk = selectdfquery(dfrecosk, self.s_reco_skim[ipt])
         dfrecosk = dfrecosk.reset_index(drop=True)
         dfrecosk.to_pickle(self.mptfiles_recosk[ipt][file_index])
         if self.mcordata == "mc":
             dfgen = pickle.load(open(self.l_gen[file_index], "rb"))
             dfgensk = seldf_singlevar(dfgen, self.v_var_binning,
                                       self.lpt_anbinmin[ipt],
                                       self.lpt_anbinmax[ipt])
             dfgensk = selectdfquery(dfgensk, self.s_gen_skim[ipt])
             dfgensk = dfgensk.reset_index(drop=True)
             dfgensk.to_pickle(self.mptfiles_gensk[ipt][file_index])
Ejemplo n.º 7
0
    def process_histomass(self):
        myfile = TFile.Open(self.n_filemass, "recreate")

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb"))
            df = df.query(self.l_selml[bin_id])
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            for ibin2 in range(len(self.lvar2_binmin)):
                suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0],
                                 self.p_mass_fit_lim[1])
                df_bin = seldf_singlevar(df, self.v_var2_binning,
                                         self.lvar2_binmin[ibin2],
                                         self.lvar2_binmax[ibin2])
                weights = None
                #apply_weights = self.datap["analysis"][self.typean]["triggersel"]["weights"]
                #if apply_weights is not None:
                #    filenorm = TFile.Open("norm.root", "read")
                #    hnorm = filenorm.Get("hnorm_" + apply_weights[0] + "_" + apply_weights[1])
                #    weights = [hnorm.GetBinContent(hnorm.FindBin(_bin)) \
                #               for _bin in df_bin[apply_weights[0]]]
                fill_hist(h_invmass, df_bin.inv_mass, weights=weights)
                myfile.cd()
                h_invmass.Write()

                if "pt_jet" in df_bin.columns:
                    zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet,
                                    df_bin.eta_jet, df_bin.pt_cand,
                                    df_bin.phi_cand, df_bin.eta_cand)
                    h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00,
                                        6.00, 2000, -0.5, 1.5)
                    zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T
                    fill_hist(h_zvsinvmass, zvsinvmass)
                    h_zvsinvmass.Write()
Ejemplo n.º 8
0
    def process_histomass(self):
        myfile = TFile.Open(self.n_filemass, "recreate")

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb"))
            df = df.query(self.l_selml[bin_id])
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            for ibin2 in range(len(self.lvar2_binmin)):
                suffix = "%s%d_%d_%.2f%s_%d_%d" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                df_bin = seldf_singlevar(df, self.v_var2_binning,
                                         self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                fill_hist(h_invmass, df_bin.inv_mass)
                myfile.cd()
                h_invmass.Write()
Ejemplo n.º 9
0
    def create_df_closure(self, df_):
        df_tmp_selgen = df_.copy()
        df_tmp_selgen = seldf_singlevar(df_tmp_selgen, self.v_varshape_binning_gen, \
            self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1])
        df_tmp_selgen = seldf_singlevar(df_tmp_selgen, "pt_gen_jet", \
            self.lvar2_binmin_gen[0], self.lvar2_binmax_gen[-1])

        df_tmp_selreco = df_.copy()
        df_tmp_selreco = seldf_singlevar(df_tmp_selreco, "pt_jet", \
            self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1])
        df_tmp_selreco = seldf_singlevar(df_tmp_selreco, self.v_varshape_binning, \
            self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1])

        df_tmp_selrecogen = df_tmp_selgen.copy()
        df_tmp_selrecogen = seldf_singlevar(df_tmp_selrecogen, "pt_jet", \
            self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1])
        df_tmp_selrecogen = seldf_singlevar(df_tmp_selrecogen, self.v_varshape_binning, \
            self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1])

        return df_tmp_selgen, df_tmp_selreco, df_tmp_selrecogen
Ejemplo n.º 10
0
    def process_efficiency_single(self, index):
        out_file = TFile.Open(self.l_histoeff[index], "recreate")
        for ibin2 in range(self.p_nbin2_reco):
            stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \
                                        self.lvar2_binmin_reco[ibin2], \
                                        self.lvar2_binmax_reco[ibin2])
            n_bins = self.p_nptfinbins
            analysis_bin_lims_temp = self.lpt_finbinmin.copy()
            analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins-1])
            analysis_bin_lims = array.array('f', analysis_bin_lims_temp)
            h_gen_pr = TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \
                            n_bins, analysis_bin_lims)
            h_presel_pr = TH1F("h_presel_pr" + stringbin2, "Prompt Reco in acc |#eta|<0.8 and sel", \
                               n_bins, analysis_bin_lims)
            h_sel_pr = TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \
                            n_bins, analysis_bin_lims)
            h_gen_fd = TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \
                            n_bins, analysis_bin_lims)
            h_presel_fd = TH1F("h_presel_fd" + stringbin2, "FD Reco in acc |#eta|<0.8 and sel", \
                               n_bins, analysis_bin_lims)
            h_sel_fd = TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \
                            n_bins, analysis_bin_lims)

            bincounter = 0
            for ipt in range(self.p_nptfinbins):
                bin_id = self.bin_matching[ipt]
                df_mc_reco = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
                if self.s_evtsel is not None:
                    df_mc_reco = df_mc_reco.query(self.s_evtsel)
                if self.s_jetsel_reco is not None:
                    df_mc_reco = df_mc_reco.query(self.s_jetsel_reco)
                if self.s_trigger is not None:
                    df_mc_reco = df_mc_reco.query(self.s_trigger)
                df_mc_reco = selectdfrunlist(df_mc_reco, \
                         self.run_param[self.runlistrigger[self.triggerbit]], "run_number")
                df_mc_gen = pickle.load(openfile(self.mptfiles_gensk[bin_id][index], "rb"))
                df_mc_gen = df_mc_gen.query(self.s_jetsel_gen)
                df_mc_gen = selectdfrunlist(df_mc_gen, \
                         self.run_param[self.runlistrigger[self.triggerbit]], "run_number")
                df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \
                                     self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
                df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \
                                     self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
                df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var2_binning, \
                                             self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2])
                df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var2_binning, \
                                            self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2])
                df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1]
                df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1]
                df_reco_sel_pr = None
                if self.doml is True:
                    df_reco_sel_pr = df_reco_presel_pr.query(self.l_selml[bin_id])
                else:
                    df_reco_sel_pr = df_reco_presel_pr.copy()
                df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1]
                df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1]
                df_reco_sel_fd = None
                if self.doml is True:
                    df_reco_sel_fd = df_reco_presel_fd.query(self.l_selml[bin_id])
                else:
                    df_reco_sel_fd = df_reco_presel_fd.copy()

                val = len(df_gen_sel_pr)
                err = math.sqrt(val)
                h_gen_pr.SetBinContent(bincounter + 1, val)
                h_gen_pr.SetBinError(bincounter + 1, err)
                val = len(df_reco_presel_pr)
                err = math.sqrt(val)
                h_presel_pr.SetBinContent(bincounter + 1, val)
                h_presel_pr.SetBinError(bincounter + 1, err)
                val = len(df_reco_sel_pr)
                err = math.sqrt(val)
                h_sel_pr.SetBinContent(bincounter + 1, val)
                h_sel_pr.SetBinError(bincounter + 1, err)

                val = len(df_gen_sel_fd)
                err = math.sqrt(val)
                h_gen_fd.SetBinContent(bincounter + 1, val)
                h_gen_fd.SetBinError(bincounter + 1, err)
                val = len(df_reco_presel_fd)
                err = math.sqrt(val)
                h_presel_fd.SetBinContent(bincounter + 1, val)
                h_presel_fd.SetBinError(bincounter + 1, err)
                val = len(df_reco_sel_fd)
                err = math.sqrt(val)
                h_sel_fd.SetBinContent(bincounter + 1, val)
                h_sel_fd.SetBinError(bincounter + 1, err)

                bincounter = bincounter + 1

            out_file.cd()
            h_gen_pr.Write()
            h_presel_pr.Write()
            h_sel_pr.Write()
            h_gen_fd.Write()
            h_presel_fd.Write()
            h_sel_fd.Write()
Ejemplo n.º 11
0
    def process_histomass_single(self, index):
        myfile = TFile.Open(self.l_histomass[index], "recreate")
        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
            if self.doml is True:
                df = df.query(self.l_selml[bin_id])
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_jetsel_reco is not None:
                df = df.query(self.s_jetsel_reco)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)

            h_invmass_all = TH1F("hmass_%d" % ipt, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
            fill_hist(h_invmass_all, df.inv_mass)
            myfile.cd()
            h_invmass_all.Write()

            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            for ibin2 in range(self.p_nbin2_reco):
                suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin_reco[ibin2],
                          self.lvar2_binmax_reco[ibin2])
                df_bin = seldf_singlevar(df, self.v_var2_binning,
                                         self.lvar2_binmin_reco[ibin2],
                                         self.lvar2_binmax_reco[ibin2])
                df_bin = selectdfrunlist(df_bin, \
                         self.run_param[self.runlistrigger[self.triggerbit]], "run_number")

                # add the z column
                df_bin["z"] = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet,
                                     df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand)

                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                fill_hist(h_invmass, df_bin.inv_mass)
                myfile.cd()
                h_invmass.Write()

                massarray = [1.0 + i * (5.0 / 5000.0) for i in range(5001)] # 5000 bins in range 1.0-6.0
                massarray_reco = array.array('d', massarray)
                zarray_reco = array.array('d', self.varshaperanges_reco)
                h_zvsinvmass = TH2F("hzvsmass" + suffix, "", \
                    5000, massarray_reco, self.p_nbinshape_reco, zarray_reco)
                h_zvsinvmass.Sumw2()
                fill2dhist(df_bin, h_zvsinvmass, "inv_mass", self.v_varshape_binning)
                h_zvsinvmass.Write()

                if self.mcordata == "mc":
                    df_bin[self.v_ismcrefl] = np.array(tag_bit_df(df_bin, self.v_bitvar,
                                                                  self.b_mcrefl), dtype=int)
                    df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1]
                    df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1]
                    h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins,
                                         self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                    h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins,
                                          self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                    fill_hist(h_invmass_sig, df_bin_sig.inv_mass)
                    fill_hist(h_invmass_refl, df_bin_refl.inv_mass)
                    myfile.cd()
                    h_invmass_sig.Write()
                    h_invmass_refl.Write()
Ejemplo n.º 12
0
    def process_efficiency_single(self, index):
        out_file = TFile.Open(self.l_histoeff[index], "recreate")
        h_list = []
        for ibin2 in range(len(self.lvar2_binmin)):
            stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning_gen,
                                            self.lvar2_binmin[ibin2],
                                            self.lvar2_binmax[ibin2])
            n_bins = len(self.lpt_finbinmin)
            analysis_bin_lims_temp = self.lpt_finbinmin.copy()
            analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1])
            analysis_bin_lims = array.array('f', analysis_bin_lims_temp)

            def make_histo(name,
                           title,
                           name_extra=stringbin2,
                           bins=n_bins,
                           binning=analysis_bin_lims):
                histo = TH1F(name + name_extra, title, bins, binning)
                h_list.append(histo)
                return histo

            h_gen_pr = make_histo("h_gen_pr",
                                  "Prompt Generated in acceptance |y|<0.5")
            h_presel_pr = make_histo("h_presel_pr",
                                     "Prompt Reco in acc |#eta|<0.8 and sel")
            h_presel_pr_wotof = make_histo(
                "h_presel_pr_wotof",
                "Prompt Reco in acc woTOF |#eta|<0.8 and pre-sel")
            h_presel_pr_wtof = make_histo(
                "h_presel_pr_wtof",
                "Prompt Reco in acc wTOF |#eta|<0.8 and pre-sel")
            h_sel_pr = make_histo(
                "h_sel_pr", "Prompt Reco and sel in acc |#eta|<0.8 and sel")
            h_sel_pr_wotof = make_histo(
                "h_sel_pr_wotof",
                "Prompt Reco and sel woTOF in acc |#eta|<0.8")
            h_sel_pr_wtof = make_histo(
                "h_sel_pr_wtof", "Prompt Reco and sel wTOF in acc |#eta|<0.8")
            h_gen_fd = make_histo("h_gen_fd",
                                  "FD Generated in acceptance |y|<0.5")
            h_presel_fd = make_histo("h_presel_fd",
                                     "FD Reco in acc |#eta|<0.8 and sel")
            h_sel_fd = make_histo("h_sel_fd",
                                  "FD Reco and sel in acc |#eta|<0.8 and sel")

            bincounter = 0
            for ipt in range(self.p_nptfinbins):
                bin_id = self.bin_matching[ipt]
                df_mc_reco = pickle.load(
                    openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
                if self.s_evtsel is not None:
                    df_mc_reco = df_mc_reco.query(self.s_evtsel)
                if self.s_trigger is not None:
                    df_mc_reco = df_mc_reco.query(self.s_trigger)
                if self.runlistrigger is not None:
                    df_mc_reco = selectdfrunlist(df_mc_reco, \
                         self.run_param[self.runlistrigger], "run_number")
                df_mc_gen = pickle.load(
                    openfile(self.mptfiles_gensk[bin_id][index], "rb"))
                df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff)
                if self.runlistrigger is not None:
                    df_mc_gen = selectdfrunlist(df_mc_gen, \
                             self.run_param[self.runlistrigger], "run_number")
                df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \
                                     self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
                df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \
                                     self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
                # Whether or not to cut on the 2nd binning variable
                if self.mc_cut_on_binning2:
                    df_mc_reco = seldf_singlevar_inclusive(df_mc_reco, self.v_var2_binning_gen, \
                                                 self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                    df_mc_gen = seldf_singlevar_inclusive(df_mc_gen, self.v_var2_binning_gen, \
                                                self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1]
                df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1]
                df_reco_sel_pr = None
                if self.doml is True:
                    df_reco_sel_pr = df_reco_presel_pr.query(
                        self.l_selml[bin_id])
                else:
                    df_reco_sel_pr = df_reco_presel_pr.copy()
                df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1]
                df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1]
                df_reco_sel_fd = None
                if self.doml is True:
                    df_reco_sel_fd = df_reco_presel_fd.query(
                        self.l_selml[bin_id])
                else:
                    df_reco_sel_fd = df_reco_presel_fd.copy()

                def set_content(df_to_use,
                                histogram,
                                i_b=ibin2,
                                b_c=bincounter):
                    if self.corr_eff_mult[i_b] is True:
                        val, err = self.get_reweighted_count(df_to_use, i_b)
                    else:
                        val = len(df_to_use)
                        err = math.sqrt(val)
                    histogram.SetBinContent(b_c + 1, val)
                    histogram.SetBinError(b_c + 1, err)

                set_content(df_gen_sel_pr, h_gen_pr)
                if "nsigTOF_Pr_0" in df_reco_presel_pr:
                    set_content(
                        df_reco_presel_pr[
                            df_reco_presel_pr.nsigTOF_Pr_0 < -998],
                        h_presel_pr_wotof)
                    set_content(
                        df_reco_presel_pr[
                            df_reco_presel_pr.nsigTOF_Pr_0 > -998],
                        h_presel_pr_wtof)
                set_content(df_reco_presel_pr, h_presel_pr)
                set_content(df_reco_sel_pr, h_sel_pr)
                if "nsigTOF_Pr_0" in df_reco_sel_pr:
                    set_content(
                        df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 < -998],
                        h_sel_pr_wotof)
                    set_content(
                        df_reco_sel_pr[df_reco_sel_pr.nsigTOF_Pr_0 > -998],
                        h_sel_pr_wtof)
                set_content(df_gen_sel_fd, h_gen_fd)
                set_content(df_reco_presel_fd, h_presel_fd)
                set_content(df_reco_sel_fd, h_sel_fd)

                bincounter = bincounter + 1

            out_file.cd()
            for h in h_list:
                h.Write()
            h_list = []
Ejemplo n.º 13
0
    def process_histomass_single(self, index):
        myfile = TFile.Open(self.l_histomass[index], "recreate")
        dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb"))
        neventsorig = len(dfevtorig)
        if self.s_trigger is not None:
            dfevtorig = dfevtorig.query(self.s_trigger)
        neventsaftertrigger = len(dfevtorig)
        if self.runlistrigger is not None:
            dfevtorig = selectdfrunlist(dfevtorig, \
                             self.run_param[self.runlistrigger], "run_number")
        neventsafterrunsel = len(dfevtorig)
        dfevtevtsel = dfevtorig.query(self.s_evtsel)

        #validation plot for event selection
        neventsafterevtsel = len(dfevtevtsel)
        histonorm = TH1F("histonorm", "histonorm", 10, 0, 10)
        histonorm.SetBinContent(1, neventsorig)
        histonorm.GetXaxis().SetBinLabel(1, "tot events")
        histonorm.SetBinContent(2, neventsaftertrigger)
        histonorm.GetXaxis().SetBinLabel(2, "tot events after trigger")
        histonorm.SetBinContent(3, neventsafterrunsel)
        histonorm.GetXaxis().SetBinLabel(3, "tot events after run sel")
        histonorm.SetBinContent(4, neventsafterevtsel)
        histonorm.GetXaxis().SetBinLabel(4, "tot events after evt sel")
        for ibin2 in range(len(self.lvar2_binmin)):
            binneddf = seldf_singlevar_inclusive(dfevtevtsel, self.v_var2_binning_gen, \
                self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
            histonorm.SetBinContent(5 + ibin2, len(binneddf))
            histonorm.GetXaxis().SetBinLabel(5 + ibin2, \
                        "tot events after mult sel %d - %d" % \
                        (self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]))
        histonorm.Write()
        labeltrigger = "hbit%svs%s" % (self.triggerbit,
                                       self.v_var2_binning_gen)

        myfile.cd()
        hsel, hnovtxmult, hvtxoutmult = \
            self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \
                                       labeltrigger, self.v_var2_binning_gen)

        if self.usetriggcorrfunc is not None and self.mcordata == "data":
            hselweight, hnovtxmultweight, hvtxoutmultweight = \
                self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \
                    labeltrigger, self.v_var2_binning_gen, self.usetriggcorrfunc)
            hselweight.Write()
            hnovtxmultweight.Write()
            hvtxoutmultweight.Write()

        hsel.Write()
        hnovtxmult.Write()
        hvtxoutmult.Write()

        list_df_recodtrig = []

        for ipt in range(self.p_nptfinbins):  # pylint: disable=too-many-nested-blocks
            bin_id = self.bin_matching[ipt]
            df = pickle.load(
                openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)
            if self.runlistrigger is not None:
                df = selectdfrunlist(df, \
                    self.run_param[self.runlistrigger], "run_number")
            if self.doml is True:
                df = df.query(self.l_selml[bin_id])
            list_df_recodtrig.append(df)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])

            if self.do_custom_analysis_cuts:
                df = self.apply_cuts_ptbin(df, ipt)

            for ibin2 in range(len(self.lvar2_binmin)):
                suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                curr_dir = myfile.mkdir(f"bin1_{ipt}_bin2_{ibin2}")
                meta_info = create_meta_info(
                    self.v_var_binning, self.lpt_finbinmin[ipt],
                    self.lpt_finbinmax[ipt], self.v_var2_binning,
                    self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2],
                    self.lpt_probcutfin[bin_id])
                write_meta_info(curr_dir, meta_info)
                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0],
                                 self.p_mass_fit_lim[1])
                h_invmass_weight = TH1F("h_invmass_weight" + suffix, "",
                                        self.p_num_bins,
                                        self.p_mass_fit_lim[0],
                                        self.p_mass_fit_lim[1])
                df_bin = seldf_singlevar_inclusive(df, self.v_var2_binning, \
                                         self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                fill_hist(h_invmass, df_bin.inv_mass)
                if self.usetriggcorrfunc is not None and self.mcordata == "data":
                    weights = self.make_weights(
                        df_bin[self.v_var2_binning_gen], self.weightfunc,
                        self.weighthist, self.usetriggcorrfunc)

                    weightsinv = [1. / weight for weight in weights]
                    fill_hist(h_invmass_weight,
                              df_bin.inv_mass,
                              weights=weightsinv)
                myfile.cd()
                h_invmass.Write()
                h_invmass_weight.Write()

                if self.mcordata == "mc":
                    df_bin[self.v_ismcrefl] = np.array(tag_bit_df(
                        df_bin, self.v_bitvar, self.b_mcrefl),
                                                       dtype=int)
                    df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1]
                    df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1]
                    h_invmass_sig = TH1F("hmass_sig" + suffix, "",
                                         self.p_num_bins,
                                         self.p_mass_fit_lim[0],
                                         self.p_mass_fit_lim[1])
                    h_invmass_refl = TH1F("hmass_refl" + suffix, "",
                                          self.p_num_bins,
                                          self.p_mass_fit_lim[0],
                                          self.p_mass_fit_lim[1])
                    fill_hist(h_invmass_sig, df_bin_sig.inv_mass)
                    fill_hist(h_invmass_refl, df_bin_refl.inv_mass)
                    myfile.cd()
                    h_invmass_sig.Write()
                    h_invmass_refl.Write()

        if self.event_cand_validation is True:
            df_recodtrig = pd.concat(list_df_recodtrig)
            df_recodtrig = df_recodtrig.query("inv_mass>%f and inv_mass<%f" % \
                                              (self.mass - 0.15, self.mass + 0.15))
            dfevtwithd = pd.merge(dfevtevtsel,
                                  df_recodtrig,
                                  on=self.v_evtmatch)
            label = "h%s" % self.v_var2_binning_gen
            histomult = TH1F(label, label, self.nbinshisto, self.minvaluehisto,
                             self.maxvaluehisto)
            fill_hist(histomult, dfevtevtsel[self.v_var2_binning_gen])
            histomult.Write()
            labelwithd = "h%s_withd" % self.v_var2_binning_gen
            histomultwithd = TH1F(labelwithd, labelwithd, self.nbinshisto,
                                  self.minvaluehisto, self.maxvaluehisto)
            fill_hist(histomultwithd,
                      dfevtwithd["%s_x" % self.v_var2_binning_gen])
            histomultwithd.Write()
            # Validation histograms
            fill_validation_vertex(dfevtorig, dfevtevtsel,
                                   df_recodtrig).write()
            fill_validation_multiplicity(dfevtorig, dfevtevtsel,
                                         df_recodtrig).write()
            fill_validation_candidates(df_recodtrig).write()
            if self.mcordata == "mc":
                fill_validation_candidates(
                    df_recodtrig[df_recodtrig[self.v_ismcsignal] == 1],
                    "MC").write()
Ejemplo n.º 14
0
    def cutvariation_efficiencies(self, min_cv_cut, max_cv_cut):
        myfile = TFile.Open(self.n_fileeff_cutvar, "recreate")

        h_gen_pr = []
        h_sel_pr = []
        h_gen_fd = []
        h_sel_fd = []

        idx = 0
        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.lpt_recodecmerged_mc[bin_id], "rb"))

            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger_mc is not None:
                df = df.query(self.s_trigger_mc)
            print("Using run selection for eff histo", self.runlistrigger[self.triggerbit], \
                  "for period", self.period)
            df = selectdfrunlist(df, \
                                 self.run_param[self.runlistrigger[self.triggerbit]], "run_number")
            df = seldf_singlevar(df, self.v_var_binning, self.lpt_finbinmin[ipt], \
                                 self.lpt_finbinmax[ipt])

            df_gen = pickle.load(openfile(self.lpt_gendecmerged[bin_id], "rb"))
            df_gen = df_gen.query(self.s_presel_gen_eff)
            df_gen = selectdfrunlist(df_gen, \
                         self.run_param[self.runlistrigger[self.triggerbit]], "run_number")
            df_gen = seldf_singlevar(df_gen, self.v_var_binning, self.lpt_finbinmin[ipt], \
                                     self.lpt_finbinmax[ipt])

            stepsmin = (self.lpt_probcutfin[bin_id] - min_cv_cut[ipt]) / self.p_ncutvar
            stepsmax = (max_cv_cut[ipt] - self.lpt_probcutfin[bin_id]) / self.p_ncutvar
            ntrials = 2 * self.p_ncutvar + 1
            icvmax = 1

            idx = 0
            for icv in range(ntrials):
                if icv < self.p_ncutvar:
                    selml_cvval = min_cv_cut[ipt] + icv * stepsmin
                elif icv == self.p_ncutvar:
                    selml_cvval = self.lpt_probcutfin[bin_id]
                else:
                    selml_cvval = self.lpt_probcutfin[bin_id] + icvmax * stepsmax
                    icvmax = icvmax + 1
                selml_cv = "y_test_prob%s>%s" % (self.p_modelname, selml_cvval)

                print("Cutting on: ", selml_cv)
                df = df.query(selml_cv)

                for ibin2 in range(len(self.lvar2_binmin)):
                    stringbin2 = "_%d_%s_%.2f_%.2f" % (icv, \
                                                self.v_var2_binning, \
                                                self.lvar2_binmin[ibin2], \
                                                self.lvar2_binmax[ibin2])

                    if ipt == 0:
                        n_bins = len(self.lpt_finbinmin)
                        analysis_bin_lims_temp = self.lpt_finbinmin.copy()
                        analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins-1])
                        analysis_bin_lims = array('f', analysis_bin_lims_temp)
                        h_gen_pr.append(TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \
                                        n_bins, analysis_bin_lims))
                        h_sel_pr.append(TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \
                                        n_bins, analysis_bin_lims))
                        h_gen_fd.append(TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \
                                        n_bins, analysis_bin_lims))
                        h_sel_fd.append(TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \
                                        n_bins, analysis_bin_lims))

                    df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], \
                                         self.lvar2_binmax[ibin2])
                    df_gen_bin = seldf_singlevar(df_gen, self.v_var2_binning, self.lvar2_binmin[ibin2], \
                                             self.lvar2_binmax[ibin2])

                    df_sel_pr = df_bin[df_bin.ismcprompt == 1]
                    df_gen_pr = df_gen_bin[df_gen_bin.ismcprompt == 1]
                    df_sel_fd = df_bin[df_bin.ismcfd == 1]
                    df_gen_fd = df_gen_bin[df_gen_bin.ismcfd == 1]

                    h_gen_pr[idx].SetBinContent(ipt + 1, len(df_gen_pr))
                    h_gen_pr[idx].SetBinError(ipt + 1, math.sqrt(len(df_gen_pr)))
                    h_sel_pr[idx].SetBinContent(ipt + 1, len(df_sel_pr))
                    h_sel_pr[idx].SetBinError(ipt + 1, math.sqrt(len(df_sel_pr)))

                    h_gen_fd[idx].SetBinContent(ipt + 1, len(df_gen_fd))
                    h_gen_fd[idx].SetBinError(ipt + 1, math.sqrt(len(df_gen_fd)))
                    h_sel_fd[idx].SetBinContent(ipt + 1, len(df_sel_fd))
                    h_sel_fd[idx].SetBinError(ipt + 1, math.sqrt(len(df_sel_fd)))
                    idx = idx + 1

        myfile.cd()
        for i in range(idx):
            h_gen_pr[i].Write()
            h_sel_pr[i].Write()
            h_gen_fd[i].Write()
            h_sel_fd[i].Write()
Ejemplo n.º 15
0
    def preparesample(self):

        self.logger.info("Prepare Sample")

        filename_train = \
                os.path.join(self.dirmlout, f"df_train_{self.p_binmin}_{self.p_binmax}.pkl")
        filename_test = \
                os.path.join(self.dirmlout, f"df_test_{self.p_binmin}_{self.p_binmax}.pkl")

        if os.path.exists(filename_train) \
                and os.path.exists(filename_test) \
                and self.step_done("preparemlsamples"):
            self.df_mltrain = pickle.load(openfile(filename_train, "rb"))
            self.df_mltest = pickle.load(openfile(filename_test, "rb"))

        else:

            self.prepare_data_mc_mcgen()

            self.df_sig, self.df_bkg = self.arraydf[
                self.p_tagsig], self.arraydf[self.p_tagbkg]
            self.df_sig = seldf_singlevar(self.df_sig, self.v_bin,
                                          self.p_binmin, self.p_binmax)
            self.df_bkg = seldf_singlevar(self.df_bkg, self.v_bin,
                                          self.p_binmin, self.p_binmax)
            self.df_sig = self.df_sig.query(self.s_selsigml)
            self.df_bkg = self.df_bkg.query(self.s_selbkgml)
            self.df_bkg["ismcsignal"] = 0
            self.df_bkg["ismcprompt"] = 0
            self.df_bkg["ismcfd"] = 0
            self.df_bkg["ismcbkg"] = 0

            if self.p_equalise_sig_bkg:
                self.p_nsig = min(len(self.df_sig), len(self.df_bkg),
                                  self.p_nsig)
                self.p_nbkg = min(len(self.df_sig), len(self.df_bkg),
                                  self.p_nbkg)

            self.df_ml = pd.DataFrame()
            self.df_sig = shuffle(self.df_sig, random_state=self.rnd_shuffle)
            self.df_bkg = shuffle(self.df_bkg, random_state=self.rnd_shuffle)
            self.df_sig = self.df_sig[:self.p_nsig]
            self.df_bkg = self.df_bkg[:self.p_nbkg]
            self.df_sig[self.v_sig] = 1
            self.df_bkg[self.v_sig] = 0
            self.df_ml = pd.concat([self.df_sig, self.df_bkg])
            self.df_mltrain, self.df_mltest = train_test_split(self.df_ml, \
                                               test_size=self.test_frac, random_state=self.rnd_splt)
            self.df_mltrain = self.df_mltrain.reset_index(drop=True)
            self.df_mltest = self.df_mltest.reset_index(drop=True)

            # Write for later usage
            pickle.dump(self.df_mltrain,
                        openfile(filename_train, "wb"),
                        protocol=4)
            pickle.dump(self.df_mltest,
                        openfile(filename_test, "wb"),
                        protocol=4)

        # Now continue with extracting signal and background stats and report
        self.df_sigtrain, self.df_bkgtrain = split_df_sigbkg(
            self.df_mltrain, self.v_sig)
        self.df_sigtest, self.df_bkgtest = split_df_sigbkg(
            self.df_mltest, self.v_sig)
        self.logger.info("Total number of candidates: train %d and test %d",
                         len(self.df_mltrain), len(self.df_mltest))
        self.logger.info("Number of signal candidates: train %d and test %d",
                         len(self.df_sigtrain), len(self.df_sigtest))
        self.logger.info("Number of bkg candidates: %d and test %d",
                         len(self.df_bkgtrain), len(self.df_bkgtest))

        self.logger.info("Aim for number of signal events: %d", self.p_nsig)
        self.logger.info("Aim for number of background events: %d",
                         self.p_nbkg)

        if self.p_nsig > (len(self.df_sigtrain) + len(self.df_sigtest)):
            self.logger.warning("There are not enough signal events")
        if self.p_nbkg > (len(self.df_bkgtrain) + len(self.df_bkgtest)):
            self.logger.warning("There are not enough background events")

        if self.p_mask_values:
            self.logger.info("Maksing values for training and testing")
            mask_df(self.df_mltrain, self.p_mask_values)
            mask_df(self.df_mltest, self.p_mask_values)
        # Final preparation of signal and background samples for training and testing
        self.df_xtrain = self.df_mltrain[self.v_train]
        self.df_ytrain = self.df_mltrain[self.v_sig]
        self.df_xtest = self.df_mltest[self.v_train]
        self.df_ytest = self.df_mltest[self.v_sig]

        self.step_done("preparemlsamples")
Ejemplo n.º 16
0
    def process_efficiency(self):
        out_file = TFile.Open(self.n_fileeff, "recreate")
        for ibin2 in range(len(self.lvar2_binmin)):
            stringbin2 = "_%s_%.2f_%.2f" % (self.v_var2_binning, \
                                        self.lvar2_binmin[ibin2], \
                                        self.lvar2_binmax[ibin2])
            print(stringbin2)
            n_bins = len(self.lpt_finbinmin)
            analysis_bin_lims_temp = self.lpt_finbinmin.copy()
            analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1])
            analysis_bin_lims = array.array('f', analysis_bin_lims_temp)
            h_gen_pr = TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \
                            n_bins, analysis_bin_lims)
            h_presel_pr = TH1F("h_presel_pr" + stringbin2, "Prompt Reco in acc |#eta|<0.8 and sel", \
                               n_bins, analysis_bin_lims)
            h_sel_pr = TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \
                            n_bins, analysis_bin_lims)
            h_gen_fd = TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \
                            n_bins, analysis_bin_lims)
            h_presel_fd = TH1F("h_presel_fd" + stringbin2, "FD Reco in acc |#eta|<0.8 and sel", \
                               n_bins, analysis_bin_lims)
            h_sel_fd = TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \
                            n_bins, analysis_bin_lims)
            h_gen_pr = TH1F("h_gen_pr" + stringbin2, "Prompt Generated in acceptance |y|<0.5", \
                            n_bins, analysis_bin_lims)
            h_presel_pr = TH1F("h_presel_pr" + stringbin2, "Prompt Reco in acc |#eta|<0.8 and sel", \
                               n_bins, analysis_bin_lims)
            h_sel_pr = TH1F("h_sel_pr" + stringbin2, "Prompt Reco and sel in acc |#eta|<0.8 and sel", \
                            n_bins, analysis_bin_lims)
            h_gen_fd = TH1F("h_gen_fd" + stringbin2, "FD Generated in acceptance |y|<0.5", \
                            n_bins, analysis_bin_lims)
            h_presel_fd = TH1F("h_presel_fd" + stringbin2, "FD Reco in acc |#eta|<0.8 and sel", \
                               n_bins, analysis_bin_lims)
            h_sel_fd = TH1F("h_sel_fd" + stringbin2, "FD Reco and sel in acc |#eta|<0.8 and sel", \
                            n_bins, analysis_bin_lims)

            bincounter = 0
            for ipt in range(self.p_nptfinbins):
                bin_id = self.bin_matching[ipt]
                df_mc_reco = pickle.load(
                    openfile(self.lpt_recodecmerged[bin_id], "rb"))
                if self.s_evtsel is not None:
                    df_mc_reco = df_mc_reco.query(self.s_evtsel)
                if self.s_trigger is not None:
                    df_mc_reco = df_mc_reco.query(self.s_trigger)
                df_mc_gen = pickle.load(
                    openfile(self.lpt_gendecmerged[bin_id], "rb"))
                df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff)
                df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \
                                     self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
                df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \
                                     self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
                df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var2_binning, \
                                             self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var2_binning, \
                                            self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1]
                df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1]
                df_reco_sel_pr = df_reco_presel_pr.query(self.l_selml[bin_id])
                df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1]
                df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1]
                df_reco_sel_fd = df_reco_presel_fd.query(self.l_selml[bin_id])

                h_gen_pr.SetBinContent(bincounter + 1, len(df_gen_sel_pr))
                h_gen_pr.SetBinError(bincounter + 1,
                                     math.sqrt(len(df_gen_sel_pr)))
                h_presel_pr.SetBinContent(bincounter + 1,
                                          len(df_reco_presel_pr))
                h_presel_pr.SetBinError(bincounter + 1,
                                        math.sqrt(len(df_reco_presel_pr)))
                h_sel_pr.SetBinContent(bincounter + 1, len(df_reco_sel_pr))
                h_sel_pr.SetBinError(bincounter + 1,
                                     math.sqrt(len(df_reco_sel_pr)))
                #print("prompt efficiency tot ptbin=", bincounter, ", value = ",
                #      len(df_reco_sel_pr)/len(df_gen_sel_pr))

                h_gen_fd.SetBinContent(bincounter + 1, len(df_gen_sel_fd))
                h_gen_fd.SetBinError(bincounter + 1,
                                     math.sqrt(len(df_gen_sel_fd)))
                h_presel_fd.SetBinContent(bincounter + 1,
                                          len(df_reco_presel_fd))
                h_presel_fd.SetBinError(bincounter + 1,
                                        math.sqrt(len(df_reco_presel_fd)))
                h_sel_fd.SetBinContent(bincounter + 1, len(df_reco_sel_fd))
                h_sel_fd.SetBinError(bincounter + 1,
                                     math.sqrt(len(df_reco_sel_fd)))
                #print("fd efficiency tot ptbin=", bincounter, ", value = ",
                #      len(df_reco_sel_fd)/len(df_gen_sel_fd))
                bincounter = bincounter + 1
            out_file.cd()
            h_gen_pr.Write()
            h_presel_pr.Write()
            h_sel_pr.Write()
            h_gen_fd.Write()
            h_presel_fd.Write()
            h_sel_fd.Write()
Ejemplo n.º 17
0
    def cutvariation_masshistos(self, min_cv_cut, max_cv_cut):
        myfile = TFile.Open(self.n_filemass_cutvar, "recreate")

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.lpt_recodecmerged_data[bin_id], "rb"))

            stepsmin = (self.lpt_probcutfin[bin_id] - min_cv_cut[ipt]) / self.p_ncutvar
            stepsmax = (max_cv_cut[ipt] - self.lpt_probcutfin[bin_id]) / self.p_ncutvar
            ntrials = 2 * self.p_ncutvar + 1
            icvmax = 1

            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger_data is not None:
                df = df.query(self.s_trigger_data)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            print("Using run selection for mass histo", self.runlistrigger[self.triggerbit], \
                  "for period", self.period)
            df = selectdfrunlist(df, self.run_param[self.runlistrigger[self.triggerbit]], \
                                 "run_number")

            for icv in range(ntrials):
                if icv < self.p_ncutvar:
                    selml_cvval = min_cv_cut[ipt] + icv * stepsmin
                elif icv == self.p_ncutvar:
                    selml_cvval = self.lpt_probcutfin[bin_id]
                else:
                    selml_cvval = self.lpt_probcutfin[bin_id] + icvmax * stepsmax
                    icvmax = icvmax + 1
                selml_cv = "y_test_prob%s>%s" % (self.p_modelname, selml_cvval)

                print("Cutting on: ", selml_cv)
                df = df.query(selml_cv)

                for ibin2 in range(len(self.lvar2_binmin)):
                    suffix = "%s%d_%d_%d_%s%.2f_%.2f" % \
                             (self.v_var_binning, self.lpt_finbinmin[ipt],
                              self.lpt_finbinmax[ipt], icv,
                              self.v_var2_binning, self.lvar2_binmin[ibin2],
                              self.lvar2_binmax[ibin2])
                    h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                     self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                    h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins,
                                            self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])

                    df_bin = seldf_singlevar(df, self.v_var2_binning,
                                             self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])

                    fill_hist(h_invmass, df_bin.inv_mass)

                    if "INT7" not in self.triggerbit:
                        fileweight_name = "%s/correctionsweights.root" % self.d_val
                        fileweight = TFile.Open(fileweight_name, "read")
                        namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning)
                        funcweighttrig = fileweight.Get(namefunction)
                        if funcweighttrig:
                            weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning])
                            weightsinv = [1./weight for weight in weights]
                            fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv)
                    myfile.cd()
                    h_invmass.Write()
                    h_invmass_weight.Write()
Ejemplo n.º 18
0
    def process_histomass(self):
        myfile = TFile.Open(self.n_filemass, "recreate")

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb"))
            df = df.query(self.l_selml[bin_id])
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            for ibin2 in range(len(self.lvar2_binmin)):
                suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0],
                                 self.p_mass_fit_lim[1])
                h_invmass_weight = TH1F("h_invmass_weight" + suffix, "",
                                        self.p_num_bins,
                                        self.p_mass_fit_lim[0],
                                        self.p_mass_fit_lim[1])
                df_bin = seldf_singlevar(df, self.v_var2_binning,
                                         self.lvar2_binmin[ibin2],
                                         self.lvar2_binmax[ibin2])
                fill_hist(h_invmass, df_bin.inv_mass)
                triggerbit = self.datap["analysis"][self.typean]["triggerbit"]
                if "INT7" not in triggerbit and self.mcordata == "data":
                    fileweight_name = "%s/correctionsweights.root" % self.d_val
                    fileweight = TFile.Open(fileweight_name, "read")
                    namefunction = "funcnorm_%s" % self.triggerbit
                    funcweighttrig = fileweight.Get(namefunction)
                    weights = evaluate(funcweighttrig,
                                       df_bin[self.v_var2_binning])
                    weightsinv = [1. / weight for weight in weights]
                    fill_hist(h_invmass_weight,
                              df_bin.inv_mass,
                              weights=weightsinv)
                myfile.cd()
                h_invmass.Write()
                h_invmass_weight.Write()

                if "pt_jet" in df_bin.columns:
                    zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet,
                                    df_bin.eta_jet, df_bin.pt_cand,
                                    df_bin.phi_cand, df_bin.eta_cand)
                    h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00,
                                        6.00, 2000, -0.5, 1.5)
                    zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T
                    fill_hist(h_zvsinvmass, zvsinvmass)
                    h_zvsinvmass.Write()

                if self.mcordata == "mc":
                    df_bin[self.v_ismcrefl] = np.array(tag_bit_df(
                        df_bin, self.v_bitvar, self.b_mcrefl),
                                                       dtype=int)
                    df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1]
                    df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1]
                    h_invmass_sig = TH1F("hmass_sig" + suffix, "",
                                         self.p_num_bins,
                                         self.p_mass_fit_lim[0],
                                         self.p_mass_fit_lim[1])
                    h_invmass_refl = TH1F("hmass_refl" + suffix, "",
                                          self.p_num_bins,
                                          self.p_mass_fit_lim[0],
                                          self.p_mass_fit_lim[1])
                    fill_hist(h_invmass_sig, df_bin_sig.inv_mass)
                    fill_hist(h_invmass_refl, df_bin_refl.inv_mass)
                    myfile.cd()
                    h_invmass_sig.Write()
                    h_invmass_refl.Write()
    def process_efficiency_single(self, index):
        #TO UPDATE TO DHADRON_MULT VERSION
        print("step1")
        out_file = TFile.Open(self.l_histoeff[index], "recreate")
        n_bins = len(self.lpt_finbinmin)
        analysis_bin_lims_temp = self.lpt_finbinmin.copy()
        analysis_bin_lims_temp.append(self.lpt_finbinmax[n_bins - 1])
        analysis_bin_lims = array.array('f', analysis_bin_lims_temp)
        h_gen_pr = TH1F("h_gen_pr", "Prompt Generated in acceptance |y|<0.5", \
                        n_bins, analysis_bin_lims)
        h_presel_pr = TH1F("h_presel_pr", "Prompt Reco in acc |#eta|<0.8 and sel", \
                           n_bins, analysis_bin_lims)
        h_sel_pr = TH1F("h_sel_pr", "Prompt Reco and sel in acc |#eta|<0.8 and sel", \
                        n_bins, analysis_bin_lims)
        h_gen_fd = TH1F("h_gen_fd", "FD Generated in acceptance |y|<0.5", \
                        n_bins, analysis_bin_lims)
        h_presel_fd = TH1F("h_presel_fd", "FD Reco in acc |#eta|<0.8 and sel", \
                           n_bins, analysis_bin_lims)
        h_sel_fd = TH1F("h_sel_fd", "FD Reco and sel in acc |#eta|<0.8 and sel", \
                        n_bins, analysis_bin_lims)
        print("step2")

        bincounter = 0
        for ipt in range(self.p_nptfinbins):
            print("step2a")
            bin_id = self.bin_matching[ipt]
            df_mc_reco = pickle.load(
                openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
            if self.s_evtsel is not None:
                df_mc_reco = df_mc_reco.query(self.s_evtsel)
            if self.s_trigger is not None:
                df_mc_reco = df_mc_reco.query(self.s_trigger)
            if self.runlistrigger is not None:
                df_mc_reco = selectdfrunlist(df_mc_reco, \
                         self.run_param[self.runlistrigger], "run_number")
            df_mc_gen = pickle.load(
                openfile(self.mptfiles_gensk[bin_id][index], "rb"))
            df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff)
            print("step2b")
            if self.runlistrigger is not None:
                df_mc_gen = selectdfrunlist(df_mc_gen, \
                         self.run_param[self.runlistrigger], "run_number")
            df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1]
            df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1]
            df_reco_sel_pr = None
            if self.doml is True:
                df_reco_sel_pr = df_reco_presel_pr.query(self.l_selml[bin_id])
            else:
                df_reco_sel_pr = df_reco_presel_pr.copy()
            df_gen_sel_fd = df_mc_gen[df_mc_gen.ismcfd == 1]
            df_reco_presel_fd = df_mc_reco[df_mc_reco.ismcfd == 1]
            df_reco_sel_fd = None
            print("step2d")
            if self.doml is True:
                df_reco_sel_fd = df_reco_presel_fd.query(self.l_selml[bin_id])
            else:
                df_reco_sel_fd = df_reco_presel_fd.copy()

            val = len(df_gen_sel_pr)
            err = math.sqrt(val)
            h_gen_pr.SetBinContent(bincounter + 1, val)
            h_gen_pr.SetBinError(bincounter + 1, err)
            val = len(df_reco_presel_pr)
            err = math.sqrt(val)
            h_presel_pr.SetBinContent(bincounter + 1, val)
            h_presel_pr.SetBinError(bincounter + 1, err)
            val = len(df_reco_sel_pr)
            err = math.sqrt(val)
            h_sel_pr.SetBinContent(bincounter + 1, val)
            h_sel_pr.SetBinError(bincounter + 1, err)
            print("step2e")

            val = len(df_gen_sel_fd)
            err = math.sqrt(val)
            h_gen_fd.SetBinContent(bincounter + 1, val)
            h_gen_fd.SetBinError(bincounter + 1, err)
            val = len(df_reco_presel_fd)
            err = math.sqrt(val)
            h_presel_fd.SetBinContent(bincounter + 1, val)
            h_presel_fd.SetBinError(bincounter + 1, err)
            val = len(df_reco_sel_fd)
            err = math.sqrt(val)
            h_sel_fd.SetBinContent(bincounter + 1, val)
            h_sel_fd.SetBinError(bincounter + 1, err)
            bincounter = bincounter + 1
            print("step2f")

        out_file.cd()
        h_gen_pr.Write()
        h_presel_pr.Write()
        h_sel_pr.Write()
        h_gen_fd.Write()
        h_presel_fd.Write()
        h_sel_fd.Write()
        print("FINALISED")
Ejemplo n.º 20
0
    def process_response(self):
        """
        First of all, we load all the mc gen and reco files that are skimmed
        in bins of HF candidate ptand we apply the standard selection to all
        of them. After this, we merged them all to create a single file of gen
        and reco monte carlo sample with all the HF candidate pt. In particular
        gen jets are selected according to run trigger, runlist, and gen jet
        zbin_recoand pseudorapidity. Reco candidates according to evt selection, eta
        jets, trigger and ml probability of the HF hadron
        """
        zbin_reco = []
        nzbin_reco = self.p_nbinshape_reco
        zbin_reco = self.varshaperanges_reco
        zbinarray_reco = array.array('d', zbin_reco)

        zbin_gen = []
        nzbin_gen = self.p_nbinshape_gen
        zbin_gen = self.varshaperanges_gen
        zbinarray_gen = array.array('d', zbin_gen)

        jetptbin_reco = []
        njetptbin_reco = self.p_nbin2_reco
        jetptbin_reco = self.var2ranges_reco
        jetptbinarray_reco = array.array('d', jetptbin_reco)

        jetptbin_gen = []
        njetptbin_gen = self.p_nbin2_gen
        jetptbin_gen = self.var2ranges_gen
        jetptbinarray_gen = array.array('d', jetptbin_gen)

        candptbin = []
        candptbin = self.lpt_finbinmin.copy()
        candptbin.append(self.lpt_finbinmax[-1])
        candptbinarray = array.array('d', candptbin)

        out_file = TFile.Open(self.n_fileeff, "update")
        list_df_mc_reco = []
        list_df_mc_gen = []

        for iptskim, _ in enumerate(self.lpt_anbinmin):

            df_mc_gen = pickle.load(openfile(self.lpt_gendecmerged[iptskim], "rb"))
            df_mc_gen = selectdfrunlist(df_mc_gen, \
                    self.run_param[self.runlistrigger[self.triggerbit]], "run_number")
            df_mc_gen = df_mc_gen.query(self.s_jetsel_gen)
            list_df_mc_gen.append(df_mc_gen)

            df_mc_reco = pickle.load(openfile(self.lpt_recodecmerged[iptskim], "rb"))
            if self.s_evtsel is not None:
                df_mc_reco = df_mc_reco.query(self.s_evtsel)
            if self.s_jetsel_reco is not None:
                df_mc_reco = df_mc_reco.query(self.s_jetsel_reco)
            if self.s_trigger is not None:
                df_mc_reco = df_mc_reco.query(self.s_trigger)
            if self.doml is True:
                df_mc_reco = df_mc_reco.query(self.l_selml[iptskim])
            list_df_mc_reco.append(df_mc_reco)

        # Here we can merge the dataframes corresponding to different HF pt in a
        # single one. In addition we are here selecting only non prompt HF

        df_gen = pd.concat(list_df_mc_gen)
        df_mc_reco = pd.concat(list_df_mc_reco)

        # add the z columns
        df_gen["z"] = z_calc(df_gen.pt_jet, df_gen.phi_jet, df_gen.eta_jet,
                             df_gen.pt_cand, df_gen.phi_cand, df_gen.eta_cand)

        df_mc_reco["z"] = z_calc(df_mc_reco.pt_jet, df_mc_reco.phi_jet, df_mc_reco.eta_jet,
                                 df_mc_reco.pt_cand, df_mc_reco.phi_cand, df_mc_reco.eta_cand)

        df_mc_reco["z_gen"] = z_gen_calc(df_mc_reco.pt_gen_jet, df_mc_reco.phi_gen_jet,
                                         df_mc_reco.eta_gen_jet, df_mc_reco.pt_gen_cand,
                                         df_mc_reco.delta_phi_gen_jet, df_mc_reco.delta_eta_gen_jet)

        df_gen_nonprompt = df_gen[df_gen.ismcfd == 1]
        df_gen_prompt = df_gen[df_gen.ismcprompt == 1]
        df_mc_reco_merged_nonprompt = df_mc_reco[df_mc_reco.ismcfd == 1]
        df_mc_reco_merged_prompt = df_mc_reco[df_mc_reco.ismcprompt == 1]

        # The following plots are 3d plots all at generated level of z,
        # pt_jet and pt_cand. This was used in the first version of the feeddown
        # subtraction, currently is obsolete

        hzvsjetpt_gen_unmatched = TH2F("hzvsjetpt_gen_unmatched", "hzvsjetpt_gen_unmatched", \
            nzbin_gen, zbinarray_gen, njetptbin_gen, jetptbinarray_gen)
        df_zvsjetpt_gen_unmatched = df_gen_prompt.loc[:, [self.v_varshape_binning, "pt_jet"]]
        fill_hist(hzvsjetpt_gen_unmatched, df_zvsjetpt_gen_unmatched)
        hzvsjetpt_gen_unmatched.Write()
        titlehist = "hzvsjetptvscandpt_gen_nonprompt"
        hzvsjetptvscandpt_gen_nonprompt = makefill3dhist(df_gen_nonprompt, titlehist, \
            zbinarray_gen, jetptbinarray_gen, candptbinarray, self.v_varshape_binning, "pt_jet", "pt_cand")
        hzvsjetptvscandpt_gen_nonprompt.Write()

        # hz_gen_nocuts is the distribution of generated z values in b in
        # bins of gen_jet pt before the reco z and jetpt selection. hz_gen_cuts
        # also includes cut on z reco and jet pt reco. These are used for overall
        # efficiency correction to estimate the fraction of candidates that are
        # in the reco range but outside the gen range and viceversa

        for ibin2 in range(self.p_nbin2_gen):
            suffix = "%s_%.2f_%.2f" % \
                (self.v_var2_binning, self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2])
            hz_gen_nocuts = TH1F("hz_gen_nocuts_nonprompt" + suffix, \
                "hz_gen_nocuts_nonprompt" + suffix, nzbin_gen, zbinarray_gen)
            hz_gen_nocuts.Sumw2()
            hz_gen_cuts = TH1F("hz_gen_cuts_nonprompt" + suffix,
                               "hz_gen_cuts_nonprompt" + suffix, nzbin_gen, zbinarray_gen)
            hz_gen_cuts.Sumw2()

            df_tmp = seldf_singlevar(df_mc_reco_merged_nonprompt, "pt_gen_jet", \
                                     self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2])
            df_tmp = seldf_singlevar(df_tmp, self.v_varshape_binning_gen, \
                                     self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1])
            fill_hist(hz_gen_nocuts, df_tmp[self.v_varshape_binning_gen])
            df_tmp = seldf_singlevar(df_tmp, "pt_jet",
                                     self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1])
            df_tmp = seldf_singlevar(df_tmp, self.v_varshape_binning,
                                     self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1])
            fill_hist(hz_gen_cuts, df_tmp[self.v_varshape_binning_gen])
            hz_gen_cuts.Write()
            hz_gen_nocuts.Write()

            # Addendum for unfolding
            hz_gen_nocuts_pr = TH1F("hz_gen_nocuts" + suffix, \
                "hz_gen_nocuts" + suffix, nzbin_gen, zbinarray_gen)
            hz_gen_nocuts_pr.Sumw2()
            hz_gen_cuts_pr = TH1F("hz_gen_cuts" + suffix,
                                  "hz_gen_cuts" + suffix, nzbin_gen, zbinarray_gen)
            hz_gen_cuts_pr.Sumw2()
            df_tmp_pr = seldf_singlevar(df_mc_reco_merged_prompt, "pt_gen_jet", \
                                     self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2])
            df_tmp_pr = seldf_singlevar(df_tmp_pr, self.v_varshape_binning_gen, \
                                     self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1])
            fill_hist(hz_gen_nocuts_pr, df_tmp_pr[self.v_varshape_binning_gen])
            df_tmp_pr = seldf_singlevar(df_tmp_pr, "pt_jet",
                                        self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1])
            df_tmp_pr = seldf_singlevar(df_tmp_pr, self.v_varshape_binning,
                                        self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1])
            fill_hist(hz_gen_cuts_pr, df_tmp_pr[self.v_varshape_binning_gen])
            hz_gen_cuts_pr.Write()
            hz_gen_nocuts_pr.Write()
            # End addendum for unfolding


        df_tmp_selgen, df_tmp_selreco, df_tmp_selrecogen = \
                self.create_df_closure(df_mc_reco_merged_nonprompt)

        df_tmp_selgen_pr, df_tmp_selreco_pr, df_tmp_selrecogen_pr = \
                self.create_df_closure(df_mc_reco_merged_prompt)

        # histograms for response of feeddown
        hzvsjetpt_reco_nocuts = \
            build2dhisto("hzvsjetpt_reco_nocuts_nonprompt", zbinarray_reco, jetptbinarray_reco)
        hzvsjetpt_reco_cuts = \
            build2dhisto("hzvsjetpt_reco_cuts_nonprompt", zbinarray_reco, jetptbinarray_reco)
        hzvsjetpt_gen_nocuts = \
            build2dhisto("hzvsjetpt_gen_nocuts_nonprompt", zbinarray_gen, jetptbinarray_gen)
        hzvsjetpt_gen_cuts = \
            build2dhisto("hzvsjetpt_gen_cuts_nonprompt", zbinarray_gen, jetptbinarray_gen)

        hzvsjetpt_reco = hzvsjetpt_reco_nocuts.Clone("hzvsjetpt_reco_nonprompt")
        hzvsjetpt_gen = hzvsjetpt_gen_nocuts.Clone("hzvsjetpt_genv")
        response_matrix = RooUnfoldResponse(hzvsjetpt_reco, hzvsjetpt_gen)

        fill2dhist(df_tmp_selreco, hzvsjetpt_reco_nocuts, self.v_varshape_binning, "pt_jet")
        fill2dhist(df_tmp_selgen, hzvsjetpt_gen_nocuts, self.v_varshape_binning_gen, "pt_gen_jet")
        fill2dhist(df_tmp_selrecogen, hzvsjetpt_reco_cuts, self.v_varshape_binning, "pt_jet")
        fill2dhist(df_tmp_selrecogen, hzvsjetpt_gen_cuts, self.v_varshape_binning_gen, "pt_gen_jet")

        hzvsjetpt_reco_nocuts.Write()
        hzvsjetpt_gen_nocuts.Write()
        hzvsjetpt_reco_cuts.Write()
        hzvsjetpt_gen_cuts.Write()

        # histograms for unfolding
        hzvsjetpt_reco_nocuts_pr = \
            build2dhisto("hzvsjetpt_reco_nocuts", zbinarray_reco, jetptbinarray_reco)
        hzvsjetpt_reco_cuts_pr = \
            build2dhisto("hzvsjetpt_reco_cuts", zbinarray_reco, jetptbinarray_reco)
        hzvsjetpt_gen_nocuts_pr = \
            build2dhisto("hzvsjetpt_gen_nocuts", zbinarray_gen, jetptbinarray_gen)
        hzvsjetpt_gen_cuts_pr = \
            build2dhisto("hzvsjetpt_gen_cuts", zbinarray_gen, jetptbinarray_gen)

        fill2dhist(df_tmp_selreco_pr, hzvsjetpt_reco_nocuts_pr, self.v_varshape_binning, "pt_jet")
        fill2dhist(df_tmp_selgen_pr, hzvsjetpt_gen_nocuts_pr, self.v_varshape_binning_gen, "pt_gen_jet")
        fill2dhist(df_tmp_selrecogen_pr, hzvsjetpt_reco_cuts_pr, self.v_varshape_binning, "pt_jet")
        fill2dhist(df_tmp_selrecogen_pr, hzvsjetpt_gen_cuts_pr, self.v_varshape_binning_gen, "pt_gen_jet")
        hzvsjetpt_reco_nocuts_pr.Write()
        hzvsjetpt_gen_nocuts_pr.Write()
        hzvsjetpt_reco_cuts_pr.Write()
        hzvsjetpt_gen_cuts_pr.Write()

        hzvsjetpt_reco_closure_pr = \
            build2dhisto("hzvsjetpt_reco_closure", zbinarray_reco, jetptbinarray_reco)
        hzvsjetpt_gen_closure_pr = \
            build2dhisto("hzvsjetpt_gen_closure", zbinarray_reco, jetptbinarray_reco)
        hzvsjetpt_reco_pr = \
            build2dhisto("hzvsjetpt_reco", zbinarray_reco, jetptbinarray_reco)
        hzvsjetpt_gen_pr = \
            build2dhisto("hzvsjetpt_gen", zbinarray_gen, jetptbinarray_gen)
        response_matrix_pr = RooUnfoldResponse(hzvsjetpt_reco_pr, hzvsjetpt_gen_pr)
        response_matrix_closure_pr = RooUnfoldResponse(hzvsjetpt_reco_pr, hzvsjetpt_gen_pr)

        fill2dhist(df_tmp_selreco_pr, hzvsjetpt_reco_pr, self.v_varshape_binning, "pt_jet")
        fill2dhist(df_tmp_selgen_pr, hzvsjetpt_gen_pr, self.v_varshape_binning_gen, "pt_gen_jet")
        hzvsjetpt_reco_pr.Write()
        hzvsjetpt_gen_pr.Write()

        hjetpt_gen_nocuts_pr = TH1F("hjetpt_gen_nocuts", \
            "hjetpt_gen_nocuts", njetptbin_gen, jetptbinarray_gen)
        hjetpt_gen_cuts_pr = TH1F("hjetpt_gen_cuts", \
            "hjetpt_gen_cuts", njetptbin_gen, jetptbinarray_gen)
        hjetpt_gen_nocuts_closure = TH1F("hjetpt_gen_nocuts_closure", \
            "hjetpt_gen_nocuts_closure", njetptbin_gen, jetptbinarray_gen)
        hjetpt_gen_cuts_closure = TH1F("hjetpt_gen_cuts_closure", \
            "hjetpt_gen_cuts_closure", njetptbin_gen, jetptbinarray_gen)
        hjetpt_gen_nocuts_pr.Sumw2()
        hjetpt_gen_cuts_pr.Sumw2()
        hjetpt_gen_nocuts_closure.Sumw2()
        hjetpt_gen_nocuts_closure.Sumw2()

        fill_hist(hjetpt_gen_nocuts_pr, df_tmp_selgen_pr["pt_gen_jet"])
        fill_hist(hjetpt_gen_cuts_pr, df_tmp_selrecogen_pr["pt_gen_jet"])
        hjetpt_gen_nocuts_pr.Write()
        hjetpt_gen_cuts_pr.Write()
        # end of histograms for unfolding

        hjetpt_genvsreco_full = \
            TH2F("hjetpt_genvsreco_full_nonprompt", "hjetpt_genvsreco_full_nonprompt", \
            njetptbin_gen * 100, self.lvar2_binmin_gen[0], self.lvar2_binmax_gen[-1], \
            njetptbin_reco * 100, self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1])

        hz_genvsreco_full = \
            TH2F("hz_genvsreco_full_nonprompt", "hz_genvsreco_full_nonprompt", \
                 nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1],
                 nzbin_reco * 100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1])

        fill2dhist(df_tmp_selrecogen, hjetpt_genvsreco_full, "pt_gen_jet", "pt_jet")
        hjetpt_genvsreco_full.Scale(1.0 / hjetpt_genvsreco_full.Integral(1, -1, 1, -1))
        hjetpt_genvsreco_full.Write()
        fill2dhist(df_tmp_selrecogen, hz_genvsreco_full, self.v_varshape_binning_gen, self.v_varshape_binning)
        hz_genvsreco_full.Scale(1.0 / hz_genvsreco_full.Integral(1, -1, 1, -1))
        hz_genvsreco_full.Write()
        for row in df_tmp_selrecogen.itertuples():
            response_matrix.Fill(getattr(row, self.v_varshape_binning), row.pt_jet, getattr(row, self.v_varshape_binning_gen), row.pt_gen_jet)
        response_matrix.Write("response_matrix_nonprompt")

        # histograms for unfolding
        hjetpt_genvsreco_full_pr = \
            TH2F("hjetpt_genvsreco_full", "hjetpt_genvsreco_full", \
            njetptbin_gen * 100, self.lvar2_binmin_gen[0], self.lvar2_binmax_gen[-1], \
            njetptbin_reco * 100, self.lvar2_binmin_reco[0], self.lvar2_binmax_reco[-1])

        hz_genvsreco_full_pr = \
            TH2F("hz_genvsreco_full", "hz_genvsreco_full", \
                 nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1],
                 nzbin_reco * 100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1])
        fill2dhist(df_tmp_selrecogen_pr, hjetpt_genvsreco_full_pr, "pt_gen_jet", "pt_jet")
        hjetpt_genvsreco_full_pr.Scale(1.0 / hjetpt_genvsreco_full_pr.Integral(1, -1, 1, -1))
        hjetpt_genvsreco_full_pr.Write()
        fill2dhist(df_tmp_selrecogen_pr, hz_genvsreco_full_pr, self.v_varshape_binning_gen, self.v_varshape_binning)
        hz_genvsreco_full_pr.Scale(1.0 / hz_genvsreco_full_pr.Integral(1, -1, 1, -1))
        hz_genvsreco_full_pr.Write()


        hzvsjetpt_prior_weights = build2dhisto("hzvsjetpt_prior_weights", \
            zbinarray_gen, jetptbinarray_gen)
        fill2dhist(df_tmp_selrecogen_pr, hzvsjetpt_prior_weights, self.v_varshape_binning_gen, "pt_gen_jet")
        # end of histograms for unfolding

        for ibin2 in range(self.p_nbin2_reco):
            df_tmp_selrecogen_jetbin = seldf_singlevar(df_tmp_selrecogen, "pt_jet", \
                self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2])
            suffix = "%s_%.2f_%.2f" % (self.v_var2_binning, \
                self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2])
            hz_genvsreco = TH2F("hz_genvsreco_nonprompt" + suffix, "hz_genvsreco_nonprompt" + suffix, \
                nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1], \
                nzbin_reco*100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1])
            fill2dhist(df_tmp_selrecogen_jetbin, hz_genvsreco, self.v_varshape_binning_gen, self.v_varshape_binning)
            norm = hz_genvsreco.Integral(1, -1, 1, -1)
            if norm > 0:
                hz_genvsreco.Scale(1.0/norm)
            hz_genvsreco.Write()

            df_tmp_selrecogen_pr_jetbin = seldf_singlevar(df_tmp_selrecogen_pr, "pt_jet", \
                self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2])
            suffix = "%s_%.2f_%.2f" % (self.v_var2_binning, \
                self.lvar2_binmin_reco[ibin2], self.lvar2_binmax_reco[ibin2])
            hz_genvsreco_pr = TH2F("hz_genvsreco" + suffix, "hz_genvsreco" + suffix, \
                nzbin_gen * 100, self.lvarshape_binmin_gen[0], self.lvarshape_binmax_gen[-1], \
                nzbin_reco*100, self.lvarshape_binmin_reco[0], self.lvarshape_binmax_reco[-1])
            fill2dhist(df_tmp_selrecogen_pr_jetbin, hz_genvsreco_pr, self.v_varshape_binning_gen, self.v_varshape_binning)
            norm_pr = hz_genvsreco_pr.Integral(1, -1, 1, -1)
            if norm_pr > 0:
                hz_genvsreco_pr.Scale(1.0/norm_pr)
            hz_genvsreco_pr.Write()

        for ibinshape in range(len(self.lvarshape_binmin_reco)):
            df_tmp_selrecogen_zbin = seldf_singlevar(df_tmp_selrecogen, self.v_varshape_binning, \
                self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape])
            suffix = "%s_%.2f_%.2f" % \
                (self.v_varshape_binning, self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape])
            hjetpt_genvsreco = TH2F("hjetpt_genvsreco_nonprompt" + suffix, \
                "hjetpt_genvsreco_nonprompt" + suffix, njetptbin_gen * 100, self.lvar2_binmin_gen[0], \
                self.lvar2_binmax_gen[-1], njetptbin_reco * 100, self.lvar2_binmin_reco[0], \
                self.lvar2_binmax_reco[-1])
            fill2dhist(df_tmp_selrecogen_zbin, hjetpt_genvsreco, "pt_gen_jet", "pt_jet")
            norm = hjetpt_genvsreco.Integral(1, -1, 1, -1)
            if norm > 0:
                hjetpt_genvsreco.Scale(1.0/norm)
            hjetpt_genvsreco.Write()

            df_tmp_selrecogen_pr_zbin = seldf_singlevar(df_tmp_selrecogen_pr, self.v_varshape_binning, \
                self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape])
            suffix = "%s_%.2f_%.2f" % \
                (self.v_varshape_binning, self.lvarshape_binmin_reco[ibinshape], self.lvarshape_binmax_reco[ibinshape])
            hjetpt_genvsreco_pr = TH2F("hjetpt_genvsreco" + suffix, \
                "hjetpt_genvsreco" + suffix, njetptbin_gen * 100, self.lvar2_binmin_gen[0], \
                self.lvar2_binmax_gen[-1], njetptbin_reco * 100, self.lvar2_binmin_reco[0], \
                self.lvar2_binmax_reco[-1])
            fill2dhist(df_tmp_selrecogen_pr_zbin, hjetpt_genvsreco_pr, "pt_gen_jet", "pt_jet")
            norm_pr = hjetpt_genvsreco_pr.Integral(1, -1, 1, -1)
            if norm_pr > 0:
                hjetpt_genvsreco_pr.Scale(1.0/norm_pr)
            hjetpt_genvsreco_pr.Write()

        for ibinshape in range(len(self.lvarshape_binmin_gen)):
            dtmp_nonprompt_zgen = seldf_singlevar(df_mc_reco_merged_nonprompt, \
                self.v_varshape_binning_gen, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape])
            suffix = "%s_%.2f_%.2f" % \
                     (self.v_varshape_binning, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape])
            hz_fracdiff = TH1F("hz_fracdiff_nonprompt" + suffix,
                               "hz_fracdiff_nonprompt" + suffix, 100, -2, 2)
            fill_hist(hz_fracdiff, (dtmp_nonprompt_zgen[self.v_varshape_binning] - \
                    dtmp_nonprompt_zgen[self.v_varshape_binning_gen])/dtmp_nonprompt_zgen[self.v_varshape_binning_gen])
            norm = hz_fracdiff.Integral(1, -1)
            if norm:
                hz_fracdiff.Scale(1.0 / norm)
            hz_fracdiff.Write()

            dtmp_prompt_zgen = seldf_singlevar(df_mc_reco_merged_prompt, \
                self.v_varshape_binning_gen, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape])
            suffix = "%s_%.2f_%.2f" % \
                     (self.v_varshape_binning, self.lvarshape_binmin_gen[ibinshape], self.lvarshape_binmax_gen[ibinshape])
            hz_fracdiff_pr = TH1F("hz_fracdiff_prompt" + suffix,
                                  "hz_fracdiff_prompt" + suffix, 100, -2, 2)
            fill_hist(hz_fracdiff_pr, (dtmp_prompt_zgen[self.v_varshape_binning] - \
                    dtmp_prompt_zgen[self.v_varshape_binning_gen])/dtmp_prompt_zgen[self.v_varshape_binning_gen])
            norm_pr = hz_fracdiff_pr.Integral(1, -1)
            if norm_pr:
                hz_fracdiff_pr.Scale(1.0 / norm_pr)
            hz_fracdiff_pr.Write()

        for ibin2 in range(self.p_nbin2_gen):
            dtmp_nonprompt_jetptgen = seldf_singlevar(df_mc_reco_merged_nonprompt, \
                "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2])
            suffix = "%s_%.2f_%.2f" % (self.v_var2_binning,
                                       self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2])
            hjetpt_fracdiff = TH1F("hjetpt_fracdiff_nonprompt" + suffix,
                                   "hjetpt_fracdiff_nonprompt" + suffix, 100, -2, 2)
            fill_hist(hjetpt_fracdiff, (dtmp_nonprompt_jetptgen["pt_jet"] - \
                dtmp_nonprompt_jetptgen["pt_gen_jet"])/dtmp_nonprompt_jetptgen["pt_gen_jet"])
            norm = hjetpt_fracdiff.Integral(1, -1)
            if norm:
                hjetpt_fracdiff.Scale(1.0 / norm)
            hjetpt_fracdiff.Write()

            dtmp_prompt_jetptgen = seldf_singlevar(df_mc_reco_merged_prompt, \
                "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2])
            suffix = "%s_%.2f_%.2f" % (self.v_var2_binning,
                                       self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2])
            hjetpt_fracdiff_pr = TH1F("hjetpt_fracdiff_prompt" + suffix,
                                      "hjetpt_fracdiff_prompt" + suffix, 100, -2, 2)
            fill_hist(hjetpt_fracdiff_pr, (dtmp_prompt_jetptgen["pt_jet"] - \
                dtmp_prompt_jetptgen["pt_gen_jet"])/dtmp_prompt_jetptgen["pt_gen_jet"])
            norm_pr = hjetpt_fracdiff_pr.Integral(1, -1)
            if norm_pr:
                hjetpt_fracdiff_pr.Scale(1.0 / norm_pr)
            hjetpt_fracdiff_pr.Write()

        df_mc_reco_merged_prompt_train, df_mc_reco_merged_prompt_test = \
                train_test_split(df_mc_reco_merged_prompt, test_size=self.closure_frac)
        df_tmp_selgen_pr_test, df_tmp_selreco_pr_test, df_tmp_selrecogen_pr_test = \
                self.create_df_closure(df_mc_reco_merged_prompt_test)
        _, _, df_tmp_selrecogen_pr_train = \
                self.create_df_closure(df_mc_reco_merged_prompt_train)

        fill2dhist(df_tmp_selreco_pr_test, hzvsjetpt_reco_closure_pr, self.v_varshape_binning, "pt_jet")
        fill2dhist(df_tmp_selgen_pr_test, hzvsjetpt_gen_closure_pr, self.v_varshape_binning_gen, "pt_gen_jet")
        hzvsjetpt_reco_closure_pr.Write("input_closure_reco")
        hzvsjetpt_gen_closure_pr.Write("input_closure_gen")


        for ibin2 in range(self.p_nbin2_gen):
            suffix = "%s_%.2f_%.2f" % \
                (self.v_var2_binning, self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2])
            hz_gen_nocuts_closure = TH1F("hz_gen_nocuts_closure" + suffix,
                                         "hz_gen_nocuts_closure" + suffix,
                                         nzbin_gen, zbinarray_gen)
            hz_gen_nocuts_closure.Sumw2()
            hz_gen_cuts_closure = TH1F("hz_gen_cuts_closure" + suffix,
                                       "hz_gen_cuts_closure" + suffix,
                                       nzbin_gen, zbinarray_gen)
            hz_gen_cuts_closure.Sumw2()
            df_tmp_selgen_pr_test_bin = seldf_singlevar(df_tmp_selgen_pr_test, \
                "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2])
            df_tmp_selrecogen_pr_test_bin = seldf_singlevar(df_tmp_selrecogen_pr_test, \
                "pt_gen_jet", self.lvar2_binmin_gen[ibin2], self.lvar2_binmax_gen[ibin2])
            fill_hist(hz_gen_nocuts_closure, df_tmp_selgen_pr_test_bin[self.v_varshape_binning_gen])
            fill_hist(hz_gen_cuts_closure, df_tmp_selrecogen_pr_test_bin[self.v_varshape_binning_gen])
            hz_gen_cuts_closure.Write()
            hz_gen_nocuts_closure.Write()

        fill_hist(hjetpt_gen_nocuts_closure, df_tmp_selgen_pr_test["pt_gen_jet"])
        fill_hist(hjetpt_gen_cuts_closure, df_tmp_selrecogen_pr_test["pt_gen_jet"])
        hjetpt_gen_nocuts_closure.Write()
        hjetpt_gen_cuts_closure.Write()

        hzvsjetpt_reco_nocuts_closure = TH2F("hzvsjetpt_reco_nocuts_closure",
                                             "hzvsjetpt_reco_nocuts_closure",
                                             nzbin_reco, zbinarray_reco,
                                             njetptbin_reco, jetptbinarray_reco)
        hzvsjetpt_reco_nocuts_closure.Sumw2()
        hzvsjetpt_reco_cuts_closure = TH2F("hzvsjetpt_reco_cuts_closure",
                                           "hzvsjetpt_reco_cuts_closure",
                                           nzbin_reco, zbinarray_reco,
                                           njetptbin_reco, jetptbinarray_reco)
        hzvsjetpt_reco_cuts_closure.Sumw2()

        fill2dhist(df_tmp_selreco_pr_test, hzvsjetpt_reco_nocuts_closure, self.v_varshape_binning, "pt_jet")
        fill2dhist(df_tmp_selrecogen_pr_test, hzvsjetpt_reco_cuts_closure, self.v_varshape_binning, "pt_jet")
        hzvsjetpt_reco_nocuts_closure.Write()
        hzvsjetpt_reco_cuts_closure.Write()

        for row in df_tmp_selrecogen_pr.itertuples():
            response_matrix_weight = 1.0
            if self.doprior is True:
                binx = hzvsjetpt_prior_weights.GetXaxis().FindBin(getattr(row, self.v_varshape_binning_gen))
                biny = hzvsjetpt_prior_weights.GetYaxis().FindBin(row.pt_gen_jet)
                weight = hzvsjetpt_prior_weights.GetBinContent(binx, biny)

                if weight > 0.0:
                    response_matrix_weight = 1.0/weight
            response_matrix_pr.Fill(getattr(row, self.v_varshape_binning), row.pt_jet,\
                getattr(row, self.v_varshape_binning_gen), row.pt_gen_jet, response_matrix_weight)
        for row in df_tmp_selrecogen_pr_train.itertuples():
            response_matrix_weight = 1.0
            if self.doprior is True:
                binx = hzvsjetpt_prior_weights.GetXaxis().FindBin(getattr(row, self.v_varshape_binning_gen))
                biny = hzvsjetpt_prior_weights.GetYaxis().FindBin(row.pt_gen_jet)
                weight = hzvsjetpt_prior_weights.GetBinContent(binx, biny)

                if weight > 0.0:
                    response_matrix_weight = 1.0/weight
            response_matrix_closure_pr.Fill(getattr(row, self.v_varshape_binning), row.pt_jet,\
                getattr(row, self.v_varshape_binning_gen), row.pt_gen_jet, response_matrix_weight)
        response_matrix_pr.Write("response_matrix")
        response_matrix_closure_pr.Write("response_matrix_closure")

        out_file.Close()
Ejemplo n.º 21
0
    def process_histomass_single(self, index):
        myfile = TFile.Open(self.l_histomass[index], "recreate")
        dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb"))
        if self.s_trigger is not None:
            dfevtorig = dfevtorig.query(self.s_trigger)
        dfevtorig = selectdfrunlist(dfevtorig, \
                         self.run_param[self.runlistrigger[self.triggerbit]], "run_number")
        for ibin2 in range(len(self.lvar2_binmin)):
            mybindfevtorig = seldf_singlevar(dfevtorig, self.v_var2_binning_gen, \
                                        self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
            hNorm = TH1F("hEvForNorm_mult%d" % ibin2, "hEvForNorm_mult%d" % ibin2, 2, 0.5, 2.5)
            hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor")
            hNorm.GetXaxis().SetBinLabel(2, "selected events")
            nselevt = 0
            norm = 0
            if not mybindfevtorig.empty:
                nselevt = len(mybindfevtorig.query("is_ev_rej==0"))
                norm = getnormforselevt(mybindfevtorig)
            hNorm.SetBinContent(1, norm)
            hNorm.SetBinContent(2, nselevt)
            hNorm.Write()
#            histmultevt = TH1F("hmultevtmult%d" % ibin2,
#                               "hmultevtmult%d"  % ibin2, 100, 0, 100)
            mybindfevtorig = mybindfevtorig.query("is_ev_rej==0")
#            fill_hist(histmultevt, mybindfevtorig.n_tracklets_corr)
#            histmultevt.Write()
#            h_v0m_ntracklets = TH2F("h_v0m_ntracklets%d" % ibin2,
#                                    "h_v0m_ntracklets%d" % ibin2,
#                                    200, 0, 200, 200, -0.5, 1999.5)
#            v_v0m_ntracklets = np.vstack((mybindfevtorig.n_tracklets_corr,
#                                          mybindfevtorig.v0m_corr)).T
#            fill_hist(h_v0m_ntracklets, v_v0m_ntracklets)
#            h_v0m_ntracklets.Write()

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
            if self.doml is True:
                df = df.query(self.l_selml[bin_id])
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            for ibin2 in range(len(self.lvar2_binmin)):
                suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins,
                                        self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                df_bin = seldf_singlevar(df, self.v_var2_binning,
                                         self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                df_bin = selectdfrunlist(df_bin, \
                         self.run_param[self.runlistrigger[self.triggerbit]], "run_number")
                fill_hist(h_invmass, df_bin.inv_mass)
                if "INT7" not in self.triggerbit and self.mcordata == "data":
                    fileweight_name = "%s/correctionsweights.root" % self.d_val
                    fileweight = TFile.Open(fileweight_name, "read")
                    namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning_gen)
                    funcweighttrig = fileweight.Get(namefunction)
                    if funcweighttrig:
                        weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning])
                        weightsinv = [1./weight for weight in weights]
                        fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv)
                myfile.cd()
                h_invmass.Write()
                h_invmass_weight.Write()
                histmult = TH1F("hmultpt%dmult%d" % (ipt, ibin2),
                                "hmultpt%dmult%d"  % (ipt, ibin2), 1000, 0, 1000)
                fill_hist(histmult, df_bin.n_tracklets_corr)
                histmult.Write()
                h_v0m_ntrackletsD = TH2F("h_v0m_ntrackletsD%d%d" % (ibin2, ipt),
                                         "h_v0m_ntrackletsD%d%d" % (ibin2, ipt),
                                         200, 0, 200, 200, -0.5, 1999.5)
                v_v0m_ntrackletsD = np.vstack((df_bin.n_tracklets_corr,
                                               df_bin.v0m_corr)).T
                fill_hist(h_v0m_ntrackletsD, v_v0m_ntrackletsD)
                h_v0m_ntrackletsD.Write()
                if "pt_jet" in df_bin.columns:
                    zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet,
                                    df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand)
                    h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5)
                    zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T
                    fill_hist(h_zvsinvmass, zvsinvmass)
                    h_zvsinvmass.Write()

                if self.mcordata == "mc":
                    df_bin[self.v_ismcrefl] = np.array(tag_bit_df(df_bin, self.v_bitvar,
                                                                  self.b_mcrefl), dtype=int)
                    df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1]
                    df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1]
                    h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins,
                                         self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                    h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins,
                                          self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                    fill_hist(h_invmass_sig, df_bin_sig.inv_mass)
                    fill_hist(h_invmass_refl, df_bin_refl.inv_mass)
                    myfile.cd()
                    h_invmass_sig.Write()
                    h_invmass_refl.Write()
Ejemplo n.º 22
0
    def define_cutvariation_limits(self):

        min_cv_cut = []
        max_cv_cut = []
        ncutvar_temp = self.p_ncutvar * 2
        for ipt in range(self.p_nptfinbins):

            print("Systematics pt-bin: ", ipt)

            bin_id = self.bin_matching[ipt]
            df_mc_reco = pickle.load(openfile(self.lpt_recodecmerged_mc[bin_id], "rb"))
            if self.s_evtsel is not None:
                df_mc_reco = df_mc_reco.query(self.s_evtsel)
            if self.s_trigger_mc is not None:
                df_mc_reco = df_mc_reco.query(self.s_trigger_mc)

            df_mc_gen = pickle.load(openfile(self.lpt_gendecmerged[bin_id], "rb"))
            df_mc_gen = df_mc_gen.query(self.s_presel_gen_eff)

            df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var_binning, \
                                self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])

            df_mc_reco = seldf_singlevar(df_mc_reco, self.v_var2_binning, \
                                         self.lvar2_binmin[self.use_var2_bin], \
                                         self.lvar2_binmax[self.use_var2_bin])
            df_mc_gen = seldf_singlevar(df_mc_gen, self.v_var2_binning, \
                                        self.lvar2_binmin[self.use_var2_bin], \
                                        self.lvar2_binmax[self.use_var2_bin])

            df_gen_sel_pr = df_mc_gen[df_mc_gen.ismcprompt == 1]
            df_reco_presel_pr = df_mc_reco[df_mc_reco.ismcprompt == 1]

            selml_cent = "y_test_prob%s>%s" % (self.p_modelname, self.lpt_probcutfin[bin_id])
            df_reco_sel_pr = df_reco_presel_pr.query(selml_cent)
            len_gen_pr = len(df_gen_sel_pr)
            eff_cent = len(df_reco_sel_pr)/len_gen_pr
            print("Central efficiency pt-bin", ipt, ": ", eff_cent)

            stepsmin = \
              (self.lpt_probcutfin[bin_id] - self.p_cutvar_minrange[bin_id]) / ncutvar_temp
            min_cv_cut.append(self.lpt_probcutfin[bin_id])
            df_reco_cvmin_pr = df_reco_presel_pr
            for icv in range(ncutvar_temp):
                min_cv_cut[ipt] = self.p_cutvar_minrange[bin_id] + icv * stepsmin
                selml_min = "y_test_prob%s>%s" % (self.p_modelname, min_cv_cut[ipt])
                df_reco_cvmin_pr = df_reco_cvmin_pr.query(selml_min)
                eff_min = len(df_reco_cvmin_pr)/len_gen_pr
                if eff_cent == 0:
                    break
                if eff_min / eff_cent < 1 + self.p_maxperccutvar:
                    break

            eff_min = len(df_reco_cvmin_pr)/len_gen_pr
            print("Minimal efficiency pt-bin", ipt, ": ", eff_min)

            stepsmax = \
              (self.p_cutvar_maxrange[bin_id] - self.lpt_probcutfin[bin_id]) / ncutvar_temp
            max_cv_cut.append(self.lpt_probcutfin[bin_id])
            df_reco_cvmax_pr = df_reco_sel_pr
            for icv in range(ncutvar_temp):
                max_cv_cut[ipt] = self.lpt_probcutfin[bin_id] + icv * stepsmax
                selml_max = "y_test_prob%s>%s" % (self.p_modelname, max_cv_cut[ipt])
                df_reco_cvmax_pr = df_reco_cvmax_pr.query(selml_max)
                eff_max = len(df_reco_cvmax_pr)/len_gen_pr
                if eff_cent == 0:
                    break
                if eff_max / eff_cent < 1 - self.p_maxperccutvar:
                    break

            eff_max = len(df_reco_cvmax_pr)/len_gen_pr
            print("Maximal efficiency pt-bin", ipt, ": ", eff_max)

        return min_cv_cut, max_cv_cut
    def process_histomass_single(self, index):
        myfile = TFile.Open(self.l_histomass[index], "recreate")
        dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb"))
        neventsorig = len(dfevtorig)
        if self.s_trigger is not None:
            dfevtorig = dfevtorig.query(self.s_trigger)
        neventsaftertrigger = len(dfevtorig)
        if self.runlistrigger is not None:
            dfevtorig = selectdfrunlist(dfevtorig, \
                             self.run_param[self.runlistrigger], "run_number")
        neventsafterrunsel = len(dfevtorig)
        if self.s_evtsel is not None:
            dfevtevtsel = dfevtorig.query(self.s_evtsel)
        else:
            dfevtevtsel = dfevtorig
        neventsafterevtsel = len(dfevtevtsel)

        #validation plot for event selection
        histonorm = TH1F("histonorm", "histonorm", 10, 0, 10)
        histonorm.SetBinContent(1, neventsorig)
        histonorm.GetXaxis().SetBinLabel(1, "tot events")
        histonorm.SetBinContent(2, neventsaftertrigger)
        histonorm.GetXaxis().SetBinLabel(2, "tot events after trigger")
        histonorm.SetBinContent(3, neventsafterrunsel)
        histonorm.GetXaxis().SetBinLabel(3, "tot events after run sel")
        histonorm.SetBinContent(4, neventsafterevtsel)
        histonorm.GetXaxis().SetBinLabel(4, "tot events after evt sel")
        histonorm.Write()

        myfile.cd()
        labeltrigger = "hbit%s" % (self.triggerbit)
        hsel, hnovtxmult, hvtxoutmult = gethistonormforselevt_varsel(dfevtorig, dfevtevtsel, \
                                                              labeltrigger, self.s_var_evt_sel)
        hsel.Write()
        hnovtxmult.Write()
        hvtxoutmult.Write()

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(
                openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)
            if self.runlistrigger is not None:
                df = selectdfrunlist(df, \
                    self.run_param[self.runlistrigger], "run_number")

            if self.doml is True:
                df = df.query(self.l_selml[bin_id])
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])

            if self.do_custom_analysis_cuts:
                df = self.apply_cuts_ptbin(df, ipt)

            if self.mltype == "MultiClassification":
                suffix = "%s%d_%d_%.2f%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt][0],
                          self.lpt_probcutfin[ipt][1])
            else:
                suffix = "%s%d_%d_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[ipt])

            h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                             self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
            fill_hist(h_invmass, df[self.v_invmass])
            myfile.cd()
            h_invmass.Write()

            if self.mcordata == "mc":
                df[self.v_ismcrefl] = np.array(tag_bit_df(
                    df, self.v_bitvar, self.b_mcrefl),
                                               dtype=int)
                df_sig = df[df[self.v_ismcsignal] == 1]
                df_bkg = df[df[self.v_ismcbkg] == 1]
                df_refl = df[df[self.v_ismcrefl] == 1]
                h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins,
                                     self.p_mass_fit_lim[0],
                                     self.p_mass_fit_lim[1])
                h_invmass_bkg = TH1F("hmass_bkg" + suffix, "", self.p_num_bins,
                                     self.p_mass_fit_lim[0],
                                     self.p_mass_fit_lim[1])
                h_invmass_refl = TH1F("hmass_refl" + suffix, "",
                                      self.p_num_bins, self.p_mass_fit_lim[0],
                                      self.p_mass_fit_lim[1])
                fill_hist(h_invmass_sig, df_sig[self.v_invmass])
                fill_hist(h_invmass_bkg, df_bkg[self.v_invmass])
                fill_hist(h_invmass_refl, df_refl[self.v_invmass])
                myfile.cd()
                h_invmass_sig.Write()
                h_invmass_bkg.Write()
                h_invmass_refl.Write()