def gethistonormforselevt_mult(self, df_evt, dfevtevtsel, label, var, weightfunc=None):

        if weightfunc is not None:
            label = label + "_weight"
        hSelMult = TH1F('sel_' + label, 'sel_' + label, self.nbinshisto,
                        self.minvaluehisto, self.maxvaluehisto)
        hNoVtxMult = TH1F('novtx_' + label, 'novtx_' + label, self.nbinshisto,
                          self.minvaluehisto, self.maxvaluehisto)
        hVtxOutMult = TH1F('vtxout_' + label, 'vtxout_' + label, self.nbinshisto,
                           self.minvaluehisto, self.maxvaluehisto)
        df_to_keep = filter_bit_df(df_evt, 'is_ev_rej', [[], [0, 5, 6, 10, 11]])
        # events with reco vtx after previous selection
        tag_vtx = tag_bit_df(df_to_keep, 'is_ev_rej', [[], [1, 2, 7, 12]])
        df_no_vtx = df_to_keep[~tag_vtx.values]
        # events with reco zvtx > 10 cm after previous selection
        df_bit_zvtx_gr10 = filter_bit_df(df_to_keep, 'is_ev_rej', [[3], [1, 2, 7, 12]])
        if weightfunc is not None:
            weightssel = evaluate(weightfunc, dfevtevtsel[var])
            weightsinvsel = [1./weight for weight in weightssel]
            fill_hist(hSelMult, dfevtevtsel[var], weights=weightsinvsel)
            weightsnovtx = evaluate(weightfunc, df_no_vtx[var])
            weightsinvnovtx = [1./weight for weight in weightsnovtx]
            fill_hist(hNoVtxMult, df_no_vtx[var], weights=weightsinvnovtx)
            weightsgr10 = evaluate(weightfunc, df_bit_zvtx_gr10[var])
            weightsinvgr10 = [1./weight for weight in weightsgr10]
            fill_hist(hVtxOutMult, df_bit_zvtx_gr10[var], weights=weightsinvgr10)
        else:
            fill_hist(hSelMult, dfevtevtsel[var])
            fill_hist(hNoVtxMult, df_no_vtx[var])
            fill_hist(hVtxOutMult, df_bit_zvtx_gr10[var])

        return hSelMult, hNoVtxMult, hVtxOutMult
Exemple #2
0
def test_evaluate_graph():
    g = ROOT.TGraph(2)
    g.SetPoint(0, 0, 1)
    g.SetPoint(1, 1, 2)
    assert_array_equal(rnp.evaluate(g, [0, .5, 1]), [1, 1.5, 2])
    s = ROOT.TSpline3("spline", g)
    assert_array_equal(rnp.evaluate(s, [0, .5, 1]),
                       [s.Eval(x) for x in [0, .5, 1]])
    # test exceptions
    arr_2d = RNG.rand(5, 2)
    assert_raises(TypeError, rnp.evaluate, object(), [1, 2, 3])
    assert_raises(ValueError, rnp.evaluate, g, arr_2d)
    assert_raises(ValueError, rnp.evaluate, s, arr_2d)
Exemple #3
0
 def corrections(self, rec):
     # posterior trigger correction
     if not self.posterior_trigger_correction:
         return
     arr = rec2array(rec[['tau1_pt', 'tau2_pt']])
     weights = evaluate(self.trigger_correct, arr)
     return [weights]
Exemple #4
0
 def corrections(self, rec):
     # posterior trigger correction
     if not self.posterior_trigger_correction:
         return
     arr = rec2array(rec[['tau1_pt', 'tau2_pt']])
     weights = evaluate(self.trigger_correct, arr)
     return [weights]
Exemple #5
0
    def make_weights(col, func, hist, use_func):
        """Helper function to extract weights

        Args:
            col: np.array
                array to evaluate/run over
            func: ROOT.TF1
                ROOT function to use for evaluation
            hist: TH1
                ROOT histogram used for getting weights
            use_func: bool
                whether or not to use func (otherwise hist)

        Returns:
            iterable
        """

        if use_func:
            return evaluate(func, col)

        def reg(value):
            # warning, the histogram has empty bins at high mult.
            # (>125 ntrkl) so a check is needed to avoid a 1/0 division
            # when computing the inverse of the weight
            return value if value != 0. else 1.

        return [reg(hist.GetBinContent(hist.FindBin(iw))) for iw in col]
def test_evaluate_func():
    f1 = ROOT.TF1("f1", "x")
    f2 = ROOT.TF2("f2", "x*y")
    f3 = ROOT.TF3("f3", "x*y*z")

    # generate random arrays
    arr_1d = RNG.rand(5)
    arr_2d = RNG.rand(5, 2)
    arr_3d = RNG.rand(5, 3)
    arr_4d = RNG.rand(5, 4)

    assert_array_equal(rnp.evaluate(f1, arr_1d), [f1.Eval(x) for x in arr_1d])
    assert_array_equal(rnp.evaluate(f1.GetTitle(), arr_1d),
                       [f1.Eval(x) for x in arr_1d])
    assert_array_equal(rnp.evaluate(f2, arr_2d), [f2.Eval(*x) for x in arr_2d])
    assert_array_equal(rnp.evaluate(f2.GetTitle(), arr_2d),
                       [f2.Eval(*x) for x in arr_2d])
    assert_array_equal(rnp.evaluate(f3, arr_3d), [f3.Eval(*x) for x in arr_3d])
    assert_array_equal(rnp.evaluate(f3.GetTitle(), arr_3d),
                       [f3.Eval(*x) for x in arr_3d])
    # 4d formula
    f4 = ROOT.TFormula('test', 'x*y+z*t')
    assert_array_equal(rnp.evaluate(f4, arr_4d), [f4.Eval(*x) for x in arr_4d])

    assert_raises(ValueError, rnp.evaluate, f1, arr_2d)
    assert_raises(ValueError, rnp.evaluate, f2, arr_3d)
    assert_raises(ValueError, rnp.evaluate, f2, arr_1d)
    assert_raises(ValueError, rnp.evaluate, f3, arr_1d)
    assert_raises(ValueError, rnp.evaluate, f3, arr_2d)

    assert_raises(ValueError, rnp.evaluate, "f", arr_1d)
    assert_raises(ValueError, rnp.evaluate, "x*y", arr_1d)
    assert_raises(ValueError, rnp.evaluate, "x", arr_2d)
    assert_raises(ValueError, rnp.evaluate, "x*y", arr_3d)
Exemple #7
0
def _calcPulls_TH1(hist, modelFunc):
    """
    Calculate pulls of model function at hist bin centers
    :param hist:
    :param modelFunc:
    :return:
    """
    yValues = root_numpy.hist2array(hist)
    stds = [hist.GetBinError(iBin) for iBin in range(1, hist.GetNbinsX() + 1)]
    xValues = getXvalues(hist)
    expectedValues = root_numpy.evaluate(modelFunc, xValues)
    return mathFuncs.calcPulls(yValues, stds, expectedValues)
Exemple #8
0
def _calcPulls_graphErrors(graphErrors, modelFunc):
    """
    Calculate pulls of model function at each (x,y) value of graph
    :param graphErrors:
    :param modelFunc:
    :return:
    """
    yValues = [y for y in graphErrors.GetY()]
    stds = [ey for ey in graphErrors.GetEY()]

    xValues = getXvalues(graphErrors)
    expectedValues = root_numpy.evaluate(modelFunc, xValues)
    return mathFuncs.calcPulls(yValues, stds, expectedValues)
Exemple #9
0
def test_evaluate_hist():
    h1 = ROOT.TH1D("h1", "", 10, 0, 1)
    h1.FillRandom("f1")
    h2 = ROOT.TH2D("h2", "", 10, 0, 1, 10, 0, 1)
    h2.FillRandom("f2")
    h3 = ROOT.TH3D("h3", "", 10, 0, 1, 10, 0, 1, 10, 0, 1)
    h3.FillRandom("f3")

    arr_1d = RNG.rand(5)
    arr_2d = RNG.rand(5, 2)
    arr_3d = RNG.rand(5, 3)

    assert_array_equal(rnp.evaluate(h1, arr_1d),
                       [h1.GetBinContent(h1.FindBin(x)) for x in arr_1d])
    assert_array_equal(rnp.evaluate(h2, arr_2d),
                       [h2.GetBinContent(h2.FindBin(*x)) for x in arr_2d])
    assert_array_equal(rnp.evaluate(h3, arr_3d),
                       [h3.GetBinContent(h3.FindBin(*x)) for x in arr_3d])

    assert_raises(ValueError, rnp.evaluate, h1, arr_2d)
    assert_raises(ValueError, rnp.evaluate, h2, arr_3d)
    assert_raises(ValueError, rnp.evaluate, h2, arr_1d)
    assert_raises(ValueError, rnp.evaluate, h3, arr_1d)
    assert_raises(ValueError, rnp.evaluate, h3, arr_2d)
Exemple #10
0
 def value(self, inputs, map_positions):
     # remove overflows (overwrite with a value just below the histogram boundary)
     upper_bounds = [self.function_index_map.bounds(axis)[1]-1e-3 for axis in range(len(self.function_index_map.axes))]
     map_positions_no_overflow = np.apply_along_axis(lambda x:np.minimum(x,upper_bounds), 1, map_positions)
     # evaluate of a 1D histograms take flatten array as input
     if self.dim==1: map_positions_no_overflow = map_positions_no_overflow.ravel()
     indices = evaluate(self.function_index_map, map_positions_no_overflow).astype(np.int32)
     # Compute isolation for all used working points
     outputs = []
     for i,function in enumerate(self.functions):
         if i in self.indices: outputs.append(function(inputs))
         else: outputs.append(np.array([]))
     #output = [self.functions[index]([input]) for index,input in zip(indices,inputs)]
     # Associate the correct working point for each entry
     output = np.zeros(len(indices))
     for i,index in enumerate(indices):
         output[i] = outputs[index][i]
     return output
Exemple #11
0
    def process_histomass_single(self, index):
        myfile = TFile.Open(self.l_histomass[index], "recreate")
        dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb"))
        neventsorig = len(dfevtorig)
        if self.s_trigger is not None:
            dfevtorig = dfevtorig.query(self.s_trigger)
        neventsaftertrigger = len(dfevtorig)
        if self.runlistrigger is not None:
            dfevtorig = selectdfrunlist(dfevtorig, \
                             self.run_param[self.runlistrigger], "run_number")
        neventsafterrunsel = len(dfevtorig)
        dfevtevtsel = dfevtorig.query(self.s_evtsel)

        #validation plot for event selection
        neventsafterevtsel = len(dfevtevtsel)
        histonorm = TH1F("histonorm", "histonorm", 10, 0, 10)
        histonorm.SetBinContent(1, neventsorig)
        histonorm.GetXaxis().SetBinLabel(1, "tot events")
        histonorm.SetBinContent(2, neventsaftertrigger)
        histonorm.GetXaxis().SetBinLabel(2, "tot events after trigger")
        histonorm.SetBinContent(3, neventsafterrunsel)
        histonorm.GetXaxis().SetBinLabel(3, "tot events after run sel")
        histonorm.SetBinContent(4, neventsafterevtsel)
        histonorm.GetXaxis().SetBinLabel(4, "tot events after evt sel")
        for ibin2 in range(len(self.lvar2_binmin)):
            binneddf = seldf_singlevar_inclusive(dfevtevtsel, self.v_var2_binning_gen, \
                self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
            histonorm.SetBinContent(5 + ibin2, len(binneddf))
            histonorm.GetXaxis().SetBinLabel(5 + ibin2, \
                        "tot events after mult sel %d - %d" % \
                        (self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]))
        histonorm.Write()
        labeltrigger = "hbit%svs%s" % (self.triggerbit,
                                       self.v_var2_binning_gen)

        myfile.cd()
        hsel, hnovtxmult, hvtxoutmult = \
            self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \
                                       labeltrigger, self.v_var2_binning_gen)

        if self.apply_weights is True and self.mcordata == "data":
            hselweight, hnovtxmultweight, hvtxoutmultweight = \
                self.gethistonormforselevt_mult(dfevtorig, dfevtevtsel, \
                    labeltrigger, self.v_var2_binning_gen, self.weightfunc)
            hselweight.Write()
            hnovtxmultweight.Write()
            hvtxoutmultweight.Write()

        hsel.Write()
        hnovtxmult.Write()
        hvtxoutmult.Write()

        list_df_recodtrig = []
        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(
                openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)
            if self.runlistrigger is not None:
                df = selectdfrunlist(df, \
                    self.run_param[self.runlistrigger], "run_number")
            if self.doml is True:
                df = df.query(self.l_selml[bin_id])
            list_df_recodtrig.append(df)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            for ibin2 in range(len(self.lvar2_binmin)):
                suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0],
                                 self.p_mass_fit_lim[1])
                h_invmass_weight = TH1F("h_invmass_weight" + suffix, "",
                                        self.p_num_bins,
                                        self.p_mass_fit_lim[0],
                                        self.p_mass_fit_lim[1])
                df_bin = seldf_singlevar_inclusive(df, self.v_var2_binning, \
                                         self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                fill_hist(h_invmass, df_bin.inv_mass)
                if self.apply_weights is True and self.mcordata == "data":
                    weights = evaluate(self.weightfunc,
                                       df_bin[self.v_var2_binning_gen])
                    weightsinv = [1. / weight for weight in weights]
                    fill_hist(h_invmass_weight,
                              df_bin.inv_mass,
                              weights=weightsinv)
                myfile.cd()
                h_invmass.Write()
                h_invmass_weight.Write()

                if self.mcordata == "mc":
                    df_bin[self.v_ismcrefl] = np.array(tag_bit_df(
                        df_bin, self.v_bitvar, self.b_mcrefl),
                                                       dtype=int)
                    df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1]
                    df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1]
                    h_invmass_sig = TH1F("hmass_sig" + suffix, "",
                                         self.p_num_bins,
                                         self.p_mass_fit_lim[0],
                                         self.p_mass_fit_lim[1])
                    h_invmass_refl = TH1F("hmass_refl" + suffix, "",
                                          self.p_num_bins,
                                          self.p_mass_fit_lim[0],
                                          self.p_mass_fit_lim[1])
                    fill_hist(h_invmass_sig, df_bin_sig.inv_mass)
                    fill_hist(h_invmass_refl, df_bin_refl.inv_mass)
                    myfile.cd()
                    h_invmass_sig.Write()
                    h_invmass_refl.Write()

        if self.event_cand_validation is True:
            df_recodtrig = pd.concat(list_df_recodtrig)
            df_recodtrig = df_recodtrig.query("inv_mass>%f and inv_mass<%f" % \
                                              (self.mass - 0.15, self.mass + 0.15))
            dfevtwithd = pd.merge(dfevtevtsel,
                                  df_recodtrig,
                                  on=self.v_evtmatch)
            label = "h%s" % self.v_var2_binning_gen
            histomult = TH1F(label, label, self.nbinshisto, self.minvaluehisto,
                             self.maxvaluehisto)
            fill_hist(histomult, dfevtevtsel[self.v_var2_binning_gen])
            histomult.Write()
            labelwithd = "h%s_withd" % self.v_var2_binning_gen
            histomultwithd = TH1F(labelwithd, labelwithd, self.nbinshisto,
                                  self.minvaluehisto, self.maxvaluehisto)
            fill_hist(histomultwithd,
                      dfevtwithd["%s_x" % self.v_var2_binning_gen])
            histomultwithd.Write()
            # Validation histograms
            fill_validation_vertex(dfevtorig, dfevtevtsel,
                                   df_recodtrig).write()
            fill_validation_multiplicity(dfevtorig, dfevtevtsel,
                                         df_recodtrig).write()
            fill_validation_candidates(df_recodtrig).write()
            if self.mcordata == "mc":
                fill_validation_candidates(
                    df_recodtrig[df_recodtrig[self.v_ismcsignal] == 1],
                    "MC").write()
Exemple #12
0
def test_evaluate():
    # create functions and histograms
    f1 = TF1("f1", "x")
    f2 = TF2("f2", "x*y")
    f3 = TF3("f3", "x*y*z")
    h1 = TH1D("h1", "", 10, 0, 1)
    h1.FillRandom("f1")
    h2 = TH2D("h2", "", 10, 0, 1, 10, 0, 1)
    h2.FillRandom("f2")
    h3 = TH3D("h3", "", 10, 0, 1, 10, 0, 1, 10, 0, 1)
    h3.FillRandom("f3")
    # generate random arrays
    arr_1d = RNG.rand(5)
    arr_2d = RNG.rand(5, 2)
    arr_3d = RNG.rand(5, 3)
    arr_4d = RNG.rand(5, 4)
    # evaluate the functions
    assert_array_equal(rnp.evaluate(f1, arr_1d),
                       [f1.Eval(x) for x in arr_1d])
    assert_array_equal(rnp.evaluate(f1.GetTitle(), arr_1d),
                       [f1.Eval(x) for x in arr_1d])
    assert_array_equal(rnp.evaluate(f2, arr_2d),
                       [f2.Eval(*x) for x in arr_2d])
    assert_array_equal(rnp.evaluate(f2.GetTitle(), arr_2d),
                       [f2.Eval(*x) for x in arr_2d])
    assert_array_equal(rnp.evaluate(f3, arr_3d),
                       [f3.Eval(*x) for x in arr_3d])
    assert_array_equal(rnp.evaluate(f3.GetTitle(), arr_3d),
                       [f3.Eval(*x) for x in arr_3d])
    # 4d formula
    f4 = TFormula('test', 'x*y+z*t')
    assert_array_equal(rnp.evaluate(f4, arr_4d),
                       [f4.Eval(*x) for x in arr_4d])
    # evaluate the histograms
    assert_array_equal(rnp.evaluate(h1, arr_1d),
                       [h1.GetBinContent(h1.FindBin(x)) for x in arr_1d])
    assert_array_equal(rnp.evaluate(h2, arr_2d),
                       [h2.GetBinContent(h2.FindBin(*x)) for x in arr_2d])
    assert_array_equal(rnp.evaluate(h3, arr_3d),
                       [h3.GetBinContent(h3.FindBin(*x)) for x in arr_3d])
    # create a graph
    g = TGraph(2)
    g.SetPoint(0, 0, 1)
    g.SetPoint(1, 1, 2)
    assert_array_equal(rnp.evaluate(g, [0, .5, 1]), [1, 1.5, 2])
    from ROOT import TSpline3
    s = TSpline3("spline", g)
    assert_array_equal(rnp.evaluate(s, [0, .5, 1]),
                       [s.Eval(x) for x in [0, .5, 1]])
    # test exceptions
    assert_raises(TypeError, rnp.evaluate, object(), [1, 2, 3])
    assert_raises(ValueError, rnp.evaluate, h1, arr_2d)
    assert_raises(ValueError, rnp.evaluate, h2, arr_3d)
    assert_raises(ValueError, rnp.evaluate, h2, arr_1d)
    assert_raises(ValueError, rnp.evaluate, h3, arr_1d)
    assert_raises(ValueError, rnp.evaluate, h3, arr_2d)
    assert_raises(ValueError, rnp.evaluate, f1, arr_2d)
    assert_raises(ValueError, rnp.evaluate, f2, arr_3d)
    assert_raises(ValueError, rnp.evaluate, f2, arr_1d)
    assert_raises(ValueError, rnp.evaluate, f3, arr_1d)
    assert_raises(ValueError, rnp.evaluate, f3, arr_2d)
    assert_raises(ValueError, rnp.evaluate, g, arr_2d)
    assert_raises(ValueError, rnp.evaluate, s, arr_2d)
    assert_raises(ValueError, rnp.evaluate, "f", arr_1d)
    assert_raises(ValueError, rnp.evaluate, "x*y", arr_1d)
    assert_raises(ValueError, rnp.evaluate, "x", arr_2d)
    assert_raises(ValueError, rnp.evaluate, "x*y", arr_3d)
Exemple #13
0
def test_evaluate():
    # create functions and histograms
    f1 = TF1("f1", "x")
    f2 = TF2("f2", "x*y")
    f3 = TF3("f3", "x*y*z")
    h1 = TH1D("h1", "", 10, 0, 1)
    h1.FillRandom("f1")
    h2 = TH2D("h2", "", 10, 0, 1, 10, 0, 1)
    h2.FillRandom("f2")
    h3 = TH3D("h3", "", 10, 0, 1, 10, 0, 1, 10, 0, 1)
    h3.FillRandom("f3")
    # generate random arrays
    arr_1d = np.random.rand(5)
    arr_2d = np.random.rand(5, 2)
    arr_3d = np.random.rand(5, 3)
    arr_4d = np.random.rand(5, 4)
    # evaluate the functions
    assert_array_equal(rnp.evaluate(f1, arr_1d), map(f1.Eval, arr_1d))
    assert_array_equal(rnp.evaluate(f1.GetTitle(), arr_1d),
                       map(f1.Eval, arr_1d))
    assert_array_equal(rnp.evaluate(f2, arr_2d),
                       [f2.Eval(*x) for x in arr_2d])
    assert_array_equal(rnp.evaluate(f2.GetTitle(), arr_2d),
                       [f2.Eval(*x) for x in arr_2d])
    assert_array_equal(rnp.evaluate(f3, arr_3d),
                       [f3.Eval(*x) for x in arr_3d])
    assert_array_equal(rnp.evaluate(f3.GetTitle(), arr_3d),
                       [f3.Eval(*x) for x in arr_3d])
    # 4d formula
    f4 = TFormula('test', 'x*y+z*t')
    assert_array_equal(rnp.evaluate(f4, arr_4d),
                       [f4.Eval(*x) for x in arr_4d])
    # evaluate the histograms
    assert_array_equal(rnp.evaluate(h1, arr_1d),
                       [h1.GetBinContent(h1.FindBin(x)) for x in arr_1d])
    assert_array_equal(rnp.evaluate(h2, arr_2d),
                       [h2.GetBinContent(h2.FindBin(*x)) for x in arr_2d])
    assert_array_equal(rnp.evaluate(h3, arr_3d),
                       [h3.GetBinContent(h3.FindBin(*x)) for x in arr_3d])
    # create a graph
    g = TGraph(2)
    g.SetPoint(0, 0, 1)
    g.SetPoint(1, 1, 2)
    assert_array_equal(rnp.evaluate(g, [0, .5, 1]), [1, 1.5, 2])
    from ROOT import TSpline3
    s = TSpline3("spline", g)
    assert_array_equal(rnp.evaluate(s, [0, .5, 1]), map(s.Eval, [0, .5, 1]))
    # test exceptions
    assert_raises(TypeError, rnp.evaluate, object(), [1, 2, 3])
    assert_raises(ValueError, rnp.evaluate, h1, arr_2d)
    assert_raises(ValueError, rnp.evaluate, h2, arr_3d)
    assert_raises(ValueError, rnp.evaluate, h2, arr_1d)
    assert_raises(ValueError, rnp.evaluate, h3, arr_1d)
    assert_raises(ValueError, rnp.evaluate, h3, arr_2d)
    assert_raises(ValueError, rnp.evaluate, f1, arr_2d)
    assert_raises(ValueError, rnp.evaluate, f2, arr_3d)
    assert_raises(ValueError, rnp.evaluate, f2, arr_1d)
    assert_raises(ValueError, rnp.evaluate, f3, arr_1d)
    assert_raises(ValueError, rnp.evaluate, f3, arr_2d)
    assert_raises(ValueError, rnp.evaluate, g, arr_2d)
    assert_raises(ValueError, rnp.evaluate, s, arr_2d)
    assert_raises(ValueError, rnp.evaluate, "f", arr_1d)
    assert_raises(ValueError, rnp.evaluate, "x*y", arr_1d)
    assert_raises(ValueError, rnp.evaluate, "x", arr_2d)
    assert_raises(ValueError, rnp.evaluate, "x*y", arr_3d)
Exemple #14
0
import matplotlib as mat
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import ROOT
import root_numpy as rnp
import pylandau

# Seaborn configuration an Latex
sns.set(rc={"figure.figsize":(8,4)})
sns.set_context('paper',font_scale=1.0,rc={'lines.linewidth':1.0})
sns.set_style('whitegrid')
mat.rc('text',usetex=True)
mat.rc('font',family='serif',serif='palatino')
mat.rcParams['text.latex.preamble']=[r'\usepackage[utf8]{inputenc}',r'\usepackage[T1]{fontenc}',r'\usepackage[spanish]{babel}',r'\usepackage{amsmath,amsfonts,amssymb}',r'\usepackage{siunitx}']

# I will generate random variable "time" with a Landau distribution -- useful to model single photoelectron response from PMT
time=np.arange(400,700,0.01)
dtau=ROOT.TF1('tau0','TMath::Landau(x,492.145,7.59229,1)')
tau=rnp.evaluate(dtau,time) # PDF --it may also be generated with pylandau

# generate Nevents random samples from the distribution
Nevents=1000000
rnd_tau=rnp.random_sample(ROOT.TF1('tau0','TMath::Landau(x,492.145,7.59229,1)',400,700),Nevents,seed=1)
c=sns.color_palette(sns.cubehelix_palette(8,start=.25,rot=-.75,reverse=True))
fig,ax=plt.subplots(nrows=1,ncols=1)
plt.plot(time,tau,color=c[0]) #plotting the PDF and the distribution of the samples
sns.distplot(rnd_tau,hist=True,kde=False,rug=False,ax=ax,norm_hist=True,
hist_kws={'histtype':'stepfilled','alpha':0.9},color=c[1])
plt.show()
    def process_histomass_single(self, index):
        myfile = TFile.Open(self.l_histomass[index], "recreate")
        dfevtorig = pickle.load(openfile(self.l_evtorig[index], "rb"))
        if self.s_trigger is not None:
            dfevtorig = dfevtorig.query(self.s_trigger)
        dfevtorig = selectdfrunlist(dfevtorig, \
                         self.run_param[self.runlistrigger[self.triggerbit]], "run_number")
        for ibin2 in range(len(self.lvar2_binmin)):
            mybindfevtorig = seldf_singlevar(dfevtorig, self.v_var2_binning_gen, \
                                        self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
            hNorm = TH1F("hEvForNorm_mult%d" % ibin2, "hEvForNorm_mult%d" % ibin2, 2, 0.5, 2.5)
            hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor")
            hNorm.GetXaxis().SetBinLabel(2, "selected events")
            nselevt = 0
            norm = 0
            if not mybindfevtorig.empty:
                nselevt = len(mybindfevtorig.query("is_ev_rej==0"))
                norm = getnormforselevt(mybindfevtorig)
            hNorm.SetBinContent(1, norm)
            hNorm.SetBinContent(2, nselevt)
            hNorm.Write()
#            histmultevt = TH1F("hmultevtmult%d" % ibin2,
#                               "hmultevtmult%d"  % ibin2, 100, 0, 100)
            mybindfevtorig = mybindfevtorig.query("is_ev_rej==0")
#            fill_hist(histmultevt, mybindfevtorig.n_tracklets_corr)
#            histmultevt.Write()
#            h_v0m_ntracklets = TH2F("h_v0m_ntracklets%d" % ibin2,
#                                    "h_v0m_ntracklets%d" % ibin2,
#                                    200, 0, 200, 200, -0.5, 1999.5)
#            v_v0m_ntracklets = np.vstack((mybindfevtorig.n_tracklets_corr,
#                                          mybindfevtorig.v0m_corr)).T
#            fill_hist(h_v0m_ntracklets, v_v0m_ntracklets)
#            h_v0m_ntracklets.Write()

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.mptfiles_recoskmldec[bin_id][index], "rb"))
            if self.doml is True:
                df = df.query(self.l_selml[bin_id])
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            for ibin2 in range(len(self.lvar2_binmin)):
                suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins,
                                        self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                df_bin = seldf_singlevar(df, self.v_var2_binning,
                                         self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                df_bin = selectdfrunlist(df_bin, \
                         self.run_param[self.runlistrigger[self.triggerbit]], "run_number")
                fill_hist(h_invmass, df_bin.inv_mass)
                if "INT7" not in self.triggerbit and self.mcordata == "data":
                    fileweight_name = "%s/correctionsweights.root" % self.d_val
                    fileweight = TFile.Open(fileweight_name, "read")
                    namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning_gen)
                    funcweighttrig = fileweight.Get(namefunction)
                    if funcweighttrig:
                        weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning])
                        weightsinv = [1./weight for weight in weights]
                        fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv)
                myfile.cd()
                h_invmass.Write()
                h_invmass_weight.Write()
                histmult = TH1F("hmultpt%dmult%d" % (ipt, ibin2),
                                "hmultpt%dmult%d"  % (ipt, ibin2), 1000, 0, 1000)
                fill_hist(histmult, df_bin.n_tracklets_corr)
                histmult.Write()
                h_v0m_ntrackletsD = TH2F("h_v0m_ntrackletsD%d%d" % (ibin2, ipt),
                                         "h_v0m_ntrackletsD%d%d" % (ibin2, ipt),
                                         200, 0, 200, 200, -0.5, 1999.5)
                v_v0m_ntrackletsD = np.vstack((df_bin.n_tracklets_corr,
                                               df_bin.v0m_corr)).T
                fill_hist(h_v0m_ntrackletsD, v_v0m_ntrackletsD)
                h_v0m_ntrackletsD.Write()
                if "pt_jet" in df_bin.columns:
                    zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet,
                                    df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand)
                    h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5)
                    zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T
                    fill_hist(h_zvsinvmass, zvsinvmass)
                    h_zvsinvmass.Write()

                if self.mcordata == "mc":
                    df_bin[self.v_ismcrefl] = np.array(tag_bit_df(df_bin, self.v_bitvar,
                                                                  self.b_mcrefl), dtype=int)
                    df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1]
                    df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1]
                    h_invmass_sig = TH1F("hmass_sig" + suffix, "", self.p_num_bins,
                                         self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                    h_invmass_refl = TH1F("hmass_refl" + suffix, "", self.p_num_bins,
                                          self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                    fill_hist(h_invmass_sig, df_bin_sig.inv_mass)
                    fill_hist(h_invmass_refl, df_bin_refl.inv_mass)
                    myfile.cd()
                    h_invmass_sig.Write()
                    h_invmass_refl.Write()
def main(parameters):
    # Compute isolation cuts for efficiencies from 0.2 to 1 with smaller steps for larger efficiencies
    # TODO: put this in parameters
    effs = np.arange(0.2, 0.5, 0.05)
    effs = np.concatenate((effs, np.arange(0.5, 0.85, 0.02)))
    effs = np.concatenate((effs, np.arange(0.85, 0.999, 0.01)))
    # if no version specified, automatically set version number
    if parameters.version is 'automatic':
        # if training of the working points requested
        # create a new version
        if parameters.steps.train_workingpoints:
            version = batch_launcher.job_version(parameters.working_directory)
        # else, use the last version available
        else:
            version = batch_launcher.latest_version(
                parameters.working_directory)
            if version is '':
                raise StandardError(
                    'Cannot find already trained working points')
    else:
        version = parameters.version
    workingdir = parameters.working_directory + '/' + version
    inputs = [
        parameters.variables.ieta,
        parameters.variables.ntt,
    ]
    target = parameters.variables.iso
    pileupref = parameters.variables.rho
    # Train isolation cuts
    eg_isolations = train_isolation_workingpoints(parameters.steps, effs,
                                                  parameters.signal_file,
                                                  parameters.signal_tree,
                                                  parameters.working_directory,
                                                  version, parameters.name,
                                                  inputs, target, pileupref)
    with root_open(workingdir + '/' + parameters.name + '.root',
                   'recreate') as output_file:
        # Save isolation cuts in TH2s
        for eff, eg_isolation_cuts in zip(effs, eg_isolations):
            histo = function2th2(eg_isolation_cuts.predict,
                                 quantile_regression.binning[inputs[0]],
                                 quantile_regression.binning[inputs[1]])
            histo.SetName(parameters.name + '_' + str(eff))
            histo.Write()
        # Test isolation cuts vs offline variables
        if parameters.steps.test_workingpoints:
            print '> Checking efficiencies vs offline variables'
            graphs = test_efficiency(functions=[(lambda x,isolation=iso:np.less(x[:,[len(inputs)]].ravel(),isolation.predict(x[:,range(len(inputs))]))) for iso in eg_isolations], \
                                     function_inputs=inputs+[target],\
                                     # TODO: define these variables in parameters

                                     variables=['offl_eta','offl_pt', 'rho', 'npv'],\
                                     inputfile=parameters.signal_file,\
                                     tree=parameters.signal_tree,\
                                     # TODO: Define the selection in parameters

                                     selection='et>0'\
                                    )
            for graph in graphs:
                graph.Write()
        print '> Applying eta/et efficiency shape'
        # TODO: Add the possibility to perform automatic optimization of the efficiency shape
        eg_isolation_eta_et = CombinedWorkingPoints(
            np.append(effs, [1.]), [iso.predict for iso in eg_isolations] +
            [lambda x: np.full(x.shape[0], 9999.)],
            parameters.eta_pt_optimization.eta_pt_efficiency_shapes)
        print '> Compress input variables'
        branches = [
            parameters.variables.ieta,
            parameters.variables.ntt,
            parameters.variables.et,
        ]
        data = root2array(
            parameters.signal_file,
            treename=parameters.signal_tree,
            branches=branches,
            # TODO: Define the selection in parameters
            selection='et>0')
        data = data.view(
            (np.float64, len(data.dtype.names))).astype(np.float32)
        iso_cuts = eg_isolation_eta_et.value(data[:, [0, 1]], data[:, [0, 2]])
        eg_isolation_compressed = events2th3(data, iso_cuts,
                                             (parameters.compression.eta, ),
                                             (parameters.compression.ntt, ),
                                             (parameters.compression.et, ))
        eg_isolation_compressed.SetName('isolation_compressed_')
        eg_isolation_compressed.Write()
        graphs_compressed = test_efficiency(functions=(lambda x: np.less(x[:,[3]].ravel(),evaluate(eg_isolation_compressed, x[:,range(3)]))), \
                                      function_inputs=branches+[parameters.variables.iso],\
                                     # TODO: define these variables in parameters

                                      variables=['offl_eta','offl_pt', 'rho', 'npv'],\
                                      inputfile=parameters.signal_file,\
                                      tree=parameters.signal_tree,\
                                     # TODO: Define the selection in parameters

                                      selection='et>0'\
                                     )
        for graph in graphs_compressed:
            graph.Write()
    def cutvariation_masshistos(self, min_cv_cut, max_cv_cut):
        myfile = TFile.Open(self.n_filemass_cutvar, "recreate")

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.lpt_recodecmerged_data[bin_id], "rb"))

            stepsmin = (self.lpt_probcutfin[bin_id] - min_cv_cut[ipt]) / self.p_ncutvar
            stepsmax = (max_cv_cut[ipt] - self.lpt_probcutfin[bin_id]) / self.p_ncutvar
            ntrials = 2 * self.p_ncutvar + 1
            icvmax = 1

            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger_data is not None:
                df = df.query(self.s_trigger_data)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            print("Using run selection for mass histo", self.runlistrigger[self.triggerbit], \
                  "for period", self.period)
            df = selectdfrunlist(df, self.run_param[self.runlistrigger[self.triggerbit]], \
                                 "run_number")

            for icv in range(ntrials):
                if icv < self.p_ncutvar:
                    selml_cvval = min_cv_cut[ipt] + icv * stepsmin
                elif icv == self.p_ncutvar:
                    selml_cvval = self.lpt_probcutfin[bin_id]
                else:
                    selml_cvval = self.lpt_probcutfin[bin_id] + icvmax * stepsmax
                    icvmax = icvmax + 1
                selml_cv = "y_test_prob%s>%s" % (self.p_modelname, selml_cvval)

                print("Cutting on: ", selml_cv)
                df = df.query(selml_cv)

                for ibin2 in range(len(self.lvar2_binmin)):
                    suffix = "%s%d_%d_%d_%s%.2f_%.2f" % \
                             (self.v_var_binning, self.lpt_finbinmin[ipt],
                              self.lpt_finbinmax[ipt], icv,
                              self.v_var2_binning, self.lvar2_binmin[ibin2],
                              self.lvar2_binmax[ibin2])
                    h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                     self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                    h_invmass_weight = TH1F("h_invmass_weight" + suffix, "", self.p_num_bins,
                                            self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])

                    df_bin = seldf_singlevar(df, self.v_var2_binning,
                                             self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])

                    fill_hist(h_invmass, df_bin.inv_mass)

                    if "INT7" not in self.triggerbit:
                        fileweight_name = "%s/correctionsweights.root" % self.d_val
                        fileweight = TFile.Open(fileweight_name, "read")
                        namefunction = "funcnorm_%s_%s" % (self.triggerbit, self.v_var2_binning)
                        funcweighttrig = fileweight.Get(namefunction)
                        if funcweighttrig:
                            weights = evaluate(funcweighttrig, df_bin[self.v_var2_binning])
                            weightsinv = [1./weight for weight in weights]
                            fill_hist(h_invmass_weight, df_bin.inv_mass, weights=weightsinv)
                    myfile.cd()
                    h_invmass.Write()
                    h_invmass_weight.Write()
def main(parameters):
    # Compute isolation cuts for efficiencies from 0.2 to 1 with smaller steps for larger efficiencies
    # TODO: put this in parameters
    effs = np.arange(0.2,0.5,0.05)
    effs = np.concatenate((effs,np.arange(0.5,0.85,0.02)))
    effs = np.concatenate((effs,np.arange(0.85,0.999,0.01)))
    # if no version specified, automatically set version number
    if parameters.version is 'automatic':
        # if training of the working points requested
        # create a new version
        if parameters.steps.train_workingpoints:
            version = batch_launcher.job_version(parameters.working_directory)
        # else, use the last version available
        else:
            version = batch_launcher.latest_version(parameters.working_directory)
            if version is '':
                raise StandardError('Cannot find already trained working points')
    else:
        version = parameters.version
    workingdir = parameters.working_directory+'/'+version
    inputs = [
        parameters.variables.ieta,
        parameters.variables.ntt,
    ]
    target = parameters.variables.iso
    pileupref = parameters.variables.rho
    # Train isolation cuts
    eg_isolations = train_isolation_workingpoints(parameters.steps,
                                                  effs,
                                                  parameters.signal_file,
                                                  parameters.signal_tree,
                                                  parameters.working_directory,
                                                  version,
                                                  parameters.name,
                                                  inputs,
                                                  target,
                                                  pileupref)
    with root_open(workingdir+'/'+parameters.name+'.root', 'recreate') as output_file:
        # Save isolation cuts in TH2s
        for eff,eg_isolation_cuts in zip(effs,eg_isolations):
            histo = function2th2(eg_isolation_cuts.predict, quantile_regression.binning[inputs[0]], quantile_regression.binning[inputs[1]])
            histo.SetName(parameters.name+'_'+str(eff))
            histo.Write()
        # Test isolation cuts vs offline variables
        if parameters.steps.test_workingpoints:
            print '> Checking efficiencies vs offline variables'
            graphs = test_efficiency(functions=[(lambda x,isolation=iso:np.less(x[:,[len(inputs)]].ravel(),isolation.predict(x[:,range(len(inputs))]))) for iso in eg_isolations], \
                                     function_inputs=inputs+[target],\
                                     # TODO: define these variables in parameters
                                     variables=['offl_eta','offl_pt', 'rho', 'npv'],\
                                     inputfile=parameters.signal_file,\
                                     tree=parameters.signal_tree,\
                                     # TODO: Define the selection in parameters
                                     selection='et>0'\
                                    )
            for graph in graphs:
                graph.Write()
        print '> Applying eta/et efficiency shape'
        # TODO: Add the possibility to perform automatic optimization of the efficiency shape
        eg_isolation_eta_et = CombinedWorkingPoints(np.append(effs,[1.]),
                                                    [iso.predict for iso in eg_isolations]+[lambda x:np.full(x.shape[0],9999.)],
                                                    parameters.eta_pt_optimization.eta_pt_efficiency_shapes)
        print '> Compress input variables'
        branches = [
            parameters.variables.ieta,    
            parameters.variables.ntt,
            parameters.variables.et,
        ]
        data = root2array(parameters.signal_file,
                          treename=parameters.signal_tree,
                          branches=branches,
                          # TODO: Define the selection in parameters
                          selection='et>0')
        data = data.view((np.float64, len(data.dtype.names))).astype(np.float32)
        iso_cuts = eg_isolation_eta_et.value(data[:,[0,1]],data[:,[0,2]])
        eg_isolation_compressed = events2th3(data, iso_cuts,
                                             (parameters.compression.eta,),
                                             (parameters.compression.ntt,),
                                             (parameters.compression.et,))
        eg_isolation_compressed.SetName('isolation_compressed_')
        eg_isolation_compressed.Write()
        graphs_compressed = test_efficiency(functions=(lambda x: np.less(x[:,[3]].ravel(),evaluate(eg_isolation_compressed, x[:,range(3)]))), \
                                      function_inputs=branches+[parameters.variables.iso],\
                                     # TODO: define these variables in parameters
                                      variables=['offl_eta','offl_pt', 'rho', 'npv'],\
                                      inputfile=parameters.signal_file,\
                                      tree=parameters.signal_tree,\
                                     # TODO: Define the selection in parameters
                                      selection='et>0'\
                                     )
        for graph in graphs_compressed:
            graph.Write()
Exemple #19
0
    def process_histomass(self):
        myfile = TFile.Open(self.n_filemass, "recreate")

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb"))
            df = df.query(self.l_selml[bin_id])
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            for ibin2 in range(len(self.lvar2_binmin)):
                suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0],
                                 self.p_mass_fit_lim[1])
                h_invmass_weight = TH1F("h_invmass_weight" + suffix, "",
                                        self.p_num_bins,
                                        self.p_mass_fit_lim[0],
                                        self.p_mass_fit_lim[1])
                df_bin = seldf_singlevar(df, self.v_var2_binning,
                                         self.lvar2_binmin[ibin2],
                                         self.lvar2_binmax[ibin2])
                fill_hist(h_invmass, df_bin.inv_mass)
                triggerbit = self.datap["analysis"][self.typean]["triggerbit"]
                if "INT7" not in triggerbit and self.mcordata == "data":
                    fileweight_name = "%s/correctionsweights.root" % self.d_val
                    fileweight = TFile.Open(fileweight_name, "read")
                    namefunction = "funcnorm_%s" % self.triggerbit
                    funcweighttrig = fileweight.Get(namefunction)
                    weights = evaluate(funcweighttrig,
                                       df_bin[self.v_var2_binning])
                    weightsinv = [1. / weight for weight in weights]
                    fill_hist(h_invmass_weight,
                              df_bin.inv_mass,
                              weights=weightsinv)
                myfile.cd()
                h_invmass.Write()
                h_invmass_weight.Write()

                if "pt_jet" in df_bin.columns:
                    zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet,
                                    df_bin.eta_jet, df_bin.pt_cand,
                                    df_bin.phi_cand, df_bin.eta_cand)
                    h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00,
                                        6.00, 2000, -0.5, 1.5)
                    zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T
                    fill_hist(h_zvsinvmass, zvsinvmass)
                    h_zvsinvmass.Write()

                if self.mcordata == "mc":
                    df_bin[self.v_ismcrefl] = np.array(tag_bit_df(
                        df_bin, self.v_bitvar, self.b_mcrefl),
                                                       dtype=int)
                    df_bin_sig = df_bin[df_bin[self.v_ismcsignal] == 1]
                    df_bin_refl = df_bin[df_bin[self.v_ismcrefl] == 1]
                    h_invmass_sig = TH1F("hmass_sig" + suffix, "",
                                         self.p_num_bins,
                                         self.p_mass_fit_lim[0],
                                         self.p_mass_fit_lim[1])
                    h_invmass_refl = TH1F("hmass_refl" + suffix, "",
                                          self.p_num_bins,
                                          self.p_mass_fit_lim[0],
                                          self.p_mass_fit_lim[1])
                    fill_hist(h_invmass_sig, df_bin_sig.inv_mass)
                    fill_hist(h_invmass_refl, df_bin_refl.inv_mass)
                    myfile.cd()
                    h_invmass_sig.Write()
                    h_invmass_refl.Write()