Example #1
0
def VarCrossCheck(Sig, Bkg, SigW, BkgW, name, xmin, xmax, bins):

    hSig = ROOT.TH1F("hSig", name, bins, xmin, xmax)
    fill_hist(hSig, Sig, weights=SigW)
    hSig.SetLineColor(2)
    hSig.SetLineWidth(3)
    SetOverflow(hSig)
    hSig.Scale(np.sum(BkgW) / hSig.Integral())
    hBkg = ROOT.TH1F("hBkg", name, bins, xmin, xmax)
    fill_hist(hBkg, Bkg, weights=BkgW)
    hBkg.SetLineWidth(3)
    SetOverflow(hBkg)

    c1 = ROOT.TCanvas("c1", "c1", 800, 600)
    ROOT.gStyle.SetOptStat(0)
    hBkg.Draw("Hist")
    hSig.Draw("SameHist")

    if (hSig.GetMaximum() > hBkg.GetMaximum()):
        hBkg.SetMaximum(int(round(hSig.GetMaximum() * 1.1)))
    hBkg.GetXaxis().SetTitle("Jet multiplicity")
    hBkg.GetYaxis().SetTitle("Yield")

    leg = ROOT.TLegend(0.7, 0.7, 0.9, 0.9)
    leg.AddEntry(hSig, "Sig (Yield: {:04.2f})".format(np.sum(SigW)))
    leg.AddEntry(hBkg, "Bkg (Yield: {:04.2f})".format(np.sum(BkgW)))
    leg.Draw()

    c1.Update()
    c1.SaveAs("./plots/VarCrossCeck.png")
Example #2
0
def makePlot(variable):
    data_var = np.array(data[variable])
    data[
        "totalWeight"] = data.evtWeight * data.lep1_frWeight * data.lep2_frWeight * data.lep3_frWeight * data.tau_frWeight
    data_weights = np.array(data['totalWeight'])

    data_tight_var = np.array(data_tight[variable])
    data_tight[
        "totalWeight"] = data_tight.evtWeight * data_tight.lep1_frWeight * data_tight.lep2_frWeight * data_tight.lep3_frWeight
    data_tight_weights = np.array(data_tight['totalWeight'])

    c1 = TCanvas()
    c1.SetFillColor(10)
    c1.SetBorderSize(2)
    c1.SetLeftMargin(0.12)
    c1.SetBottomMargin(0.12)
    c1.SetRightMargin(0.05)
    c1.SetLogy()

    histogram_base = TH1F("histogram_base", "", 100, np.nanmin(data_var),
                          np.nanmax(data_var))
    histogram_base.SetTitle("")
    histogram_base.SetStats(False)
    histogram_base.SetMinimum(0.001)
    histogram_base.SetMaximum(10.0)
    histogram_base.GetXaxis().SetTitle(variable)
    histogram_base.GetYaxis().SetTitle("Events")
    histogram_base.Draw("hist")

    hist_loose = TH1F("hist_loose", "", 100, np.nanmin(data_var),
                      np.nanmax(data_var))
    hist_tight = TH1F("hist_tight", "", 100, np.nanmin(data_var),
                      np.nanmax(data_var))
    root_numpy.fill_hist(hist_loose, data_var, weights=data_weights)
    root_numpy.fill_hist(hist_tight,
                         data_tight_var,
                         weights=data_tight_weights)
    hist_loose.SetLineColor(2)
    hist_tight.SetLineColor(4)
    hist_loose.SetFillColor(2)
    hist_tight.SetFillColor(4)
    hist_loose.SetFillStyle(3004)
    hist_tight.SetFillStyle(3005)

    leg = TLegend(0.2, 0.65, 0.5, 0.9)
    leg.SetBorderSize(0)
    leg.SetFillColor(10)
    leg.SetLineColor(0)
    leg.SetFillStyle(0)
    leg.SetTextSize(0.04)
    leg.SetTextFont(42)

    leg.AddEntry(hist_loose, "loose", "F")
    leg.AddEntry(hist_tight, "loose_genTau_matched", "F")

    hist_loose.DrawNormalized("histsame")
    hist_tight.DrawNormalized("histsame")
    leg.Draw()

    c1.SaveAs("plots/" + variable + "_" + process + "_loose_vs_genTau.png")
Example #3
0
def Make_Binned_ROC_histograms(title, DDT, kNN, pT, bins, sample_weights=None):
    rt.gROOT.SetBatch(True)
    N = len(DDT)
    assert len(pT) == N and len(kNN) == N
    if sample_weights is not None: assert len(sample_weights) == N

    nbins = 100
    DDT_hist_list = []
    kNN_hist_list = []
    for bin_ in range(len(bins) - 1):
        DDT_hist_list.append(
            rt.TH1D("DDT_" + str(bins[bin_]) + "_" + str(bins[bin_ + 1]),
                    "DDT_" + str(bins[bin_]) + "_" + str(bins[bin_ + 1]),
                    nbins, 0, 1))
        kNN_hist_list.append(
            rt.TH1D("kNN_" + str(bins[bin_]) + "_" + str(bins[bin_ + 1]),
                    "kNN_" + str(bins[bin_]) + "_" + str(bins[bin_ + 1]),
                    nbins, 0, 1))

        condition = np.logical_and((pT > bins[bin_]), (pT < bins[bin_ + 1]))
        root_numpy.fill_hist(DDT_hist_list[bin_],
                             DDT[condition],
                             weights=sample_weights)
        root_numpy.fill_hist(kNN_hist_list[bin_],
                             kNN[condition],
                             weights=sample_weights)

    tfile = rt.TFile("ROC_doublechecks/{}_ROC_histograms.root".format(title),
                     "recreate")
    for hist in DDT_hist_list:
        hist.Write()
    for hist in kNN_hist_list:
        hist.Write()
    print "saved histograms in ROC_doublechecks/{}_ROC_histograms.root".format(
        title)
Example #4
0
    def _diff_plot1D_numpy(self,
                           data,
                           bins,
                           weights=None,
                           option='',
                           **kwargs):
        """ ... """

        # Check(s)
        if bins is None:
            warning(
                "You need to specify 'bins' when plotting a numpy-type input.")
            return

        if len(bins) < 2:
            warning("Number of bins {} is not accepted".format(len(bins)))
            return

        # Fill histogram
        h1 = ROOT.TH1F('h_num_{}'.format(id(data)), "", len(bins) - 1, bins)
        h2 = ROOT.TH1F('h_den_{}'.format(id(data)), "", len(bins) - 1, bins)
        fill_hist(h1, data[0], weights=weights[0])
        fill_hist(h2, data[1], weights=weights[1])

        return _diff_plot1D((h1, h2), option, **kwargs)
Example #5
0
    def test_project_3d_to_2d(self):
        hist_3d = _get_hist(3)
        # populate overflow bins to make sure that they are treated as expected
        fill_hist(hist_3d, np.random.uniform(-1, 0, (100, 3)))
        fill_hist(hist_3d, np.random.uniform(1, 2, (100, 3)))
        val3d, err3d = hu.get_array(hist_3d), hu.get_array(hist_3d, errors=True)

        hist_xy = hu.project(hist_3d, 'xy')
        val, err = hu.get_array(hist_xy), hu.get_array(hist_xy, errors=True)
        npt.assert_equal(val, np.sum(val3d, axis=2))
        npt.assert_equal(err, np.sqrt(np.sum(err3d**2, axis=2)))

        hist_yz = hu.project(hist_3d, 'yz')
        val, err = hu.get_array(hist_yz), hu.get_array(hist_yz, errors=True)
        npt.assert_equal(val, np.sum(val3d, axis=0))
        npt.assert_equal(err, np.sqrt(np.sum(err3d**2, axis=0)))

        hist_zx = hu.project(hist_3d, 'zx')
        val, err = hu.get_array(hist_zx), hu.get_array(hist_zx, errors=True)
        npt.assert_equal(val, np.sum(val3d, axis=1).T)
        npt.assert_equal(err, np.sqrt(np.sum(err3d**2, axis=1)).T)

        hist_yx = hu.project(hist_3d, 'yx')
        val, err = hu.get_array(hist_yx), hu.get_array(hist_yx, errors=True)
        npt.assert_equal(val, np.sum(val3d, axis=2).T)
        npt.assert_equal(err, np.sqrt(np.sum(err3d**2, axis=2)).T)
Example #6
0
def plotRatio(input_df, inputFile):
    gStyle.SetOptStat(0)
    #hPtRatioPredicted = TH1F('hPtRatioPredicted', 'NN prediction, RMS=0.135', 40, 0., 2.)
    #hPtRatioCorrected = TH1F('hPtRatioPredicted', 'Colinear approximation, RMS=0.175', 40, 0., 2.)
    rmse1 = ((input_df['bc_ptRatio_predictedGen'].mean() -
              input_df['bc_ptRatio_predictedGen'])**2).mean()**.5
    rmse2 = ((input_df['bc_ptRatio_correctedGen'].mean() -
              input_df['bc_ptRatio_correctedGen'])**2).mean()**.5

    label1 = "NN prediction, RMS=%3.3f" % rmse1
    label2 = "Colinear correction, RMS=%3.3f" % rmse2
    hPtRatioPredicted = TH1F('hPtRatioPredicted', label1, 40, 0., 2.)
    hPtRatioCorrected = TH1F('hPtRatioPredicted', label2, 40, 0., 2.)

    fill_hist(hPtRatioPredicted,
              input_df['bc_ptRatio_predictedGen'].to_numpy())
    fill_hist(hPtRatioCorrected,
              input_df['bc_ptRatio_correctedGen'].to_numpy())

    c1 = TCanvas('c1', 'c1', 700, 500)
    hPtRatioPredicted.SetLineColor(ROOT.kAzure)
    hPtRatioCorrected.SetLineColor(ROOT.kRed)
    #profilePtGenVsPtRatioPredictedGen.SetTitle("")
    gStyle.SetOptTitle(0)
    hPtRatioPredicted.GetYaxis().SetTitle("Events/50 MeV")
    hPtRatioPredicted.GetXaxis().SetTitle(
        "pT_{corrected}(B_{c}^{+})/pT_{gen}(B_{c}^{+})")
    hPtRatioPredicted.Draw("")
    hPtRatioCorrected.Draw('same')
    gPad.BuildLegend()
    c1.SaveAs(plotsDir + inputFile + '_ratio_predicted_gen.pdf')
Example #7
0
def plotProfile(input_df):
    gStyle.SetOptStat(0)
    hPtGenVsPtRatioPredictedGen = TH2F('hPtGenVsPtRatioPredictedGen',
                                       'NN prediction', 80, 0., 80., 40, 0.,
                                       2.)
    fill_hist(hPtGenVsPtRatioPredictedGen,
              input_df[['gen_b_pt', 'bc_ptRatio_predictedGen']].to_numpy())
    profilePtGenVsPtRatioPredictedGen = hPtGenVsPtRatioPredictedGen.ProfileX()
    profilePtGenVsPtRatioPredictedGen.SetMarkerStyle(ROOT.kFullCircle)

    hPtGenVsPtRatioCorrectedGen = TH2F('hPtGenVsPtRatioCorrectedGen',
                                       'Jonas correction', 80, 0., 80., 40, 0.,
                                       2.)
    fill_hist(hPtGenVsPtRatioCorrectedGen,
              input_df[['gen_b_pt', 'bc_ptRatio_correctedGen']].to_numpy())
    profilePtGenVsPtRatioCorrectedGen = hPtGenVsPtRatioCorrectedGen.ProfileX()
    profilePtGenVsPtRatioCorrectedGen.SetMarkerStyle(ROOT.kFullSquare)

    c1 = TCanvas('c1', 'c1', 700, 500)
    profilePtGenVsPtRatioPredictedGen.SetLineColor(ROOT.kAzure)
    profilePtGenVsPtRatioCorrectedGen.SetLineColor(ROOT.kOrange)
    #profilePtGenVsPtRatioPredictedGen.SetTitle("")
    gStyle.SetOptTitle(0)
    profilePtGenVsPtRatioPredictedGen.GetXaxis().SetTitle("pT_{gen}(Bc) [GeV]")
    profilePtGenVsPtRatioPredictedGen.GetYaxis().SetTitle(
        "pT_{corrected}(B_{c}^{+})/pT_{gen}(B_{c}^{+})")
    profilePtGenVsPtRatioPredictedGen.Draw("")
    profilePtGenVsPtRatioCorrectedGen.Draw('same')
    gPad.BuildLegend()
    c1.SaveAs(plotsDir + 'profile.png')
    return 0
Example #8
0
def Plot_variable_from_data_2D(out_file,
                               data_path,
                               x_title,
                               x_var,
                               x_bins,
                               x_range,
                               y_title,
                               y_var,
                               y_bins,
                               y_range,
                               test=False,
                               weights=None):
    if test:
        data, features, _ = load_data(data_path, test_full_signal=True)
    else:
        data, features, _ = load_data(data_path, train_full_signal=True)
    if weights is not None:
        weights = data[weights]
    f1 = ROOT.TFile(out_file, "RECREATE")
    hist = ROOT.TH2D('hist', 'hist', x_bins, x_range[0], x_range[1], y_bins,
                     y_range[0], y_range[1])
    X = data[x_var]
    Y = data[y_var]
    root_numpy.fill_hist(hist, np.vstack((X, Y)).T, weights=weights)
    canv = ROOT.TCanvas('canv', 'canv', 600, 600)
    hist.SetContour(256)
    hist.GetXaxis().SetTitle(x_title)
    hist.GetYaxis().SetTitle(y_title)
    hist.Draw("COLZ")
    canv.Write()
    f1.Close()
Example #9
0
def create_TH1D(x,
                name='h',
                title=None,
                binning=[None, None, None],
                weights=None,
                h2clone=None):
    if title is None:
        title = name
    if h2clone == None:
        if binning[1] is None:
            binning[1] = min(x)
        if binning[2] is None:
            if ((np.percentile(x, 95) - np.percentile(x, 50)) < 0.2 *
                (max(x) - np.percentile(x, 95))):
                binning[2] = np.percentile(x, 90)
            else:
                binning[2] = max(x)
        if binning[0] is None:
            bin_w = 4 * (np.percentile(x, 75) -
                         np.percentile(x, 25)) / (len(x))**(1. / 3.)
            binning[0] = int((binning[2] - binning[1]) / bin_w)

        h = rt.TH1D(name, title, binning[0], binning[1], binning[2])
    else:
        h = h2clone.Clone(name)
        h.SetTitle(title)
        h.Reset()

    rtnp.fill_hist(h, x, weights=weights)
    h.binning = binning
    return h
Example #10
0
def scatterplot(dfevt, nvar1, nvar2, nbins1, min1, max1, nbins2, min2, max2):
    hmult1_mult2 = TH2F(nvar1 + nvar2, nvar1 + nvar2, nbins1, min1, max1,
                        nbins2, min2, max2)
    dfevt_rd = dfevt[[nvar1, nvar2]]
    arr2 = dfevt_rd.values
    fill_hist(hmult1_mult2, arr2)
    return hmult1_mult2
def create_TH2D(sample,
                name='h',
                title=None,
                binning=[None, None, None, None, None, None],
                weights=None,
                axis_title=['', '', '']):
    if title is None:
        title = name
    if (sample.shape[0] == 0):
        for i in range(len(binning)):
            if binning[i] == None:
                binning[i] = 1
    else:
        if binning[1] is None:
            binning[1] = min(sample[:, 0])
        if binning[2] is None:
            binning[2] = max(sample[:, 0])
        if binning[0] is None:
            bin_w = 4 * (np.percentile(sample[:, 0], 75) - np.percentile(
                sample[:, 0], 25)) / (len(sample[:, 0]))**(1. / 3.)
            if bin_w == 0:
                bin_w = 0.5 * np.std(sample[:, 0])
            if bin_w == 0:
                bin_w = 1

            binning[0] = int((binning[2] - binning[1]) / bin_w)

        if binning[4] is None:
            binning[4] = min(sample[:, 1])
        if binning[5] == None:
            binning[5] = max(sample[:, 1])
        if binning[3] == None:
            bin_w = 4 * (np.percentile(sample[:, 1], 75) - np.percentile(
                sample[:, 1], 25)) / (len(sample[:, 1]))**(1. / 3.)
            if bin_w == 0:
                bin_w = 0.5 * np.std(sample[:, 1])
            if bin_w == 0:
                bin_w = 1
            binning[3] = int((binning[5] - binning[4]) / bin_w)
    if len(binning) == 6:
        h = rt.TH2D(name, title, binning[0], binning[1], binning[2],
                    binning[3], binning[4], binning[5])
    else:
        h = rt.TH2D(name, title, binning[-2] - 1,
                    array('f', binning[:binning[-2]]), binning[-1] - 1,
                    array('f', binning[binning[-2]:-2]))

    #for i in range(len(sample)):


#	if weights is None:
#	    h.Fill(sample[i,0],sample[i,1])
#	else:
#	    h.Fill(sample[i,0],sample[i,1],weights[i])
    rtnp.fill_hist(h, sample, weights=weights)
    h.SetXTitle(axis_title[0])
    h.SetYTitle(axis_title[1])
    h.SetZTitle(axis_title[2])
    h.binning = binning
    return h
Example #12
0
def SigBkgHist(Sample, XTitle, bins, xmin, xmax, tag=''):
    x = Sample.Events
    Sig = x[Sample.OutTrue == 1]  # Signal values of each event
    wSig = Sample.Weights[Sample.OutTrue == 1]  # Signal weights of each event
    Bkg = x[Sample.OutTrue == 0]  # Background values of each event
    wBkg = Sample.Weights[Sample.OutTrue ==
                          0]  # Background weights of each event

    c1 = ROOT.TCanvas("c1", "Canvas", 800, 600)
    ROOT.gStyle.SetOptStat(0)

    hSig = ROOT.TH1F("hSig", "", bins, xmin, xmax)
    fill_hist(hSig, Sig, weights=wSig)
    SetOverflow(hSig)
    SetUnderflow(hSig)
    hBkg = ROOT.TH1F("hBkg", "", bins, xmin, xmax)
    fill_hist(hBkg, Bkg, weights=wBkg)
    SetOverflow(hBkg)
    SetUnderflow(hBkg)
    hBkg.SetLineColor(2)
    if (hSig.GetMaximum() > hBkg.GetMaximum()):
        hSig.GetYaxis().SetRangeUser(0, hSig.GetMaximum() * 1.4)
    else:
        hSig.GetYaxis().SetRangeUser(0, hBkg.GetMaximum() * 1.4)
    hSig.GetXaxis().SetTitle(XTitle)
    hSig.Draw("Hist")
    hBkg.Draw("SameHist")

    c1.SaveAs("./plots/SigBkg" + tag + ".png")
Example #13
0
def makefill1dhist(df_, h_name, h_tit, arrayx, nvar1):
    """
    Create a TH1F histogram and fill it with one variables from a dataframe.
    """
    histo = buildhisto(h_name, h_tit, arrayx)
    fill_hist(histo, df_[nvar1])
    return histo
Example #14
0
    def fillHistograms(self, cut, path, category, weight = Weight("1.0",[]) ):

        data_content = self.read_data_content(cut, path, weight)

        ff_weights = self.getFFWeights(data_content)

        FFHistos = {}
        for i,uncert in enumerate(self.uncerts):

            if not "jetFakes" in uncert:
                name = self.convertSystematicName(uncert)
            else:
                name = uncert

            FFHistos[name] = getEmptyHist(name, self.variable, category)
            FFHistos[name].Sumw2(True)

            if len(data_content):
                rn.fill_hist( FFHistos[name], array = data_content[self.variable.getBranches()].values,
                                              weights = ff_weights[i].values )

            FFHistos[name] = self.unroll2D(FFHistos[name])

        data_content.drop( data_content.index, inplace = True )
        return copy.deepcopy(FFHistos)
Example #15
0
    def setup(self):
        if os.path.exists(self.output_dir):
            os.system("rm -rf %s" % (self.output_dir + "/*" + self.tag + "*"))
        else:
            os.system("mkdir %s" % self.output_dir)

        # Initialize workspace
        self.w = ROOT.RooWorkspace("w")
        self.rooVar = "mgg"

        # Initialize and fill histogram
        self.h = ROOT.TH1F("h_mgg", "h_mgg", 320, 100, 180)
        self.h.Sumw2()
        root_numpy.fill_hist(self.h, self.events["ggMass"], weights = self.events["weight"])
                
        # Convert to RooDataHist
        self.d = ROOT.RooDataHist("d_mgg_" + self.tag, "", ROOT.RooArgList(self.w.var(self.rooVar)), self.h, 1)
        self.norm = self.d.sumEntries()
        self.rooVarNorm = ROOT.RooRealVar(self.tag + "_norm", "", self.norm)
        self.pdf = ROOT.RooExtendPdf(self.tag + "_pdf", "", self.w.pdf(self.tag), self.rooVarNorm)

        if not self.resonant:
            self.w.var(self.rooVar).setRange("SL", 100, 120)
            self.w.var(self.rooVar).setRange("SU", 130, 180)
            self.w.var(self.rooVar).setRange("full", 100, 180)

        return
Example #16
0
    def process_histomass(self):
        myfile = TFile.Open(self.n_filemass, "recreate")

        for ipt in range(self.p_nptfinbins):
            bin_id = self.bin_matching[ipt]
            df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb"))
            df = df.query(self.l_selml[bin_id])
            if self.s_evtsel is not None:
                df = df.query(self.s_evtsel)
            if self.s_trigger is not None:
                df = df.query(self.s_trigger)
            df = seldf_singlevar(df, self.v_var_binning, \
                                 self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt])
            for ibin2 in range(len(self.lvar2_binmin)):
                suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \
                         (self.v_var_binning, self.lpt_finbinmin[ipt],
                          self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id],
                          self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins,
                                 self.p_mass_fit_lim[0], self.p_mass_fit_lim[1])
                df_bin = seldf_singlevar(df, self.v_var2_binning,
                                         self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2])
                fill_hist(h_invmass, df_bin.inv_mass)
                myfile.cd()
                h_invmass.Write()

                if "pt_jet" in df_bin.columns:
                    zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet,
                                    df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand)
                    h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5)
                    zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T
                    fill_hist(h_zvsinvmass, zvsinvmass)
                    h_zvsinvmass.Write()
Example #17
0
    def plot_hist(self, values, bins, **params):
       
        name = None
        if 'name' in params: 
            name = params['name']

        if ('root' in params): 
            histo = r.TH1F(name, name, len(bins), np.amin(bins), np.amax(bins))
            rnp.fill_hist(histo, values)
            histo.Scale(1/histo.Integral(), 'width')
            histo.Write()
            
        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15, 10))
        
        label=None
        if 'label' in params:
            label=params['label']
            ax.legend()
       
        norm = False
        if 'norm' in params: 
            norm = True

        if 'ylog' in params:
            if norm: ax.set_yscale('log')
            else: ax.set_yscale('symlog')

        if 'x_label' in params:
            ax.set_xlabel(params['x_label'])


        ax.hist(values, bins, histtype='step', lw=1.5, label=label, normed=norm)

        self.pdf.savefig(bbox_inches='tight')
        plt.close()
Example #18
0
    def _plot1D_numpy(self, data, bins, weights=None, option='', **kwargs):
        """ ... """

        # Check(s)
        if bins is None:
            warning(
                "You need to specify 'bins' when plotting a numpy-type input.")
            return

        if len(data) != len(bins) and len(bins) < 2:
            warning("Number of bins {} is not accepted".format(len(bins)))
            return

        # Fill histogram
        if len(data) == len(bins):
            # Assuming 'data' and 'bins' are sets of (x,y)-points
            h = ROOT.TGraph(len(bins), np.array(bins, dtype=np.float),
                            np.array(data, dtype=np.float))
        else:
            h = ROOT.TH1F('h_{:d}'.format(int(time.time() * 1E+06)), "",
                          len(bins) - 1, np.array(bins, dtype=np.float))
            if len(data) == len(bins) - 1:
                # Assuming 'data' are bin values
                array2hist(data, h)
            else:
                # Assuming 'data' are values to be filled
                fill_hist(h, data, weights=weights)
                pass
            pass

        # Plot histogram
        return self._plot1D(h, option, **kwargs)
Example #19
0
    def _analyze(self, file):
        tree = ur.open(file)["Events"]
        arrays = tree.arrays(
            ['Jet_pt', 'nJet', 'Jet_eta', 'Jet_phi', 'MET_pt'])
        jet_pt = arrays[b"Jet_pt"]
        jet_eta = arrays[b"Jet_eta"]
        jet_phi = arrays[b"Jet_phi"]
        met_pt = arrays[b"MET_pt"]
        n_jet = arrays[b"nJet"]

        # Select events with more than one jet
        mask = (n_jet > 1)
        jet_pt = jet_pt[mask]
        jet_phi = jet_phi[mask]
        jet_eta = jet_eta[mask]

        jet_ht = calculate_jet_ht(jet_pt)

        fill_hist(self._histos["ptj"], jet_pt.flatten())
        fill_hist(self._histos["ht"], calculate_jet_ht(jet_pt).flatten())
        fill_hist(self._histos["lead_jet_eta"], jet_eta[:, 0])
        fill_hist(self._histos["trail_jet_eta"], jet_eta[:, 1])

        twod = jet_eta[:, 0:2].tolist()
        fill_hist(self._histos["jet_eta_2d"], twod)
Example #20
0
def write_score_hists(f, mass, scores_list, hist_template, no_neg_bins=True):
    sys_hists = {}
    for samp, scores_dict in scores_list:
        for sys_term, (scores, weights) in scores_dict.items():
            if sys_term == 'NOMINAL':
                suffix = ''
            else:
                suffix = '_' + '_'.join(sys_term)
            hist = hist_template.Clone(
                    name=samp.name + ('_{0}'.format(mass)) + suffix)
            fill_hist(hist, scores, weights)
            if sys_term not in sys_hists:
                sys_hists[sys_term] = []
            sys_hists[sys_term].append(hist)
    f.cd()
    for sys_term, hists in sys_hists.items():
        bad_bins = []
        if no_neg_bins:
            # check for negative bins over all systematics and zero them out
            # negative bins cause lots of problem in the limit setting
            # negative bin contents effectively means
            # the same as "no events here..."
            total_hist = sum(hists)
            for bin, content in enumerate(total_hist):
                if content < 0:
                    log.warning("Found negative bin %d (%f) for "
                                "systematic %s" % (
                                    bin, content, sys_term))
                    bad_bins.append(bin)
        for hist in hists:
            for bin in bad_bins:
                # zero out bad bins
                hist[bin] = 0.
            hist.Write()
Example #21
0
def efficiency_graph(pass_function,
                     function_inputs,
                     xs,
                     bins=None,
                     error=0.005):
    pass_results = pass_function(function_inputs)
    if bins is None:  # Automatic binning
        # Compute the number of bins such that the error on the efficiency is equal to 'error' in each bin
        # The calculation is based on binomial errors and assumes that the efficiency is flat (that the distributions of all and selected events are the same)
        k = float(np.count_nonzero(pass_results))
        n = float(len(pass_results))
        percentiles = [0., 100.]
        if k > 0:
            nbins = (error * n)**2 / k / (1 - k / n)
            # Compute the bin bounaries with the same number of events in all bins
            percentiles = np.arange(0., 100., 100. / nbins)
            percentiles[-1] = 100.
        bins = np.unique(np.percentile(xs, percentiles))
    # Fill histograms of selected and all events and compute efficiency
    histo_pass = Hist(bins)
    histo_total = Hist(bins)
    fill_hist(histo_pass, xs, pass_results)
    fill_hist(histo_total, xs)
    efficiency = Graph()
    efficiency.Divide(histo_pass, histo_total)
    return efficiency
def main ():

    inputdir  = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-06-28/'
    outputdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-07-10/'
    
    inputpaths  = glob.glob(inputdir + '/ISRgamma_*.root')
    outputpaths = [p.replace(inputdir, outputdir).replace('ISRgamma', 'hist_ISRgamma') for p in inputpaths]

    for inputpath, outputpath in zip(inputpaths,outputpaths):
        print "Processing '%s'" % inputpath

        infile  = ROOT.TFile(inputpath,  'READ')
        outfile = ROOT.TFile(outputpath, 'RECREATE')
        categories = [key.GetName() for key in infile.GetListOfKeys()]

        for category in categories:
            print "-- '%s'" % category
            tree = infile.Get(category)
            array = tree2array(tree)
            #hist = ROOT.TH1F(category, "", 30, 100, 250)
            hist = ROOT.TH1F(category, "", 32, 100, 260)
            fill_hist(hist, array['mJ'], weights=array['weight'])
            
            # TF shape/norm ...

            outfile.cd()
            hist.Write()
            pass

        outfile.Write()
        outfile.Close()
        infile.Close()
        pass

    return
Example #23
0
    def fillHistos(self, content, histname, cat, cut, add_systematics):

        binning = self.var.bins(int(cat))
        tmpCont = content.query(cut)

        tmpHist = R.TH1D(histname, histname, *binning)
        tmpHist.GetXaxis().SetTitle(self.var.name)
        tmpHist.Sumw2()
        rn.fill_hist(tmpHist,
                     array=tmpCont[self.var.name].values,
                     weights=tmpCont["event_weight"].values)

        self.DCfile.cd(self.d(self.target_names[int(cat)]))

        tmpHist.Write()

        if add_systematics:
            for rw in self.systematics:
                rwname = rw.replace("reweight", histname).replace(
                    "CHAN", self.channel).replace("CAT",
                                                  self.target_names[int(cat)])
                tmpHist = R.TH1D(rwname, rwname, *binning)
                rn.fill_hist(tmpHist,
                             array=tmpCont[self.var.name].values,
                             weights=tmpCont.eval(self.systematics[rw]).values)
                tmpHist.Write()
Example #24
0
def create_TH2D(sample,
                name='h',
                title=None,
                binning=[None, None, None, None, None, None],
                weights=None,
                axis_title=['', '']):
    if title is None:
        title = name
    if binning[1] is None:
        binning[1] = min(sample[:, 0])
    if binning[2] is None:
        binning[2] = max(sample[:, 0])
    if binning[0] is None:
        bin_w = 4 * (np.percentile(sample[:, 0], 75) - np.percentile(
            sample[:, 0], 25)) / (len(sample[:, 0]))**(1. / 3.)
        binning[0] = int((binning[2] - binning[1]) / bin_w)

    if binning[4] is None:
        binning[4] = min(sample[:, 1])
    if binning[5] == None:
        binning[5] = max(sample[:, 1])
    if binning[3] == None:
        bin_w = 4 * (np.percentile(sample[:, 1], 75) - np.percentile(
            sample[:, 1], 25)) / (len(sample[:, 1]))**(1. / 3.)
        binning[3] = int((binning[5] - binning[4]) / bin_w)

    h = rt.TH2D(name, title, binning[0], binning[1], binning[2], binning[3],
                binning[4], binning[5])
    rtnp.fill_hist(h, sample, weights=weights)
    h.SetXTitle(axis_title[0])
    h.SetYTitle(axis_title[1])
    h.binning = binning
    return h
Example #25
0
def create_histogram(var, hist_sett, **kwargs):
    """
    Create a ROOT histogram from the passed variable(s)

    Args:
        var (np.array): Array with maximum of 3 columns containing the variables
            to plot.
        hist_set (tuple): Histogram settings, that are directly unpacked into
            the constructor of the ROOT histogram

    Keyword Args:
        name (str, optional): Name to be used for the histogram
        weights (np.array, optional): weight array with the same number of
             events as the var array. Each entry corresponds to the weight of
             the event
        {x,y,z}_axis (str): axis labels to be set for the histogram

    Returns:
         ROOT.TH{1,2,3}D: The histogram with the dimension corresponding to the
             number of columns of var
    """
    name = kwargs.pop('name', '')
    if not name:
        name = create_random_str()
    # use the number of dimensions from the var to determine which sort of
    # histogram to use
    ndim = var.shape
    if len(ndim) == 1:
        ndim = 1
    else:
        ndim = ndim[1]

    if ndim > 3 or ndim < 0:
        logging.error('Dimension of histogram is {}. Cannot create histogram'
                      .format(ndim))
        raise TypeError('Invalid number of dimensions in create_histograms')

    hist_type = 'TH{}D'.format(ndim)
    try:
        hist = getattr(r, hist_type)(name, '', *hist_sett)
    except TypeError as exc:
        logging.error('Could not construct TH{}D with passed hist_sett: {}'
                      .format(ndim, hist_sett))
        raise exc

    set_hist_opts(hist)

    # set axis labels
    xax, yax, zax = (kwargs.pop(a, '') for a in ['x_axis', 'y_axis', 'z_axis'])
    if xax:
        hist.SetXTitle(xax)
    if yax:
        hist.SetYTitle(yax)
    if zax:
        hist.SetZTitle(zax)

    fill_hist(hist, var, weights=kwargs.pop('weights', None))

    return hist
Example #26
0
def HistNW(hist, Events, Weights, Color):
    fill_hist(hist, Events, weights=Weights)
    hist.SetLineColor(Color)
    hist.SetLineWidth(1)
    SetOverflow(hist)
    SetUnderflow(hist)
    if (hist.Integral() != 0):
        hist.Scale(1 / hist.Integral())
Example #27
0
def fill2dhist(df_, histo, nvar1, nvar2):
    """
    Fill a TH2 histogram with two variables from a dataframe.
    """
    df_rd = df_[[nvar1, nvar2]]
    arr2 = df_rd.values
    fill_hist(histo, arr2)
    return histo
Example #28
0
def draw_hist(hist, xarr, fields, weight=None):
    warr = xarr[weight] if weight else None
    if len(fields) == 1:
        return rnp.fill_hist(hist=hist, array=xarr[fields[0]], weights=warr)
    else:
        varr = np.array([xarr[f] for f in fields])
        varr = varr.transpose()
        return rnp.fill_hist(hist=hist, array=varr, weights=warr)
Example #29
0
def create_TH1D(x,
                name='h',
                title=None,
                binning=[None, None, None],
                weights=None,
                h2clone=None,
                axis_title=['', ''],
                opt='',
                color=0):
    if title is None:
        title = name
    if (x.shape[0] == 0):
        print('Empty sample')
        h = rt.TH1D(name, title, 1, 0, 1)
    elif not h2clone is None:
        h = h2clone.Clone(name)
        h.SetTitle(title)
        h.Reset()
    elif isinstance(binning, np.ndarray):
        h = rt.TH1D(name, title, len(binning) - 1, binning)
    elif len(binning) == 3:
        if binning[1] is None:
            binning[1] = min(x)
        if binning[2] is None:
            if ((np.percentile(x, 95) - np.percentile(x, 50)) < 0.2 *
                (max(x) - np.percentile(x, 95))):
                binning[2] = np.percentile(x, 90)
            else:
                binning[2] = max(x)
        if binning[0] is None:
            bin_w = 4 * (np.percentile(x, 75) -
                         np.percentile(x, 25)) / (len(x))**(1. / 3.)
            if bin_w == 0:
                bin_w = 0.5 * np.std(x)
            if bin_w == 0:
                bin_w = 1
            binning[0] = int((binning[2] - binning[1]) / bin_w) + 5

        h = rt.TH1D(name, title, binning[0], binning[1], binning[2])
    else:
        print('Binning not recognized')
        raise

    if 'underflow' in opt:
        m = h.GetBinCenter(1)
        x = np.copy(x)
        x[x < m] = m
    if 'overflow' in opt:
        M = h.GetBinCenter(h.GetNbinsX())
        x = np.copy(x)
        x[x > M] = M

    rtnp.fill_hist(h, x, weights=weights)
    h.SetXTitle(axis_title[0])
    h.SetYTitle(axis_title[1])
    h.SetLineColor(color)
    h.binning = binning
    return h
Example #30
0
def check_sample_category(analysis, sample, category):
    clf = analysis.get_clf(category, mass=125, load=True)
    scores, weights = sample.scores(
        clf, category, TARGET_REGION,
        systematics=False)['NOMINAL']
    hist = Hist(20, scores.min() - 1E-5, scores.max() + 1E-5)
    fill_hist(hist, scores, weights)
    assert_almost_equal(sample.events(category, TARGET_REGION)[1].value,
                        hist.integral(), 3)
Example #31
0
def makefill2dhist(df_, titlehist, arrayx, arrayy, nvar1, nvar2):
    """
    Create a TH2F histogram and fill it with two variables from a dataframe.
    """
    histo = build2dhisto(titlehist, arrayx, arrayy)
    df_rd = df_[[nvar1, nvar2]]
    arr2 = df_rd.to_numpy()
    fill_hist(histo, arr2)
    return histo
Example #32
0
def GetScoreTH(ClassNum, OutPreOther, Weights, MultiClass, xmin, xmax):

    Pre = OutPreOther[MultiClass == ClassNum]
    Weight = Weights[MultiClass == ClassNum]
    hist = ROOT.TH1F("h" + str(ClassNum), "", 20, xmin, xmax)
    fill_hist(hist, Pre, weights=Weight)
    hist.Scale(1 / hist.GetSumOfWeights() / Getdx(hist))

    return hist
Example #33
0
 def cutBasedAMS(self):
     sig_hist = r.TH1F('sigHistTemp', ';m_{ee};Entries', 160, 110, 150)
     fill_hist(sig_hist, self.sigMass, weights=self.sigWeights)
     N_sig = 0.68 * sig_hist.Integral()
     sig_width = self.getRealSigma(sig_hist)
     bkg_hist = r.TH1F('bkgHistTemp', ';m_{ee};Entries', 160, 110, 150)
     fill_hist(bkg_hist, self.bkgMass, weights=self.bkgWeights)
     N_bkg = self.computeBkg(bkg_hist, sig_width)
     return self.getAMS(N_sig, N_bkg)
Example #34
0
def get_background_signal(nbins=100, scale=1, sig_events=1000, bkg_events=1000):
    bkg_scores = transform(np.random.normal(-.1, .2, size=bkg_events),
        scale=scale)
    sig_scores = transform(np.random.normal(.1, .2, size=sig_events),
        scale=scale)
    bkg_hist = Hist(nbins, -1, 1)
    sig_hist = Hist(nbins, -1, 1)
    fill_hist(bkg_scores)
    fill_hist(sig_scores)
    return bkg_hist, sig_hist
Example #35
0
def fillHistFromNumpy(np_arr, histList=[], histname=""):
    """
        Fill a 1D or 2D hist from a np array
        Inputs: The numpy array and a list with [nbins, min, max] for 1D or 
                [nbinsX, minX, maxX, nbinsY, minY, maxY]
        Return: The hist
    """
    from ROOT import TH1F, TH2F
    from root_numpy import fill_hist
    hist = 0
    if histname =="" : histname="hist"
    if len(histList) == 0 : pass
    elif len(hist)==3 : hist = ROOT.TH1F(histname, histname, int(histList[0]),histList[1],histList[2])
    elif len(hist)==6 : hist = ROOT.TH2F(histname, histname, int(histList[0]),histList[1],histList[2], int(histList[3]),histList[4],histList[5])
    info('(fillHistFromNumpy) filling hist with name %s' % histname)
    fill_hist(hist, np_arr)
    return hist
Example #36
0
    def plot_hists(self, values, bins, **params):
    
        fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15, 10))

        label=None

        norm = False
        if 'norm' in params: 
            norm = True

        if 'ylog' in params:
            if norm: ax.set_yscale('log')
            else: ax.set_yscale('symlog')

        if 'x_label' in params:
            ax.set_xlabel(params['x_label'])

        labels = None
        if 'labels' in params:
            labels = params['labels']
 
        label_loc=0
        if 'label_loc' in params: 
            label_loc=params['label_loc']

        for x_arr, label in izip(values, labels):
            ax.hist(x_arr, bins, histtype='step', lw=1.5, normed=norm, label=label)

        if labels: ax.legend(framealpha=0.0, frameon=False, loc=label_loc)

        self.pdf.savefig(bbox_inches='tight')
        plt.close()

        if ('root' in params): 
            for x_arr, label in izip(values, labels):
                histo = r.TH1F(label, label, len(bins), np.amin(bins), np.amax(bins))
                rnp.fill_hist(histo, x_arr)
                histo.Scale(1/histo.Integral(), 'width')
                histo.Write()
Example #37
0
def efficiency_graph(pass_function, function_inputs, xs, bins=None, error=0.005):
    pass_results = pass_function(function_inputs)
    if bins is None: # Automatic binning
        # Compute the number of bins such that the error on the efficiency is equal to 'error' in each bin
        # The calculation is based on binomial errors and assumes that the efficiency is flat (that the distributions of all and selected events are the same)
        k = float(np.count_nonzero(pass_results))
        n = float(len(pass_results))
        percentiles = [0.,100.]
        if k>0: 
            nbins = (error*n)**2/k / (1-k/n)
            # Compute the bin boundaries with the same number of events in all bins
            percentiles = np.arange(0., 100., 100./nbins)
            percentiles[-1] = 100.
        bins = np.unique(np.percentile(xs, percentiles))
    # Fill histograms of selected and all events and compute efficiency
    histo_pass = Hist(bins)
    histo_total = Hist(bins)
    fill_hist(histo_pass, xs, pass_results)
    fill_hist(histo_total, xs)
    efficiency = Graph()
    efficiency.Divide(histo_pass, histo_total)
    return efficiency
Example #38
0
    def calculateBkgRej(self, discriminant, signal_idx, bkg_idx, weights=None):
        '''
        This does essentially the same thing as the plotDiscriminant method, except that it does it for
        an arbritrary discriminant and doesn't save the histograms. It just calculates the score.
        '''
        import ROOT as root
        from ROOT import TH2D, TCanvas, TFile, TNamed, TH1F, TLegend
        import numpy as np
        from root_numpy import fill_hist
        import functions as fn
        import os
        

        # stop showing plots to screen
        root.gROOT.SetBatch(True)

        bins = 100
        # when creating the plots do it over the range of all probas (scores)
        discriminant_bins = np.linspace(np.min(discriminant), np.max(discriminant), bins)

        hist_bkg = TH1F("Background Discriminant","Discriminant",bins, np.min(discriminant), np.max(discriminant))
        hist_sig = TH1F("Signal Discriminant","Discriminant",bins, np.min(discriminant), np.max(discriminant))

        # fill the signal and background histograms
        if weights is not None:
            fill_hist(hist_bkg,discriminant[bkg_idx], weights[bkg_idx])
            fill_hist(hist_sig,discriminant[signal_idx], weights[signal_idx])
        else:
            fill_hist(hist_bkg,discriminant[bkg_idx])
            fill_hist(hist_sig,discriminant[signal_idx])
        if hist_bkg.Integral() != 0:
            hist_bkg.Scale(1/hist_bkg.Integral())
        if hist_sig.Integral() != 0:
            hist_sig.Scale(1/hist_sig.Integral())

        # before deciding whether to do a left or right cut for the roc curve we have to find the median.
        sig_median = np.median(discriminant[signal_idx])
        bkg_median = np.median(discriminant[bkg_idx])
        if sig_median > bkg_median:
            roc_cut = 'R'
        else:
            roc_cut = 'L'
        roc_graph = fn.RocCurve_SingleSided(hist_sig, hist_bkg, self.sig_eff,self.bkg_eff, roc_cut)
        fpr_05 = fn.GetBGRej50(roc_graph)

        if fpr_05 != 1:
            return float(1/(1-fpr_05))

        return -1.0
Example #39
0
def check_events(analysis, sample, category, region):
    clf = analysis.get_clf(category, mass=125, load=True)
    scores, weights = sample.scores(
        clf, category, region,
        systematics=False)['NOMINAL']
    rec = sample.merged_records(
        category, region)
    sample_events = sample.events(category, region)[1].value
    hist = Hist(5, scores.min() - 1, scores.max() + 1)
    fill_hist(hist, scores, weights)
    clf_events = hist.integral()

    # test events consistency
    assert_equal(weights.shape[0], rec['weight'].shape[0])
    assert_array_equal(weights, rec['weight'])
    assert_almost_equal(clf_events, weights.sum(), 1)
    assert_almost_equal(sample_events, rec['weight'].sum(), 1)
    assert_almost_equal(sample_events, clf_events, 1)

    # test draw_array
    hist = Hist(1, -1000, 1000)
    sample.draw_array({'tau1_charge': hist}, category, region)
    assert_almost_equal(hist.integral(), sample_events, 1)

    # test scaling
    orig_scale = sample.scale
    sample.scale *= 2.
    scores, weights = sample.scores(
        clf, category, region,
        systematics=False)['NOMINAL']
    hist.Reset()
    fill_hist(hist, scores, weights)
    scale_clf_events = hist.integral()
    assert_almost_equal(scale_clf_events, weights.sum(), 1)
    assert_almost_equal(scale_clf_events, 2. * clf_events, 1)
    scale_sample_events = sample.events(category, region)[1].value
    assert_almost_equal(scale_sample_events, 2. * sample_events, 1)
    sample.scale = orig_scale
Example #40
0
def test_fill_hist():
    np.random.seed(0)
    data1D = np.random.randn(1E6)
    w1D = np.empty(1E6)
    w1D.fill(2.)
    data2D = np.random.randn(1E6, 2)
    data3D = np.random.randn(1E4, 3)

    a = TH1D('th1d', 'test', 1000, -5, 5)
    rnp.fill_hist(a, data1D)
    # one element lies beyond hist range; that's why it's not 1e6
    assert_almost_equal(a.Integral(), 999999.0)

    a_w = TH1D('th1dw', 'test', 1000, -5, 5)
    rnp.fill_hist(a_w, data1D, w1D)
    assert_almost_equal(a_w.Integral(), 999999.0 * 2)

    b = TH2D('th2d', 'test', 100, -5, 5, 100, -5, 5)
    rnp.fill_hist(b, data2D)
    assert_almost_equal(b.Integral(), 999999.0)

    c = TH3D('th3d', 'test', 10, -5, 5, 10, -5, 5, 10, -5, 5)
    rnp.fill_hist(c, data3D)
    assert_almost_equal(c.Integral(), 10000.0)

    # array and weights lengths do not match
    assert_raises(ValueError, rnp.fill_hist, c, data3D, np.ones(10))

    # weights is not 1D
    assert_raises(ValueError, rnp.fill_hist, c, data3D,
        np.ones((data3D.shape[0], 1)))

    # array not 2-d when filling 2D/3D histogram
    for h in (b, c):
        assert_raises(ValueError, rnp.fill_hist, h, np.random.randn(1E4))

    # length of second axis does not match dimensionality of histogram
    for h in (a, b, c):
        assert_raises(ValueError, rnp.fill_hist, h, np.random.randn(1E4, 4))

    # wrong type
    h = list()
    a = np.random.randn(100)
    assert_raises(TypeError, rnp.fill_hist, h, a)
Example #41
0
def test_fill_hist():
    n_samples = 1000
    data1D = RNG.randn(n_samples)
    w1D = np.empty(n_samples)
    w1D.fill(2.)
    data2D = RNG.randn(n_samples, 2)
    data3D = RNG.randn(n_samples, 3)

    a = TH1D('th1d', 'test', 100, -5, 5)
    rnp.fill_hist(a, data1D)
    assert_almost_equal(a.Integral(), n_samples)

    a_w = TH1D('th1dw', 'test', 100, -5, 5)
    rnp.fill_hist(a_w, data1D, w1D)
    assert_almost_equal(a_w.Integral(), n_samples * 2)

    b = TH2D('th2d', 'test', 100, -5, 5, 100, -5, 5)
    rnp.fill_hist(b, data2D)
    assert_almost_equal(b.Integral(), n_samples)

    c = TH3D('th3d', 'test', 10, -5, 5, 10, -5, 5, 10, -5, 5)
    rnp.fill_hist(c, data3D)
    assert_almost_equal(c.Integral(), n_samples)

    # array and weights lengths do not match
    assert_raises(ValueError, rnp.fill_hist, c, data3D, np.ones(10))

    # weights is not 1D
    assert_raises(ValueError, rnp.fill_hist, c, data3D,
        np.ones((data3D.shape[0], 1)))

    # array not 2-d when filling 2D/3D histogram
    for h in (b, c):
        assert_raises(ValueError, rnp.fill_hist, h, RNG.randn(10))

    # length of second axis does not match dimensionality of histogram
    for h in (a, b, c):
        assert_raises(ValueError, rnp.fill_hist, h, RNG.randn(10, 4))

    # wrong type
    h = list()
    a = RNG.randn(10)
    assert_raises(TypeError, rnp.fill_hist, h, a)
Example #42
0
    def decisionFunctionCanvas(self):
        '''
        Create two histograms which are then drawn onto the same canvas.

        This is only really defined for the BDT, not AGILE NN since that doesn't
        give a "score".
        '''
        import ROOT as root
        from ROOT import TH2D, TCanvas, TFile, TNamed, TH1F, TLegend
        import numpy as np
        from root_numpy import fill_hist
        import functions as fn
        import os
        # check that the decision function output was set
        if len(self.decision_function) == 0:
            return False
        df_sig = TH1F("Signal Decision Function", "Score", 100, -1.0, 1.0)
        df_bkg = TH1F("Background Decision Function", "Score", 100, -1.0, 1.0)
        # fill the histograms with the df
        if self.df_weights is not None:
            fill_hist(df_sig,self.decision_function[self.df_sig_idx],self.df_weights[self.df_sig_idx])
            fill_hist(df_bkg,self.decision_function[self.df_bkg_idx],self.df_weights[self.df_bkg_idx])
        else:
            fill_hist(df_sig,self.decision_function[self.df_sig_idx])
            fill_hist(df_bkg,self.decision_function[self.df_bkg_idx])
        # normalise
        if df_sig.Integral() != 0:
            df_sig.Scale(1./df_sig.Integral())
        if df_bkg.Integral() != 0:
            df_bkg.Scale(1./df_bkg.Integral())
        
        # set up drawing options and colours
        df_sig.SetLineColor(4); df_sig.SetFillStyle(3004)
        df_bkg.SetLineColor(2); df_bkg.SetFillStyle(3005)
        # set the y axis
        max_y = max(df_sig.GetMaximum(), df_bkg.GetMaximum())
        df_sig.SetMaximum(max_y*1.2)
        df_bkg.SetMaximum(max_y*1.2)
        # clone these things
        self.df_sig = df_sig.Clone(); self.df_sig.SetDirectory(0)
        self.df_bkg = df_bkg.Clone(); self.df_bkg.SetDirectory(0)
        return True
Example #43
0
def hist_scores(hist, scores, systematic="NOMINAL"):
    for sample, scores_dict in scores:
        scores, weight = scores_dict[systematic]
        fill_hist(hist, scores, weight)
Example #44
0
def plot_clf(
    background_scores,
    category,
    signal_scores=None,
    signal_scale=1.0,
    data_scores=None,
    name=None,
    draw_histograms=True,
    draw_data=False,
    save_histograms=False,
    hist_template=None,
    bins=10,
    min_score=0,
    max_score=1,
    signal_colors=cm.spring,
    systematics=None,
    unblind=False,
    **kwargs
):

    if hist_template is None:
        if hasattr(bins, "__iter__"):
            # variable width bins
            hist_template = Hist(bins)
            min_score = min(bins)
            max_score = max(bins)
        else:
            hist_template = Hist(bins, min_score, max_score)

    bkg_hists = []
    for bkg, scores_dict in background_scores:
        hist = hist_template.Clone(title=bkg.label)
        scores, weight = scores_dict["NOMINAL"]
        fill_hist(hist, scores, weight)
        hist.decorate(**bkg.hist_decor)
        hist.systematics = {}
        for sys_term in scores_dict.keys():
            if sys_term == "NOMINAL":
                continue
            sys_hist = hist_template.Clone()
            scores, weight = scores_dict[sys_term]
            fill_hist(sys_hist, scores, weight)
            hist.systematics[sys_term] = sys_hist
        bkg_hists.append(hist)

    if signal_scores is not None:
        sig_hists = []
        for sig, scores_dict in signal_scores:
            sig_hist = hist_template.Clone(title=sig.label)
            scores, weight = scores_dict["NOMINAL"]
            fill_hist(sig_hist, scores, weight)
            sig_hist.decorate(**sig.hist_decor)
            sig_hist.systematics = {}
            for sys_term in scores_dict.keys():
                if sys_term == "NOMINAL":
                    continue
                sys_hist = hist_template.Clone()
                scores, weight = scores_dict[sys_term]
                fill_hist(sys_hist, scores, weight)
                sig_hist.systematics[sys_term] = sys_hist
            sig_hists.append(sig_hist)
    else:
        sig_hists = None

    if data_scores is not None and draw_data and unblind is not False:
        data, data_scores = data_scores
        if isinstance(unblind, float):
            if sig_hists is not None:
                # unblind up to `unblind` % signal efficiency
                sum_sig = sum(sig_hists)
                cut = efficiency_cut(sum_sig, 0.3)
                data_scores = data_scores[data_scores < cut]
        data_hist = hist_template.Clone(title=data.label)
        data_hist.decorate(**data.hist_decor)
        fill_hist(data_hist, data_scores)
        if unblind >= 1 or unblind is True:
            log.info("Data events: %d" % sum(data_hist))
            log.info("Model events: %f" % sum(sum(bkg_hists)))
            for hist in bkg_hists:
                log.info("{0} {1}".format(hist.GetTitle(), sum(hist)))
            log.info("Data / Model: %f" % (sum(data_hist) / sum(sum(bkg_hists))))
    else:
        data_hist = None

    if draw_histograms:
        output_name = "event_bdt_score"
        if name is not None:
            output_name += "_" + name
        for logy in (False, True):
            draw(
                data=data_hist,
                model=bkg_hists,
                signal=sig_hists,
                signal_scale=signal_scale,
                category=category,
                name="BDT Score",
                output_name=output_name,
                show_ratio=data_hist is not None,
                model_colors=None,
                signal_colors=signal_colors,
                systematics=systematics,
                logy=logy,
                **kwargs
            )
    return bkg_hists, sig_hists, data_hist
Example #45
0
def histogram_scores(hist_template, scores,
                     min_score=None, max_score=None,
                     inplace=False):
    if not inplace:
        hist = hist_template.Clone(name=hist_template.name + "_scores")
        hist.Reset()
    else:
        hist = hist_template
    if min_score is not None:
        log.info("cutting out scores below %f" % min_score)
    if max_score is not None:
        log.info("cutting out scores above %f" % max_score)
    if isinstance(scores, np.ndarray):
        if min_score is not None:
            scores = scores[scores > min_score]
        if max_score is not None:
            scores = scores[scores < max_score]
        fill_hist(hist, scores)
    elif isinstance(scores, tuple):
        # data
        scores, weight = scores
        if min_score is not None:
            scores_idx = scores > min_score
            scores = scores[scores_idx]
            weight = weight[scores_idx]
        if max_score is not None:
            scores_idx = scores < max_score
            scores = scores[scores_idx]
            weight = weight[scores_idx]
        assert (weight == 1).all()
        fill_hist(hist, scores)
    elif isinstance(scores, dict):
        # non-data with possible systematics
        # nominal case:
        nom_scores, nom_weight = scores['NOMINAL']
        if min_score is not None:
            scores_idx = nom_scores > min_score
            nom_scores = nom_scores[scores_idx]
            nom_weight = nom_weight[scores_idx]
        if max_score is not None:
            scores_idx = nom_scores < max_score
            nom_scores = nom_scores[scores_idx]
            nom_weight = nom_weight[scores_idx]
        fill_hist(hist, nom_scores, nom_weight)
        # systematics
        sys_hists = {}
        for sys_term, (sys_scores, sys_weight) in scores.items():
            if sys_term == 'NOMINAL':
                continue
            if min_score is not None:
                scores_idx = sys_scores > min_score
                sys_scores = sys_scores[scores_idx]
                sys_weight = sys_weight[scores_idx]
            if max_score is not None:
                scores_idx = sys_scores < max_score
                sys_scores = sys_scores[scores_idx]
                sys_weight = sys_weight[scores_idx]
            sys_hist = hist.Clone(
                name=hist.name + "_" + systematic_name(sys_term))
            sys_hist.Reset()
            fill_hist(sys_hist, sys_scores, sys_weight)
            sys_hists[sys_term] = sys_hist
        hist.systematics = sys_hists
    else:
        raise TypeError("scores not an np.array, tuple or dict")
    return hist

#plot error on evaluation_data without normalization


evaluation_error_no_norm=[x*max_outputs[0] for x in evaluation_error]
evaluation_errors_last_epoch_no_norm=[x*max_outputs[0] for x in evaluation_errors_last_epoch]

titlename="eta="+str(eta)+", mini_batch_size="+str(mini_batch_size)+ ", lambda="+str(lm_da)+",\n Cost="+cost_function+", weight_initialization="+weight_initialization
titlename_no_norm=titlename+"_without_normalization"

plt.figure(2)

plt.title(titlename_no_norm)
plt.xlabel("epochs")
plt.ylabel("total error validation data")
x_range=[x+1 for x in range(0,epochs)]
plt.plot(x_range,evaluation_error_no_norm)
plt.savefig("error_on_val_data_"+titlename_no_norm+".png")

print "total error on validation_data set after last training"
print evaluation_error_no_norm[-1]
					
hist_no_norm=TH1D('hist_no_norm',titlename_no_norm,50,-100,100)
fill_hist(hist_no_norm,evaluation_errors_last_epoch_no_norm)
canvas=TCanvas(); 
hist_no_norm.GetXaxis().SetTitle("desired Chi^2- outputted Chi^2");
hist_no_norm.Draw()
canvas.SaveAs('error_on_val_data_'+titlename_no_norm+'_hist.png')
#-----------------------------------------------------
Example #47
0
def apply_tri_selection(rec, lumi):

    electron_p      = rec['electron_p']
    electron_px     = rec['electron_px']
    electron_py     = rec['electron_py']
    electron_chi2   = rec['electron_chi2']
    electron_has_l1 = rec['electron_has_l1']
    electron_pt = np.sqrt(np.power(electron_px, 2) + np.power(electron_py, 2))
    
    positron_p      = rec['positron_p']
    positron_px     = rec['positron_px']
    positron_py     = rec['positron_py']
    positron_chi2   = rec['positron_chi2']
    positron_d0     = rec['positron_d0']
    positron_has_l1 = rec['positron_has_l1']
    positron_has_l2 = rec['positron_has_l2']

    positron_pt = np.sqrt(np.power(positron_px, 2) + np.power(positron_py, 2))

    top_cluster_time  = rec['top_cluster_time']
    bot_cluster_time  = rec['bot_cluster_time']
    cluster_time_diff = top_cluster_time - bot_cluster_time
    
    top_time = rec['top_time']
    
    bot_time = rec['bot_time']

    mass = rec['invariant_mass']
    v0_p = rec["v0_p"]

    top_track_cluster_dt = top_cluster_time - top_time
    abs_top_track_cluster_dt = np.absolute(top_track_cluster_dt - 43)
    bot_track_cluster_dt = bot_cluster_time - bot_time
    abs_bot_track_cluster_dt = np.absolute(bot_track_cluster_dt - 43)
    track_cluster_dt_cut = ((abs_top_track_cluster_dt < 4.5) 
                            & (abs_bot_track_cluster_dt < 4.5))

    asym = (electron_pt - positron_pt)/(electron_pt + positron_pt)
    #
    # Define cuts
    #
    cuts = collections.OrderedDict()

    # Base cuts used to reduce accidentals
    cuts['Radiative cut'] = v0_p > 0.8*1.056 # GeV
    cuts['abs(Ecal clust time - trk time) - 43 ns < 4.5'] = track_cluster_dt_cut
    cuts['$p(V_0) < 1.2 E_{beam}$'] = v0_p < 1.2*1.056 # GeV
    cuts['trk $\chi^2$ < 40'] = (electron_chi2 < 40) & (positron_chi2 < 40)
    cuts['Ecal clust pair dt < 2 ns'] = np.absolute(cluster_time_diff) < 2
    cuts['l1 & l2 hit'] = (positron_has_l1 == 1) & (positron_has_l2 == 1)
    cuts['$d_{0}(e^+) < 1.1$'] = positron_d0 < 1.1
    cuts['$p_t(e^-) - p_t(e^+)/p_t(e^-) + p_t(e^+)$'] = asym < .47
    
    labels = ['Opp. Ecal clusters, trk-cluster match $\chi^2 < 10$, $p(e^-)<0.75E_{beam}$']
    clust_dt_arr = [cluster_time_diff]
    v0_p_arr = [v0_p]
    electron_p_arr = [electron_p]
    positron_d0_arr = [positron_d0]
    electron_chi2_arr = [electron_chi2]
    asym_arr = [asym]
    abs_top_cluster_dt_arr = [abs_top_track_cluster_dt]
    
    cut = np.ones(len(v0_p), dtype=bool)
    for key, value in cuts.iteritems():
        cut = cut & value
        clust_dt_arr.append(cluster_time_diff[cut])
        v0_p_arr.append(v0_p[cut])
        electron_p_arr.append(electron_p[cut])
        positron_d0_arr.append(positron_d0[cut])
        electron_chi2_arr.append(electron_chi2[cut])
        abs_top_cluster_dt_arr.append(abs_top_track_cluster_dt[cut])
        asym_arr.append(asym[cut])
        labels.append(key)

    plt = Plotter.Plotter('trident_selection.pdf')
    
    plt.plot_hists(clust_dt_arr, 
                   np.linspace(-10, 10, 201),
                   labels=labels,
                   x_label='Top cluster time - Bottom cluster time (ns)',
                   label_loc=2, 
                   norm=True,
                   ylog=True,
                   root=True)

    plt.plot_hists(v0_p_arr, 
                   np.linspace(0, 1.5, 151),
                   labels=labels,
                   label_loc=2,
                   x_label='$V_{0}(p)$ (GeV)',
                   ylog=True)
    
    plt.plot_hists(electron_p_arr, 
                   np.linspace(0, 1.5, 151), 
                   labels=labels, 
                   x_label='$p(e^-)$ (GeV)',
                   norm=True,
                   label_loc=4, 
                   ylog=True)

    plt.plot_hists(positron_d0_arr, 
                   np.linspace(-20, 20, 201),
                   labels=labels, 
                   x_label='$d_{0}(e^{+})$', 
                   norm=True,
                   label_loc=2, 
                   ylog=True)

    plt.plot_hists(electron_chi2_arr, 
                   np.linspace(0, 100, 201),
                   labels=labels, 
                   x_label='Track $\chi^2$', 
                   norm=True,
                   label_loc=1, 
                   ylog=True)

    plt.plot_hists(asym_arr, 
                   np.linspace(-1, 1, 201),
                   labels=labels, 
                   x_label='$p_t(e^-) - p_t(e^+)/p_t(e^-) + p_t(e^+)$',
                   norm=True,
                   label_loc=2, 
                   ylog=True)

    plt.plot_hists(abs_top_cluster_dt_arr,
                   np.linspace(0, 60, 121),
                   labels=labels,
                   x_label='abs(ECal cluster time - track time - 43) ns',
                   norm=True,
                   label_loc=1,
                   ylog=True)

    plt.close()

    file = r.TFile("invariant_mass.root", "recreate")

    mass_histo = r.TH1F("invariant_mass", "invariant_mass", 2000, 0., 0.1)
    #mass_histo = r.TH1F("invariant_mass", "invariant_mass", 50, 0., 0.1)
    mass_histo.GetXaxis().SetTitle("m(e^+e^-) (GeV)")
    mass_histo.GetYaxis().SetTitle("#sigma(#mub)")
    bin_width = mass_histo.GetXaxis().GetBinWidth(1)
        
    weights = np.empty(len(mass[cut]))
    if lumi: weights.fill(1/(bin_width*float(lumi)))
    else: weights.fill(1)
        #rnp.fill_hist(mass_histo, mass[selection], weights=weights)
    rnp.fill_hist(mass_histo, mass[cut], weights=weights)    
    mass_histo.Write()
    file.Close()
def Hist_comp_ratios (dec, bkg) :
#    weights = pickle.load( open( "weights_train.pck", "rb" ) ) 
#    probas = pickle.load( open( "class_proba.pck", "rb" ) ) 
#    mt_dec = pickle.load( open( "inputs_train.pck", "rb" ) )    
#    classes = pickle.load( open( "targets_train.pck", "rb" ) )

    weights = pickle.load( open( "weights.pck", "rb" ) ) 
    probas = pickle.load( open( "class_probaWholeSample.pck", "rb" ) ) 
    mt_dec = pickle.load( open( "inputs.pck", "rb" ) )    
    classes = pickle.load( open( "classes.pck", "rb" ) )
 

   
    
    #put mt_dec weights and the RELEVANT class probability together
    class_num = (GetClassIndex(bkg)-1)
    all_data = np.transpose(np.vstack((mt_dec[:,0],mt_dec[:,1], classes, weights, probas[:,class_num])))
    all_data = np.array(filter(lambda x: x[2] == class_num, all_data))
    
    
    #extract the relevant values for the specific decay channel
    Filter = np.array(filter(lambda x: x[1] == dec, all_data))
    MT = Filter[:,0]    
    Weight = Filter[:,3]
    Prob_bkg = Filter[:,4] 

           
    
    SumProbXweight = np.multiply(Prob_bkg,Weight)
    
   
    h1 = TH1D("h1","SumProbXweight_"+bkg+str(dec), 25 , 0.0 ,250.)
    root_open("plots/ROOTfiles/H1_SumProbXweight_"+bkg+str(dec)+"Samptot.root", 'recreate')
    fill_hist(h1,MT, weights=SumProbXweight)
    h1.Write()
    h2 = TH1D("h2","SumWeight_"+bkg+str(dec), 25 , 0.0 ,250.)
    root_open("plots/ROOTfiles/H1_SumWeight_"+bkg+str(dec)+"Samptot.root", 'recreate')
    fill_hist(h2,MT,weights=Weight)
    h2.Write()
    
    h3 = h1.Clone("h3")
    h3.Divide(h2)
    
    if (dec == 0.0) :
        Filter2 = np.array(filter(lambda x: x[1] == 1.0, all_data))
        MT2 = Filter2[:,0]    
        Weight2 = Filter2[:,3]
        Prob_bkg2 = Filter2[:,4]
        SumProbXweight2 = np.multiply(Prob_bkg2,Weight2)
        h12 = TH1D("h12","SumProbXweight_"+bkg+str(1.0), 25 , 0.0 ,250.)
        root_open("plots/ROOTfiles/H1_SumProbXweight_"+bkg+str(1.0)+"Samptot.root", 'recreate')
        fill_hist(h12,MT2, weights=SumProbXweight2)
        h12.Write()
        h22 = TH1D("h22","SumWeight_"+bkg+str(1.0), 25 , 0.0 ,250.)
        root_open("plots/ROOTfiles/H1_SumWeight_"+bkg+str(1.0)+"Samptot.root", 'recreate')
        fill_hist(h22,MT2,weights=Weight2)
        h22.Write()
        
        h32 = h12.Clone("h32")
        h32.Divide(h22)
        
        h12.SetStats(0)
        h22.SetStats(0)
        h32.SetStats(0)
        h12.SetLineColor(2)
        h22.SetLineColor(2)
       
        h32.SetLineColor(0)
        h32.SetMarkerStyle(23)
        h32.SetMarkerColor(2)
        h32.SetMarkerSize(1.2)
        
        
    #create Canvas and save the plot as png
    c = Canvas()
    c.Divide(2,2)
    c.cd(1)
    h1.SetStats(0)
    if (dec == 0.0) :
        if (h1.GetMaximum() > h12.GetMaximum()) :
            print "h1"
            h12.GetYaxis().SetRangeUser(0.,h1.GetMaximum())
        else :
            print "h12"
            h12.GetYaxis().SetRangeUser(0.,h12.GetMaximum())
        h12.Draw("HIST")
    h1.Draw("HIST SAME")

    c.cd(2)
    h2.SetStats(0)         
    if (dec == 0.0) :
        if (h2.GetMaximum() > h22.GetMaximum()) :
            h22.GetYaxis().SetRangeUser(0.,h2.GetMaximum())
        else :
            h22.GetYaxis().SetRangeUser(0.,h22.GetMaximum())
        h22.Draw("HIST")
    h2.Draw("HIST SAME")
    c.cd(3)
         
    f1 = root_open(GetClassProbaPath(bkg))
    #get the 2 histograms for the 2 decay channels: 1 track & 3 tracks
    H1 = f1.Get("h_w_2d")
    if (dec == 10.0) :
        #3 tracks
        h_data = Hist(list(H1.xedges()))
        h_data[:] = H1[:,2]
    else :
        #1 track
        h_data = Hist(list(H1.xedges()))
        h_data[:] = H1[:,1]
     
    h_data.GetXaxis().SetRangeUser(0.,250.)
    h_data.GetYaxis().SetRangeUser(0.,1.)
    h_data.fillstyle = '/'
    h_data.fillcolor = (255,255,0) #yellow
    h_data.SetStats(0)    
    h_data.Draw("HIST")
     
#    h3.SetFillColor(4) #blue
#    h3.SetFillStyle(3005)
    h3.SetLineColor(0)
    h3.SetMarkerStyle(21)
    h3.SetMarkerColor(4)
    h3.SetMarkerSize(1.2)
    h3.SetStats(0)    
    h3.SetTitle(bkg+str(dec))
    h3.GetXaxis().SetTitle("m_{T}")
    h3.GetYaxis().SetTitle("Class probability")        
         
         
    h3.Draw("HIST P SAME")
    if (dec == 0.0) :
        h32.Draw("HIST P SAME")
    c.Update()    
    
    
    if (dec == 0.0) :        
        legend = Legend(3, leftmargin=0.45, margin=0.3)
        legend.AddEntry(h3, "training, no #pi^{0}", style='P')
        legend.AddEntry(h32, "training, with #pi^{0}", style='P')
        legend.AddEntry(h_data, "data", style='F')
        legend.Draw()
    else :    
        legend = Legend(2, leftmargin=0.45, margin=0.3)
        legend.AddEntry(h3, "training", style='P')
        legend.AddEntry(h_data, "data", style='F')
        legend.Draw()
    
    c.SaveAs("plots/H1_"+bkg+str(dec)+"_RatioCompSamptot.png")
#clf = linear_model.SGDRegressor()
clf = xgb.XGBRegressor(max_depth=4, learning_rate=0.1, n_estimators=400)
scores = cross_val_score(clf, data_reduced_n, data_target, cv=5, scoring='neg_mean_squared_error')
print scores
print("MSE: %0.3f (+/- %0.3f)" % (scores.mean(), scores.std() * 2))

predicted = cross_val_predict(clf, data_reduced_n, data_target, cv=5)

from sklearn import metrics
scores2 = metrics.mean_squared_error(data_target, predicted)
print scores
print("MSE 2: %0.3f (+/- %0.3f)" % (scores2.mean(), scores2.std() * 2))

fig, ax = plt.subplots()
ax.scatter(data_target, predicted)
ax.plot([data_target.min(), data_target.max()], [data_target.min(), data_target.max()], 'k--', lw=4)
ax.set_xlabel('True KE [GeV]')
ax.set_ylabel('Predicted KE [GeV]')
ax.set_xlim([data_target.min()*0.9, data_target.max()*1.1])
ax.set_ylim([data_target.min()*0.9, data_target.max()*1.1])
plt.savefig("xgb_cross_val_comparision.pdf")

ROOT.gStyle.SetOptStat(1)
hist = ROOT.TH1D("hist","hist", 100, -1, 1)
diff = (data_target - predicted)/data_target
fill_hist(hist, diff)
hist.GetXaxis().SetTitle("#DeltaE/E")
canvas = ROOT.TCanvas()
hist.Draw()
canvas.SaveAs("xgb_cross_val_DeltaE.pdf")
Example #50
0
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
axes[0].set_yscale('log')
axes[1].set_yscale('log')
q     .hist(histtype='step', bins = np.arange(loC, hiC + width, width), color='r', ax = axes[0])
#q_good.hist(histtype='step', bins = np.arange(loC, hiC + width, width), color='g', ax = axes[0])
q_ped .hist(histtype='step', bins = np.arange(loC, hiC + width, width), color='b', ax = axes[1])
axes[0].plot((threshold, threshold), (0, 1e5), 'k-')
axes[1].plot((threshold, threshold), (0, 1e5), 'k-')


from ROOT import TH1F, TFile
from root_numpy import fill_hist
q_file = TFile('plots/' + sys.argv[1] + '_' + sys.argv[2] + '_charge_spectrum.root', "recreate")
q_hist1 = TH1F('hist1', 'title', nBins, loC, hiC)
q_hist2 = TH1F('hist2', 'title', 55, -10, 100)
fill_hist(q_hist1, q.as_matrix())
fill_hist(q_hist2, max_voltages['minus_voltage'].as_matrix())
q_hist1.Write();
q_hist2.Write();
q_file.Close();


max_q = q.max()

# uncomment these lines if you want to plot the fitted Gaussian
#x = np.linspace(loC, hiC, nBins)
#pdf = n_events*norm.pdf(x, mu, std)/nBins
#plt.plot(x, pdf, 'k', linewidth = 2)

axes[0].set_xlabel("charge [pC]", fontsize = 20)
axes[0].set_ylabel("Entries / (%0.2f pC)" % width, fontsize = 20)
plt.ylabel("recoKE - trueKE [MeV]")


# In[12]:

plt.scatter(test_data_trueKE,(clf.predict(test_data_reduced_n)-test_data_trueKE)/test_data_trueKE)
plt.ylim((0,1))
plt.xlabel("trueKE [MeV]")
plt.ylabel("DeltaE/E")
res_twod_SGD = np.dstack((test_data_trueKE, (clf.predict(test_data_reduced_n)-test_data_trueKE)/test_data_trueKE))


# In[13]:

hist_SGD = ROOT.TH2D('name', 'title', 100, 0, 5000, 100, -1, 10)
fill_hist(hist_SGD, res_twod_SGD[0])
hist_SGD.Draw()
ROOT.gPad.Draw()


# In[14]:

profile_SGD = hist_SGD.ProfileX()
profile_SGD.SetLineColor(ROOT.kBlue)
profile_SGD.Draw()
ROOT.gPad.Draw()


# In[15]:

params = {'n_estimators': 1000, 'max_depth': 10, 'min_samples_split': 1,
Example #52
0
for bootstrap_idx in range(100):
    sys.stdout.write("bootstrap {0} ...\r".format(bootstrap_idx))
    sys.stdout.flush()
    # resample with replacement
    # http://docs.scipy.org/doc/numpy-dev/reference/generated/numpy.random.choice.html
    sample_idx = np.random.choice(len(array), size=len(array), replace=True)
    array_bootstrapped = array[sample_idx]
    # convert back to a TTree and write it out
    tree_bootstrapped = array2tree(
        array_bootstrapped,
        name='bootstrap_{0}'.format(bootstrap_idx))
    tree_bootstrapped.Write()
    tree_bootstrapped.Delete()
    # fill the ROOT histogram with the numpy array
    hist.Reset()
    fill_hist(hist, rec2array(array_bootstrapped))
    hist.Draw()
    hist.xaxis.title = 'x'
    hist.yaxis.title = 'y'
    hist.zaxis.title = 'Events'
    hist.xaxis.limits = (-2.5, 2.5)
    hist.yaxis.limits = (-2.5, 2.5)
    hist.zaxis.range_user = (0, 60)
    hist.xaxis.divisions = 5
    hist.yaxis.divisions = 5
    hist.zaxis.divisions = 5
    canvas.Print('bootstrap.gif+50')

# loop the gif
canvas.Print('bootstrap.gif++')
output.Close()
Example #53
0
    def plotDiscriminant(self, discriminant, signal_idx, bkg_idx, weights = None, save_disc = True, rejection_power=True):
        '''
        Plot the discriminants and the resulting ROC curve derived from them.

        Keyword args:
        discriminant --- The score of the BDT (set in the setProbas method)
        signal_idx --- The true indices of all signal events
        bkg_idx ---The true indices of all background events
        save_disc --- Flag indicating if the discriminant plots should be saved.
        rejection_power --- Whether or not to calculate bkg power: 1/eff in addtion to 1-eff
        '''
        import ROOT as root
        from ROOT import TH2D, TCanvas, TFile, TNamed, TH1F, TLegend
        import numpy as np
        from root_numpy import fill_hist
        import functions as fn
        import os
        

        # stop showing plots to screen
        root.gROOT.SetBatch(True)

        if not os.path.exists(self.output_path):
            os.makedirs(self.output_path)
        fo = TFile.Open(self.output_path+"/"+self.output_prefix+str(self.job_id)+'.root','RECREATE')

        bins = 100
        # when creating the plots do it over the range of all probas (scores)
        discriminant_bins = np.linspace(0,1,bins)#np.min(discriminant), np.max(discriminant), bins)

        hist_bkg = TH1F("Background Discriminant","Discriminant",bins, np.min(discriminant), np.max(discriminant))
        hist_sig = TH1F("Signal Discriminant","Discriminant",bins, np.min(discriminant), np.max(discriminant))

        # fill the signal and background histograms
        if weights is not None:
            print 'weights is not none******************'
            fill_hist(hist_bkg,discriminant[bkg_idx], weights[bkg_idx])
            fill_hist(hist_sig,discriminant[signal_idx], weights[signal_idx])
        else:
            fill_hist(hist_bkg,discriminant[bkg_idx])
            fill_hist(hist_sig,discriminant[signal_idx])
        if hist_bkg.Integral() != 0:
            hist_bkg.Scale(1/hist_bkg.Integral())
        if hist_sig.Integral() != 0:
            hist_sig.Scale(1/hist_sig.Integral())

        hist_sig.SetLineColor(4)
        hist_bkg.SetLineColor(2)
        #hist_sig.SetFillColorAlpha(4, 0.5);
        hist_sig.SetFillStyle(3004)
        #hist_bkg.SetFillColorAlpha(2, 0.5);
        hist_bkg.SetFillStyle(3005)
        hist_sig.Write()
        hist_bkg.Write()
        c = TCanvas()
        leg = TLegend(0.8,0.55,0.9,0.65);leg.SetFillColor(root.kWhite)
        leg.AddEntry(hist_sig, "Signal","l")
        leg.AddEntry(hist_bkg, "Background", "l")
        max_y = max(hist_sig.GetMaximum(), hist_bkg.GetMaximum())
        hist_sig.SetMaximum(max_y*1.2)
        hist_bkg.SetMaximum(max_y*1.2)
        hist_sig.Draw('hist')
        hist_bkg.Draw('histsame')
        c.Write()
        if save_disc == True:
            if not os.path.exists('disc_plots'):
                os.makedirs('disc_plots')
            c.SaveAs('disc_plots/discriminants_'+str(self.job_id)+'.png')

        # before deciding whether to do a left or right cut for the roc curve we have to find the median.
        sig_median = np.median(discriminant[signal_idx])
        bkg_median = np.median(discriminant[bkg_idx])
        if sig_median > bkg_median:
            roc_cut = 'R'
        else:
            roc_cut = 'L'

        # create the single sided roccurve with the code from Sam
        self.roc_graph = fn.RocCurve_SingleSided(hist_sig, hist_bkg, self.sig_eff,self.bkg_eff, roc_cut)
        self.roc_graph.SetName('BackgroundRejection')
        self.roc_graph.SetTitle('BackgroundRejection')
        self.roc_graph.Write()
        
        # get teh background rejection power at 50% signal efficiency
        # store the efficiencies first
        self.ROC_sig_efficiency, self.ROC_bkg_rejection = fn.getEfficiencies(self.roc_graph)
        self.bkgRejectionPower()
        # write the roc score as a string to the output file
        rej_string = 'rejection_power_'+str(self.ROC_rej_power_05)
        rej_n = TNamed(rej_string,rej_string)
        rej_n.Write()


        if rejection_power:
            c.SetLogy()
            self.roc_graph_power = fn.RocCurve_SingleSided(hist_sig, hist_bkg, self.sig_eff,self.bkg_eff, roc_cut, rejection=False)
            c.cd()
            self.roc_graph_power.SetName('BackgroundPower')
            self.roc_graph_power.SetTitle('BackgroundPower')
            self.roc_graph_power.Write()

        # write the decision function to the root file as well, if it is defined.
        if len(self.decision_function) > 0:
            self.decisionFunctionCanvas()
            # add the legends
            leg2 = TLegend(0.8,0.55,0.9,0.65);leg2.SetFillColor(root.kWhite)
            leg2.AddEntry(self.df_sig, "Signal","l")
            leg2.AddEntry(self.df_bkg, "Background", "l")
            # canvas to draw them on
            c2 = TCanvas('Decision Functions')
            self.df_sig.Draw('hist')
            self.df_bkg.Draw('histsame')
            leg2.Draw('same')
            c2.Write()
            # now write the df histograms as well
            self.df_sig.Write()
            self.df_bkg.Write()
            
        self.hist_sig = hist_sig.Clone(); self.hist_sig.SetDirectory(0)
        self.hist_bkg = hist_bkg.Clone(); self.hist_bkg.SetDirectory(0)
        
        fo.Close()
Example #54
0
def optimize_background_rejection_vs_ieta(effs, isolations, signalfile, signaltree, backgroundfile, backgroundtree, inputnames=['abs(ieta)','ntt'], targetname='iso', cut='et>10'):
    ieta_binning = [0.5, 3.5, 6.5, 9.5, 13.5, 18.5, 24.5, 27.5]
    # Compute signal efficiencies
    ninputs = len(inputnames)
    branches = copy.deepcopy(inputnames)
    branches.append(targetname)
    data = root2array(signalfile, treename=signaltree, branches=branches, selection=cut)
    data = data.view((np.float64, len(data.dtype.names)))
    inputs = data[:, range(ninputs)].astype(np.float32)
    targets  = data[:, [ninputs]].astype(np.float32).ravel()
    xs  = data[:, [0]].astype(np.float32).ravel()
    # fill signal ieta histogram and normalize to 1
    histo_signal = Hist(ieta_binning)
    fill_hist(histo_signal, xs)
    #histo_signal.Scale(1./histo_signal.integral(overflow=True))
    # signal_efficiencies is a 2D array
    # The first dimension corresponds to different ieta values
    # The second dimension corresponds to different working points
    signal_efficiencies = [graph2array(efficiency.efficiency_graph(pass_function=(lambda x:np.less(x[1],iso.predict(x[0]))), function_inputs=(inputs,targets), xs=xs, bins=ieta_binning))[:,[1]].ravel() for iso in isolations]
    signal_efficiencies = np.column_stack(signal_efficiencies)
    # Compute background efficiencies
    ninputs = len(inputnames)
    branches = copy.deepcopy(inputnames)
    branches.append(targetname)
    data = root2array(backgroundfile, treename=backgroundtree, branches=branches, selection=cut)
    data = data.view((np.float64, len(data.dtype.names)))
    inputs = data[:, range(ninputs)].astype(np.float32)
    targets  = data[:, [ninputs]].astype(np.float32).ravel()
    xs  = data[:, [0]].astype(np.float32).ravel()
    # fill background ieta histogram and normalize to 1
    histo_background = Hist(ieta_binning)
    fill_hist(histo_background, xs)
    #histo_background.Scale(1./histo_background.integral(overflow=True))
    # background_efficiencies is a 2D array
    # The first dimension corresponds to different ieta values
    # The second dimension corresponds to different working points
    background_efficiencies = [graph2array(efficiency.efficiency_graph(pass_function=(lambda x:np.less(x[1],iso.predict(x[0]))), function_inputs=(inputs,targets), xs=xs, bins=ieta_binning))[:,[1]].ravel() for iso in isolations]
    background_efficiencies = np.column_stack(background_efficiencies)
    signal_efficiencies_diff_graphs = []
    background_efficiencies_diff_graphs = []
    optimal_points_graphs = []
    optimal_points = []
    # compute best working point for each ieta (loop on ieta)
    for i,(signal_effs,background_effs) in enumerate(zip(signal_efficiencies, background_efficiencies)):
        # Compute the probability of signal in this ieta bin for the different efficiency points
        # It is assumed that the cut is applied only in this bin, all the other bins keep the same number of entries
        n_i = histo_signal[i+1].value
        n_tot = histo_signal.integral(overflow=True)
        proba_signal = np.array([n_i*eff/(n_tot-n_i*(1.-eff)) for eff in signal_effs])
        # Same as above, but for background
        n_i = histo_background[i+1].value 
        n_tot = histo_background.integral(overflow=True)
        proba_background = np.array([n_i*eff/(n_tot-n_i*(1.-eff)) for eff in background_effs])
        signal_efficiencies_diff_graph, background_efficiencies_diff_graph, optimal_points_graph, optimal_point = find_best_working_point(effs, signal_effs, background_effs, proba_signal, proba_background)
        signal_efficiencies_diff_graph.SetName('efficiencies_signal_ieta_{}'.format(i))
        background_efficiencies_diff_graph.SetName('efficiencies_background_ieta_{}'.format(i))
        optimal_points_graph.SetName('signal_background_optimal_points_ieta_{}'.format(i))
        signal_efficiencies_diff_graphs.append(signal_efficiencies_diff_graph)
        background_efficiencies_diff_graphs.append(background_efficiencies_diff_graph)
        optimal_points_graphs.append(optimal_points_graph)
        optimal_points.append(optimal_point)
    optimal_points_histo = Hist(ieta_binning)
    array2hist(optimal_points, optimal_points_histo)
    return signal_efficiencies_diff_graphs, background_efficiencies_diff_graphs, optimal_points_graphs, optimal_points_histo
Example #55
0
    def histfactory(self, sample, category, systematics=False,
                    rec=None, weights=None, mva=False,
                    uniform=False, nominal=None):
        if not systematics:
            return
        if len(self.modes) != 1:
            raise TypeError(
                'histfactory sample only valid for single production mode')
        if len(self.masses) != 1:
            raise TypeError(
                'histfactory sample only valid for single mass point')

        # isolation systematic
        sample.AddOverallSys(
            'ATLAS_ANA_HH_{0:d}_Isolation'.format(self.year),
            1. - 0.06,
            1. + 0.06)

        mode = self.modes[0]

        if mode in ('Z', 'W'):
            _uncert_mode = 'VH'
        else:
            _uncert_mode = self.MODES_WORKSPACE[mode]

        if self.year == 2011:
            energy = 7
        elif self.year == 2012:
            energy = 8
        else:
            raise ValueError(
                "collision energy is unknown for year {0:d}".format(self.year))

        # QCD_SCALE
        for qcd_scale_term, qcd_scale_mode, qcd_scale_category, values in self.QCD_SCALE:
            if qcd_scale_mode == _uncert_mode and qcd_scale_category == category.name:
                high, low = map(float, values.split('/'))
                sample.AddOverallSys(qcd_scale_term, low, high)

        # UE UNCERTAINTY
        for ue_term, ue_mode, ue_category, values in self.UE_UNCERT:
            if ue_mode == _uncert_mode and ue_category == category.name:
                high, low = map(float, values.split('/'))
                sample.AddOverallSys(ue_term, low, high)

        # PDF ACCEPTANCE UNCERTAINTY (OverallSys)
        for pdf_term, pdf_mode, pdf_category, values in self.PDF_ACCEPT_NORM_UNCERT:
            if pdf_mode == _uncert_mode and pdf_category == category.name:
                high, low = map(float, values.split('/'))
                sample.AddOverallSys(pdf_term, low, high)

        sample_nom = sample.hist

        # PDF ACCEPTANCE UNCERTAINTY (HistoSys) ONLY FOR MVA
        if mva:
            for pdf_term, pdf_mode, pdf_category, hist_names in self.PDF_ACCEPT_SHAPE_UNCERT:
                if pdf_mode == _uncert_mode and pdf_category == category.name:
                    high_name, low_name = hist_names.format(energy).split('/')
                    high_shape, low_shape = self.PDF_ACCEPT_file[high_name], self.PDF_ACCEPT_file[low_name]
                    if len(high_shape) != len(sample.hist):
                        log.warning("skipping pdf acceptance shape systematic "
                                    "since histograms are not compatible")
                        continue
                    high = sample_nom.Clone(shallow=True, name=sample_nom.name + '_{0}_UP'.format(pdf_term))
                    low = sample_nom.Clone(shallow=True, name=sample_nom.name + '_{0}_DOWN'.format(pdf_term))
                    high *= high_shape
                    low *= low_shape
                    histsys = histfactory.HistoSys(
                        pdf_term, low=low, high=high)
                    sample.AddHistoSys(histsys)

        #mixing Norms
        if self.SM:
            log.info('adding norm factor')
            sample.AddNormFactor('ATLAS_epsilon', 1., -200., 200., False)
        elif self.BSM:
            log.info('adding norm factor')
            sample.AddNormFactor('ATLAS_epsilon_rejected', 1., -200., 200., False)
        else:
            log.info('no norms for {0}'.format(self.name))



        # BR_tautau
        _, (br_up, br_down) = yellowhiggs.br(
            self.mass, 'tautau', error_type='factor')
        sample.AddOverallSys('ATLAS_BR_tautau', br_down, br_up)

        # <NormFactor Name="mu_BR_tautau" Val="1" Low="0" High="200" />
        sample.AddNormFactor('mu_BR_tautau', 1., 0., 200., True)

        #mu_XS[energy]_[mode]
        #_, (xs_up, xs_down) = yellowhiggs.xs(
        #    energy, self.mass, self.MODES_DICT[self.mode][0],
        #    error_type='factor')
        #sample.AddOverallSys(
        #    'mu_XS{0:d}_{1}'.format(energy, self.MODES_WORKSPACE[self.mode]),
        #    xs_down, xs_up)
        sample.AddNormFactor(
            'mu_XS{0:d}_{1}'.format(energy, self.MODES_WORKSPACE[self.mode]),
            1., 0., 200., True)

        # https://twiki.cern.ch/twiki/bin/viewauth/AtlasProtected/HSG4Uncertainties
        # pdf uncertainty
        if mode == 'gg':
            if energy == 8:
                sample.AddOverallSys('pdf_Higgs_gg', 0.93, 1.08)
            else: # 7 TeV
                sample.AddOverallSys('pdf_Higgs_gg', 0.92, 1.08)
        else:
            if energy == 8:
                sample.AddOverallSys('pdf_Higgs_qq', 0.97, 1.03)
            else: # 7 TeV
                sample.AddOverallSys('pdf_Higgs_qq', 0.98, 1.03)

        # EWK NLO CORRECTION FOR VBF ONLY
        if mode == 'VBF':
            sample.AddOverallSys('NLO_EW_Higgs', 0.98, 1.02)

        # QCDscale_ggH3in HistoSys ONLY FOR MVA
        # also see ggH3in script
        if mva and mode == 'gg' and category.name == 'vbf':
            Rel_Error_2j = 0.215
            Error_exc = 0.08613046469238815 # Abs error on the exclusive xsec
            xsec_exc = 0.114866523583739 # Exclusive Xsec
            Error_3j = sqrt(Error_exc**2 - (Rel_Error_2j*xsec_exc)**2)
            rel_error = Error_3j / xsec_exc

            dphi = rec['true_dphi_jj_higgs_no_overlap']
            scores = rec['classifier']

            idx_2j = ((pi - dphi) < 0.2) & (dphi >= 0)
            idx_3j = ((pi - dphi) >= 0.2) & (dphi >= 0)

            # get normalization factor
            dphi_2j = weights[idx_2j].sum()
            dphi_3j = weights[idx_3j].sum()

            weight_up = np.ones(len(weights))
            weight_dn = np.ones(len(weights))

            weight_up[idx_2j] -= (dphi_3j / dphi_2j) * rel_error
            weight_dn[idx_2j] += (dphi_3j / dphi_2j) * rel_error

            weight_up[idx_3j] += rel_error
            weight_dn[idx_3j] -= rel_error

            weight_up *= weights
            weight_dn *= weights

            up_hist = nominal.clone(shallow=True, name=sample_nom.name + '_QCDscale_ggH3in_UP')
            up_hist.Reset()
            dn_hist = nominal.clone(shallow=True, name=sample_nom.name + '_QCDscale_ggH3in_DOWN')
            dn_hist.Reset()

            fill_hist(up_hist, scores, weight_up)
            fill_hist(dn_hist, scores, weight_dn)

            if uniform:
                up_hist = uniform_hist(up_hist)
                dn_hist = uniform_hist(dn_hist)

            shape = histfactory.HistoSys('QCDscale_ggH3in',
                low=dn_hist,
                high=up_hist)
            norm, shape = histfactory.split_norm_shape(shape, sample_nom)
            sample.AddHistoSys(shape)
#printing#
#        #


plt.figure(1)
plt.title("Costfunction of (modified) Training-data")
plt.xlabel("epochs")
plt.ylabel("cost function")
x_range=[x+1 for x in range(0,N_epochs)]
plt.plot(x_range,cost_training_data)
plt.savefig("cost_on_training_data.png")

plt.figure(2)
plt.title("f data")
plt.xlabel("epochs")
plt.ylabel("total error on validation data")
x_range=[x+1 for x in range(0,N_epochs)]
plt.plot(x_range,error_validation_data)
plt.savefig("error_on_val_data.png")




error_on_validation_data_after_training = diff_validation[-1].reshape((1,validation_set.get_N()))			
hist=TH1D('hist',"Errors on val data after last training epoch",200,-10000,10000)
fill_hist(hist,error_on_validation_data_after_training[0])
canvas=TCanvas(); 
hist.GetXaxis().SetTitle("desired Chi^2- outputted Chi^2");
hist.Draw()
canvas.SaveAs('error_on_val_data_hist.png')