def VarCrossCheck(Sig, Bkg, SigW, BkgW, name, xmin, xmax, bins): hSig = ROOT.TH1F("hSig", name, bins, xmin, xmax) fill_hist(hSig, Sig, weights=SigW) hSig.SetLineColor(2) hSig.SetLineWidth(3) SetOverflow(hSig) hSig.Scale(np.sum(BkgW) / hSig.Integral()) hBkg = ROOT.TH1F("hBkg", name, bins, xmin, xmax) fill_hist(hBkg, Bkg, weights=BkgW) hBkg.SetLineWidth(3) SetOverflow(hBkg) c1 = ROOT.TCanvas("c1", "c1", 800, 600) ROOT.gStyle.SetOptStat(0) hBkg.Draw("Hist") hSig.Draw("SameHist") if (hSig.GetMaximum() > hBkg.GetMaximum()): hBkg.SetMaximum(int(round(hSig.GetMaximum() * 1.1))) hBkg.GetXaxis().SetTitle("Jet multiplicity") hBkg.GetYaxis().SetTitle("Yield") leg = ROOT.TLegend(0.7, 0.7, 0.9, 0.9) leg.AddEntry(hSig, "Sig (Yield: {:04.2f})".format(np.sum(SigW))) leg.AddEntry(hBkg, "Bkg (Yield: {:04.2f})".format(np.sum(BkgW))) leg.Draw() c1.Update() c1.SaveAs("./plots/VarCrossCeck.png")
def makePlot(variable): data_var = np.array(data[variable]) data[ "totalWeight"] = data.evtWeight * data.lep1_frWeight * data.lep2_frWeight * data.lep3_frWeight * data.tau_frWeight data_weights = np.array(data['totalWeight']) data_tight_var = np.array(data_tight[variable]) data_tight[ "totalWeight"] = data_tight.evtWeight * data_tight.lep1_frWeight * data_tight.lep2_frWeight * data_tight.lep3_frWeight data_tight_weights = np.array(data_tight['totalWeight']) c1 = TCanvas() c1.SetFillColor(10) c1.SetBorderSize(2) c1.SetLeftMargin(0.12) c1.SetBottomMargin(0.12) c1.SetRightMargin(0.05) c1.SetLogy() histogram_base = TH1F("histogram_base", "", 100, np.nanmin(data_var), np.nanmax(data_var)) histogram_base.SetTitle("") histogram_base.SetStats(False) histogram_base.SetMinimum(0.001) histogram_base.SetMaximum(10.0) histogram_base.GetXaxis().SetTitle(variable) histogram_base.GetYaxis().SetTitle("Events") histogram_base.Draw("hist") hist_loose = TH1F("hist_loose", "", 100, np.nanmin(data_var), np.nanmax(data_var)) hist_tight = TH1F("hist_tight", "", 100, np.nanmin(data_var), np.nanmax(data_var)) root_numpy.fill_hist(hist_loose, data_var, weights=data_weights) root_numpy.fill_hist(hist_tight, data_tight_var, weights=data_tight_weights) hist_loose.SetLineColor(2) hist_tight.SetLineColor(4) hist_loose.SetFillColor(2) hist_tight.SetFillColor(4) hist_loose.SetFillStyle(3004) hist_tight.SetFillStyle(3005) leg = TLegend(0.2, 0.65, 0.5, 0.9) leg.SetBorderSize(0) leg.SetFillColor(10) leg.SetLineColor(0) leg.SetFillStyle(0) leg.SetTextSize(0.04) leg.SetTextFont(42) leg.AddEntry(hist_loose, "loose", "F") leg.AddEntry(hist_tight, "loose_genTau_matched", "F") hist_loose.DrawNormalized("histsame") hist_tight.DrawNormalized("histsame") leg.Draw() c1.SaveAs("plots/" + variable + "_" + process + "_loose_vs_genTau.png")
def Make_Binned_ROC_histograms(title, DDT, kNN, pT, bins, sample_weights=None): rt.gROOT.SetBatch(True) N = len(DDT) assert len(pT) == N and len(kNN) == N if sample_weights is not None: assert len(sample_weights) == N nbins = 100 DDT_hist_list = [] kNN_hist_list = [] for bin_ in range(len(bins) - 1): DDT_hist_list.append( rt.TH1D("DDT_" + str(bins[bin_]) + "_" + str(bins[bin_ + 1]), "DDT_" + str(bins[bin_]) + "_" + str(bins[bin_ + 1]), nbins, 0, 1)) kNN_hist_list.append( rt.TH1D("kNN_" + str(bins[bin_]) + "_" + str(bins[bin_ + 1]), "kNN_" + str(bins[bin_]) + "_" + str(bins[bin_ + 1]), nbins, 0, 1)) condition = np.logical_and((pT > bins[bin_]), (pT < bins[bin_ + 1])) root_numpy.fill_hist(DDT_hist_list[bin_], DDT[condition], weights=sample_weights) root_numpy.fill_hist(kNN_hist_list[bin_], kNN[condition], weights=sample_weights) tfile = rt.TFile("ROC_doublechecks/{}_ROC_histograms.root".format(title), "recreate") for hist in DDT_hist_list: hist.Write() for hist in kNN_hist_list: hist.Write() print "saved histograms in ROC_doublechecks/{}_ROC_histograms.root".format( title)
def _diff_plot1D_numpy(self, data, bins, weights=None, option='', **kwargs): """ ... """ # Check(s) if bins is None: warning( "You need to specify 'bins' when plotting a numpy-type input.") return if len(bins) < 2: warning("Number of bins {} is not accepted".format(len(bins))) return # Fill histogram h1 = ROOT.TH1F('h_num_{}'.format(id(data)), "", len(bins) - 1, bins) h2 = ROOT.TH1F('h_den_{}'.format(id(data)), "", len(bins) - 1, bins) fill_hist(h1, data[0], weights=weights[0]) fill_hist(h2, data[1], weights=weights[1]) return _diff_plot1D((h1, h2), option, **kwargs)
def test_project_3d_to_2d(self): hist_3d = _get_hist(3) # populate overflow bins to make sure that they are treated as expected fill_hist(hist_3d, np.random.uniform(-1, 0, (100, 3))) fill_hist(hist_3d, np.random.uniform(1, 2, (100, 3))) val3d, err3d = hu.get_array(hist_3d), hu.get_array(hist_3d, errors=True) hist_xy = hu.project(hist_3d, 'xy') val, err = hu.get_array(hist_xy), hu.get_array(hist_xy, errors=True) npt.assert_equal(val, np.sum(val3d, axis=2)) npt.assert_equal(err, np.sqrt(np.sum(err3d**2, axis=2))) hist_yz = hu.project(hist_3d, 'yz') val, err = hu.get_array(hist_yz), hu.get_array(hist_yz, errors=True) npt.assert_equal(val, np.sum(val3d, axis=0)) npt.assert_equal(err, np.sqrt(np.sum(err3d**2, axis=0))) hist_zx = hu.project(hist_3d, 'zx') val, err = hu.get_array(hist_zx), hu.get_array(hist_zx, errors=True) npt.assert_equal(val, np.sum(val3d, axis=1).T) npt.assert_equal(err, np.sqrt(np.sum(err3d**2, axis=1)).T) hist_yx = hu.project(hist_3d, 'yx') val, err = hu.get_array(hist_yx), hu.get_array(hist_yx, errors=True) npt.assert_equal(val, np.sum(val3d, axis=2).T) npt.assert_equal(err, np.sqrt(np.sum(err3d**2, axis=2)).T)
def plotRatio(input_df, inputFile): gStyle.SetOptStat(0) #hPtRatioPredicted = TH1F('hPtRatioPredicted', 'NN prediction, RMS=0.135', 40, 0., 2.) #hPtRatioCorrected = TH1F('hPtRatioPredicted', 'Colinear approximation, RMS=0.175', 40, 0., 2.) rmse1 = ((input_df['bc_ptRatio_predictedGen'].mean() - input_df['bc_ptRatio_predictedGen'])**2).mean()**.5 rmse2 = ((input_df['bc_ptRatio_correctedGen'].mean() - input_df['bc_ptRatio_correctedGen'])**2).mean()**.5 label1 = "NN prediction, RMS=%3.3f" % rmse1 label2 = "Colinear correction, RMS=%3.3f" % rmse2 hPtRatioPredicted = TH1F('hPtRatioPredicted', label1, 40, 0., 2.) hPtRatioCorrected = TH1F('hPtRatioPredicted', label2, 40, 0., 2.) fill_hist(hPtRatioPredicted, input_df['bc_ptRatio_predictedGen'].to_numpy()) fill_hist(hPtRatioCorrected, input_df['bc_ptRatio_correctedGen'].to_numpy()) c1 = TCanvas('c1', 'c1', 700, 500) hPtRatioPredicted.SetLineColor(ROOT.kAzure) hPtRatioCorrected.SetLineColor(ROOT.kRed) #profilePtGenVsPtRatioPredictedGen.SetTitle("") gStyle.SetOptTitle(0) hPtRatioPredicted.GetYaxis().SetTitle("Events/50 MeV") hPtRatioPredicted.GetXaxis().SetTitle( "pT_{corrected}(B_{c}^{+})/pT_{gen}(B_{c}^{+})") hPtRatioPredicted.Draw("") hPtRatioCorrected.Draw('same') gPad.BuildLegend() c1.SaveAs(plotsDir + inputFile + '_ratio_predicted_gen.pdf')
def plotProfile(input_df): gStyle.SetOptStat(0) hPtGenVsPtRatioPredictedGen = TH2F('hPtGenVsPtRatioPredictedGen', 'NN prediction', 80, 0., 80., 40, 0., 2.) fill_hist(hPtGenVsPtRatioPredictedGen, input_df[['gen_b_pt', 'bc_ptRatio_predictedGen']].to_numpy()) profilePtGenVsPtRatioPredictedGen = hPtGenVsPtRatioPredictedGen.ProfileX() profilePtGenVsPtRatioPredictedGen.SetMarkerStyle(ROOT.kFullCircle) hPtGenVsPtRatioCorrectedGen = TH2F('hPtGenVsPtRatioCorrectedGen', 'Jonas correction', 80, 0., 80., 40, 0., 2.) fill_hist(hPtGenVsPtRatioCorrectedGen, input_df[['gen_b_pt', 'bc_ptRatio_correctedGen']].to_numpy()) profilePtGenVsPtRatioCorrectedGen = hPtGenVsPtRatioCorrectedGen.ProfileX() profilePtGenVsPtRatioCorrectedGen.SetMarkerStyle(ROOT.kFullSquare) c1 = TCanvas('c1', 'c1', 700, 500) profilePtGenVsPtRatioPredictedGen.SetLineColor(ROOT.kAzure) profilePtGenVsPtRatioCorrectedGen.SetLineColor(ROOT.kOrange) #profilePtGenVsPtRatioPredictedGen.SetTitle("") gStyle.SetOptTitle(0) profilePtGenVsPtRatioPredictedGen.GetXaxis().SetTitle("pT_{gen}(Bc) [GeV]") profilePtGenVsPtRatioPredictedGen.GetYaxis().SetTitle( "pT_{corrected}(B_{c}^{+})/pT_{gen}(B_{c}^{+})") profilePtGenVsPtRatioPredictedGen.Draw("") profilePtGenVsPtRatioCorrectedGen.Draw('same') gPad.BuildLegend() c1.SaveAs(plotsDir + 'profile.png') return 0
def Plot_variable_from_data_2D(out_file, data_path, x_title, x_var, x_bins, x_range, y_title, y_var, y_bins, y_range, test=False, weights=None): if test: data, features, _ = load_data(data_path, test_full_signal=True) else: data, features, _ = load_data(data_path, train_full_signal=True) if weights is not None: weights = data[weights] f1 = ROOT.TFile(out_file, "RECREATE") hist = ROOT.TH2D('hist', 'hist', x_bins, x_range[0], x_range[1], y_bins, y_range[0], y_range[1]) X = data[x_var] Y = data[y_var] root_numpy.fill_hist(hist, np.vstack((X, Y)).T, weights=weights) canv = ROOT.TCanvas('canv', 'canv', 600, 600) hist.SetContour(256) hist.GetXaxis().SetTitle(x_title) hist.GetYaxis().SetTitle(y_title) hist.Draw("COLZ") canv.Write() f1.Close()
def create_TH1D(x, name='h', title=None, binning=[None, None, None], weights=None, h2clone=None): if title is None: title = name if h2clone == None: if binning[1] is None: binning[1] = min(x) if binning[2] is None: if ((np.percentile(x, 95) - np.percentile(x, 50)) < 0.2 * (max(x) - np.percentile(x, 95))): binning[2] = np.percentile(x, 90) else: binning[2] = max(x) if binning[0] is None: bin_w = 4 * (np.percentile(x, 75) - np.percentile(x, 25)) / (len(x))**(1. / 3.) binning[0] = int((binning[2] - binning[1]) / bin_w) h = rt.TH1D(name, title, binning[0], binning[1], binning[2]) else: h = h2clone.Clone(name) h.SetTitle(title) h.Reset() rtnp.fill_hist(h, x, weights=weights) h.binning = binning return h
def scatterplot(dfevt, nvar1, nvar2, nbins1, min1, max1, nbins2, min2, max2): hmult1_mult2 = TH2F(nvar1 + nvar2, nvar1 + nvar2, nbins1, min1, max1, nbins2, min2, max2) dfevt_rd = dfevt[[nvar1, nvar2]] arr2 = dfevt_rd.values fill_hist(hmult1_mult2, arr2) return hmult1_mult2
def create_TH2D(sample, name='h', title=None, binning=[None, None, None, None, None, None], weights=None, axis_title=['', '', '']): if title is None: title = name if (sample.shape[0] == 0): for i in range(len(binning)): if binning[i] == None: binning[i] = 1 else: if binning[1] is None: binning[1] = min(sample[:, 0]) if binning[2] is None: binning[2] = max(sample[:, 0]) if binning[0] is None: bin_w = 4 * (np.percentile(sample[:, 0], 75) - np.percentile( sample[:, 0], 25)) / (len(sample[:, 0]))**(1. / 3.) if bin_w == 0: bin_w = 0.5 * np.std(sample[:, 0]) if bin_w == 0: bin_w = 1 binning[0] = int((binning[2] - binning[1]) / bin_w) if binning[4] is None: binning[4] = min(sample[:, 1]) if binning[5] == None: binning[5] = max(sample[:, 1]) if binning[3] == None: bin_w = 4 * (np.percentile(sample[:, 1], 75) - np.percentile( sample[:, 1], 25)) / (len(sample[:, 1]))**(1. / 3.) if bin_w == 0: bin_w = 0.5 * np.std(sample[:, 1]) if bin_w == 0: bin_w = 1 binning[3] = int((binning[5] - binning[4]) / bin_w) if len(binning) == 6: h = rt.TH2D(name, title, binning[0], binning[1], binning[2], binning[3], binning[4], binning[5]) else: h = rt.TH2D(name, title, binning[-2] - 1, array('f', binning[:binning[-2]]), binning[-1] - 1, array('f', binning[binning[-2]:-2])) #for i in range(len(sample)): # if weights is None: # h.Fill(sample[i,0],sample[i,1]) # else: # h.Fill(sample[i,0],sample[i,1],weights[i]) rtnp.fill_hist(h, sample, weights=weights) h.SetXTitle(axis_title[0]) h.SetYTitle(axis_title[1]) h.SetZTitle(axis_title[2]) h.binning = binning return h
def SigBkgHist(Sample, XTitle, bins, xmin, xmax, tag=''): x = Sample.Events Sig = x[Sample.OutTrue == 1] # Signal values of each event wSig = Sample.Weights[Sample.OutTrue == 1] # Signal weights of each event Bkg = x[Sample.OutTrue == 0] # Background values of each event wBkg = Sample.Weights[Sample.OutTrue == 0] # Background weights of each event c1 = ROOT.TCanvas("c1", "Canvas", 800, 600) ROOT.gStyle.SetOptStat(0) hSig = ROOT.TH1F("hSig", "", bins, xmin, xmax) fill_hist(hSig, Sig, weights=wSig) SetOverflow(hSig) SetUnderflow(hSig) hBkg = ROOT.TH1F("hBkg", "", bins, xmin, xmax) fill_hist(hBkg, Bkg, weights=wBkg) SetOverflow(hBkg) SetUnderflow(hBkg) hBkg.SetLineColor(2) if (hSig.GetMaximum() > hBkg.GetMaximum()): hSig.GetYaxis().SetRangeUser(0, hSig.GetMaximum() * 1.4) else: hSig.GetYaxis().SetRangeUser(0, hBkg.GetMaximum() * 1.4) hSig.GetXaxis().SetTitle(XTitle) hSig.Draw("Hist") hBkg.Draw("SameHist") c1.SaveAs("./plots/SigBkg" + tag + ".png")
def makefill1dhist(df_, h_name, h_tit, arrayx, nvar1): """ Create a TH1F histogram and fill it with one variables from a dataframe. """ histo = buildhisto(h_name, h_tit, arrayx) fill_hist(histo, df_[nvar1]) return histo
def fillHistograms(self, cut, path, category, weight = Weight("1.0",[]) ): data_content = self.read_data_content(cut, path, weight) ff_weights = self.getFFWeights(data_content) FFHistos = {} for i,uncert in enumerate(self.uncerts): if not "jetFakes" in uncert: name = self.convertSystematicName(uncert) else: name = uncert FFHistos[name] = getEmptyHist(name, self.variable, category) FFHistos[name].Sumw2(True) if len(data_content): rn.fill_hist( FFHistos[name], array = data_content[self.variable.getBranches()].values, weights = ff_weights[i].values ) FFHistos[name] = self.unroll2D(FFHistos[name]) data_content.drop( data_content.index, inplace = True ) return copy.deepcopy(FFHistos)
def setup(self): if os.path.exists(self.output_dir): os.system("rm -rf %s" % (self.output_dir + "/*" + self.tag + "*")) else: os.system("mkdir %s" % self.output_dir) # Initialize workspace self.w = ROOT.RooWorkspace("w") self.rooVar = "mgg" # Initialize and fill histogram self.h = ROOT.TH1F("h_mgg", "h_mgg", 320, 100, 180) self.h.Sumw2() root_numpy.fill_hist(self.h, self.events["ggMass"], weights = self.events["weight"]) # Convert to RooDataHist self.d = ROOT.RooDataHist("d_mgg_" + self.tag, "", ROOT.RooArgList(self.w.var(self.rooVar)), self.h, 1) self.norm = self.d.sumEntries() self.rooVarNorm = ROOT.RooRealVar(self.tag + "_norm", "", self.norm) self.pdf = ROOT.RooExtendPdf(self.tag + "_pdf", "", self.w.pdf(self.tag), self.rooVarNorm) if not self.resonant: self.w.var(self.rooVar).setRange("SL", 100, 120) self.w.var(self.rooVar).setRange("SU", 130, 180) self.w.var(self.rooVar).setRange("full", 100, 180) return
def process_histomass(self): myfile = TFile.Open(self.n_filemass, "recreate") for ipt in range(self.p_nptfinbins): bin_id = self.bin_matching[ipt] df = pickle.load(openfile(self.lpt_recodecmerged[bin_id], "rb")) df = df.query(self.l_selml[bin_id]) if self.s_evtsel is not None: df = df.query(self.s_evtsel) if self.s_trigger is not None: df = df.query(self.s_trigger) df = seldf_singlevar(df, self.v_var_binning, \ self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt]) for ibin2 in range(len(self.lvar2_binmin)): suffix = "%s%d_%d_%.2f%s_%.2f_%.2f" % \ (self.v_var_binning, self.lpt_finbinmin[ipt], self.lpt_finbinmax[ipt], self.lpt_probcutfin[bin_id], self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) h_invmass = TH1F("hmass" + suffix, "", self.p_num_bins, self.p_mass_fit_lim[0], self.p_mass_fit_lim[1]) df_bin = seldf_singlevar(df, self.v_var2_binning, self.lvar2_binmin[ibin2], self.lvar2_binmax[ibin2]) fill_hist(h_invmass, df_bin.inv_mass) myfile.cd() h_invmass.Write() if "pt_jet" in df_bin.columns: zarray = z_calc(df_bin.pt_jet, df_bin.phi_jet, df_bin.eta_jet, df_bin.pt_cand, df_bin.phi_cand, df_bin.eta_cand) h_zvsinvmass = TH2F("hzvsmass" + suffix, "", 5000, 1.00, 6.00, 2000, -0.5, 1.5) zvsinvmass = np.vstack((df_bin.inv_mass, zarray)).T fill_hist(h_zvsinvmass, zvsinvmass) h_zvsinvmass.Write()
def plot_hist(self, values, bins, **params): name = None if 'name' in params: name = params['name'] if ('root' in params): histo = r.TH1F(name, name, len(bins), np.amin(bins), np.amax(bins)) rnp.fill_hist(histo, values) histo.Scale(1/histo.Integral(), 'width') histo.Write() fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) label=None if 'label' in params: label=params['label'] ax.legend() norm = False if 'norm' in params: norm = True if 'ylog' in params: if norm: ax.set_yscale('log') else: ax.set_yscale('symlog') if 'x_label' in params: ax.set_xlabel(params['x_label']) ax.hist(values, bins, histtype='step', lw=1.5, label=label, normed=norm) self.pdf.savefig(bbox_inches='tight') plt.close()
def _plot1D_numpy(self, data, bins, weights=None, option='', **kwargs): """ ... """ # Check(s) if bins is None: warning( "You need to specify 'bins' when plotting a numpy-type input.") return if len(data) != len(bins) and len(bins) < 2: warning("Number of bins {} is not accepted".format(len(bins))) return # Fill histogram if len(data) == len(bins): # Assuming 'data' and 'bins' are sets of (x,y)-points h = ROOT.TGraph(len(bins), np.array(bins, dtype=np.float), np.array(data, dtype=np.float)) else: h = ROOT.TH1F('h_{:d}'.format(int(time.time() * 1E+06)), "", len(bins) - 1, np.array(bins, dtype=np.float)) if len(data) == len(bins) - 1: # Assuming 'data' are bin values array2hist(data, h) else: # Assuming 'data' are values to be filled fill_hist(h, data, weights=weights) pass pass # Plot histogram return self._plot1D(h, option, **kwargs)
def _analyze(self, file): tree = ur.open(file)["Events"] arrays = tree.arrays( ['Jet_pt', 'nJet', 'Jet_eta', 'Jet_phi', 'MET_pt']) jet_pt = arrays[b"Jet_pt"] jet_eta = arrays[b"Jet_eta"] jet_phi = arrays[b"Jet_phi"] met_pt = arrays[b"MET_pt"] n_jet = arrays[b"nJet"] # Select events with more than one jet mask = (n_jet > 1) jet_pt = jet_pt[mask] jet_phi = jet_phi[mask] jet_eta = jet_eta[mask] jet_ht = calculate_jet_ht(jet_pt) fill_hist(self._histos["ptj"], jet_pt.flatten()) fill_hist(self._histos["ht"], calculate_jet_ht(jet_pt).flatten()) fill_hist(self._histos["lead_jet_eta"], jet_eta[:, 0]) fill_hist(self._histos["trail_jet_eta"], jet_eta[:, 1]) twod = jet_eta[:, 0:2].tolist() fill_hist(self._histos["jet_eta_2d"], twod)
def write_score_hists(f, mass, scores_list, hist_template, no_neg_bins=True): sys_hists = {} for samp, scores_dict in scores_list: for sys_term, (scores, weights) in scores_dict.items(): if sys_term == 'NOMINAL': suffix = '' else: suffix = '_' + '_'.join(sys_term) hist = hist_template.Clone( name=samp.name + ('_{0}'.format(mass)) + suffix) fill_hist(hist, scores, weights) if sys_term not in sys_hists: sys_hists[sys_term] = [] sys_hists[sys_term].append(hist) f.cd() for sys_term, hists in sys_hists.items(): bad_bins = [] if no_neg_bins: # check for negative bins over all systematics and zero them out # negative bins cause lots of problem in the limit setting # negative bin contents effectively means # the same as "no events here..." total_hist = sum(hists) for bin, content in enumerate(total_hist): if content < 0: log.warning("Found negative bin %d (%f) for " "systematic %s" % ( bin, content, sys_term)) bad_bins.append(bin) for hist in hists: for bin in bad_bins: # zero out bad bins hist[bin] = 0. hist.Write()
def efficiency_graph(pass_function, function_inputs, xs, bins=None, error=0.005): pass_results = pass_function(function_inputs) if bins is None: # Automatic binning # Compute the number of bins such that the error on the efficiency is equal to 'error' in each bin # The calculation is based on binomial errors and assumes that the efficiency is flat (that the distributions of all and selected events are the same) k = float(np.count_nonzero(pass_results)) n = float(len(pass_results)) percentiles = [0., 100.] if k > 0: nbins = (error * n)**2 / k / (1 - k / n) # Compute the bin bounaries with the same number of events in all bins percentiles = np.arange(0., 100., 100. / nbins) percentiles[-1] = 100. bins = np.unique(np.percentile(xs, percentiles)) # Fill histograms of selected and all events and compute efficiency histo_pass = Hist(bins) histo_total = Hist(bins) fill_hist(histo_pass, xs, pass_results) fill_hist(histo_total, xs) efficiency = Graph() efficiency.Divide(histo_pass, histo_total) return efficiency
def main (): inputdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-06-28/' outputdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-07-10/' inputpaths = glob.glob(inputdir + '/ISRgamma_*.root') outputpaths = [p.replace(inputdir, outputdir).replace('ISRgamma', 'hist_ISRgamma') for p in inputpaths] for inputpath, outputpath in zip(inputpaths,outputpaths): print "Processing '%s'" % inputpath infile = ROOT.TFile(inputpath, 'READ') outfile = ROOT.TFile(outputpath, 'RECREATE') categories = [key.GetName() for key in infile.GetListOfKeys()] for category in categories: print "-- '%s'" % category tree = infile.Get(category) array = tree2array(tree) #hist = ROOT.TH1F(category, "", 30, 100, 250) hist = ROOT.TH1F(category, "", 32, 100, 260) fill_hist(hist, array['mJ'], weights=array['weight']) # TF shape/norm ... outfile.cd() hist.Write() pass outfile.Write() outfile.Close() infile.Close() pass return
def fillHistos(self, content, histname, cat, cut, add_systematics): binning = self.var.bins(int(cat)) tmpCont = content.query(cut) tmpHist = R.TH1D(histname, histname, *binning) tmpHist.GetXaxis().SetTitle(self.var.name) tmpHist.Sumw2() rn.fill_hist(tmpHist, array=tmpCont[self.var.name].values, weights=tmpCont["event_weight"].values) self.DCfile.cd(self.d(self.target_names[int(cat)])) tmpHist.Write() if add_systematics: for rw in self.systematics: rwname = rw.replace("reweight", histname).replace( "CHAN", self.channel).replace("CAT", self.target_names[int(cat)]) tmpHist = R.TH1D(rwname, rwname, *binning) rn.fill_hist(tmpHist, array=tmpCont[self.var.name].values, weights=tmpCont.eval(self.systematics[rw]).values) tmpHist.Write()
def create_TH2D(sample, name='h', title=None, binning=[None, None, None, None, None, None], weights=None, axis_title=['', '']): if title is None: title = name if binning[1] is None: binning[1] = min(sample[:, 0]) if binning[2] is None: binning[2] = max(sample[:, 0]) if binning[0] is None: bin_w = 4 * (np.percentile(sample[:, 0], 75) - np.percentile( sample[:, 0], 25)) / (len(sample[:, 0]))**(1. / 3.) binning[0] = int((binning[2] - binning[1]) / bin_w) if binning[4] is None: binning[4] = min(sample[:, 1]) if binning[5] == None: binning[5] = max(sample[:, 1]) if binning[3] == None: bin_w = 4 * (np.percentile(sample[:, 1], 75) - np.percentile( sample[:, 1], 25)) / (len(sample[:, 1]))**(1. / 3.) binning[3] = int((binning[5] - binning[4]) / bin_w) h = rt.TH2D(name, title, binning[0], binning[1], binning[2], binning[3], binning[4], binning[5]) rtnp.fill_hist(h, sample, weights=weights) h.SetXTitle(axis_title[0]) h.SetYTitle(axis_title[1]) h.binning = binning return h
def create_histogram(var, hist_sett, **kwargs): """ Create a ROOT histogram from the passed variable(s) Args: var (np.array): Array with maximum of 3 columns containing the variables to plot. hist_set (tuple): Histogram settings, that are directly unpacked into the constructor of the ROOT histogram Keyword Args: name (str, optional): Name to be used for the histogram weights (np.array, optional): weight array with the same number of events as the var array. Each entry corresponds to the weight of the event {x,y,z}_axis (str): axis labels to be set for the histogram Returns: ROOT.TH{1,2,3}D: The histogram with the dimension corresponding to the number of columns of var """ name = kwargs.pop('name', '') if not name: name = create_random_str() # use the number of dimensions from the var to determine which sort of # histogram to use ndim = var.shape if len(ndim) == 1: ndim = 1 else: ndim = ndim[1] if ndim > 3 or ndim < 0: logging.error('Dimension of histogram is {}. Cannot create histogram' .format(ndim)) raise TypeError('Invalid number of dimensions in create_histograms') hist_type = 'TH{}D'.format(ndim) try: hist = getattr(r, hist_type)(name, '', *hist_sett) except TypeError as exc: logging.error('Could not construct TH{}D with passed hist_sett: {}' .format(ndim, hist_sett)) raise exc set_hist_opts(hist) # set axis labels xax, yax, zax = (kwargs.pop(a, '') for a in ['x_axis', 'y_axis', 'z_axis']) if xax: hist.SetXTitle(xax) if yax: hist.SetYTitle(yax) if zax: hist.SetZTitle(zax) fill_hist(hist, var, weights=kwargs.pop('weights', None)) return hist
def HistNW(hist, Events, Weights, Color): fill_hist(hist, Events, weights=Weights) hist.SetLineColor(Color) hist.SetLineWidth(1) SetOverflow(hist) SetUnderflow(hist) if (hist.Integral() != 0): hist.Scale(1 / hist.Integral())
def fill2dhist(df_, histo, nvar1, nvar2): """ Fill a TH2 histogram with two variables from a dataframe. """ df_rd = df_[[nvar1, nvar2]] arr2 = df_rd.values fill_hist(histo, arr2) return histo
def draw_hist(hist, xarr, fields, weight=None): warr = xarr[weight] if weight else None if len(fields) == 1: return rnp.fill_hist(hist=hist, array=xarr[fields[0]], weights=warr) else: varr = np.array([xarr[f] for f in fields]) varr = varr.transpose() return rnp.fill_hist(hist=hist, array=varr, weights=warr)
def create_TH1D(x, name='h', title=None, binning=[None, None, None], weights=None, h2clone=None, axis_title=['', ''], opt='', color=0): if title is None: title = name if (x.shape[0] == 0): print('Empty sample') h = rt.TH1D(name, title, 1, 0, 1) elif not h2clone is None: h = h2clone.Clone(name) h.SetTitle(title) h.Reset() elif isinstance(binning, np.ndarray): h = rt.TH1D(name, title, len(binning) - 1, binning) elif len(binning) == 3: if binning[1] is None: binning[1] = min(x) if binning[2] is None: if ((np.percentile(x, 95) - np.percentile(x, 50)) < 0.2 * (max(x) - np.percentile(x, 95))): binning[2] = np.percentile(x, 90) else: binning[2] = max(x) if binning[0] is None: bin_w = 4 * (np.percentile(x, 75) - np.percentile(x, 25)) / (len(x))**(1. / 3.) if bin_w == 0: bin_w = 0.5 * np.std(x) if bin_w == 0: bin_w = 1 binning[0] = int((binning[2] - binning[1]) / bin_w) + 5 h = rt.TH1D(name, title, binning[0], binning[1], binning[2]) else: print('Binning not recognized') raise if 'underflow' in opt: m = h.GetBinCenter(1) x = np.copy(x) x[x < m] = m if 'overflow' in opt: M = h.GetBinCenter(h.GetNbinsX()) x = np.copy(x) x[x > M] = M rtnp.fill_hist(h, x, weights=weights) h.SetXTitle(axis_title[0]) h.SetYTitle(axis_title[1]) h.SetLineColor(color) h.binning = binning return h
def check_sample_category(analysis, sample, category): clf = analysis.get_clf(category, mass=125, load=True) scores, weights = sample.scores( clf, category, TARGET_REGION, systematics=False)['NOMINAL'] hist = Hist(20, scores.min() - 1E-5, scores.max() + 1E-5) fill_hist(hist, scores, weights) assert_almost_equal(sample.events(category, TARGET_REGION)[1].value, hist.integral(), 3)
def makefill2dhist(df_, titlehist, arrayx, arrayy, nvar1, nvar2): """ Create a TH2F histogram and fill it with two variables from a dataframe. """ histo = build2dhisto(titlehist, arrayx, arrayy) df_rd = df_[[nvar1, nvar2]] arr2 = df_rd.to_numpy() fill_hist(histo, arr2) return histo
def GetScoreTH(ClassNum, OutPreOther, Weights, MultiClass, xmin, xmax): Pre = OutPreOther[MultiClass == ClassNum] Weight = Weights[MultiClass == ClassNum] hist = ROOT.TH1F("h" + str(ClassNum), "", 20, xmin, xmax) fill_hist(hist, Pre, weights=Weight) hist.Scale(1 / hist.GetSumOfWeights() / Getdx(hist)) return hist
def cutBasedAMS(self): sig_hist = r.TH1F('sigHistTemp', ';m_{ee};Entries', 160, 110, 150) fill_hist(sig_hist, self.sigMass, weights=self.sigWeights) N_sig = 0.68 * sig_hist.Integral() sig_width = self.getRealSigma(sig_hist) bkg_hist = r.TH1F('bkgHistTemp', ';m_{ee};Entries', 160, 110, 150) fill_hist(bkg_hist, self.bkgMass, weights=self.bkgWeights) N_bkg = self.computeBkg(bkg_hist, sig_width) return self.getAMS(N_sig, N_bkg)
def get_background_signal(nbins=100, scale=1, sig_events=1000, bkg_events=1000): bkg_scores = transform(np.random.normal(-.1, .2, size=bkg_events), scale=scale) sig_scores = transform(np.random.normal(.1, .2, size=sig_events), scale=scale) bkg_hist = Hist(nbins, -1, 1) sig_hist = Hist(nbins, -1, 1) fill_hist(bkg_scores) fill_hist(sig_scores) return bkg_hist, sig_hist
def fillHistFromNumpy(np_arr, histList=[], histname=""): """ Fill a 1D or 2D hist from a np array Inputs: The numpy array and a list with [nbins, min, max] for 1D or [nbinsX, minX, maxX, nbinsY, minY, maxY] Return: The hist """ from ROOT import TH1F, TH2F from root_numpy import fill_hist hist = 0 if histname =="" : histname="hist" if len(histList) == 0 : pass elif len(hist)==3 : hist = ROOT.TH1F(histname, histname, int(histList[0]),histList[1],histList[2]) elif len(hist)==6 : hist = ROOT.TH2F(histname, histname, int(histList[0]),histList[1],histList[2], int(histList[3]),histList[4],histList[5]) info('(fillHistFromNumpy) filling hist with name %s' % histname) fill_hist(hist, np_arr) return hist
def plot_hists(self, values, bins, **params): fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15, 10)) label=None norm = False if 'norm' in params: norm = True if 'ylog' in params: if norm: ax.set_yscale('log') else: ax.set_yscale('symlog') if 'x_label' in params: ax.set_xlabel(params['x_label']) labels = None if 'labels' in params: labels = params['labels'] label_loc=0 if 'label_loc' in params: label_loc=params['label_loc'] for x_arr, label in izip(values, labels): ax.hist(x_arr, bins, histtype='step', lw=1.5, normed=norm, label=label) if labels: ax.legend(framealpha=0.0, frameon=False, loc=label_loc) self.pdf.savefig(bbox_inches='tight') plt.close() if ('root' in params): for x_arr, label in izip(values, labels): histo = r.TH1F(label, label, len(bins), np.amin(bins), np.amax(bins)) rnp.fill_hist(histo, x_arr) histo.Scale(1/histo.Integral(), 'width') histo.Write()
def efficiency_graph(pass_function, function_inputs, xs, bins=None, error=0.005): pass_results = pass_function(function_inputs) if bins is None: # Automatic binning # Compute the number of bins such that the error on the efficiency is equal to 'error' in each bin # The calculation is based on binomial errors and assumes that the efficiency is flat (that the distributions of all and selected events are the same) k = float(np.count_nonzero(pass_results)) n = float(len(pass_results)) percentiles = [0.,100.] if k>0: nbins = (error*n)**2/k / (1-k/n) # Compute the bin boundaries with the same number of events in all bins percentiles = np.arange(0., 100., 100./nbins) percentiles[-1] = 100. bins = np.unique(np.percentile(xs, percentiles)) # Fill histograms of selected and all events and compute efficiency histo_pass = Hist(bins) histo_total = Hist(bins) fill_hist(histo_pass, xs, pass_results) fill_hist(histo_total, xs) efficiency = Graph() efficiency.Divide(histo_pass, histo_total) return efficiency
def calculateBkgRej(self, discriminant, signal_idx, bkg_idx, weights=None): ''' This does essentially the same thing as the plotDiscriminant method, except that it does it for an arbritrary discriminant and doesn't save the histograms. It just calculates the score. ''' import ROOT as root from ROOT import TH2D, TCanvas, TFile, TNamed, TH1F, TLegend import numpy as np from root_numpy import fill_hist import functions as fn import os # stop showing plots to screen root.gROOT.SetBatch(True) bins = 100 # when creating the plots do it over the range of all probas (scores) discriminant_bins = np.linspace(np.min(discriminant), np.max(discriminant), bins) hist_bkg = TH1F("Background Discriminant","Discriminant",bins, np.min(discriminant), np.max(discriminant)) hist_sig = TH1F("Signal Discriminant","Discriminant",bins, np.min(discriminant), np.max(discriminant)) # fill the signal and background histograms if weights is not None: fill_hist(hist_bkg,discriminant[bkg_idx], weights[bkg_idx]) fill_hist(hist_sig,discriminant[signal_idx], weights[signal_idx]) else: fill_hist(hist_bkg,discriminant[bkg_idx]) fill_hist(hist_sig,discriminant[signal_idx]) if hist_bkg.Integral() != 0: hist_bkg.Scale(1/hist_bkg.Integral()) if hist_sig.Integral() != 0: hist_sig.Scale(1/hist_sig.Integral()) # before deciding whether to do a left or right cut for the roc curve we have to find the median. sig_median = np.median(discriminant[signal_idx]) bkg_median = np.median(discriminant[bkg_idx]) if sig_median > bkg_median: roc_cut = 'R' else: roc_cut = 'L' roc_graph = fn.RocCurve_SingleSided(hist_sig, hist_bkg, self.sig_eff,self.bkg_eff, roc_cut) fpr_05 = fn.GetBGRej50(roc_graph) if fpr_05 != 1: return float(1/(1-fpr_05)) return -1.0
def check_events(analysis, sample, category, region): clf = analysis.get_clf(category, mass=125, load=True) scores, weights = sample.scores( clf, category, region, systematics=False)['NOMINAL'] rec = sample.merged_records( category, region) sample_events = sample.events(category, region)[1].value hist = Hist(5, scores.min() - 1, scores.max() + 1) fill_hist(hist, scores, weights) clf_events = hist.integral() # test events consistency assert_equal(weights.shape[0], rec['weight'].shape[0]) assert_array_equal(weights, rec['weight']) assert_almost_equal(clf_events, weights.sum(), 1) assert_almost_equal(sample_events, rec['weight'].sum(), 1) assert_almost_equal(sample_events, clf_events, 1) # test draw_array hist = Hist(1, -1000, 1000) sample.draw_array({'tau1_charge': hist}, category, region) assert_almost_equal(hist.integral(), sample_events, 1) # test scaling orig_scale = sample.scale sample.scale *= 2. scores, weights = sample.scores( clf, category, region, systematics=False)['NOMINAL'] hist.Reset() fill_hist(hist, scores, weights) scale_clf_events = hist.integral() assert_almost_equal(scale_clf_events, weights.sum(), 1) assert_almost_equal(scale_clf_events, 2. * clf_events, 1) scale_sample_events = sample.events(category, region)[1].value assert_almost_equal(scale_sample_events, 2. * sample_events, 1) sample.scale = orig_scale
def test_fill_hist(): np.random.seed(0) data1D = np.random.randn(1E6) w1D = np.empty(1E6) w1D.fill(2.) data2D = np.random.randn(1E6, 2) data3D = np.random.randn(1E4, 3) a = TH1D('th1d', 'test', 1000, -5, 5) rnp.fill_hist(a, data1D) # one element lies beyond hist range; that's why it's not 1e6 assert_almost_equal(a.Integral(), 999999.0) a_w = TH1D('th1dw', 'test', 1000, -5, 5) rnp.fill_hist(a_w, data1D, w1D) assert_almost_equal(a_w.Integral(), 999999.0 * 2) b = TH2D('th2d', 'test', 100, -5, 5, 100, -5, 5) rnp.fill_hist(b, data2D) assert_almost_equal(b.Integral(), 999999.0) c = TH3D('th3d', 'test', 10, -5, 5, 10, -5, 5, 10, -5, 5) rnp.fill_hist(c, data3D) assert_almost_equal(c.Integral(), 10000.0) # array and weights lengths do not match assert_raises(ValueError, rnp.fill_hist, c, data3D, np.ones(10)) # weights is not 1D assert_raises(ValueError, rnp.fill_hist, c, data3D, np.ones((data3D.shape[0], 1))) # array not 2-d when filling 2D/3D histogram for h in (b, c): assert_raises(ValueError, rnp.fill_hist, h, np.random.randn(1E4)) # length of second axis does not match dimensionality of histogram for h in (a, b, c): assert_raises(ValueError, rnp.fill_hist, h, np.random.randn(1E4, 4)) # wrong type h = list() a = np.random.randn(100) assert_raises(TypeError, rnp.fill_hist, h, a)
def test_fill_hist(): n_samples = 1000 data1D = RNG.randn(n_samples) w1D = np.empty(n_samples) w1D.fill(2.) data2D = RNG.randn(n_samples, 2) data3D = RNG.randn(n_samples, 3) a = TH1D('th1d', 'test', 100, -5, 5) rnp.fill_hist(a, data1D) assert_almost_equal(a.Integral(), n_samples) a_w = TH1D('th1dw', 'test', 100, -5, 5) rnp.fill_hist(a_w, data1D, w1D) assert_almost_equal(a_w.Integral(), n_samples * 2) b = TH2D('th2d', 'test', 100, -5, 5, 100, -5, 5) rnp.fill_hist(b, data2D) assert_almost_equal(b.Integral(), n_samples) c = TH3D('th3d', 'test', 10, -5, 5, 10, -5, 5, 10, -5, 5) rnp.fill_hist(c, data3D) assert_almost_equal(c.Integral(), n_samples) # array and weights lengths do not match assert_raises(ValueError, rnp.fill_hist, c, data3D, np.ones(10)) # weights is not 1D assert_raises(ValueError, rnp.fill_hist, c, data3D, np.ones((data3D.shape[0], 1))) # array not 2-d when filling 2D/3D histogram for h in (b, c): assert_raises(ValueError, rnp.fill_hist, h, RNG.randn(10)) # length of second axis does not match dimensionality of histogram for h in (a, b, c): assert_raises(ValueError, rnp.fill_hist, h, RNG.randn(10, 4)) # wrong type h = list() a = RNG.randn(10) assert_raises(TypeError, rnp.fill_hist, h, a)
def decisionFunctionCanvas(self): ''' Create two histograms which are then drawn onto the same canvas. This is only really defined for the BDT, not AGILE NN since that doesn't give a "score". ''' import ROOT as root from ROOT import TH2D, TCanvas, TFile, TNamed, TH1F, TLegend import numpy as np from root_numpy import fill_hist import functions as fn import os # check that the decision function output was set if len(self.decision_function) == 0: return False df_sig = TH1F("Signal Decision Function", "Score", 100, -1.0, 1.0) df_bkg = TH1F("Background Decision Function", "Score", 100, -1.0, 1.0) # fill the histograms with the df if self.df_weights is not None: fill_hist(df_sig,self.decision_function[self.df_sig_idx],self.df_weights[self.df_sig_idx]) fill_hist(df_bkg,self.decision_function[self.df_bkg_idx],self.df_weights[self.df_bkg_idx]) else: fill_hist(df_sig,self.decision_function[self.df_sig_idx]) fill_hist(df_bkg,self.decision_function[self.df_bkg_idx]) # normalise if df_sig.Integral() != 0: df_sig.Scale(1./df_sig.Integral()) if df_bkg.Integral() != 0: df_bkg.Scale(1./df_bkg.Integral()) # set up drawing options and colours df_sig.SetLineColor(4); df_sig.SetFillStyle(3004) df_bkg.SetLineColor(2); df_bkg.SetFillStyle(3005) # set the y axis max_y = max(df_sig.GetMaximum(), df_bkg.GetMaximum()) df_sig.SetMaximum(max_y*1.2) df_bkg.SetMaximum(max_y*1.2) # clone these things self.df_sig = df_sig.Clone(); self.df_sig.SetDirectory(0) self.df_bkg = df_bkg.Clone(); self.df_bkg.SetDirectory(0) return True
def hist_scores(hist, scores, systematic="NOMINAL"): for sample, scores_dict in scores: scores, weight = scores_dict[systematic] fill_hist(hist, scores, weight)
def plot_clf( background_scores, category, signal_scores=None, signal_scale=1.0, data_scores=None, name=None, draw_histograms=True, draw_data=False, save_histograms=False, hist_template=None, bins=10, min_score=0, max_score=1, signal_colors=cm.spring, systematics=None, unblind=False, **kwargs ): if hist_template is None: if hasattr(bins, "__iter__"): # variable width bins hist_template = Hist(bins) min_score = min(bins) max_score = max(bins) else: hist_template = Hist(bins, min_score, max_score) bkg_hists = [] for bkg, scores_dict in background_scores: hist = hist_template.Clone(title=bkg.label) scores, weight = scores_dict["NOMINAL"] fill_hist(hist, scores, weight) hist.decorate(**bkg.hist_decor) hist.systematics = {} for sys_term in scores_dict.keys(): if sys_term == "NOMINAL": continue sys_hist = hist_template.Clone() scores, weight = scores_dict[sys_term] fill_hist(sys_hist, scores, weight) hist.systematics[sys_term] = sys_hist bkg_hists.append(hist) if signal_scores is not None: sig_hists = [] for sig, scores_dict in signal_scores: sig_hist = hist_template.Clone(title=sig.label) scores, weight = scores_dict["NOMINAL"] fill_hist(sig_hist, scores, weight) sig_hist.decorate(**sig.hist_decor) sig_hist.systematics = {} for sys_term in scores_dict.keys(): if sys_term == "NOMINAL": continue sys_hist = hist_template.Clone() scores, weight = scores_dict[sys_term] fill_hist(sys_hist, scores, weight) sig_hist.systematics[sys_term] = sys_hist sig_hists.append(sig_hist) else: sig_hists = None if data_scores is not None and draw_data and unblind is not False: data, data_scores = data_scores if isinstance(unblind, float): if sig_hists is not None: # unblind up to `unblind` % signal efficiency sum_sig = sum(sig_hists) cut = efficiency_cut(sum_sig, 0.3) data_scores = data_scores[data_scores < cut] data_hist = hist_template.Clone(title=data.label) data_hist.decorate(**data.hist_decor) fill_hist(data_hist, data_scores) if unblind >= 1 or unblind is True: log.info("Data events: %d" % sum(data_hist)) log.info("Model events: %f" % sum(sum(bkg_hists))) for hist in bkg_hists: log.info("{0} {1}".format(hist.GetTitle(), sum(hist))) log.info("Data / Model: %f" % (sum(data_hist) / sum(sum(bkg_hists)))) else: data_hist = None if draw_histograms: output_name = "event_bdt_score" if name is not None: output_name += "_" + name for logy in (False, True): draw( data=data_hist, model=bkg_hists, signal=sig_hists, signal_scale=signal_scale, category=category, name="BDT Score", output_name=output_name, show_ratio=data_hist is not None, model_colors=None, signal_colors=signal_colors, systematics=systematics, logy=logy, **kwargs ) return bkg_hists, sig_hists, data_hist
def histogram_scores(hist_template, scores, min_score=None, max_score=None, inplace=False): if not inplace: hist = hist_template.Clone(name=hist_template.name + "_scores") hist.Reset() else: hist = hist_template if min_score is not None: log.info("cutting out scores below %f" % min_score) if max_score is not None: log.info("cutting out scores above %f" % max_score) if isinstance(scores, np.ndarray): if min_score is not None: scores = scores[scores > min_score] if max_score is not None: scores = scores[scores < max_score] fill_hist(hist, scores) elif isinstance(scores, tuple): # data scores, weight = scores if min_score is not None: scores_idx = scores > min_score scores = scores[scores_idx] weight = weight[scores_idx] if max_score is not None: scores_idx = scores < max_score scores = scores[scores_idx] weight = weight[scores_idx] assert (weight == 1).all() fill_hist(hist, scores) elif isinstance(scores, dict): # non-data with possible systematics # nominal case: nom_scores, nom_weight = scores['NOMINAL'] if min_score is not None: scores_idx = nom_scores > min_score nom_scores = nom_scores[scores_idx] nom_weight = nom_weight[scores_idx] if max_score is not None: scores_idx = nom_scores < max_score nom_scores = nom_scores[scores_idx] nom_weight = nom_weight[scores_idx] fill_hist(hist, nom_scores, nom_weight) # systematics sys_hists = {} for sys_term, (sys_scores, sys_weight) in scores.items(): if sys_term == 'NOMINAL': continue if min_score is not None: scores_idx = sys_scores > min_score sys_scores = sys_scores[scores_idx] sys_weight = sys_weight[scores_idx] if max_score is not None: scores_idx = sys_scores < max_score sys_scores = sys_scores[scores_idx] sys_weight = sys_weight[scores_idx] sys_hist = hist.Clone( name=hist.name + "_" + systematic_name(sys_term)) sys_hist.Reset() fill_hist(sys_hist, sys_scores, sys_weight) sys_hists[sys_term] = sys_hist hist.systematics = sys_hists else: raise TypeError("scores not an np.array, tuple or dict") return hist
#plot error on evaluation_data without normalization evaluation_error_no_norm=[x*max_outputs[0] for x in evaluation_error] evaluation_errors_last_epoch_no_norm=[x*max_outputs[0] for x in evaluation_errors_last_epoch] titlename="eta="+str(eta)+", mini_batch_size="+str(mini_batch_size)+ ", lambda="+str(lm_da)+",\n Cost="+cost_function+", weight_initialization="+weight_initialization titlename_no_norm=titlename+"_without_normalization" plt.figure(2) plt.title(titlename_no_norm) plt.xlabel("epochs") plt.ylabel("total error validation data") x_range=[x+1 for x in range(0,epochs)] plt.plot(x_range,evaluation_error_no_norm) plt.savefig("error_on_val_data_"+titlename_no_norm+".png") print "total error on validation_data set after last training" print evaluation_error_no_norm[-1] hist_no_norm=TH1D('hist_no_norm',titlename_no_norm,50,-100,100) fill_hist(hist_no_norm,evaluation_errors_last_epoch_no_norm) canvas=TCanvas(); hist_no_norm.GetXaxis().SetTitle("desired Chi^2- outputted Chi^2"); hist_no_norm.Draw() canvas.SaveAs('error_on_val_data_'+titlename_no_norm+'_hist.png') #-----------------------------------------------------
def apply_tri_selection(rec, lumi): electron_p = rec['electron_p'] electron_px = rec['electron_px'] electron_py = rec['electron_py'] electron_chi2 = rec['electron_chi2'] electron_has_l1 = rec['electron_has_l1'] electron_pt = np.sqrt(np.power(electron_px, 2) + np.power(electron_py, 2)) positron_p = rec['positron_p'] positron_px = rec['positron_px'] positron_py = rec['positron_py'] positron_chi2 = rec['positron_chi2'] positron_d0 = rec['positron_d0'] positron_has_l1 = rec['positron_has_l1'] positron_has_l2 = rec['positron_has_l2'] positron_pt = np.sqrt(np.power(positron_px, 2) + np.power(positron_py, 2)) top_cluster_time = rec['top_cluster_time'] bot_cluster_time = rec['bot_cluster_time'] cluster_time_diff = top_cluster_time - bot_cluster_time top_time = rec['top_time'] bot_time = rec['bot_time'] mass = rec['invariant_mass'] v0_p = rec["v0_p"] top_track_cluster_dt = top_cluster_time - top_time abs_top_track_cluster_dt = np.absolute(top_track_cluster_dt - 43) bot_track_cluster_dt = bot_cluster_time - bot_time abs_bot_track_cluster_dt = np.absolute(bot_track_cluster_dt - 43) track_cluster_dt_cut = ((abs_top_track_cluster_dt < 4.5) & (abs_bot_track_cluster_dt < 4.5)) asym = (electron_pt - positron_pt)/(electron_pt + positron_pt) # # Define cuts # cuts = collections.OrderedDict() # Base cuts used to reduce accidentals cuts['Radiative cut'] = v0_p > 0.8*1.056 # GeV cuts['abs(Ecal clust time - trk time) - 43 ns < 4.5'] = track_cluster_dt_cut cuts['$p(V_0) < 1.2 E_{beam}$'] = v0_p < 1.2*1.056 # GeV cuts['trk $\chi^2$ < 40'] = (electron_chi2 < 40) & (positron_chi2 < 40) cuts['Ecal clust pair dt < 2 ns'] = np.absolute(cluster_time_diff) < 2 cuts['l1 & l2 hit'] = (positron_has_l1 == 1) & (positron_has_l2 == 1) cuts['$d_{0}(e^+) < 1.1$'] = positron_d0 < 1.1 cuts['$p_t(e^-) - p_t(e^+)/p_t(e^-) + p_t(e^+)$'] = asym < .47 labels = ['Opp. Ecal clusters, trk-cluster match $\chi^2 < 10$, $p(e^-)<0.75E_{beam}$'] clust_dt_arr = [cluster_time_diff] v0_p_arr = [v0_p] electron_p_arr = [electron_p] positron_d0_arr = [positron_d0] electron_chi2_arr = [electron_chi2] asym_arr = [asym] abs_top_cluster_dt_arr = [abs_top_track_cluster_dt] cut = np.ones(len(v0_p), dtype=bool) for key, value in cuts.iteritems(): cut = cut & value clust_dt_arr.append(cluster_time_diff[cut]) v0_p_arr.append(v0_p[cut]) electron_p_arr.append(electron_p[cut]) positron_d0_arr.append(positron_d0[cut]) electron_chi2_arr.append(electron_chi2[cut]) abs_top_cluster_dt_arr.append(abs_top_track_cluster_dt[cut]) asym_arr.append(asym[cut]) labels.append(key) plt = Plotter.Plotter('trident_selection.pdf') plt.plot_hists(clust_dt_arr, np.linspace(-10, 10, 201), labels=labels, x_label='Top cluster time - Bottom cluster time (ns)', label_loc=2, norm=True, ylog=True, root=True) plt.plot_hists(v0_p_arr, np.linspace(0, 1.5, 151), labels=labels, label_loc=2, x_label='$V_{0}(p)$ (GeV)', ylog=True) plt.plot_hists(electron_p_arr, np.linspace(0, 1.5, 151), labels=labels, x_label='$p(e^-)$ (GeV)', norm=True, label_loc=4, ylog=True) plt.plot_hists(positron_d0_arr, np.linspace(-20, 20, 201), labels=labels, x_label='$d_{0}(e^{+})$', norm=True, label_loc=2, ylog=True) plt.plot_hists(electron_chi2_arr, np.linspace(0, 100, 201), labels=labels, x_label='Track $\chi^2$', norm=True, label_loc=1, ylog=True) plt.plot_hists(asym_arr, np.linspace(-1, 1, 201), labels=labels, x_label='$p_t(e^-) - p_t(e^+)/p_t(e^-) + p_t(e^+)$', norm=True, label_loc=2, ylog=True) plt.plot_hists(abs_top_cluster_dt_arr, np.linspace(0, 60, 121), labels=labels, x_label='abs(ECal cluster time - track time - 43) ns', norm=True, label_loc=1, ylog=True) plt.close() file = r.TFile("invariant_mass.root", "recreate") mass_histo = r.TH1F("invariant_mass", "invariant_mass", 2000, 0., 0.1) #mass_histo = r.TH1F("invariant_mass", "invariant_mass", 50, 0., 0.1) mass_histo.GetXaxis().SetTitle("m(e^+e^-) (GeV)") mass_histo.GetYaxis().SetTitle("#sigma(#mub)") bin_width = mass_histo.GetXaxis().GetBinWidth(1) weights = np.empty(len(mass[cut])) if lumi: weights.fill(1/(bin_width*float(lumi))) else: weights.fill(1) #rnp.fill_hist(mass_histo, mass[selection], weights=weights) rnp.fill_hist(mass_histo, mass[cut], weights=weights) mass_histo.Write() file.Close()
def Hist_comp_ratios (dec, bkg) : # weights = pickle.load( open( "weights_train.pck", "rb" ) ) # probas = pickle.load( open( "class_proba.pck", "rb" ) ) # mt_dec = pickle.load( open( "inputs_train.pck", "rb" ) ) # classes = pickle.load( open( "targets_train.pck", "rb" ) ) weights = pickle.load( open( "weights.pck", "rb" ) ) probas = pickle.load( open( "class_probaWholeSample.pck", "rb" ) ) mt_dec = pickle.load( open( "inputs.pck", "rb" ) ) classes = pickle.load( open( "classes.pck", "rb" ) ) #put mt_dec weights and the RELEVANT class probability together class_num = (GetClassIndex(bkg)-1) all_data = np.transpose(np.vstack((mt_dec[:,0],mt_dec[:,1], classes, weights, probas[:,class_num]))) all_data = np.array(filter(lambda x: x[2] == class_num, all_data)) #extract the relevant values for the specific decay channel Filter = np.array(filter(lambda x: x[1] == dec, all_data)) MT = Filter[:,0] Weight = Filter[:,3] Prob_bkg = Filter[:,4] SumProbXweight = np.multiply(Prob_bkg,Weight) h1 = TH1D("h1","SumProbXweight_"+bkg+str(dec), 25 , 0.0 ,250.) root_open("plots/ROOTfiles/H1_SumProbXweight_"+bkg+str(dec)+"Samptot.root", 'recreate') fill_hist(h1,MT, weights=SumProbXweight) h1.Write() h2 = TH1D("h2","SumWeight_"+bkg+str(dec), 25 , 0.0 ,250.) root_open("plots/ROOTfiles/H1_SumWeight_"+bkg+str(dec)+"Samptot.root", 'recreate') fill_hist(h2,MT,weights=Weight) h2.Write() h3 = h1.Clone("h3") h3.Divide(h2) if (dec == 0.0) : Filter2 = np.array(filter(lambda x: x[1] == 1.0, all_data)) MT2 = Filter2[:,0] Weight2 = Filter2[:,3] Prob_bkg2 = Filter2[:,4] SumProbXweight2 = np.multiply(Prob_bkg2,Weight2) h12 = TH1D("h12","SumProbXweight_"+bkg+str(1.0), 25 , 0.0 ,250.) root_open("plots/ROOTfiles/H1_SumProbXweight_"+bkg+str(1.0)+"Samptot.root", 'recreate') fill_hist(h12,MT2, weights=SumProbXweight2) h12.Write() h22 = TH1D("h22","SumWeight_"+bkg+str(1.0), 25 , 0.0 ,250.) root_open("plots/ROOTfiles/H1_SumWeight_"+bkg+str(1.0)+"Samptot.root", 'recreate') fill_hist(h22,MT2,weights=Weight2) h22.Write() h32 = h12.Clone("h32") h32.Divide(h22) h12.SetStats(0) h22.SetStats(0) h32.SetStats(0) h12.SetLineColor(2) h22.SetLineColor(2) h32.SetLineColor(0) h32.SetMarkerStyle(23) h32.SetMarkerColor(2) h32.SetMarkerSize(1.2) #create Canvas and save the plot as png c = Canvas() c.Divide(2,2) c.cd(1) h1.SetStats(0) if (dec == 0.0) : if (h1.GetMaximum() > h12.GetMaximum()) : print "h1" h12.GetYaxis().SetRangeUser(0.,h1.GetMaximum()) else : print "h12" h12.GetYaxis().SetRangeUser(0.,h12.GetMaximum()) h12.Draw("HIST") h1.Draw("HIST SAME") c.cd(2) h2.SetStats(0) if (dec == 0.0) : if (h2.GetMaximum() > h22.GetMaximum()) : h22.GetYaxis().SetRangeUser(0.,h2.GetMaximum()) else : h22.GetYaxis().SetRangeUser(0.,h22.GetMaximum()) h22.Draw("HIST") h2.Draw("HIST SAME") c.cd(3) f1 = root_open(GetClassProbaPath(bkg)) #get the 2 histograms for the 2 decay channels: 1 track & 3 tracks H1 = f1.Get("h_w_2d") if (dec == 10.0) : #3 tracks h_data = Hist(list(H1.xedges())) h_data[:] = H1[:,2] else : #1 track h_data = Hist(list(H1.xedges())) h_data[:] = H1[:,1] h_data.GetXaxis().SetRangeUser(0.,250.) h_data.GetYaxis().SetRangeUser(0.,1.) h_data.fillstyle = '/' h_data.fillcolor = (255,255,0) #yellow h_data.SetStats(0) h_data.Draw("HIST") # h3.SetFillColor(4) #blue # h3.SetFillStyle(3005) h3.SetLineColor(0) h3.SetMarkerStyle(21) h3.SetMarkerColor(4) h3.SetMarkerSize(1.2) h3.SetStats(0) h3.SetTitle(bkg+str(dec)) h3.GetXaxis().SetTitle("m_{T}") h3.GetYaxis().SetTitle("Class probability") h3.Draw("HIST P SAME") if (dec == 0.0) : h32.Draw("HIST P SAME") c.Update() if (dec == 0.0) : legend = Legend(3, leftmargin=0.45, margin=0.3) legend.AddEntry(h3, "training, no #pi^{0}", style='P') legend.AddEntry(h32, "training, with #pi^{0}", style='P') legend.AddEntry(h_data, "data", style='F') legend.Draw() else : legend = Legend(2, leftmargin=0.45, margin=0.3) legend.AddEntry(h3, "training", style='P') legend.AddEntry(h_data, "data", style='F') legend.Draw() c.SaveAs("plots/H1_"+bkg+str(dec)+"_RatioCompSamptot.png")
#clf = linear_model.SGDRegressor() clf = xgb.XGBRegressor(max_depth=4, learning_rate=0.1, n_estimators=400) scores = cross_val_score(clf, data_reduced_n, data_target, cv=5, scoring='neg_mean_squared_error') print scores print("MSE: %0.3f (+/- %0.3f)" % (scores.mean(), scores.std() * 2)) predicted = cross_val_predict(clf, data_reduced_n, data_target, cv=5) from sklearn import metrics scores2 = metrics.mean_squared_error(data_target, predicted) print scores print("MSE 2: %0.3f (+/- %0.3f)" % (scores2.mean(), scores2.std() * 2)) fig, ax = plt.subplots() ax.scatter(data_target, predicted) ax.plot([data_target.min(), data_target.max()], [data_target.min(), data_target.max()], 'k--', lw=4) ax.set_xlabel('True KE [GeV]') ax.set_ylabel('Predicted KE [GeV]') ax.set_xlim([data_target.min()*0.9, data_target.max()*1.1]) ax.set_ylim([data_target.min()*0.9, data_target.max()*1.1]) plt.savefig("xgb_cross_val_comparision.pdf") ROOT.gStyle.SetOptStat(1) hist = ROOT.TH1D("hist","hist", 100, -1, 1) diff = (data_target - predicted)/data_target fill_hist(hist, diff) hist.GetXaxis().SetTitle("#DeltaE/E") canvas = ROOT.TCanvas() hist.Draw() canvas.SaveAs("xgb_cross_val_DeltaE.pdf")
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 6)) axes[0].set_yscale('log') axes[1].set_yscale('log') q .hist(histtype='step', bins = np.arange(loC, hiC + width, width), color='r', ax = axes[0]) #q_good.hist(histtype='step', bins = np.arange(loC, hiC + width, width), color='g', ax = axes[0]) q_ped .hist(histtype='step', bins = np.arange(loC, hiC + width, width), color='b', ax = axes[1]) axes[0].plot((threshold, threshold), (0, 1e5), 'k-') axes[1].plot((threshold, threshold), (0, 1e5), 'k-') from ROOT import TH1F, TFile from root_numpy import fill_hist q_file = TFile('plots/' + sys.argv[1] + '_' + sys.argv[2] + '_charge_spectrum.root', "recreate") q_hist1 = TH1F('hist1', 'title', nBins, loC, hiC) q_hist2 = TH1F('hist2', 'title', 55, -10, 100) fill_hist(q_hist1, q.as_matrix()) fill_hist(q_hist2, max_voltages['minus_voltage'].as_matrix()) q_hist1.Write(); q_hist2.Write(); q_file.Close(); max_q = q.max() # uncomment these lines if you want to plot the fitted Gaussian #x = np.linspace(loC, hiC, nBins) #pdf = n_events*norm.pdf(x, mu, std)/nBins #plt.plot(x, pdf, 'k', linewidth = 2) axes[0].set_xlabel("charge [pC]", fontsize = 20) axes[0].set_ylabel("Entries / (%0.2f pC)" % width, fontsize = 20)
plt.ylabel("recoKE - trueKE [MeV]") # In[12]: plt.scatter(test_data_trueKE,(clf.predict(test_data_reduced_n)-test_data_trueKE)/test_data_trueKE) plt.ylim((0,1)) plt.xlabel("trueKE [MeV]") plt.ylabel("DeltaE/E") res_twod_SGD = np.dstack((test_data_trueKE, (clf.predict(test_data_reduced_n)-test_data_trueKE)/test_data_trueKE)) # In[13]: hist_SGD = ROOT.TH2D('name', 'title', 100, 0, 5000, 100, -1, 10) fill_hist(hist_SGD, res_twod_SGD[0]) hist_SGD.Draw() ROOT.gPad.Draw() # In[14]: profile_SGD = hist_SGD.ProfileX() profile_SGD.SetLineColor(ROOT.kBlue) profile_SGD.Draw() ROOT.gPad.Draw() # In[15]: params = {'n_estimators': 1000, 'max_depth': 10, 'min_samples_split': 1,
for bootstrap_idx in range(100): sys.stdout.write("bootstrap {0} ...\r".format(bootstrap_idx)) sys.stdout.flush() # resample with replacement # http://docs.scipy.org/doc/numpy-dev/reference/generated/numpy.random.choice.html sample_idx = np.random.choice(len(array), size=len(array), replace=True) array_bootstrapped = array[sample_idx] # convert back to a TTree and write it out tree_bootstrapped = array2tree( array_bootstrapped, name='bootstrap_{0}'.format(bootstrap_idx)) tree_bootstrapped.Write() tree_bootstrapped.Delete() # fill the ROOT histogram with the numpy array hist.Reset() fill_hist(hist, rec2array(array_bootstrapped)) hist.Draw() hist.xaxis.title = 'x' hist.yaxis.title = 'y' hist.zaxis.title = 'Events' hist.xaxis.limits = (-2.5, 2.5) hist.yaxis.limits = (-2.5, 2.5) hist.zaxis.range_user = (0, 60) hist.xaxis.divisions = 5 hist.yaxis.divisions = 5 hist.zaxis.divisions = 5 canvas.Print('bootstrap.gif+50') # loop the gif canvas.Print('bootstrap.gif++') output.Close()
def plotDiscriminant(self, discriminant, signal_idx, bkg_idx, weights = None, save_disc = True, rejection_power=True): ''' Plot the discriminants and the resulting ROC curve derived from them. Keyword args: discriminant --- The score of the BDT (set in the setProbas method) signal_idx --- The true indices of all signal events bkg_idx ---The true indices of all background events save_disc --- Flag indicating if the discriminant plots should be saved. rejection_power --- Whether or not to calculate bkg power: 1/eff in addtion to 1-eff ''' import ROOT as root from ROOT import TH2D, TCanvas, TFile, TNamed, TH1F, TLegend import numpy as np from root_numpy import fill_hist import functions as fn import os # stop showing plots to screen root.gROOT.SetBatch(True) if not os.path.exists(self.output_path): os.makedirs(self.output_path) fo = TFile.Open(self.output_path+"/"+self.output_prefix+str(self.job_id)+'.root','RECREATE') bins = 100 # when creating the plots do it over the range of all probas (scores) discriminant_bins = np.linspace(0,1,bins)#np.min(discriminant), np.max(discriminant), bins) hist_bkg = TH1F("Background Discriminant","Discriminant",bins, np.min(discriminant), np.max(discriminant)) hist_sig = TH1F("Signal Discriminant","Discriminant",bins, np.min(discriminant), np.max(discriminant)) # fill the signal and background histograms if weights is not None: print 'weights is not none******************' fill_hist(hist_bkg,discriminant[bkg_idx], weights[bkg_idx]) fill_hist(hist_sig,discriminant[signal_idx], weights[signal_idx]) else: fill_hist(hist_bkg,discriminant[bkg_idx]) fill_hist(hist_sig,discriminant[signal_idx]) if hist_bkg.Integral() != 0: hist_bkg.Scale(1/hist_bkg.Integral()) if hist_sig.Integral() != 0: hist_sig.Scale(1/hist_sig.Integral()) hist_sig.SetLineColor(4) hist_bkg.SetLineColor(2) #hist_sig.SetFillColorAlpha(4, 0.5); hist_sig.SetFillStyle(3004) #hist_bkg.SetFillColorAlpha(2, 0.5); hist_bkg.SetFillStyle(3005) hist_sig.Write() hist_bkg.Write() c = TCanvas() leg = TLegend(0.8,0.55,0.9,0.65);leg.SetFillColor(root.kWhite) leg.AddEntry(hist_sig, "Signal","l") leg.AddEntry(hist_bkg, "Background", "l") max_y = max(hist_sig.GetMaximum(), hist_bkg.GetMaximum()) hist_sig.SetMaximum(max_y*1.2) hist_bkg.SetMaximum(max_y*1.2) hist_sig.Draw('hist') hist_bkg.Draw('histsame') c.Write() if save_disc == True: if not os.path.exists('disc_plots'): os.makedirs('disc_plots') c.SaveAs('disc_plots/discriminants_'+str(self.job_id)+'.png') # before deciding whether to do a left or right cut for the roc curve we have to find the median. sig_median = np.median(discriminant[signal_idx]) bkg_median = np.median(discriminant[bkg_idx]) if sig_median > bkg_median: roc_cut = 'R' else: roc_cut = 'L' # create the single sided roccurve with the code from Sam self.roc_graph = fn.RocCurve_SingleSided(hist_sig, hist_bkg, self.sig_eff,self.bkg_eff, roc_cut) self.roc_graph.SetName('BackgroundRejection') self.roc_graph.SetTitle('BackgroundRejection') self.roc_graph.Write() # get teh background rejection power at 50% signal efficiency # store the efficiencies first self.ROC_sig_efficiency, self.ROC_bkg_rejection = fn.getEfficiencies(self.roc_graph) self.bkgRejectionPower() # write the roc score as a string to the output file rej_string = 'rejection_power_'+str(self.ROC_rej_power_05) rej_n = TNamed(rej_string,rej_string) rej_n.Write() if rejection_power: c.SetLogy() self.roc_graph_power = fn.RocCurve_SingleSided(hist_sig, hist_bkg, self.sig_eff,self.bkg_eff, roc_cut, rejection=False) c.cd() self.roc_graph_power.SetName('BackgroundPower') self.roc_graph_power.SetTitle('BackgroundPower') self.roc_graph_power.Write() # write the decision function to the root file as well, if it is defined. if len(self.decision_function) > 0: self.decisionFunctionCanvas() # add the legends leg2 = TLegend(0.8,0.55,0.9,0.65);leg2.SetFillColor(root.kWhite) leg2.AddEntry(self.df_sig, "Signal","l") leg2.AddEntry(self.df_bkg, "Background", "l") # canvas to draw them on c2 = TCanvas('Decision Functions') self.df_sig.Draw('hist') self.df_bkg.Draw('histsame') leg2.Draw('same') c2.Write() # now write the df histograms as well self.df_sig.Write() self.df_bkg.Write() self.hist_sig = hist_sig.Clone(); self.hist_sig.SetDirectory(0) self.hist_bkg = hist_bkg.Clone(); self.hist_bkg.SetDirectory(0) fo.Close()
def optimize_background_rejection_vs_ieta(effs, isolations, signalfile, signaltree, backgroundfile, backgroundtree, inputnames=['abs(ieta)','ntt'], targetname='iso', cut='et>10'): ieta_binning = [0.5, 3.5, 6.5, 9.5, 13.5, 18.5, 24.5, 27.5] # Compute signal efficiencies ninputs = len(inputnames) branches = copy.deepcopy(inputnames) branches.append(targetname) data = root2array(signalfile, treename=signaltree, branches=branches, selection=cut) data = data.view((np.float64, len(data.dtype.names))) inputs = data[:, range(ninputs)].astype(np.float32) targets = data[:, [ninputs]].astype(np.float32).ravel() xs = data[:, [0]].astype(np.float32).ravel() # fill signal ieta histogram and normalize to 1 histo_signal = Hist(ieta_binning) fill_hist(histo_signal, xs) #histo_signal.Scale(1./histo_signal.integral(overflow=True)) # signal_efficiencies is a 2D array # The first dimension corresponds to different ieta values # The second dimension corresponds to different working points signal_efficiencies = [graph2array(efficiency.efficiency_graph(pass_function=(lambda x:np.less(x[1],iso.predict(x[0]))), function_inputs=(inputs,targets), xs=xs, bins=ieta_binning))[:,[1]].ravel() for iso in isolations] signal_efficiencies = np.column_stack(signal_efficiencies) # Compute background efficiencies ninputs = len(inputnames) branches = copy.deepcopy(inputnames) branches.append(targetname) data = root2array(backgroundfile, treename=backgroundtree, branches=branches, selection=cut) data = data.view((np.float64, len(data.dtype.names))) inputs = data[:, range(ninputs)].astype(np.float32) targets = data[:, [ninputs]].astype(np.float32).ravel() xs = data[:, [0]].astype(np.float32).ravel() # fill background ieta histogram and normalize to 1 histo_background = Hist(ieta_binning) fill_hist(histo_background, xs) #histo_background.Scale(1./histo_background.integral(overflow=True)) # background_efficiencies is a 2D array # The first dimension corresponds to different ieta values # The second dimension corresponds to different working points background_efficiencies = [graph2array(efficiency.efficiency_graph(pass_function=(lambda x:np.less(x[1],iso.predict(x[0]))), function_inputs=(inputs,targets), xs=xs, bins=ieta_binning))[:,[1]].ravel() for iso in isolations] background_efficiencies = np.column_stack(background_efficiencies) signal_efficiencies_diff_graphs = [] background_efficiencies_diff_graphs = [] optimal_points_graphs = [] optimal_points = [] # compute best working point for each ieta (loop on ieta) for i,(signal_effs,background_effs) in enumerate(zip(signal_efficiencies, background_efficiencies)): # Compute the probability of signal in this ieta bin for the different efficiency points # It is assumed that the cut is applied only in this bin, all the other bins keep the same number of entries n_i = histo_signal[i+1].value n_tot = histo_signal.integral(overflow=True) proba_signal = np.array([n_i*eff/(n_tot-n_i*(1.-eff)) for eff in signal_effs]) # Same as above, but for background n_i = histo_background[i+1].value n_tot = histo_background.integral(overflow=True) proba_background = np.array([n_i*eff/(n_tot-n_i*(1.-eff)) for eff in background_effs]) signal_efficiencies_diff_graph, background_efficiencies_diff_graph, optimal_points_graph, optimal_point = find_best_working_point(effs, signal_effs, background_effs, proba_signal, proba_background) signal_efficiencies_diff_graph.SetName('efficiencies_signal_ieta_{}'.format(i)) background_efficiencies_diff_graph.SetName('efficiencies_background_ieta_{}'.format(i)) optimal_points_graph.SetName('signal_background_optimal_points_ieta_{}'.format(i)) signal_efficiencies_diff_graphs.append(signal_efficiencies_diff_graph) background_efficiencies_diff_graphs.append(background_efficiencies_diff_graph) optimal_points_graphs.append(optimal_points_graph) optimal_points.append(optimal_point) optimal_points_histo = Hist(ieta_binning) array2hist(optimal_points, optimal_points_histo) return signal_efficiencies_diff_graphs, background_efficiencies_diff_graphs, optimal_points_graphs, optimal_points_histo
def histfactory(self, sample, category, systematics=False, rec=None, weights=None, mva=False, uniform=False, nominal=None): if not systematics: return if len(self.modes) != 1: raise TypeError( 'histfactory sample only valid for single production mode') if len(self.masses) != 1: raise TypeError( 'histfactory sample only valid for single mass point') # isolation systematic sample.AddOverallSys( 'ATLAS_ANA_HH_{0:d}_Isolation'.format(self.year), 1. - 0.06, 1. + 0.06) mode = self.modes[0] if mode in ('Z', 'W'): _uncert_mode = 'VH' else: _uncert_mode = self.MODES_WORKSPACE[mode] if self.year == 2011: energy = 7 elif self.year == 2012: energy = 8 else: raise ValueError( "collision energy is unknown for year {0:d}".format(self.year)) # QCD_SCALE for qcd_scale_term, qcd_scale_mode, qcd_scale_category, values in self.QCD_SCALE: if qcd_scale_mode == _uncert_mode and qcd_scale_category == category.name: high, low = map(float, values.split('/')) sample.AddOverallSys(qcd_scale_term, low, high) # UE UNCERTAINTY for ue_term, ue_mode, ue_category, values in self.UE_UNCERT: if ue_mode == _uncert_mode and ue_category == category.name: high, low = map(float, values.split('/')) sample.AddOverallSys(ue_term, low, high) # PDF ACCEPTANCE UNCERTAINTY (OverallSys) for pdf_term, pdf_mode, pdf_category, values in self.PDF_ACCEPT_NORM_UNCERT: if pdf_mode == _uncert_mode and pdf_category == category.name: high, low = map(float, values.split('/')) sample.AddOverallSys(pdf_term, low, high) sample_nom = sample.hist # PDF ACCEPTANCE UNCERTAINTY (HistoSys) ONLY FOR MVA if mva: for pdf_term, pdf_mode, pdf_category, hist_names in self.PDF_ACCEPT_SHAPE_UNCERT: if pdf_mode == _uncert_mode and pdf_category == category.name: high_name, low_name = hist_names.format(energy).split('/') high_shape, low_shape = self.PDF_ACCEPT_file[high_name], self.PDF_ACCEPT_file[low_name] if len(high_shape) != len(sample.hist): log.warning("skipping pdf acceptance shape systematic " "since histograms are not compatible") continue high = sample_nom.Clone(shallow=True, name=sample_nom.name + '_{0}_UP'.format(pdf_term)) low = sample_nom.Clone(shallow=True, name=sample_nom.name + '_{0}_DOWN'.format(pdf_term)) high *= high_shape low *= low_shape histsys = histfactory.HistoSys( pdf_term, low=low, high=high) sample.AddHistoSys(histsys) #mixing Norms if self.SM: log.info('adding norm factor') sample.AddNormFactor('ATLAS_epsilon', 1., -200., 200., False) elif self.BSM: log.info('adding norm factor') sample.AddNormFactor('ATLAS_epsilon_rejected', 1., -200., 200., False) else: log.info('no norms for {0}'.format(self.name)) # BR_tautau _, (br_up, br_down) = yellowhiggs.br( self.mass, 'tautau', error_type='factor') sample.AddOverallSys('ATLAS_BR_tautau', br_down, br_up) # <NormFactor Name="mu_BR_tautau" Val="1" Low="0" High="200" /> sample.AddNormFactor('mu_BR_tautau', 1., 0., 200., True) #mu_XS[energy]_[mode] #_, (xs_up, xs_down) = yellowhiggs.xs( # energy, self.mass, self.MODES_DICT[self.mode][0], # error_type='factor') #sample.AddOverallSys( # 'mu_XS{0:d}_{1}'.format(energy, self.MODES_WORKSPACE[self.mode]), # xs_down, xs_up) sample.AddNormFactor( 'mu_XS{0:d}_{1}'.format(energy, self.MODES_WORKSPACE[self.mode]), 1., 0., 200., True) # https://twiki.cern.ch/twiki/bin/viewauth/AtlasProtected/HSG4Uncertainties # pdf uncertainty if mode == 'gg': if energy == 8: sample.AddOverallSys('pdf_Higgs_gg', 0.93, 1.08) else: # 7 TeV sample.AddOverallSys('pdf_Higgs_gg', 0.92, 1.08) else: if energy == 8: sample.AddOverallSys('pdf_Higgs_qq', 0.97, 1.03) else: # 7 TeV sample.AddOverallSys('pdf_Higgs_qq', 0.98, 1.03) # EWK NLO CORRECTION FOR VBF ONLY if mode == 'VBF': sample.AddOverallSys('NLO_EW_Higgs', 0.98, 1.02) # QCDscale_ggH3in HistoSys ONLY FOR MVA # also see ggH3in script if mva and mode == 'gg' and category.name == 'vbf': Rel_Error_2j = 0.215 Error_exc = 0.08613046469238815 # Abs error on the exclusive xsec xsec_exc = 0.114866523583739 # Exclusive Xsec Error_3j = sqrt(Error_exc**2 - (Rel_Error_2j*xsec_exc)**2) rel_error = Error_3j / xsec_exc dphi = rec['true_dphi_jj_higgs_no_overlap'] scores = rec['classifier'] idx_2j = ((pi - dphi) < 0.2) & (dphi >= 0) idx_3j = ((pi - dphi) >= 0.2) & (dphi >= 0) # get normalization factor dphi_2j = weights[idx_2j].sum() dphi_3j = weights[idx_3j].sum() weight_up = np.ones(len(weights)) weight_dn = np.ones(len(weights)) weight_up[idx_2j] -= (dphi_3j / dphi_2j) * rel_error weight_dn[idx_2j] += (dphi_3j / dphi_2j) * rel_error weight_up[idx_3j] += rel_error weight_dn[idx_3j] -= rel_error weight_up *= weights weight_dn *= weights up_hist = nominal.clone(shallow=True, name=sample_nom.name + '_QCDscale_ggH3in_UP') up_hist.Reset() dn_hist = nominal.clone(shallow=True, name=sample_nom.name + '_QCDscale_ggH3in_DOWN') dn_hist.Reset() fill_hist(up_hist, scores, weight_up) fill_hist(dn_hist, scores, weight_dn) if uniform: up_hist = uniform_hist(up_hist) dn_hist = uniform_hist(dn_hist) shape = histfactory.HistoSys('QCDscale_ggH3in', low=dn_hist, high=up_hist) norm, shape = histfactory.split_norm_shape(shape, sample_nom) sample.AddHistoSys(shape)
#printing# # # plt.figure(1) plt.title("Costfunction of (modified) Training-data") plt.xlabel("epochs") plt.ylabel("cost function") x_range=[x+1 for x in range(0,N_epochs)] plt.plot(x_range,cost_training_data) plt.savefig("cost_on_training_data.png") plt.figure(2) plt.title("f data") plt.xlabel("epochs") plt.ylabel("total error on validation data") x_range=[x+1 for x in range(0,N_epochs)] plt.plot(x_range,error_validation_data) plt.savefig("error_on_val_data.png") error_on_validation_data_after_training = diff_validation[-1].reshape((1,validation_set.get_N())) hist=TH1D('hist',"Errors on val data after last training epoch",200,-10000,10000) fill_hist(hist,error_on_validation_data_after_training[0]) canvas=TCanvas(); hist.GetXaxis().SetTitle("desired Chi^2- outputted Chi^2"); hist.Draw() canvas.SaveAs('error_on_val_data_hist.png')