def getEfficiency(passes, passesError, total, totalError): passesHist = TH1D("passes", "", 1, 0.0, 1.0) totalHist = TH1D("total", "", 1, 0.0, 1.0) passesHist.SetBinContent(1, passes) passesHist.SetBinError(1, passesError) totalHist.SetBinContent(1, total) totalHist.SetBinError(1, totalError) g = TGraphAsymmErrors(passesHist, totalHist) x = Double(0.0) y = Double(0.0) g.GetPoint(0, x, y) return (y, g.GetErrorYlow(0), g.GetErrorYhigh(0))
def getEfficiency(nominatorHisto, denominatorHisto, cutString): eff = TGraphAsymmErrors(nominatorHisto, denominatorHisto, "cp") effValue = ROOT.Double(0.) blubb = ROOT.Double(0.) intttt = eff.GetPoint(0, blubb, effValue) n = { "Nominator": nominatorHisto.Integral(), "Denominator": denominatorHisto.Integral(), "Efficiency": nominatorHisto.Integral() / denominatorHisto.Integral(), "UncertaintyUp": eff.GetErrorYhigh(0), "UncertaintyDown": eff.GetErrorYlow(0), } # print cut, n n["cut"] = cutString return n
err_sigeff_M1 = [] xM1 = ROOT.Double(0) yM1 = ROOT.Double(0) for i in range(nfile): preselect = file.Get("h_preselect_"+str(i)) med1 = file.Get("h_med1_"+str(i)) effM1 = TGraphAsymmErrors(med1,preselect,"cl=0.683 b(1,1) mode") effM1.SetTitle("Medium 1") binM1 = effM1.GetN() for j in range(binM1): effM1.GetPoint(j, xM1, yM1) if (yM1 == 0): continue else: errM1 = effM1.GetErrorY(j) err_sigeff_M1.append(errM1) #print "errM1_"+str(name[i]), errM1 ''' print "####################################" print "err_sigeff_M1", err_sigeff_M1 print "####################################" ''' #Punzi Significance errors
print bin_list[i] h1 = TH1F('h1', 'h1', 1, bin_list[i], bin_list[i + 1]) h2 = TH1F('h2', 'h2', 1, bin_list[i], bin_list[i + 1]) h1.SetBinContent(1, dfx.recum.iloc[i]) h1.SetBinContent(1, 0) #h1.SetBinError(1,dfx.rc_w2_sq.iloc[i]) h2.SetBinContent(1, w_s) h2.SetBinError(1, 0.008) #np.sqrt(w2_s)) g_tpr = GAE() g_tpr.Divide(h1, h2, "cl=0.683 b(1,1) mode") x_s = Double() y_s = Double() g_tpr.GetPoint(0, x_s, y_s) xl.append(x_s) yl.append(y_s) buffer_l_s = g_tpr.GetEYlow() buffer_l_s.SetSize(1) arr_l_s = np.array(buffer_l_s, copy=True) buffer_h_s = g_tpr.GetEYhigh() buffer_h_s.SetSize(1) arr_h_s = np.array(buffer_h_s, copy=True) hl.append(np.array(arr_h_s)[0]) ll.append(np.array(arr_l_s)[0]) #print arr_h_s
def makeplot_single( h1_sig=None, h1_bkg=None, h1_data=None, sig_legends_=None, bkg_legends_=None, sig_colors_=None, bkg_colors_=None, hist_name_=None, sig_scale_=1.0, dir_name_="plots", output_name_=None, extraoptions=None ): if h1_sig == None or h1_bkg == None: print("nothing to plot...") return os.system("mkdir -p "+dir_name_) os.system("cp index.php "+dir_name_) s_color = [632, 617, 839, 800, 1] b_color = [920, 2007, 2005, 2003, 2001, 2011] if sig_colors_: s_color = sig_colors_ if bkg_colors_: b_color = bkg_colors_ for idx in range(len(h1_sig)): h1_sig[idx].SetLineWidth(3) h1_sig[idx].SetLineColor(s_color[idx]) for idx in range(len(h1_bkg)): h1_bkg[idx].SetLineWidth(2) h1_bkg[idx].SetLineColor(b_color[idx]) h1_bkg[idx].SetFillColorAlpha(b_color[idx], 1) if h1_data: h1_data.SetBinErrorOption(1) h1_data.SetLineColor(1) h1_data.SetLineWidth(2) h1_data.SetMarkerColor(1) h1_data.SetMarkerStyle(20) myC = r.TCanvas("myC","myC", 600, 600) myC.SetTicky(1) pad1 = r.TPad("pad1","pad1", 0.05, 0.33,0.95, 0.97) pad1.SetBottomMargin(0.027) pad1.SetRightMargin( rightMargin ) pad1.SetLeftMargin( leftMargin ) pad2 = r.TPad("pad2","pad2", 0.05, 0.04, 0.95, 0.31) pad2.SetBottomMargin(0.4) pad2.SetTopMargin(0.05) pad2.SetRightMargin( rightMargin ) pad2.SetLeftMargin( leftMargin ) pad2.Draw() pad1.Draw() pad1.cd() for idx in range(len(h1_sig)): print("before signal scaling",h1_sig[idx].Integral()) h1_sig[idx].Scale(sig_scale_) print("after signal scaling",h1_sig[idx].Integral()) stack = r.THStack("stack", "stack") nS = np.zeros(h1_bkg[0].GetNbinsX()) eS = np.zeros(h1_bkg[0].GetNbinsX()) #hist_all is used to make the data/mc ratio. remove signal for the moment due to signal is scaled right now hist_all = h1_sig[0].Clone("hist_all") hist_all.Scale(0.0) hist_s = h1_sig[0].Clone("hist_s") hist_b = h1_bkg[0].Clone("hist_b") for idx in range(len(h1_bkg)): stack.Add(h1_bkg[idx]) for ib in range(h1_bkg[0].GetNbinsX()): nS[ib] += h1_bkg[idx].GetBinContent(ib+1) eS[ib] = math.sqrt(eS[ib]*eS[ib] + h1_bkg[idx].GetBinError(ib+1)*h1_bkg[idx].GetBinError(ib+1)) hist_all.Add(h1_bkg[idx]) if idx > 0: hist_b.Add(h1_bkg[idx]) for idx in range(len(h1_sig)): print("ggH signal yield: ", hist_s.Integral()) if idx > 0: hist_temp = h1_sig[idx].Clone(h1_sig[idx].GetName()+"_temp") #hist_all.Add(hist_temp) hist_s.Add(h1_sig[idx]) print("all signal yield: ", hist_s.Integral()) stack.SetTitle("") maxY = 0.0 if "stack_signal" in extraoptions and extraoptions["stack_signal"]: for idx in range(len(h1_sig)): h1_sig[idx].SetFillColorAlpha(s_color[idx], 1) stack.Add(h1_sig[idx]) for ib in range(h1_bkg[0].GetNbinsX()): nS[ib] += h1_sig[idx].GetBinContent(ib+1) eS[ib] = math.sqrt(eS[ib]*eS[ib] + h1_sig[idx].GetBinError(ib+1)*h1_sig[idx].GetBinError(ib+1)) if stack.GetMaximum() > maxY: maxY = stack.GetMaximum() #if "SR" in h.GetTitle(): stack.Draw("hist") else: stack.Draw("hist") if stack.GetMaximum() > maxY: maxY = stack.GetMaximum() for idx in range(len(h1_sig)): if h1_sig[idx].GetMaximum() > maxY: maxY = h1_sig[idx].GetMaximum() if "SR" in h1_bkg[0].GetTitle(): #h1_sig[idx].Draw("samehist") hist_s.Draw("samehist") ##draw stack total unc on top of total histogram box = r.TBox(0,0,1,1,) box.SetFillStyle(3002) box.SetLineWidth(0) box.SetFillColor(r.kBlack) for idx in range(h1_bkg[0].GetNbinsX()): box.DrawBox(h1_bkg[0].GetBinCenter(idx+1)-0.5*h1_bkg[0].GetBinWidth(idx+1), nS[idx]-eS[idx], h1_bkg[0].GetBinCenter(idx+1)+0.5*h1_bkg[0].GetBinWidth(idx+1), nS[idx]+eS[idx]) if h1_data: if h1_data.GetMaximum() > maxY: maxY = h1_data.GetMaximum()+np.sqrt(h1_data.GetMaximum()) #if not "SR" in h1_data.GetTitle() or "fail" in h1_data.GetTitle(): if True: #print("debug h1_data.GetName()",h1_data.GetName(), h1_data.GetTitle()) TGraph_data = TGraphAsymmErrors(h1_data) for i in range(TGraph_data.GetN()): #data point var_x, var_y = Double(0.), Double(0.) TGraph_data.GetPoint(i,var_x,var_y) if np.fabs(var_y) < 1e-5: TGraph_data.SetPoint(i,var_x,-1.0) TGraph_data.SetPointEYlow(i,-1) TGraph_data.SetPointEYhigh(i,-1) #print("zero bins in the data TGraph: bin",i+1) else: TGraph_data.SetPoint(i,var_x,var_y) err_low = var_y - (0.5*TMath.ChisquareQuantile(0.1586555,2.*var_y)) TGraph_data.SetPointEYlow(i, var_y - (0.5*TMath.ChisquareQuantile(0.1586555,2.*var_y))) TGraph_data.SetPointEYhigh(i, (0.5*TMath.ChisquareQuantile(1.-0.1586555,2.*(var_y+1))) - var_y) TGraph_data.SetMarkerColor(1) TGraph_data.SetMarkerSize(1) TGraph_data.SetMarkerStyle(20) TGraph_data.Draw("same P") stack.GetYaxis().SetTitle("Events") stack.GetYaxis().SetTitleOffset(1.05) stack.GetYaxis().SetTitleSize(0.08) stack.GetYaxis().SetLabelSize(0.06) #stack.GetYaxis().CenterTitle() stack.GetXaxis().SetLabelSize(0.) #stack.GetXaxis().SetLabelOffset(0.013) #if "xaxis_range" in extraoptions: # stack.GetXaxis().SetRangeUser(float(extraoptions["xaxis_range"][0]),float(extraoptions["xaxis_range"][1])) leg = r.TLegend(0.2, 0.60, 0.9, 0.88) leg.SetNColumns(3) leg.SetFillStyle(0) leg.SetBorderSize(0) leg.SetTextFont(42) leg.SetTextSize(0.05) for idx in range(len(h1_bkg)): leg.AddEntry(h1_bkg[idx], bkg_legends_[idx], "F") if "SR" in hist_s.GetTitle(): leg.AddEntry(hist_s, 'HH #times {:1.2}'.format(sig_scale_), "L") leg.AddEntry(box, "Total unc", "F") if h1_data: leg.AddEntry(h1_data, "Data", "ep") leg.Draw() pad2.cd() pad2.SetGridy(1) ratio = None ratio_Low = 0.0 ratio_High = 4 if h1_data: ratio = TGraphAsymmErrors(h1_data) for i in range(ratio.GetN()): #bkg prediction imc = Double(hist_all.GetBinContent(i+1)) #data point var_x, var_y = Double(0.), Double(0.) if not ("SR" in h1_data.GetTitle() and (i>5 and i<9)): ratio.GetPoint(i,var_x,var_y) if var_y == 0.: ratio.SetPoint(i,var_x,-1.0) ratio.SetPointEYlow(i,-1) ratio.SetPointEYhigh(i,-1) continue ratio.SetPoint(i,var_x,var_y/imc) err_low = (var_y - (0.5*TMath.ChisquareQuantile(0.1586555,2.*var_y)))/imc err_high = ((0.5*TMath.ChisquareQuantile(1.-0.1586555,2.*(var_y+1))) - var_y)/imc ratio.SetPointEYlow(i, err_low) ratio.SetPointEYhigh(i, err_high) ratio.SetMarkerColor(1) ratio.SetMarkerSize(1) ratio.SetMarkerStyle(20) ratio.GetXaxis().SetTitle("j_{2} regressed mass [GeV]") #myC.Update() if "ratio_range" in extraoptions: ratio_Low = extraoptions["ratio_range"][0] ratio_High = extraoptions["ratio_range"][1] ratio.GetYaxis().SetTitle("data/mc") ratio.GetYaxis().SetRangeUser(ratio_Low, ratio_High) ratio.GetXaxis().SetRangeUser(50, 220) ratio.SetTitle("") ratio.Draw("same AP") pad2.Update() print(ratio.GetTitle(),ratio.GetName(),"debug") else: ratio = h1_sig[0].Clone("ratio") ratio_High = 0.0 for ibin in range(1,ratio.GetNbinsX()+1): s = hist_s.GetBinContent(ibin) b = hist_b.GetBinContent(ibin) L = 0.0 if b > 0.0: L = s/math.sqrt(b) if L > ratio_High: ratio_High = L ratio.SetBinContent(ibin, L) if ratio_High > 1.0: ratio_High = 1.0 ratio.GetYaxis().SetRangeUser(ratio_Low, ratio_High*1.2) ratio.GetYaxis().SetTitle("S/#sqrt{B}") ratio.Draw("samehist") ratio.SetLineColor(1) ratio.SetLineWidth(2) ratio.SetMarkerStyle(20) ratio.SetMarkerColor(1) ratio.SetFillColorAlpha(1, 0) ratio.GetXaxis().SetTitleOffset(0.94) ratio.GetXaxis().SetTitleSize(0.18) ratio.GetXaxis().SetLabelSize(0.12) ratio.GetXaxis().SetLabelOffset(0.013) ratio.GetYaxis().SetTitleOffset(0.40) ratio.GetYaxis().SetTitleSize(0.17) ratio.GetYaxis().SetLabelSize(0.13) ratio.GetYaxis().SetTickLength(0.01) ratio.GetYaxis().SetNdivisions(505) #if "xaxis_range" in extraoptions: # ratio.GetXaxis().SetRangeUser(float(extraoptions["xaxis_range"][0]),float(extraoptions["xaxis_range"][1])) #draw stack total unc on the ratio plot to present the background uncertainty box_ratio = r.TBox(0,0,1,1,) box_ratio.SetFillStyle(3002) box_ratio.SetLineWidth(0) box_ratio.SetFillColor(r.kBlack) for idx in range(h1_bkg[0].GetNbinsX()): if np.fabs(nS[idx])> 1e-06: box_ratio.DrawBox(h1_bkg[0].GetBinCenter(idx+1)-0.5*h1_bkg[0].GetBinWidth(idx+1), (nS[idx]-eS[idx])/nS[idx], h1_bkg[0].GetBinCenter(idx+1)+0.5*h1_bkg[0].GetBinWidth(idx+1), (nS[idx]+eS[idx])/nS[idx]) else: print("blinded Higgs peak region") if "xaxis_label" in extraoptions and extraoptions["xaxis_label"] != None: x_title = extraoptions["xaxis_label"] ratio.GetXaxis().SetTitle(x_title) ratio.GetYaxis().CenterTitle() ##########draw CMS preliminary pad1.cd() tex1 = r.TLatex(leftMargin, 0.91, "CMS") tex1.SetNDC() tex1.SetTextFont(61) tex1.SetTextSize(0.070) tex1.SetLineWidth(2) tex1.Draw() tex2 = r.TLatex(leftMargin+0.12,0.912,"Internal") tex2.SetNDC() tex2.SetTextFont(52) tex2.SetTextSize(0.055) tex2.SetLineWidth(2) tex2.Draw() lumi_value = 137 if "lumi_value" in extraoptions: lumi_value = extraoptions["lumi_value"] tex3 = r.TLatex(0.72,0.912,"%d"%lumi_value+" fb^{-1} (13 TeV)") tex3.SetNDC() tex3.SetTextFont(42) tex3.SetTextSize(0.055) tex3.SetLineWidth(2) tex3.Draw() outFile = dir_name_ if output_name_: outFile = outFile + "/" +output_name_ else: outFile = outFile + "/" + hist_name_ #print("maxY = "+str(maxY)) stack.SetMaximum(maxY*1.7) #print everything into txt file text_file = open(outFile+"_linY.txt", "w") text_file.write("bin | x ") for idx in range(len(h1_bkg)): text_file.write(" | %21s"%bkg_legends_[idx]) text_file.write(" | %21s"%("total B")) for idx in range(len(sig_legends_)): text_file.write(" | %25s"%sig_legends_[idx]) if h1_data: text_file.write(" | data | data/mc") text_file.write("\n-------------") for idx in range(24*(len(h1_bkg) + 1)+ 29*len(sig_legends_)): text_file.write("-") if h1_data: text_file.write("-------") text_file.write("\n") for ibin in range(0,h1_sig[0].GetNbinsX()+1): text_file.write("%3d"%ibin+" ") text_file.write(" | %6.3f"%h1_data.GetBinCenter(ibin)+" ") for idx in range(len(h1_bkg)): text_file.write(" | %7.3f "%h1_bkg[idx].GetBinContent(ibin)+"$\\pm$"+ " %7.3f"%h1_bkg[idx].GetBinError(ibin)) text_file.write(" | %7.3f "%hist_b.GetBinContent(ibin)+"$\\pm$"+ " %7.3f"%hist_b.GetBinError(ibin)) for idx in range(len(sig_legends_)): text_file.write(" | %9.3f "%h1_sig[idx].GetBinContent(ibin)+"$\\pm$"+ " %9.3f"%h1_sig[idx].GetBinError(ibin)) if h1_data: text_file.write(" | %d"%h1_data.GetBinContent(ibin) + " | %7.3f "%h1_data.GetBinContent(ibin) +"$\\pm$"+ " %7.3f"%h1_data.GetBinError(ibin)) text_file.write("\n\n") #print yield table for AN text_file.write("print yield table for AN\n") bkg_all = 0 bkg_all_errsq = 0 for idx in range(len(h1_bkg)): bkg_tmp = h1_bkg[idx].GetBinContent(7)+h1_bkg[idx].GetBinContent(8)+h1_bkg[idx].GetBinContent(9) bkg_errsq_tmp = h1_bkg[idx].GetBinError(7)*h1_bkg[idx].GetBinError(7)+h1_bkg[idx].GetBinError(8)*h1_bkg[idx].GetBinError(8)+h1_bkg[idx].GetBinError(9)*h1_bkg[idx].GetBinError(9) bkg_all += bkg_tmp bkg_all_errsq += bkg_errsq_tmp text_file.write("%s"%(bkg_legends_[idx])+"& %7.2f"%(bkg_tmp)+"$\\pm$"+ "%7.2f"%np.sqrt(bkg_errsq_tmp)+"\n") text_file.write("total background & %7.2f"%(bkg_all)+"$\\pm$"+ "%7.2f"%np.sqrt(bkg_all_errsq)+"\n") text_file.write("\ggHH SM ($\kapl=1$) & %7.2f"%((h1_sig[0].GetBinContent(7)+h1_sig[0].GetBinContent(8)+h1_sig[0].GetBinContent(9))/sig_scale_)+"$\\pm$"+ "%7.1f"%(sig_scale_*np.sqrt(h1_sig[0].GetBinError(7)*h1_sig[0].GetBinError(7)+h1_sig[0].GetBinError(8)*h1_sig[0].GetBinError(8)+h1_sig[0].GetBinError(9)*h1_sig[0].GetBinError(9)))+"\n") text_file.write("\VBFHH SM ($\kapl=1$) & %7.2f"%((h1_sig[1].GetBinContent(7)+h1_sig[1].GetBinContent(8)+h1_sig[1].GetBinContent(9))/sig_scale_)+"$\\pm$"+ "%7.1f"%(sig_scale_*np.sqrt(h1_sig[1].GetBinError(7)*h1_sig[1].GetBinError(7)+h1_sig[1].GetBinError(8)*h1_sig[1].GetBinError(8)+h1_sig[1].GetBinError(9)*h1_sig[1].GetBinError(9)))+"\n") text_file.write("HH bin 8 value %s"%h1_sig[0].GetBinContent(8)+"\n") text_file.write("HH bin 9 value %s"%h1_sig[0].GetBinContent(9)+"\n") text_file.write("HH bin 7 value %s"%h1_sig[0].GetBinContent(7)+"\n") text_file.write("HH bin 8 error %s"%h1_sig[0].GetBinError(8)+"\n") text_file.write("HH bin 9 error %s"%h1_sig[0].GetBinError(9)+"\n") text_file.write("HH bin 7 error %s"%h1_sig[0].GetBinError(7)+"\n") text_file.write("total & %7.2f"%(bkg_all+(h1_sig[0].GetBinContent(7)+h1_sig[0].GetBinContent(8)+h1_sig[0].GetBinContent(9)+h1_sig[1].GetBinContent(7)+h1_sig[1].GetBinContent(8)+h1_sig[1].GetBinContent(9))/sig_scale_)+"$\\pm$"+ "%7.2f"%(np.sqrt((h1_sig[0].GetBinError(7)*h1_sig[0].GetBinError(7)+h1_sig[0].GetBinError(8)*h1_sig[0].GetBinError(8)+h1_sig[0].GetBinError(9)*h1_sig[0].GetBinError(9))/(sig_scale_*sig_scale_)+(h1_sig[1].GetBinError(7)*h1_sig[1].GetBinError(7)+h1_sig[1].GetBinError(8)*h1_sig[1].GetBinError(8)+h1_sig[1].GetBinError(9)*h1_sig[1].GetBinError(9))/(sig_scale_*sig_scale_)+bkg_all_errsq))+"\n") text_file.close() os.system("cp "+outFile+"_linY.txt "+outFile+"_logY.txt") pad1.RedrawAxis() myC.SaveAs(outFile+"_linY.png") myC.SaveAs(outFile+"_linY.pdf") myC.SaveAs(outFile+"_linY.C") pad1.cd() stack.SetMaximum(maxY*100.0) stack.SetMinimum(0.5) pad1.SetLogy() pad1.RedrawAxis() myC.SaveAs(outFile+"_logY.png") myC.SaveAs(outFile+"_logY.pdf") myC.SaveAs(outFile+"_logY.C") #save histogram and ratio to root file outFile_root = r.TFile(outFile+".root", "recreate") outFile_root.cd() for idx in range(len(h1_bkg)): h1_bkg[idx].Write() for idx in range(len(sig_legends_)): h1_sig[idx].Write() if h1_data: h1_data.Write() ratio.Write() #outFile_root.Write() outFile_root.Close()
g_size_s = g_tpr.GetN() x = Double() y = Double() x_s = Double() y_s = Double() arr_x = np.zeros(g_size) arr_y = np.zeros(g_size) arr_x_s = np.zeros(g_size_s) arr_y_s = np.zeros(g_size_s) for i in range( g_size ): g_efficiency.GetPoint(i,x,y) arr_x[i] = x arr_y[i] = y #print arr_y # if g_size is always equal to g_size_s we can put these loops together for i in range( g_size_s ): g_tpr.GetPoint(i,x_s,y_s) arr_x_s[i] = x_s arr_y_s[i] = y_s # GetEYhigh() work as the following 3 ways(presumably the 'copy' version works most consistently): #----------------------------------------------V1 #buffer_l = g_efficiency.GetEYlow() #arr_l = np.ndarray(g_size, 'f', buffer_l) #----------------------------------------------V2 #buffer_h = g_efficiency.GetEYhigh()
def SetBin(load, n_bins, bin_list): df = load.copy() df['w2'] = df['weight'].copy().apply(np.square) group_w = df.groupby(pd.cut(df['signal'], bin_list)) df_g_w_s = (group_w.sum()).copy() df1 = df_g_w_s[['weight']].copy() df1.fillna(0, inplace=True) df1['w2'] = df1['weight'].apply(np.square) df2 = df1.iloc[::-1] df2.loc[:, 'recum'] = df2['weight'].cumsum() df2.loc[:, 'rc_w2'] = df2['w2'].cumsum() df3 = df2.iloc[::-1] df1['recum'] = df3['recum'] df1['rc_w2_sq'] = df3['rc_w2'].apply(np.sqrt) w_s = df1['weight'].sum() sqr_s_w2 = np.sqrt(df['w2'].sum()) xl, yl, hl, ll = [], [], [], [] for i in xrange(n_bins): h_post = TH1F('h_post', 'h_post', 1, bin_list[i], bin_list[i + 1]) h_pre = TH1F('h_pre', 'h_pre', 1, bin_list[i], bin_list[i + 1]) h_post.SetBinContent(1, df1.recum.iloc[i]) h_post.SetBinError(1, df1.rc_w2_sq.iloc[i]) h_pre.SetBinContent(1, w_s) h_pre.SetBinError(1, sqr_s_w2) g_eff = GAE() g_eff.Divide(h_post, h_pre, "cl=0.683 b(1,1) mode") x = Double() y = Double() g_eff.GetPoint(0, x, y) xl.append(x) yl.append(y) buffer_l = g_eff.GetEYlow() buffer_l.SetSize(1) arr_l = np.array(buffer_l, copy=True) buffer_h = g_eff.GetEYhigh() buffer_h.SetSize(1) arr_h = np.array(buffer_h, copy=True) hl.append(np.array(arr_h)[0]) ll.append(np.array(arr_l)[0]) h_post.Delete() h_pre.Delete() #print xl #print yl #print hl #print ll out_dict = {} out_dict['x'] = xl out_dict['y'] = yl out_dict['eh'] = hl out_dict['el'] = ll return out_dict
def CutBaseROC(df_sg, df_bg, df_pos_sgn, df_pos_bkg): h_cut_pre_tpr = TH1F('h_cut_pre_tpr', 'hist_cut_pre_tpr', 1, bin_i, bin_f) h_cut_pos_tpr = TH1F('h_cut_pos_tpr', 'hist_cut_pos_tpr', 1, bin_i, bin_f) h_cut_pre_fpr = TH1F('h_cut_pre_fpr', 'hist_cut_pre_fpr', 1, bin_i, bin_f) h_cut_pos_fpr = TH1F('h_cut_pos_fpr', 'hist_cut_pos_fpr', 1, bin_i, bin_f) rnp.fill_hist(h_cut_pre_tpr, df_sg, df_sg) rnp.fill_hist(h_cut_pos_tpr, df_pos_sgn, df_pos_sgn) rnp.fill_hist(h_cut_pre_fpr, df_bg, df_bg) rnp.fill_hist(h_cut_pos_fpr, df_pos_bkg, df_pos_bkg) g_cut_tpr = GAE() g_cut_fpr = GAE() g_cut_tpr.Divide(h_cut_pos_tpr, h_cut_pre_tpr, "cl=0.683 b(1,1) mode") g_cut_fpr.Divide(h_cut_pos_fpr, h_cut_pre_fpr, "cl=0.683 b(1,1) mode") g_size_cut = 1 x = Double() y = Double() x_s = Double() y_s = Double() arr_x = np.zeros(g_size_cut) arr_y = np.zeros(g_size_cut) arr_x_s = np.zeros(g_size_cut) arr_y_s = np.zeros(g_size_cut) for i in xrange(g_size_cut): g_cut_fpr.GetPoint(i, x, y) arr_x[i] = x arr_y[i] = y g_cut_tpr.GetPoint(i, x_s, y_s) arr_x_s[i] = x_s arr_y_s[i] = y_s buffer_l = g_cut_fpr.GetEYlow() buffer_l.SetSize(g_size_cut) arr_l = np.array(buffer_l, copy=True) buffer_h = g_cut_fpr.GetEYhigh() buffer_h.SetSize(g_size_cut) arr_h = np.array(buffer_h, copy=True) buffer_l_s = g_cut_tpr.GetEYlow() buffer_l_s.SetSize(g_size_cut) arr_l_s = np.array(buffer_l_s, copy=True) buffer_h_s = g_cut_tpr.GetEYhigh() buffer_h_s.SetSize(g_size_cut) arr_h_s = np.array(buffer_h_s, copy=True) print len(arr_h) print len(arr_l) print 'TPR: ', arr_y_s print 'FPR: ', arr_y print arr_l_s print arr_l print arr_h_s print arr_h out_dict = {} out_dict['tpr'] = arr_y_s[0] out_dict['fpr'] = arr_y[0] out_dict['tpr_e_l'] = arr_l_s[0] out_dict['fpr_e_l'] = arr_l[0] out_dict['tpr_e_h'] = arr_h_s[0] out_dict['fpr_e_h'] = arr_h[0] return out_dict
xM2 = ROOT.Double(0) yM2 = ROOT.Double(0) for i in range(nfile): preselect = file.Get("h_preselect_"+str(i)) loo = file.Get("h_loo_"+str(i)) med1 = file.Get("h_med1_"+str(i)) med2 = file.Get("h_med2_"+str(i)) #gPad.Modified() effL = TGraphAsymmErrors(loo,preselect,"cl=0.683 b(1,1) mode") effL.SetTitle("Loose") binL = effL.GetN() for j in range(binL): effL.GetPoint(j, xL, yL) if (yL == 0): continue else: errL = effL.GetErrorY(j) err_sigeff_L.append(errL) #print "errL_"+str(name[i]), errL effM1 = TGraphAsymmErrors(med1,preselect,"cl=0.683 b(1,1) mode") effM1.SetTitle("Medium 1") binM1 = effM1.GetN() for j in range(binM1): effM1.GetPoint(j, xM1, yM1) if (yM1 == 0): continue else:
def ROC_GEN(load_s, load_b): n_scan = 10000000 n_bins = 300 #100 bin_i = 0 bin_f = 1 draw = 1 param = {} param['n_scan'] = n_scan param['n_bins'] = n_bins param['bin_i'] = bin_i param['bin_f'] = bin_f print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Loading data...' #load_s = joblib.load(pth+'/dumps/s.pkl') #load_b = joblib.load(pth+'/dumps/b.pkl') print len(load_b) # Calculate the bin vector: bin_width = float(bin_f - bin_i) / n_bins bin_value_dict = {} for j in xrange(n_bins): bin_value_dict[j] = bin_i + (0.5 + j) * bin_width #print bin_value_dict # if you want to use the GetCumulative() of a histogram and set its error: do not call the Sumw2() of it h_after_selection = TH1F('h_after_selection', 'hist_after_selection', n_bins, bin_i, bin_f) #h_after_selection.Sumw2() h_before_selection = TH1F('h_before_selection', 'hist_before_selection', n_bins, bin_i, bin_f) #h_before_selection.Sumw2() h_true_positive = TH1F('h_true_positive', 'True_Positives', n_bins, bin_i, bin_f) h_true = TH1F('h_true', 'Trues', n_bins, bin_i, bin_f) # see if reversing the bin_min and bin_max will cause the histogram axis to reverse(no) #h_true = TH1F('h_true' ,'Trues' , n_bins, bin_f, bin_i) h_c_b = TH1F('h_c_b', 'hist_after_selection_cum_rev', n_bins, bin_i, bin_f) #h_c_b.Sumw2() h_c_s = TH1F('h_c_s', 'hist_true_positives_cum_rev', n_bins, bin_i, bin_f) #h_c_s.Sumw2() #################################### #################################### print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Filling histogram...' zeitA = time() df_b = load_b.copy()[:n_scan] df_b['bin'] = df_b['signal'] #df_s = load_s.copy() #df_s['bin'] = df_s['signal'] df_list_pre_sel = [] df_list_pos_sel = [] def Appnd(tupl_in): df_list_pre_sel.append(tupl_in[0]) df_list_pos_sel.append(tupl_in[1]) def DataFrameExpand(k): mask_k_b = (bin_value_dict[k] - 0.5 * bin_width <= df_b['bin']) & ( bin_value_dict[k] + 0.5 * bin_width > df_b['bin']) df_b['bin'][mask_k_b] = k #mask_k_s = ( bin_value_dict[k] - 0.5*bin_width <= df_s['bin'] ) & ( bin_value_dict[k] + 0.5*bin_width > df_s['bin'] ) #df_s['bin'][mask_k_s] = k mask_k = df_b.bin == k df_b_w_k = df_b.weight[mask_k] df_list_k_pre_sel = [] df_list_k_pos_sel = [] for kk in xrange(n_bins): df_tmp_kk_pre_sel = pd.DataFrame() df_tmp_kk_pre_sel['weight'] = df_b_w_k df_tmp_kk_pre_sel['bin'] = bin_value_dict[kk] df_list_k_pre_sel.append(df_tmp_kk_pre_sel) if kk > k: continue df_tmp_kk_pos_sel = pd.DataFrame() df_tmp_kk_pos_sel['weight'] = df_b_w_k df_tmp_kk_pos_sel['bin'] = bin_value_dict[kk] df_list_k_pos_sel.append(df_tmp_kk_pos_sel) df_tmp_k_pre_sel = pd.concat(df_list_k_pre_sel) df_tmp_k_pos_sel = pd.concat(df_list_k_pos_sel) return df_tmp_k_pre_sel, df_tmp_k_pos_sel # Parallelization: ''' pool_dfe = mp.Pool() for i in xrange(n_bins): pool_dfe.apply_async(DataFrameExpand, args=(i, ), callback=Appnd) pool_dfe.close() pool_dfe.join() ''' for ii in xrange(n_bins): callback = DataFrameExpand(ii) Appnd(callback) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~alternatives: #process = mp.Process(target=F, args=(k,)) #manager = mp.Manager() #pool.map(FT,L) df_before_selection = pd.concat(df_list_pre_sel) df_after_selection = pd.concat(df_list_pos_sel) rnp.fill_hist(h_before_selection, df_before_selection.bin, df_before_selection.weight) rnp.fill_hist(h_c_b, df_after_selection.bin, df_after_selection.weight) zeitB = time() print 'Time taken for filling histogram(for #events: ' + str( n_scan) + '): ', str(zeitB - zeitA) #################################### #################################### print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Filling histogram (for tpr)...' zeitA = time() for index, row in load_s.iterrows(): tmp_weight = row['weight'] tmp_signal = row['signal'] for k in xrange(n_bins): h_true.Fill(bin_value_dict[k], tmp_weight) if bin_value_dict[ k] - 0.5 * bin_width <= tmp_signal and bin_value_dict[ k] + 0.5 * bin_width > tmp_signal: for kk in xrange(k): h_c_s.Fill(bin_value_dict[kk], tmp_weight) g_fpr = GAE() g_tpr = GAE() g_fpr.Divide(h_c_b, h_before_selection, "cl=0.683 b(1,1) mode") g_tpr.Divide(h_c_s, h_true, "cl=0.683 b(1,1) mode") g_size = g_fpr.GetN() x = Double() y = Double() x_s = Double() y_s = Double() arr_x = np.zeros(g_size) arr_y = np.zeros(g_size) arr_x_s = np.zeros(g_size) arr_y_s = np.zeros(g_size) for i in xrange(g_size): g_fpr.GetPoint(i, x, y) arr_x[i] = x arr_y[i] = y g_tpr.GetPoint(i, x_s, y_s) arr_x_s[i] = x_s arr_y_s[i] = y_s buffer_l = g_fpr.GetEYlow() buffer_l.SetSize(g_size) arr_l = np.array(buffer_l, copy=True) buffer_h = g_fpr.GetEYhigh() buffer_h.SetSize(g_size) arr_h = np.array(buffer_h, copy=True) #print arr_h #print arr_l buffer_l_s = g_tpr.GetEYlow() buffer_l_s.SetSize(g_size) arr_l_s = np.array(buffer_l_s, copy=True) buffer_h_s = g_tpr.GetEYhigh() buffer_h_s.SetSize(g_size) arr_h_s = np.array(buffer_h_s, copy=True) print len(arr_h) print len(arr_l) ####################### # Calculate AOC # ####################### ''' x = np.array(arr_y_s) y = np.array(arr_y) exl = np.array(arr_l_s) eyl = np.array(arr_l) exh = np.array(arr_h_s) eyh = np.array(arr_h) ''' ####################### # Export ROC Position # ####################### roc_dict = {} roc_dict['param'] = param roc_dict['tpr'] = np.array(arr_y_s) roc_dict['fpr'] = np.array(arr_y) roc_dict['e_tpr_l'] = np.array(arr_l_s) roc_dict['e_fpr_l'] = np.array(arr_l) roc_dict['e_tpr_h'] = np.array(arr_h_s) roc_dict['e_fpr_h'] = np.array(arr_h) roc_dict['threshold'] = bin_value_dict roc_dict['cut_based'] = {} #roc_dict['cut_based']['lc'] = LC_dict #roc_dict['cut_based']['hc'] = HC_dict #roc_dict['aoc'] = aoc #roc_dict['aoc_l'] = aoc_l #roc_dict['aoc_h'] = aoc_h #raw_data = {} #raw_data['load_s'] = load_s #raw_data['load_b'] = load_b #roc_dict['raw'] = raw_data ''' path_dump = '/beegfs/desy/user/hezhiyua/2bBacked/roc_data/' name_dump = 'roc.pkl' joblib.dump(roc_dict, path_dump+name_dump) ''' return roc_dict
class HLTRate: _denominator = 0 _crossSectionInPb = 0.0 _nFiles = 0 _iFile = 0 _label = "" _denominatorHist = None _numeratorHist = None _rateGraph = None def addFile (self, fileName): if not fileName: return if not os.path.isfile (fileName): print "\"" + fileName + "\" does not exist or is not a file!" return if not re.match (r".*\/cmsRun_.*\.log\.tar\.gz", fileName): print "\"" + fileName + "\" does not look like a CRAB log file!" return print " (" + str (self._iFile) + " / " + str (self._nFiles) + ") Processing file \"" + re.sub (r".*\/([^/]+)$", r"\1", fileName) + "\"..." n = re.sub (r".*\/cmsRun_(.*)\.log\.tar\.gz", r"\1", fileName) print " Extracting log file..." tin = tarfile.open (fileName) fin = tin.extractfile ("cmsRun-stdout-" + n + ".log") print " Parsing log file..." for line in fin: line = line.rstrip () if not re.match (r"HLT-Report.*", line): continue if re.match (r".*Events total = .* wasrun = .* passed = .* errors = .*", line): self._denominator += int (re.sub (r".*Events total = (.*) wasrun = .* passed = .* errors = .*", r"\1", line)) if re.match (r".*ETM[^_]*_MET75_IsoTrk50.*", line): seed = re.sub (r".*ETM([^_]*)_MET75_IsoTrk50.*", r"\1", line) if seed not in numerator[self._label]: numerator[self._label][seed] = 0 numerator[self._label][seed] += int (re.sub (r"HLT-Report *[^ ]* *[^ ]* *[^ ]* *[^ ]* *([^ ]*) *[^ ]* *[^ ]* *[^ ]* *[^ ]* *[^ ]*", r"\1", line)) fin.close () tin.close () def addDir (self, dirName): if not dirName: return if not os.path.isdir (dirName): print "\"" + dirName + "\" does not exist or is not a directory!" return print "Processing directory \"" + dirName + "\"..." logFiles = glob.glob (dirName + "/cmsRun_*.log.tar.gz") self._nFiles = len (logFiles) self._iFile = 0 for logFile in logFiles: self._iFile += 1 self.addFile (logFile) def setCrossSectionInPb (self, crossSectionInPb): self._crossSectionInPb = crossSectionInPb def setLabel (self, label): self._label = label numerator[self._label] = {} def __init__ (self, label = "", dirName = "", crossSectionInPb = 0.0): self.setLabel (label) self.addDir (dirName) self.setCrossSectionInPb (crossSectionInPb) def getRateInPb (self, seed): if seed not in numerator[self._label]: print "ETM" + seed + " not found in results!" return self._denominatorHist = TH1D ("total", "", 1, -0.5, 0.5) self._numeratorHist = TH1D ("pass", "", 1, -0.5, 0.5) self._denominatorHist.SetBinContent (1, self._denominator) self._denominatorHist.SetBinError (1, math.sqrt (self._denominator)) self._numeratorHist.SetBinContent (1, numerator[self._label][seed]) self._numeratorHist.SetBinError (1, math.sqrt (numerator[self._label][seed])) self._rateGraph = TGraphAsymmErrors (self._numeratorHist, self._denominatorHist) x = Double (0.0) y = Double (0.0) self._rateGraph.GetPoint (0, x, y) eLow = self._rateGraph.GetErrorYlow (0) eHigh = self._rateGraph.GetErrorYhigh (0) y *= self._crossSectionInPb eLow *= self._crossSectionInPb eHigh *= self._crossSectionInPb return (y, eLow, eHigh) def getSeeds (self): return map (str, sorted (map (int, numerator[self._label].keys ())))
#print g_size_s x = Double() y = Double() #x_s = Double() #y_s = Double() arr_x = np.zeros(g_size) arr_y = np.zeros(g_size) #arr_x_s = np.zeros(g_size_s) #arr_y_s = np.zeros(g_size_s) for i in xrange( g_size ): g_efficiency.GetPoint(i,x,y) arr_x[i] = x arr_y[i] = y #print arr_y # if g_size is always equal to g_size_s we can put these loops together #for i in xrange( g_size_s ): # g_tpr.GetPoint(i,x_s,y_s) # arr_x_s[i] = x_s # arr_y_s[i] = y_s # GetEYhigh() work as the following 3 ways(presumably the 'copy' version works most consistently): #----------------------------------------------V1 #buffer_l = g_efficiency.GetEYlow() #arr_l = np.ndarray(g_size, 'f', buffer_l) #----------------------------------------------V2 #buffer_h = g_efficiency.GetEYhigh()
def CutBaseBenchmarkNew(df_test_orig, inDict, JetPrfx_bkg, refAttr='pt', isSigAttrStr='is_signal', weightAttrStr='weight'): refAttrLabel = JetPrfx_bkg + refAttr tt = df_test_orig.copy() sg = tt[isSigAttrStr] == 1 bg = tt[isSigAttrStr] == 0 BA_l = {} #pick out events that satisfiy the cut for iAttr, iList in inDict.iteritems(): iAttr = 'J1' + iAttr if iList[0] == '<': BA_l[iAttr] = tt[JetPrfx_bkg + iAttr] < iList[1] elif iList[0] == '>': BA_l[iAttr] = tt[JetPrfx_bkg + iAttr] > iList[1] pos = tt[refAttrLabel] pos_sgn = tt[weightAttrStr] pos_bkg = tt[weightAttrStr] n_pos = tt[weightAttrStr] for iAttr, iList in inDict.iteritems(): iAttr = 'J1' + iAttr pos = pos[BA_l[iAttr]] #events that pass the selection(all the cuts) pos_sgn = pos_sgn[ BA_l[iAttr]] #signal events that pass the selection(all the cuts) pos_bkg = pos_bkg[BA_l[ iAttr]] #background events that pass the selection(all the cuts) n_pos = n_pos[BA_l[iAttr]] #see below pos_sgn = pos_sgn[sg] pos_bkg = pos_bkg[bg] n_pos = float(n_pos.sum()) #sum up the weights n_sgn = float(tt[weightAttrStr][sg].sum()) #sum of weights from all signal n_bkg = float( tt[weightAttrStr][bg].sum()) #sum of weights from all background n_pos_sgn = float(pos_sgn.sum( )) #sum of weights of signal events that pass the selection n_pos_bkg = float(pos_bkg.sum( )) #sum of weights of background events that pass the selection sgn_eff = np.divide(n_pos_sgn, n_sgn) fls_eff = np.divide(n_pos_bkg, n_bkg) print '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>Benchmark:' print 'num of total test events: ', tt[refAttrLabel].count() print "num of signals : ", n_sgn print 'num of background : ', n_bkg print "num of pos events : ", n_pos print "num of pos bkg : ", n_pos_bkg print "num of pos sgn : ", n_pos_sgn print "true positive rate : ", sgn_eff print "false positive rate : ", fls_eff #return tt[weightAttrStr][sg], tt[weightAttrStr][bg], pos_sgn, pos_bkg#sgn_eff, fls_eff df_sg = tt[weightAttrStr][sg] df_bg = tt[weightAttrStr][bg] df_pos_sgn = pos_sgn df_pos_bkg = pos_bkg bin_i = 0 bin_f = 1 #def CutBaseROC(df_sg, df_bg, df_pos_sgn, df_pos_bkg): h_cut_pre_tpr = TH1F('h_cut_pre_tpr', 'hist_cut_pre_tpr', 1, bin_i, bin_f) h_cut_pos_tpr = TH1F('h_cut_pos_tpr', 'hist_cut_pos_tpr', 1, bin_i, bin_f) h_cut_pre_fpr = TH1F('h_cut_pre_fpr', 'hist_cut_pre_fpr', 1, bin_i, bin_f) h_cut_pos_fpr = TH1F('h_cut_pos_fpr', 'hist_cut_pos_fpr', 1, bin_i, bin_f) root_numpy.fill_hist(h_cut_pre_tpr, df_sg, df_sg) root_numpy.fill_hist(h_cut_pos_tpr, df_pos_sgn, df_pos_sgn) root_numpy.fill_hist(h_cut_pre_fpr, df_bg, df_bg) root_numpy.fill_hist(h_cut_pos_fpr, df_pos_bkg, df_pos_bkg) g_cut_tpr = GAE() g_cut_fpr = GAE() g_cut_tpr.Divide(h_cut_pos_tpr, h_cut_pre_tpr, "cl=0.683 b(1,1) mode") g_cut_fpr.Divide(h_cut_pos_fpr, h_cut_pre_fpr, "cl=0.683 b(1,1) mode") g_size_cut = 1 x = Double() y = Double() x_s = Double() y_s = Double() arr_x = np.zeros(g_size_cut) arr_y = np.zeros(g_size_cut) arr_x_s = np.zeros(g_size_cut) arr_y_s = np.zeros(g_size_cut) for i in xrange(g_size_cut): g_cut_fpr.GetPoint(i, x, y) arr_x[i] = x arr_y[i] = y g_cut_tpr.GetPoint(i, x_s, y_s) arr_x_s[i] = x_s arr_y_s[i] = y_s buffer_l = g_cut_fpr.GetEYlow() buffer_l.SetSize(g_size_cut) arr_l = np.array(buffer_l, copy=True) buffer_h = g_cut_fpr.GetEYhigh() buffer_h.SetSize(g_size_cut) arr_h = np.array(buffer_h, copy=True) buffer_l_s = g_cut_tpr.GetEYlow() buffer_l_s.SetSize(g_size_cut) arr_l_s = np.array(buffer_l_s, copy=True) buffer_h_s = g_cut_tpr.GetEYhigh() buffer_h_s.SetSize(g_size_cut) arr_h_s = np.array(buffer_h_s, copy=True) print len(arr_h) print len(arr_l) print 'TPR: ', arr_y_s print 'FPR: ', arr_y print arr_l_s print arr_l print arr_h_s print arr_h out_dict = {} out_dict['tpr'] = arr_y_s[0] out_dict['fpr'] = arr_y[0] out_dict['tpr_e_l'] = arr_l_s[0] out_dict['fpr_e_l'] = arr_l[0] out_dict['tpr_e_h'] = arr_h_s[0] out_dict['fpr_e_h'] = arr_h[0] return out_dict