def writeFullUnc(pred_file): ''' Update the input root file, add a hist with total prediction and full uncertainty. ''' f = rt.TFile(pred_file, 'UPDATE') h = TGraphAsymmErrors(f.Get(pred_total_name).Clone('bkgtotal_unc_sr')) h_syst = TGraphAsymmErrors( f.Get(pred_total_name).Clone('bkgtotal_syst_unc_sr')) h_pieces = {} h_syst_pieces = {} for hname, sample in zip(graph_names, all_samples): h_pieces[sample] = TGraphAsymmErrors( f.Get(hname).Clone(sample + '_unc_sr')) h_syst_pieces[sample + "_up"] = TH1F(sample + '_syst_up', sample + '_syst_up', 183, 0, 183) h_syst_pieces[sample + "_dn"] = TH1F(sample + '_syst_dn', sample + '_syst_dn', 183, 0, 183) print "%30s %10s %16s" % ('bin', 'total pred', 'total unc.') for ibin in xrange(0, h.GetN()): bin = binlist[ibin] val = h.GetY()[ibin] e_low, e_up = fullUnc[bin] h.SetPointEYlow(ibin, e_low) h.SetPointEYhigh(ibin, e_up) print "%30s %10.4f +%8.4f -%8.4f" % (bin, val, e_up, e_low) allVals[bin] = {'bkg': (val, e_low, e_up)} # Test for only syst histograms val = h_syst.GetY()[ibin] e_low, e_up = systUnc[bin] h_syst.SetPointEYlow(ibin, e_low) h_syst.SetPointEYhigh(ibin, e_up) for sample in all_samples: val = yields[bin][sample] e_low, e_up = fullUnc_pieces[sample][bin] #print "%11s %30s %10.4f +%8.4f -%8.4f" % (sample, bin, val, e_up, e_low) h_pieces[sample].SetPointEYlow(ibin, e_low) h_pieces[sample].SetPointEYhigh(ibin, e_up) allVals[bin][sample] = (val, e_low, e_up) # Test for only syst histograms e_low, e_up = systUnc_rel_pieces[sample][bin] #if sample in test_samp and bin in test_bin and type in test_type and debug: #if 'TTZ' in sample and e_up > 8: # print("%11s %30s %10.4f +%8.4f -%8.4f" % (sample, bin, val, e_up, e_low)) h_syst_pieces[sample + "_up"].SetBinContent(ibin + 1, e_up) h_syst_pieces[sample + "_dn"].SetBinContent(ibin + 1, e_low) h_syst_pieces[sample + "_up"].SetBinError(ibin + 1, 0) h_syst_pieces[sample + "_dn"].SetBinError(ibin + 1, 0) h.Write('bkgtotal_unc_sr', rt.TObject.kOverwrite) h.Write('bkgtotal_syst_unc_sr', rt.TObject.kOverwrite) for sample in all_samples: h_pieces[sample].Write(sample + '_unc_sr', rt.TObject.kOverwrite) h_syst_pieces[sample + "_up"].Write(sample + '_syst_up', rt.TObject.kOverwrite) h_syst_pieces[sample + "_dn"].Write(sample + '_syst_dn', rt.TObject.kOverwrite) f.Close()
def writeFullUnc(pred_file): ''' Update the input root file, add a hist with total prediction and full uncertainty. ''' f = rt.TFile(pred_file, 'UPDATE') h = TGraphAsymmErrors(f.Get(pred_total_name).Clone('bkgtotal_unc_sr')) h_pieces = {} for hname, sample in zip(graph_names, all_samples): h_pieces[sample] = TGraphAsymmErrors( f.Get(hname).Clone(sample + '_unc_sr')) print "%30s %10s %16s" % ('bin', 'total pred', 'total unc.') for ibin in xrange(0, h.GetN()): bin = binlist[ibin] val = h.GetY()[ibin] e_low, e_up = fullUnc[bin] h.SetPointEYlow(ibin, e_low) h.SetPointEYhigh(ibin, e_up) print "%30s %10.2f +%8.2f -%8.2f" % (bin, val, e_up, e_low) allVals[bin] = {'bkg': (val, e_low, e_up)} for sample in all_samples: val = yields[bin][sample] e_low, e_up = fullUnc_pieces[sample][bin] h_pieces[sample].SetPointEYlow(ibin, e_low) h_pieces[sample].SetPointEYhigh(ibin, e_up) allVals[bin][sample] = (val, e_low, e_up) h.Write('bkgtotal_unc_sr', rt.TObject.kOverwrite) for sample in all_samples: h_pieces[sample].Write(sample + '_unc_sr', rt.TObject.kOverwrite) f.Close()
def obsOverPredWithDataStat(data_hist, bkg_total): obs_over_pred = TGraphAsymmErrors(data_hist) for b in range(1, data_hist.GetNbinsX() + 1): #divide data by background bkg_bin = bkg_total.GetBinContent(b) if abs(bkg_bin) > 1e-8: obs_over_pred.GetY()[b - 1] /= bkg_bin obs_over_pred.SetPointError(b - 1, 0., 0., data_hist.GetBinErrorLow(b) / bkg_bin, data_hist.GetBinErrorUp(b) / bkg_bin) else: obs_over_pred.GetY()[b - 1] = 0 obs_over_pred.SetPointError(b - 1, 0., 0., 0, 0) return obs_over_pred
def calculate(self, histo): from ROOT import TGraphAsymmErrors frac = self.__getWeightOneHisto(histo, self.__name) total = self.__getWeightOneHisto( histo, self.__norm) if(frac.GetEntries() > total.GetEntries()): raise StandardError," comparing '%s' to '%s' in '%s' makes no sense eff > 1!"%(self.__name, self.__norm, histo.GetName()) eff = TGraphAsymmErrors(1) eff.BayesDivide(frac, total) if eff.GetN() < 1: raise StandardError,"Efficiency cannot be calculated '%s' in '%s'"%(self.__name, histo.GetName()) return ( eff.GetY()[0], (eff.GetEYlow()[0],eff.GetEYhigh()[0]) )
def obsOverPredWithDataStat(data_hist, bkg_total): obs_over_pred = TGraphAsymmErrors(data_hist) for b in range(1, data_hist.GetNbinsX() + 1): #divide data by background bkg_bin = bkg_total.GetBinContent(b) if abs(bkg_bin) > 1e-8: obs_over_pred.GetY()[b - 1] /= bkg_bin obs_over_pred.SetPointError(b - 1, 0., 0., data_hist.GetBinErrorLow(b) / bkg_bin, data_hist.GetBinErrorUp(b) / bkg_bin) else: obs_over_pred.GetY()[b - 1] = 0 obs_over_pred.SetPointError(b - 1, 0., 0., 0, 0) #in case we plot just the expected distribution data will be 0 #in this case we don't want to plot the data points in the ratio if data_hist.GetSumOfWeights() <= 0: for b in range(1, data_hist.GetNbinsX() + 1): obs_over_pred.GetY()[b - 1] += 1e30 return obs_over_pred
def fixData(hist, useGarwood=True, cutGrass=False, maxPoisson=False): if hist == None: return varBins = False data = TGraphAsymmErrors() alpha = 1 - 0.6827 for i in list(reversed(range(0, hist.GetNbinsX()))): #print "bin", i, "x:", hist.GetX()[i], "y:", hist.GetY()[i] # X error bars to 0 - do not move this, otherwise the first bin will disappear, thanks Wouter and Rene! N = max(hist.GetBinContent(i + 1), 0.) # Avoid unphysical bins data.SetPoint(i, hist.GetXaxis().GetBinCenter(i + 1), N) if not varBins: data.SetPointEXlow(i, 0) data.SetPointEXhigh(i, 0) # Garwood confidence intervals if (useGarwood): L = ROOT.Math.gamma_quantile(alpha / 2, N, 1.) if N > 0 else 0. U = ROOT.Math.gamma_quantile_c(alpha / 2, N + 1, 1) # maximum between Poisson and Sumw2 error bars EL = N - L if not maxPoisson else max(N - L, hist.GetBinErrorLow(i)) EU = U - N if not maxPoisson else max(U - N, hist.GetBinErrorHigh(i)) data.SetPointEYlow(i, EL) data.SetPointEYhigh(i, EU) else: data.SetPointEYlow(i, math.sqrt(N)) data.SetPointEYhigh(i, math.sqrt(N)) # Cut grass if cutGrass and data.GetY()[i] > 0.: cutGrass = False # Treatment for 0 bins # if abs(hist.GetY()[i])<=1.e-6: # if cutGrass: hist.SetPointError(i, hist.GetErrorXlow(i), hist.GetErrorXhigh(i), 1.e-6, 1.e-6, ) # if (hist.GetX()[i]>65 and hist.GetX()[i]<135 and hist.GetY()[i]==0): hist.SetPointError(i, hist.GetErrorXlow(i), hist.GetErrorXhigh(i), 1.e-6, 1.e-6, ) # hist.SetPoint(i, hist.GetX()[i], -1.e-4) # X error bars #if hist.GetErrorXlow(i)<1.e-4: # binwidth = hist.GetX()[1]-hist.GetX()[0] # hist.SetPointEXlow(i, binwidth/2.) # hist.SetPointEXhigh(i, binwidth/2.) data.SetMarkerColor(hist.GetMarkerColor()) data.SetMarkerStyle(hist.GetMarkerStyle()) data.SetMarkerSize(hist.GetMarkerSize()) #data.SetLineSize(hist.GetLineSize()) return data
def prepareData(data): #set poison errors data.SetBinErrorOption(ROOT.TH1.kPoisson) #build TGraphAsymmErrors for plotting data_graph = TGraphAsymmErrors(data) for b in range(1, data.GetNbinsX() + 1): bin_content = data.GetBinContent(b) data_graph.SetPointError( b - 1, 0, 0, data.GetBinErrorLow(b) if bin_content >= 0. else 0., data.GetBinErrorUp(b) if bin_content >= 0. else 0.) #hack for not displaying points at zero maximum = (data.GetBinContent(data.GetMaximumBin()) + data.GetBinErrorUp(data.GetMaximumBin())) for b in range(1, data.GetNbinsX() + 1): if data.GetBinContent(b) < 1e-8: data_graph.GetY()[b - 1] += (maximum * 1e8) return data, data_graph
def plotDataOverMCEff(hist_mc_tight, hist_mc_loose, hist_data_tight, hist_data_loose, plot_name='fakerate.pdf'): g = TGraphAsymmErrors(hist_mc_tight) g.Divide(hist_mc_tight, hist_mc_loose) g.GetYaxis().SetTitle('Fake rate') g.GetXaxis().SetTitle(hist_mc_tight.GetXaxis().GetTitle()) g.GetYaxis().SetTitleOffset(1.2) g.GetYaxis().SetTitleOffset(1.3) g.SetLineColor(2) g.SetMarkerColor(2) g_data = TGraphAsymmErrors(hist_data_tight) g_data.Divide(hist_data_tight, hist_data_loose) g_data.GetYaxis().SetTitle('Fake rate') g_data.GetXaxis().SetTitle(hist_data_tight.GetXaxis().GetTitle()) g_data.GetYaxis().SetTitleOffset(1.2) g_data.GetYaxis().SetTitleOffset(1.3) g_data.SetMarkerColor(1) g_vals = g.GetY() g_data_vals = g_data.GetY() g_ratio = g_data.Clone('ratio') for i in xrange(g_data.GetN()): ratio = g_data_vals[i] / g_vals[i] if g_vals[i] else 0. g_ratio.SetPoint(i, g.GetX()[i], ratio) rel_y_low = math.sqrt((g_data.GetErrorYlow(i) / g_data_vals[i])**2 + ( g.GetErrorYlow(i) / g_vals[i])**2) if g_data_vals[i] > 0. and g_vals[i] > 0. else 0. g_ratio.SetPointEYlow(i, rel_y_low * ratio) rel_y_high = math.sqrt( (g_data.GetErrorYhigh(i) / g_data_vals[i])**2 + (g.GetErrorYhigh(i) / g_vals[i])**2) if g_data_vals[i] > 0. and g_vals[i] > 0. else 0. g_ratio.SetPointEYhigh(i, rel_y_high * ratio) # Gymnastics to get same label sizes etc in ratio and main plot ytp_ratio = 2. xtp_ratio = 2. # hr.GetYaxis().SetNdivisions(4) g_ratio.GetYaxis().SetTitleSize(g.GetYaxis().GetTitleSize() * xtp_ratio) g_ratio.GetXaxis().SetTitleSize(g.GetXaxis().GetTitleSize() * ytp_ratio) g_ratio.GetYaxis().SetTitleOffset(g.GetYaxis().GetTitleOffset() / xtp_ratio) g_ratio.GetXaxis().SetTitleOffset( g.GetXaxis().GetTitleOffset()) # / ytp_ratio) g_ratio.GetYaxis().SetLabelSize(g.GetYaxis().GetLabelSize() * xtp_ratio) g_ratio.GetXaxis().SetLabelSize(g.GetXaxis().GetLabelSize() * ytp_ratio) g_data.GetXaxis().SetLabelColor(0) g_data.GetXaxis().SetLabelSize(0) g.GetXaxis().SetLabelColor(0) g.GetXaxis().SetLabelSize(0) g_ratio.GetXaxis().SetTitle(g.GetXaxis().GetTitle()) # maxy = 1.1 * min(g.GetMaximum(), g_data.GetMaximum(), 0.2) g.GetYaxis().SetRangeUser(0.001, 0.2) cv, pad, padr = HistDrawer.buildCanvas() pad.cd() g.Draw('AP') g_data.Draw('P') legend = TLegend(0.23, 0.73, 0.43, 0.91) legend.SetFillColor(0) legend.SetFillStyle(0) legend.SetLineColor(0) legend.SetLineWidth(0) legend.AddEntry(g.GetName(), 'MC', 'lep') legend.AddEntry(g_data.GetName(), 'Observed', 'lep') legend.Draw() padr.cd() g_ratio.GetYaxis().SetRangeUser(0.51, 1.49) g_ratio.GetYaxis().SetTitle('Obs/MC') g_ratio.Draw('AP') drawRatioLines(g_ratio) cv.Print(plot_name)
',', "_").replace(' ', "") + "_rebin1_" + u + "Down" print histname2 histname3 = 'QCD#chi' + str(massbins[massbin]).strip("()").replace( ',', "_").replace(' ', "") + "_rebin1" print histname3 up = fsys.Get(histname1) down = fsys.Get(histname2) central = fsys.Get(histname3) uncertainties += [[up, down, central]] h2new = h14.Clone("down" + str(massbins[massbin])) h3new = h14.Clone("up" + str(massbins[massbin])) chi2 = 0 for b in range(h14.GetXaxis().GetNbins()): if b == 0: # or b==h14G.GetXaxis().GetNbins()-1: print massbins[massbin], b, "stat", h14G.GetErrorYlow( b) / h14G.GetY()[b], h14G.GetErrorYhigh(b) / h14G.GetY()[b] exp_sumdown = 0 exp_sumup = 0 theory_sumdown = 0 theory_sumup = 0 for up, down, central in uncertainties: if b == 0: # or b==h14G.GetXaxis().GetNbins()-1: print massbins[massbin], b, uncertaintynames[ uncertainties.index([up, down, central])], abs( up.GetBinContent(b + 1) - central.GetBinContent( b + 1)) / central.GetBinContent(b + 1), abs( down.GetBinContent(b + 1) - central.GetBinContent(b + 1) ) / central.GetBinContent(b + 1) addup = pow( max(
def plotDataOverMCEff(hist_mc_tight, hist_mc_loose, hist_data_tight, hist_data_loose, plot_name='fakerate.pdf', mc_leg='MC', obs_leg='Observed', ratio_leg='Obs/MC'): g = TGraphAsymmErrors(hist_mc_tight) g.Divide(hist_mc_tight, hist_mc_loose) g.GetYaxis().SetTitle('Misidentification rate') g.GetXaxis().SetTitle(hist_mc_tight.GetXaxis().GetTitle()) g.GetYaxis().SetTitleOffset(1.2) g.GetYaxis().SetTitleOffset(1.3) g.SetLineColor(2) g.SetMarkerColor(2) g_data = TGraphAsymmErrors(hist_data_tight) g_data.Divide(hist_data_tight, hist_data_loose) # if g_data.GetN() != hist_data_tight.GetNbinsX(): # import pdb; pdb.set_trace() g_data.GetYaxis().SetTitle('Misidentification rate') g_data.GetXaxis().SetTitle(hist_data_tight.GetXaxis().GetTitle()) g_data.GetYaxis().SetTitleOffset(1.2) g_data.GetYaxis().SetTitleOffset(1.3) g_data.SetMarkerColor(1) g_vals = g.GetY() g_data_vals = g_data.GetY() g_ratio = g_data.Clone('ratio') for i in xrange(g_data.GetN()): ratio = g_data_vals[i]/g_vals[i] if g_vals[i] else 0. g_ratio.SetPoint(i, g.GetX()[i], ratio) rel_y_low = math.sqrt((g_data.GetErrorYlow(i)/g_data_vals[i])**2 + (g.GetErrorYlow(i)/g_vals[i])**2) if g_data_vals[i] > 0. and g_vals[i] > 0. else 0. g_ratio.SetPointEYlow(i, rel_y_low * ratio) rel_y_high = math.sqrt((g_data.GetErrorYhigh(i)/g_data_vals[i])**2 + (g.GetErrorYhigh(i)/g_vals[i])**2) if g_data_vals[i] > 0. and g_vals[i] > 0. else 0. g_ratio.SetPointEYhigh(i, rel_y_high * ratio) # Gymnastics to get same label sizes etc in ratio and main plot ytp_ratio = 2. xtp_ratio = 2. # hr.GetYaxis().SetNdivisions(4) g_ratio.GetYaxis().SetTitleSize(g.GetYaxis().GetTitleSize() * xtp_ratio) g_ratio.GetXaxis().SetTitleSize(g.GetXaxis().GetTitleSize() * ytp_ratio) g_ratio.GetYaxis().SetTitleOffset(g.GetYaxis().GetTitleOffset() / xtp_ratio) g_ratio.GetXaxis().SetTitleOffset(g.GetXaxis().GetTitleOffset()) # / ytp_ratio) g_ratio.GetYaxis().SetLabelSize(g.GetYaxis().GetLabelSize() * xtp_ratio) g_ratio.GetXaxis().SetLabelSize(g.GetXaxis().GetLabelSize() * ytp_ratio) g_data.GetXaxis().SetLabelColor(0) g_data.GetXaxis().SetLabelSize(0) g.GetXaxis().SetLabelColor(0) g.GetXaxis().SetLabelSize(0) g_ratio.GetXaxis().SetTitle(g.GetXaxis().GetTitle()) maxy = 1.3 * max(g.GetMaximum(), g_data.GetMaximum(), 0.05) g.GetYaxis().SetRangeUser(0.0011, maxy) cv, pad, padr = HistDrawer.buildCanvas() pad.cd() g.Draw('AP') g_data.Draw('P') legend = TLegend(0.23, 0.73, 0.43, 0.91) legend.SetFillColor(0) legend.SetFillStyle(0) legend.SetLineColor(0) legend.SetLineWidth(0) legend.AddEntry(g.GetName(), mc_leg, 'lep') legend.AddEntry(g_data.GetName(), obs_leg, 'lep') legend.Draw() padr.cd() g_ratio.GetYaxis().SetRangeUser(0.01, 1.99) g_ratio.GetYaxis().SetTitle(ratio_leg) g_ratio.Draw('AP') drawRatioLines(g_ratio) cv.Print(plot_name) g.GetYaxis().SetRangeUser(0.0001, 1) pad.SetLogy(True) cv.Print(plot_name.replace('.', '_log.')) f = ROOT.TFile(plot_name.replace('.', '_log.').replace('.pdf', '.root'), 'RECREATE') g.Write() g_data.Write() cv.Write() f.Close()
def fillAcceptance(self): ''' This is to calculate the acceptance of the sample ''' if debug: print('Info in Sample.fillAcceptance()') #f = TFile.Open(self.infileName) #t = f.Get(self.treeName) #if not t: # raise RuntimeError( 'ERROR: no tree in file %s' % self.infileName) chain = TChain(self.treeName) for fn in self.infileNames: chain.Add(fn) self.effnum = ROOT.TH1F('effnum', 'effnum', 1, 0, 13000) self.effden = ROOT.TH1F('effden', 'effden', 1, 0, 13000) self.effnumB = dict([(b, ROOT.TH1F('effnum_%s' % b, 'effnum_%s' % b, 1, 0, 13000)) for b in self.Bspecies]) self.effdenB = dict([(b, ROOT.TH1F('effden_%s' % b, 'effden_%s' % b, 1, 0, 13000)) for b in self.Bspecies]) #if doInclusive: cutsnum = '(l0_pt>{mp} && abs(l0_eta)<1.5'.format(mp=muTrigPt) #else: #cutsnum = '(l0_pt>{mp} && abs(l0_eta)<1.5 && k_pt>1 && abs(k_eta)<2.5 && pi_pt>1 && abs(pi_eta)<2.5'.format(mp=muTrigPt) if doSkipDispl: cutsnum += '' else: cutsnum += '&& Lxy < 500' if not doDisplZ: pass else: cutsnum += '&& Lz < 20' if doSkipHNLptEta: cutsnum += '' else: cutsnum += '&& l1_pt>3 && abs(l1_eta)<2.5 && pi1_pt>0.8 && abs(pi1_eta)<2.5' cutsnum += ')' cutsden = '(l0_pt>{mp} && abs(l0_eta)<1.5)'.format(mp=muTrigPt) ###### fill the total acceptance chain.Draw('hnl_pt>>effnum', cutsnum+'*'+self.evt_w, 'goff') chain.Draw('hnl_pt>>effden', cutsden, 'goff') if TEfficiency.CheckConsistency(self.effnum,self.effden): peff = TEfficiency(self.effnum,self.effden) # check usage of TGraphAsymmetricErrors self.acc = peff.GetEfficiency(1) self.acc_errup = peff.GetEfficiencyErrorUp(1) self.acc_errdn = peff.GetEfficiencyErrorLow(1) tgra = TGraphAsymmErrors() tgra.BayesDivide(self.effnum, self.effden) self.acc_tg = tgra.GetY()[0] self.acc_errup_tg = tgra.GetErrorYhigh(0) self.acc_errdn_tg = tgra.GetErrorYlow(0) # for debugging purposes self.num = self.effnum.GetEntries() if self.num==0: print('**** 0 entries for mass={}'.format(self.mass)) self.den = self.effden.GetEntries() ###### fill the partial acceptances for ib,b in enumerate(self.Bspecies): bsel = '(abs(b_pdgid)=={bid})'.format(bid=self.BpdgIds[ib]) selnum = '(' + cutsnum + '&&' + bsel + ')' selden = '(' + cutsden + '&&' + bsel + ')' chain.Draw('hnl_pt>>effnum_{b}'.format(b=b), selnum + '*' + self.evt_w, 'goff') chain.Draw('hnl_pt>>effden_{b}'.format(b=b), selden, 'goff') if TEfficiency.CheckConsistency(self.effnumB[b],self.effdenB[b]): peff = TEfficiency(self.effnumB[b],self.effdenB[b]) self.accB[b] = peff.GetEfficiency(1) self.accB_errup[b] = peff.GetEfficiencyErrorUp(1) self.accB_errdn[b] = peff.GetEfficiencyErrorLow(1) else: self.accB[b] = 0 self.accB_errup[b] = 0 self.accB_errdn[b] = 0