def BackgroundFit(datafileName="hist_data.root", topfileName="hist_ttbar.root", zjetfileName="hist_Zjets.root", distributionName="leadHCand_Mass", n_trkjet=["4", "3", "2"], n_btag=["4", "3", "2"], btag_WP="77", NRebin=1, use_one_top_nuis=False, use_scale_top_0b=False, nbtag_top_shape_for4b=None, makePlots=True, whichFunc="SLAC", output="", verbose=True): global h_qcd global h_top global h_top_0b global h_zjet global h_zjet_0b global h_data global useOneTopNuis global scaleTop0b global regions global dist_name global Output ################### Parse ######################### dist_name = distributionName num_trkjet = np.asarray(n_trkjet) if num_trkjet.shape == (): num_trkjet = np.asarray([n_trkjet]) num_btag = np.asarray(n_btag) if num_btag.shape == (): num_btag = np.asarray([n_btag]) if num_btag.shape != num_trkjet.shape: print "Must have same number of track jet and b-tag regions specified" sys.exit(0) btag_WP = btag_WP n_rebin = NRebin topShape_nbtag_for4b = nbtag_top_shape_for4b if nbtag_top_shape_for4b == None: topShape_nbtag_for4b = num_btag useOneTopNuis = use_one_top_nuis scaleTop0b = use_scale_top_0b regions = [num_trkjet[i] + num_btag[i] for i in range(num_trkjet.shape[0])] datafile = R.TFile(datafileName, "READ") topfile = R.TFile(topfileName, "READ") zjetfile = (R.TFile(zjetfileName, "READ") if zjetfileName != None else None) Output = output ######################################################### ################### Setup Minuit ################### n_param = len(regions) + (1 if useOneTopNuis else len(regions)) minuit = R.TMinuit(n_param) # 2 parameter fit minuit.SetPrintLevel((1 if verbose else -1)) minuit.SetErrorDef(0.5) minuit.SetFCN(NegLogL) ######################################################### ################### Get Histograms ################### histos = {} # collect all histograms for r in ["44", "33", "22", "40", "30", "20"]: folder_r = HistLocStr(dist_name, r[0], r[1], btag_WP, "SB") #folder( r[0], r[1], btag_WP) #print folder_r data_r = datafile.Get(folder_r).Clone("data_" + r) top_r = topfile.Get(folder_r).Clone("top_" + r) zjet_r = CheckAndGet(zjetfile, folder_r, top_r).Clone("zjet_" + r) for ibin in range(1, top_r.GetNbinsX() + 1): if top_r.GetBinContent(ibin) < 0: top_r.SetBinContent(ibin, 0) top_r.SetBinError(ibin, 0) histo_r = {"data": data_r, "top": top_r, "zjet": zjet_r} histos[r] = histo_r # put relevant histograms in global lists for use in LogLk for r in regions: hd = histos[r]["data"].Clone("h_data_" + r) hq = histos[r[0] + "0"]["data"].Clone("h_qcd_" + r) ht0 = histos[r[0] + "0"]["top"].Clone("h_top_0b_" + r) if nbtag_top_shape_for4b != None: ht = histos[nbtag_top_shape_for4b]["top"].Clone("h_top_" + r) ht.Scale(histos[r]["top"].Integral() / ht.Integral()) #scale to correct norm for region else: ht = histos[r]["top"].Clone("h_top_" + r) hz = histos[r]["zjet"].Clone("h_zjet_" + r) hz0 = histos[r[0] + "0"]["zjet"].Clone("h_zjet_" + r) hq.Add(ht0, -1.0) hq.Add(hz0, -1.0) h_data[r] = hd h_qcd[r] = hq h_top[r] = ht h_top_0b[r] = ht0 h_zjet[r] = hz h_zjet_0b[r] = hz0 if NRebin > 1: h_data[r].Rebin(NRebin) h_qcd[r].Rebin(NRebin) h_top[r].Rebin(NRebin) h_top_0b[r].Rebin(NRebin) h_zjet[r].Rebin(NRebin) h_zjet_0b[r].Rebin(NRebin) ######################################################### results = Fit(minuit) evars = GetEigenVariations(results["cov_m"]) pnom = np.asarray(results["muqcd"] + results["topscale"]) pvars = [[pnom + evars[i], pnom - evars[i]] for i in range(len(evars))] results["pnom"] = pnom results["pvars"] = pvars # store the input histograms for fitting h_store_0b_data = histos[r[0] + "0"]["data"].Clone() h_store_0b_data.SetDirectory(0) results["inputhist_0b_data"] = h_store_0b_data h_store_0b_ttbar = histos[r[0] + "0"]["top"].Clone() h_store_0b_ttbar.SetDirectory(0) results["inputhist_0b_ttbar"] = h_store_0b_ttbar #h_store_4b_data = histos[r[0]+"4"]["data"].Clone() #h_store_4b_data.SetDirectory(0) #results["inputhist_4b_data"] = h_store_4b_data #h_store_4b_ttbar = histos[r[0]+"4"]["top"].Clone() #h_store_4b_ttbar.SetDirectory(0) #results["inputhist_4b_ttbar"] = h_store_4b_ttbar #print pnom #print pvars #print "Fit Results:" #print "mu_qcd = ", results["muqcd"], "+/-", results["muqcd_e"] #print "top_scale = ", results["topscale"], "+/-", results["topscale_e"] #print "correlation=", results["corr_muqcd_topscale"] #ComputeBasicMuQCD( histo_s, histo_c ) if makePlots: for i in range(len(regions)): c = MakePlot(regions[i], results["muqcd"][i], results["topscale"][0 if useOneTopNuis else i]) print "" print "muqcd=", results["muqcd"][i], "+", results["muqcd_e_up"][ i], "-", results["muqcd_e_dw"][i] print "topscale=", results["topscale"][ 0 if useOneTopNuis else i], "+", results["topscale_e_up"][ 0 if useOneTopNuis else i], "-", results["topscale_e_dw"][ 0 if useOneTopNuis else i] print "corr=", results["corr_m"] print "" datafile.Close() topfile.Close() if zjetfile != None: zjetfile.Close() return results
def BackgroundFit( datafileName="hist_data.root", topfileName="hist_ttbar.root", zjetfileName="hist_Zjets.root", distributionName=["LeadCaloJetM"], n_trkjet=["4", "3", "2", "1"], n_btag=["4", "3", "2s"], #"2", "1"], #["4", "3", "2s", "2", "1"], a_ttbar=1.06, #this is to prescale the ttbar normalization btag_WP="77", #not useful for Xhh Framework NRebin=1, BKG_model="s", #define the bkg models BKG_lst=[], #baseline fits BKG_dic={}, #baseline background estimations Weight_dic={}, #baseline rescale keys use_one_top_nuis=False, #True to fix one top parameter use_scale_top_model=False, nbtag_top_shape=True, #fix 4b and 3b shape to be the same makePlots=True, whichFunc="XhhBoosted", output="", fitzjets=False, verbose=False): global h_qcd global h_top global h_top_model global h_data global dist_name global useOneTopNuis global scaleTop_model global regions global Output global Fitzjets global Bkg_model print BKG_model, " is the background model!" ################### Parse ######################### # num_trkjet = np.asarray(n_trkjet) # if num_trkjet.shape==(): # num_trkjet = np.asarray([n_trkjet]) # num_btag = np.asarray(n_btag) # if num_btag.shape==(): # num_btag = np.asarray([n_btag]) # if num_btag.shape != num_trkjet.shape: # print "Must have same number of track jet and b-tag regions specified" # sys.exit(0) btag_WP = btag_WP n_rebin = NRebin #setup top shape constrains useOneTopNuis = use_one_top_nuis scaleTop_model = use_scale_top_model dist_name = distributionName Fitzjets = fitzjets Bkg_model = BKG_model ######################################################## #setup regions to fit #regions = [ "i" + n_btag[i] for i in range(len(n_btag)) ] #print BKG_dic regions = BKG_lst[:] #print regions #print regions ######################################################## #load the histogram files datafile = R.TFile(datafileName, "READ") topfile = R.TFile(topfileName, "READ") zjetfile = R.TFile(zjetfileName, "READ") Output = output ######################################################## ################### Setup Minuit ################### n_param = len(regions) + (1 if useOneTopNuis else len(regions)) minuit = R.TMinuit(n_param) # 2 parameter fit minuit.SetPrintLevel((1 if verbose else -1)) minuit.SetErrorDef(0.5) minuit.SetFCN(NegLogL) ######################################################### ################### Get Histograms ################### histos = {} # collect all histograms; ntrkjets, nbtags #hist_region_lst = ["i" + x for x in n_btag] hist_region_lst = BKG_lst[:] #stupid way of copying yet doesn't change the original value hist_region_lst += [BKG_dic[i] for i in BKG_lst] for bkg in BKG_lst: if bkg in Weight_dic.keys(): if Weight_dic[bkg][0] not in hist_region_lst: hist_region_lst += [Weight_dic[bkg][0]] if Weight_dic[bkg][1] not in hist_region_lst: hist_region_lst += [Weight_dic[bkg][1]] print "list of hists and fits:", hist_region_lst #hist_region_lst = ["i" + x for x in n_btag] #load the specific trackjet regions # hist_region_lst.append("i0") # hist_region_lst.append("2" + str(BKG_model)) # hist_region_lst.append("3" + str(BKG_model)) # hist_region_lst.append("4" + str(BKG_model)) #print hist_region_lst #load the histograms for r in hist_region_lst: data_r = {} top_r = {} zjet_r = {} for h in dist_name: #print r hist_fullpath = HistLocStr( h, massRegion="Sideband", whichFunc=whichFunc, folderName=r) #folder( r[0], r[1], btag_WP) #print r, hist_fullpath data_r[h] = datafile.Get(hist_fullpath).Clone("data_" + r + h) top_r[h] = topfile.Get(hist_fullpath).Clone("top_" + r + h) zjet_r[h] = CheckAndGet(zjetfile, hist_fullpath, top_r).Clone("zjet_" + r + h) # do rebin if necessary if NRebin > 1: data_r[h].Rebin(NRebin) top_r[h].Rebin(NRebin) zjet_r[h].Rebin(NRebin) histo_r = {"data": data_r, "top": top_r, "zjet": zjet_r} histos[r] = histo_r # put relevant histograms in global lists for use in LogLk for r in regions: h_data[r] = {} #this is the data to fit h_qcd[r] = {} #this is the qcd fit h_top[r] = {} #this is the top fit h_top_model[r] = {} h_zjet[r] = {} h_zjet_model[r] = {} for h in dist_name: hd = histos[r]["data"][h].Clone("h_data_" + r + h) if nbtag_top_shape != None: ht = histos[r]["top"][h].Clone("h_top_" + r + h) #change for top selection, from 4b to 3b if r == "FourTag": ht = histos["ThreeTag"]["top"][h].Clone("h_top_" + r + h) ht.Scale(histos[r]["top"][h].Integral() / ht.Integral()) #scale to correct norm for region else: ht = histos[r]["top"][h].Clone("h_top_" + r + h) hz = histos[r]["zjet"][h].Clone("h_zjet_" + r + h) #start background modeling #print regions bkg_model = BKG_dic[r] # if r[1:] == "2s": # bkg_model = "2"+str(BKG_model) # elif r[1:] == "3": # bkg_model = "3"+str(BKG_model) # elif r[1:] == "4": # bkg_model = "4"+str(BKG_model) #bkg_model = "42" #load the histograms hq = histos[bkg_model]["data"][h].Clone("h_qcd_" + r + h) ht2 = histos[bkg_model]["top"][h].Clone("h_top_model_" + r + h) hz2 = histos[bkg_model]["zjet"][h].Clone("h_zjet_" + r + h) #substract top and Zjet contributions from data hq.Add(ht2, -1.0 * a_ttbar) if (Fitzjets): hq.Add(hz2, -1.0) #do not substract z+jets ##add an option to rescale the distribution here if (r in Weight_dic.keys()): hq_base = histos[Weight_dic[r][0]]["data"][h].Clone("h_qcd_" + r + h + "_base") ht2_base = histos[Weight_dic[r][0]]["top"][h].Clone("h_top_" + r + h + "_base") hz2_base = histos[Weight_dic[r][0]]["zjet"][h].Clone( "h_zjet_" + r + h + "_base") hq_base.Add(ht2_base, -1.0) if (Fitzjets): hq_base.Add(hz2_base, -1.0) #do not substract z+jets hq_model = histos[Weight_dic[r][1]]["data"][h].Clone("h_qcd_" + r + h + "_model") ht2_model = histos[Weight_dic[r][1]]["top"][h].Clone("h_top_" + r + h + "_model") hz2_model = histos[Weight_dic[r][1]]["zjet"][h].Clone( "h_zjet_" + r + h + "_model") hq_model.Add(ht2_model, -1.0) if (Fitzjets): hq_model.Add(hz2_model, -1.0) #do not substract z+jets #scale hq_model.Scale(hq_base.Integral() / hq_model.Integral()) hq_model.Divide(hq_base) hq.Multiply(hq_model) print "reweight region:{:>12}: base:{:>12}: model:{:>12}:".format( r, Weight_dic[r][0], Weight_dic[r][1]) ClearNegBin(hq) #link the dictionaries, now as a dictionary again h_data[r][h] = hd #this is the data to fit h_qcd[r][h] = hq #this is the qcd fit h_top[r][h] = ht #this is the top fit h_top_model[r][h] = ht2 h_zjet[r][h] = hz h_zjet_model[r][h] = hz2 ######################################################### #Start the fit results = Fit(minuit) ######################################################### #Gather the results evars = GetEigenVariations(results["cov_m"]) pnom = np.asarray(results["muqcd"] + results["muttbar"]) pvars = [[pnom + evars[i], pnom - evars[i]] for i in range(len(evars))] results["pnom"] = pnom results["pvars"] = pvars # store the input histograms for fitting # h_store_2b_data = histos[r[0]+"2"]["data"].Clone() # h_store_2b_data.SetDirectory(0) # results["inputhist_2b_data"] = h_store_2b_data # h_store_2b_ttbar = histos[r[0]+"2"]["top"].Clone() # h_store_2b_ttbar.SetDirectory(0) # results["inputhist_2b_ttbar"] = h_store_2b_ttbar # h_store_4b_data = histos[r[0]+"4"]["data"].Clone() # h_store_4b_data.SetDirectory(0) # results["inputhist_4b_data"] = h_store_4b_data # h_store_4b_ttbar = histos[r[0]+"4"]["top"].Clone() # h_store_4b_ttbar.SetDirectory(0) # results["inputhist_4b_ttbar"] = h_store_4b_ttbar #print pnom #print pvars # print "Fit Results:" # print "mu_qcd = ", results["muqcd"], "+/-", results["muqcd_e"] # print "top_scale = ", results["muttbar"], "+/-", results["muttbar_e"] # print "correlation=", results["corr_m"] texoutpath = Output + "Tables/" if not os.path.exists(texoutpath): os.makedirs(texoutpath) fit_outtex = open(texoutpath + "normfit.tex", "w") #print len(n_btag) WriteFitResult(results, fit_outtex, nfit=len(n_btag)) #ComputeBasicMuQCD( histo_s, histo_c ) outroot = R.TFile.Open(Output + "fitNorm.root", "recreate") if makePlots: for i in range(len(regions)): MakePlot(regions[i], results["muqcd"][i], results["muttbar"][0 if useOneTopNuis else i]) #finish and clean up outroot.Close() datafile.Close() topfile.Close() zjetfile.Close() return results
def QCDSystematics(datafileName="hist_data.root", topfileName="hist_ttbar.root", zjetfileName="hist_Zjets.root", distributionName="mHH_l", n_trkjet=["4", "3", "2"], n_btag=["4", "3", "2"], btag_WP="77", mu_qcd_vals=[1.0, 1.0], topscale_vals=[1.0, 1.0], NRebin=1, use_one_top_nuis=False, use_scale_top_0b=False, nbtag_top_shape_for4b=None, makePlots=False, verbose=False, outfileNameBase="QCDSysfit.root"): ##### Parse Inputs ############################################ dist_name = distributionName num_trkjet = np.asarray(n_trkjet) if num_trkjet.shape == (): num_trkjet = np.asarray([n_trkjet]) num_btag = np.asarray(n_btag) if num_btag.shape == (): num_btag = np.asarray([n_btag]) if num_btag.shape != num_trkjet.shape: print "Must have same number of track jet and b-tag regions specified" sys.exit(0) btag_WP = btag_WP n_rebin = NRebin useOneTopNuis = use_one_top_nuis scaleTop0b = use_scale_top_0b n_channels = num_trkjet.shape[0] regions = [num_trkjet[i] + num_btag[i] for i in range(n_channels)] ################################################################## ##### Get Signal Region Histograms ################################ datafile = R.TFile(datafileName, "READ") topfile = R.TFile(topfileName, "READ") zjetfile = (R.TFile(zjetfileName, "READ") if zjetfileName != None else None) histos = {} # collect all histograms for r in ["44", "33", "22", "40", "30", "20"]: folder_r = HistLocStr(dist_name, r[0], r[1], btag_WP, "CR") #folder( r[0], r[1], btag_WP) data_r = datafile.Get(folder_r).Clone("data_" + r) data_r.SetDirectory(0) top_r = topfile.Get(folder_r).Clone("top_" + r) top_r.SetDirectory(0) zjet_r = CheckAndGet(zjetfile, folder_r, top_r).Clone("zjet_" + r) zjet_r.SetDirectory(0) for ibin in range(1, top_r.GetNbinsX() + 1): if top_r.GetBinContent(ibin) < 0: top_r.SetBinContent(ibin, 0) top_r.SetBinError(ibin, 0) data_r.Rebin(n_rebin) top_r.Rebin(n_rebin) zjet_r.Rebin(n_rebin) histos[r] = {"data": data_r, "top": top_r, "zjet": zjet_r} datafile.Close() topfile.Close() if zjetfile != None: zjetfile.Close() ################################################################## ####### outpue object ################### QCDSyst_Dict = {} ##### scaling and subtractions ################################# for ir in range(len(regions)): r = regions[ir] r_0b = r[0] + "0" r_3b = r[0] + "3" top_0b = histos[r_0b]["top"].Clone("top_0b__" + r) if scaleTop0b: top_0b.Scale( (topscale_vals[0] if use_one_top_nuis else topscale_vals[ir])) zjet_0b = histos[r_0b]["zjet"].Clone("zjet_0b__" + r) qcd_r = histos[r_0b]["data"].Clone("qcd__" + r) qcd_r.Add( top_0b, -1 ) # added by Qi --- we still want top to be subtracted, given that their fraction is increasing in Run 2. qcd_r.Add(zjet_0b, -1) qcd_int = qcd_r.Integral() if nbtag_top_shape_for4b != None: top_r = histos[nbtag_top_shape_for4b]["top"].Clone("top__" + r) top_r.Scale(histos[r]["top"].Integral() / top_r.Integral()) #scale to correct norm for region else: top_r = histos[r]["top"].Clone("top__" + r) top_int = top_r.Integral() zjet_r = histos[r]["zjet"].Clone("zjet__" + r) mu_qcd = mu_qcd_vals[ir] top_scale = (topscale_vals[0] if use_one_top_nuis else topscale_vals[ir]) qcd_r.Scale(mu_qcd) top_r.Scale(top_scale) N_qcd_r = qcd_r.Integral() #now do ratio bkg_r = qcd_r.Clone("bkg__" + r) bkg_r.Add(top_r) bkg_r.Add(zjet_r) N_bkg_r = bkg_r.Integral() ## c=R.TCanvas() ## bkg_r_c = bkg_r.Clone("bkg_clone__"+r) ## bkg_r_c.SetDirectory(0) ## bkg_r_c.Draw("HISTs") ## data_r_c = histos[r]["data"].Clone("data_clone__"+r) ## data_r_c.SetDirectory(0) ## data_r_c.Draw("sames") ## c.SaveAs(dist_name+"_CR_Quick_"+r+".root") #bkg_r.Divide( histos[r]["data"] ) #ratio = bkg_r ratio = histos[r]["data"].Clone("ratio__" + r) ratio.SetDirectory(0) #ratio.Add( top_r, -1) #store integral and error Err_N_data_CR_r = R.Double(0) N_data_CR_r = ratio.IntegralAndError(0, ratio.GetNbinsX() + 1, Err_N_data_CR_r) #do division ratio.Divide(bkg_r) #search for last bin with data, will be used for upper fit range lastbin = 0 for ibin in reversed(range(ratio.GetNbinsX() + 1)): if ratio.GetBinContent(ibin) != 0: lastbin = ibin break lastbin_Xval = ratio.GetBinLowEdge(lastbin) + ratio.GetBinWidth( lastbin) fitRange = [1000, lastbin_Xval] ## fitting if verbose: print "QCD Ratio fit in SR=", r fitName = "fit_" + outfileNameBase[:-5] fitFunc, fitChoice, npar, params, cov = LinearFit( ratio, fitName, fitRange, verbose) ## Make fit variations, for shape uncertainties outRange = [500, 3500] fcen = R.TF1("QCDShape_f_" + r, fitChoice, outRange[0], outRange[1], npar) fcen.SetParameters(params) fup = R.TF1("QCDShape_fup_" + r, fitChoice, outRange[0], outRange[1], npar) fup.SetParameters( params[0] - np.sqrt(cov[1, 1]) * (fitRange[1] + fitRange[0]) / 2.0, params[1] + np.sqrt(cov[1, 1])) fup.SetLineColor(R.kBlue) fdw = R.TF1("QCDShape_fdw_" + r, fitChoice, outRange[0], outRange[1], npar) fdw.SetParameters( params[0] + np.sqrt(cov[1, 1]) * (fitRange[1] + fitRange[0]) / 2.0, params[1] - np.sqrt(cov[1, 1])) fdw.SetLineColor(R.kBlue) QCDSyst_Dict["Shape_" + r] = {"f": fcen, "fup": fup, "fdw": fdw} #scale is max of ratio non-unity and CR stat error QCDSyst_Dict["Scale_" + r] = np.max( np.abs([(N_bkg_r - N_data_CR_r) / N_bkg_r, (Err_N_data_CR_r / N_data_CR_r), _extraNormCRSysDict.get(r, 0.)])) print "Scale_" + r, QCDSyst_Dict[ "Scale_" + r], N_bkg_r, N_data_CR_r, Err_N_data_CR_r, ( N_bkg_r - N_data_CR_r) / N_bkg_r, Err_N_data_CR_r / N_data_CR_r #QCDSyst_Dict["Scale_"+r] = np.max( np.abs( [ (1.0-params[0]), (1.0 / np.sqrt(histos[r]["data"].Integral())) ] ) ) #this one was bugged a bit, keep anyway if makePlots: c = R.TCanvas() #R.SetOwnership(c,False) leg = R.TLegend(0.1, 0.7, 0.48, 0.9) leg.SetFillColor(0) leg.AddEntry(ratio, "Ratio", "LP") leg.AddEntry(fcen, "Ratio Fit", "L") leg.AddEntry(fup, "Ratio Fit Variations", "L") ratio.SetLineColor(R.kBlack) ratio.SetXTitle("m_{JJ} [GeV]") ratio.SetYTitle("Ratio Data/Prediction") ratio.Draw() fcen.Draw("same") fup.Draw("same") fdw.Draw("same") leg.Draw("same") c.SaveAs(outfileNameBase.split(".root")[0] + "_" + r + ".root") datafile.Close() topfile.Close() return QCDSyst_Dict
def ttbarShapeSysSR(topfileName="hist_ttbar.root", distributionName="mHH_l", signal_region="33", compare_region="44", btag_WP="77", makePlots=False, verbose=False, outfileNameBase="TopShapeSRSysfit.root"): topfile = R.TFile(topfileName, "READ") ## get top SR shape folder_sig = HistLocStr(distributionName, signal_region[0], signal_region[1], btag_WP, "SR") #folder( r[0], r[1], btag_WP) top_sig = topfile.Get(folder_sig).Clone("top_sig_" + signal_region) top_sig.SetDirectory(0) top_sig.Rebin(5) ## get top comparison shape folder_comp = HistLocStr(distributionName, compare_region[0], compare_region[1], btag_WP, "SR") #folder( r[0], r[1], btag_WP) top_comp = topfile.Get(folder_comp).Clone("top_comp_" + compare_region) top_comp.SetDirectory(0) top_comp.Rebin(5) ## remove negative values ## assume same binning, else division won't work later for ibin in range(1, top_sig.GetNbinsX() + 1): if top_sig.GetBinContent(ibin) < 0: top_sig.SetBinContent(ibin, 0) top_sig.SetBinError(ibin, 0) if top_comp.GetBinContent(ibin) < 0: top_comp.SetBinContent(ibin, 0) top_comp.SetBinError(ibin, 0) ## normalize to same area top_sig.Scale(1.0 / top_sig.Integral()) top_comp.Scale(1.0 / top_comp.Integral()) ## compute ratio top_ratio = top_comp.Clone("top_ratio_sig" + signal_region + "_comp" + compare_region) top_ratio.Divide(top_sig) ## search for last bin with data, will be used for upper fit range lastbin = 0 for ibin in reversed(range(top_ratio.GetNbinsX() + 1)): if top_ratio.GetBinContent(ibin) != 0: lastbin = ibin break lastbin_Xval = top_ratio.GetBinLowEdge(lastbin) + top_ratio.GetBinWidth( lastbin) #fitRange = [500, lastbin_Xval] #print "ttbar fit range", fitRange fitRange = [500, 1600] ## reasonable range of bins with data ## fitting fitName = "fit_" + outfileNameBase[:-5] fitFunc, fitChoice, npar, params, cov = LinearFit(top_ratio, fitName, fitRange, verbose) ## Make fit variations, for shape uncertainties outRange = [500, 3500] fcen = R.TF1( "ttbarShapeSR_f_sig" + signal_region + "_comp" + compare_region, fitChoice, outRange[0], outRange[1], npar) fcen.SetParameters(params) frev = R.TF1( "ttbarShapeSR_frev_sig" + signal_region + "_comp" + compare_region, fitChoice, outRange[0], outRange[1], npar) frev.SetParameters(2 - params[0], -params[1]) frev.SetLineColor(R.kOrange) fneg = R.TF1( "ttbarShapeSR_fneg_sig" + signal_region + "_comp" + compare_region, fitChoice, outRange[0], outRange[1], npar) fneg.SetParameters(params[0], -params[1]) fneg.SetLineColor(R.kMagenta) fup = R.TF1( "ttbarShapeSR_fup_sig" + signal_region + "_comp" + compare_region, fitChoice, outRange[0], outRange[1], npar) fup.SetParameters( params[0] - np.sqrt(cov[1, 1]) * (fitRange[1] + fitRange[0]) / 2.0, params[1] + np.sqrt(cov[1, 1])) fup.SetLineColor(R.kBlue) fdw = R.TF1( "ttbarShapeSR_fdw_sig" + signal_region + "_comp" + compare_region, fitChoice, outRange[0], outRange[1], npar) fdw.SetParameters( params[0] + np.sqrt(cov[1, 1]) * (fitRange[1] + fitRange[0]) / 2.0, params[1] - np.sqrt(cov[1, 1])) fdw.SetLineColor(R.kBlue) ttbarShapeSRSyst_Dict = {"f": fcen, "frev": frev, "fup": fup, "fdw": fdw} if makePlots: c = R.TCanvas() #R.SetOwnership(c,False) leg = R.TLegend(0.1, 0.7, 0.48, 0.9) leg.SetFillColor(0) leg.AddEntry(top_ratio, "Ratio", "LP") leg.AddEntry(fcen, "Ratio Fit", "L") leg.AddEntry(fup, "Slope Variations", "L") top_ratio.SetLineColor(R.kBlack) top_ratio.SetXTitle("m_{JJ} [GeV]") top_ratio.SetYTitle("Ratio Data/Prediction") top_ratio.Draw() fcen.Draw("same") #frev.Draw("same") #fneg.Draw("same") fup.Draw("same") fdw.Draw("same") leg.Draw("same") c.SaveAs( outfileNameBase.split(".root")[0] + "_sig" + signal_region + "_comp" + compare_region + ".root") topfile.Close() return ttbarShapeSRSyst_Dict
def HistoAnalysis(datafileName="/afs/cern.ch/user/b/btong/work/bbbb/MoriondAnalysis/Output/Moriond/data_test/hist-MiniNTuple.root", topfileName="/afs/cern.ch/user/b/btong/work/bbbb/MoriondAnalysis/Output/Moriond/ttbar_comb_test/hist-MiniNTuple.root", zjetfileName="/afs/cern.ch/user/b/btong/work/bbbb/MoriondAnalysis/Output/Moriond/zjets_test/hist-MiniNTuple.root", distributionName= "mHH_l", n_trkjet = ["4","3","2"], n_btag = ["4","3","2"], btag_WP = "70", NRebin = 20, use_one_top_nuis = False, use_scale_top_0b = False, nbtag_top_shape_SRPred_for4b = "33", rebinFinal = None, smoothing_func = "Dijet", top_smoothing_func = "Dijet", inputFitResult = None, inputQCDSyst_Dict = None, doSmoothing = True, addSmoothErrorBin = False, qcdSmoothRange = (1200, 3000), #(1200, 3000), topSmoothRange = (1200, 3000), #(1200, 3000), isSystematicVariation = False, verbose = False, makeOutputFiles = True, MassRegionName = "SR", do_variable_rebin = False ): ##### Parse Inputs ############################################ fitzjets = False dist_name = distributionName print "the chosen hist is: ", dist_name if "pole" in distributionName:#change the smoothing range if pole distributions qcdSmoothRange = (1200, 3000) topSmoothRange = (1200, 3000) num_trkjet = np.asarray(n_trkjet) if num_trkjet.shape==(): num_trkjet = np.asarray([n_trkjet]) num_btag = np.asarray(n_btag) if num_btag.shape==(): num_btag = np.asarray([n_btag]) if num_btag.shape!=num_trkjet.shape: print "Must have same number of track jet and b-tag regions specified" sys.exit(0) btag_WP = btag_WP n_rebin = NRebin nbtag_top_shape_for4b = nbtag_top_shape_SRPred_for4b topShape_nbtag_for4b = nbtag_top_shape_for4b if nbtag_top_shape_for4b == None: topShape_nbtag_for4b = num_btag+num_btag useOneTopNuis = use_one_top_nuis scaleTop0b = use_scale_top_0b n_channels = num_trkjet.shape[0] regions = [ num_trkjet[i]+num_btag[i] for i in range(n_channels) ] ##for outputing isMhhDistribution = (distributionName=="mHH_l" or distributionName=="mHH_pole") do_smoothing = (doSmoothing if isMhhDistribution else False) # qi ################################################################## ##### Storage Variables ############################################ output_Dict = { } Nbkg_dict = { } Nbkg_SysList = { } for ir in regions: Nbkg_dict[ir] = { "qcd":0, "top":0, "zjet":0, "bkg":0, "data":0 } Nbkg_SysList[ir] = { "qcd":[], "top":[], "zjet":[], "bkg":[], "data":[] } vartxt = '' ################################################################## ##### Do Background Fits ############################################ ##### This is just the same fitting procedure if inputFitResult == None: bkgFitResults = BkgFit.BackgroundFit(datafileName=datafileName, topfileName=topfileName, zjetfileName=zjetfileName, distributionName = ["leadHCand_Mass"], whichFunc = "XhhBoosted", n_trkjet = n_trkjet, n_btag = n_btag, btag_WP = btag_WP, NRebin = 2,#NRebin, #this is reset to be fine binned use_one_top_nuis = use_one_top_nuis, makePlots = True, BKG_lst = ["FourTag", "ThreeTag", "TwoTag_split"], BKG_dic = {"FourTag":"NoTag_4Trk", "ThreeTag":"NoTag_3Trk", "TwoTag_split":"NoTag_2Trk_split", "TwoTag":"OneTag", "OneTag":"NoTag"}, fitzjets = fitzjets) else: bkgFitResults = inputFitResult pvars = bkgFitResults["pvars"] output_Dict["fitResults"] = bkgFitResults ################################################################## ##### Get QCD Shape Systematics from CR ############################## print "STEP: Get QCD Shape Systematics from CR" ##### This is smoothing the CR region distributions if MassRegionName == "SR": # should only affect SR if inputQCDSyst_Dict == None and isMhhDistribution: # qi #or this option QCDSyst_Dict = SystTools.QCDSystematics(datafileName=datafileName QCDSyst_Dict = SystToolsSmooth.QCDSystematics(datafileName=datafileName, topfileName=topfileName, zjetfileName=zjetfileName, distributionName= "mHH_l", # this has been decided to fix on DiJetMass n_trkjet = n_trkjet, n_btag = n_btag, btag_WP = btag_WP, mu_qcd_vals = bkgFitResults["muqcd"], topscale_vals = bkgFitResults["muttbar"], NRebin = 5, #this used to be 5, incease to 10 just like SR smoothing_func = smoothing_func, SmoothRange = (1100, 3000),# (100, 2500), #this is fixed... use_one_top_nuis = use_one_top_nuis, use_scale_top_0b = use_scale_top_0b, nbtag_top_shape_for4b = nbtag_top_shape_SRPred_for4b, makePlots = True, verbose = False, outfileNameBase="QCDSysfitSmooth.root") elif inputQCDSyst_Dict != None: QCDSyst_Dict = inputQCDSyst_Dict else: QCDSyst_Dict = None else: QCDSyst_Dict = None output_Dict["QCDSystCR"] = QCDSyst_Dict ################################################################## ##### Get Signal Region Histograms ################################ print "STEP: Get Signal Region Histograms" ##### This is loding input file histograms datafile = R.TFile(datafileName,"READ") topfile = R.TFile(topfileName,"READ") zjetfile = ( R.TFile(zjetfileName,"READ") if fitzjets is True else None) histos = {} # collect all histograms for r in ["44","33","22","40","30","20"]: folder_r = HistLocStr(dist_name, r[0], r[1], btag_WP, MassRegionName) #folder( r[0], r[1], btag_WP) data_r = datafile.Get(folder_r).Clone("data_"+r) data_r.SetDirectory(0) top_r = topfile.Get(folder_r).Clone("top_"+r) top_r.SetDirectory(0) zjet_r = CheckAndGet(zjetfile, folder_r, top_r).Clone("zjet_"+r) zjet_r.SetDirectory(0) #clear the negative weight bins for ttbar ClearNegBin(top_r) if do_variable_rebin: data_r = smoothfit_Ultimate.VariableRebin(data_r,5,2000).Clone() top_r = smoothfit_Ultimate.VariableRebin(top_r,5,2000).Clone() zjet_r = smoothfit_Ultimate.VariableRebin(zjet_r,5,2000).Clone() else: data_r.Rebin(n_rebin) top_r.Rebin(n_rebin) zjet_r.Rebin(n_rebin) histos[r] = {"data": data_r, "top": top_r, "zjet":zjet_r} datafile.Close() topfile.Close() if zjetfile != None: zjetfile.Close() ################################################################## ##### scaling and subtractions ################################# print "STEP: scaling and subtractions" ##### This is loding input file histograms for ir in range(len(regions)): # print ir r = regions[ir] output_Dict[r] = {"qcd":{}, "ttbar":{}, "zjet":{}} if makeOutputFiles: cut_lst = {"44":"FourTag", "33":"ThreeTag", "22":"TwoTag_split"} outfileStat = R.TFile("outfile_boosted_"+cut_lst[r]+".root","RECREATE") r_0b = r[0]+"0" #r_3b = r[0]+"3" top_0b = histos[r_0b]["top"].Clone("top_0b__"+r) if scaleTop0b: top_0b.Scale( (bkgFitResults["muttbar"][0] if use_one_top_nuis else bkgFitResults["muttbar"][ir]) ) zjet_0b = histos[r_0b]["zjet"].Clone("zjet_0b__"+r) qcd_r = histos[r_0b]["data"].Clone("qcd__"+r) qcd_r.Add( top_0b, -1) qcd_r.Add( zjet_0b, -1) qcd_int = qcd_r.Integral() #clear the negative weight bins for qcd as well ClearNegBin(qcd_r) top_r = histos[r]["top"].Clone("top__"+r) if (nbtag_top_shape_for4b == "33") and (r == "44") and (MassRegionName == "SR"): # the 3b top shape is only used during the SR prediction for 44 region temp_scaler = top_r.Integral() / histos[nbtag_top_shape_for4b]["top"].Integral() top_r = histos[nbtag_top_shape_for4b]["top"].Clone("top__"+r) top_r.Scale( temp_scaler ) top_int = top_r.Integral() #print top_r.Integral(), "here! 1" zjet_r = histos[r]["zjet"].Clone("zjet__"+r) mu_qcd = bkgFitResults["muqcd"][ir] top_scale = (bkgFitResults["muttbar"][0] if use_one_top_nuis else bkgFitResults["muttbar"][ir]) qcd_r.Scale( mu_qcd ) top_r.Scale( top_scale ) print "top total:", top_r.Integral(), " ; qcd total:", qcd_r.Integral(), "here! 2" bkg_r = qcd_r.Clone("bkg__" + r) bkg_r.Add( top_r, 1) bkg_r.Add( zjet_r, 1) # store some numbers for the output table later e_qcd = R.Double(0.0) e_top = R.Double(0.0) e_bkg = R.Double(0.0) e_data = R.Double(0.0) Nbkg_dict[r]["qcd"] = qcd_r.IntegralAndError(0, qcd_r.GetNbinsX()+1, e_qcd) Nbkg_dict[r]["top"] = top_r.IntegralAndError(0, top_r.GetNbinsX()+1, e_top) Nbkg_dict[r]["bkg"] = bkg_r.IntegralAndError(0, bkg_r.GetNbinsX()+1, e_bkg) Nbkg_dict[r]["data"] = histos[r]["data"].IntegralAndError(0, histos[r]["data"].GetNbinsX()+1, e_data) Nbkg_SysList[r]["qcd"].append( float(e_qcd) ) Nbkg_SysList[r]["top"].append( float(e_top) ) Nbkg_SysList[r]["bkg"].append( float(e_bkg) ) # Qi Question; Tony Question as well... Nbkg_SysList[r]["data"].append( float(e_data) ) ## Now do smoothing ########################################################################################### print "start smoothing: ", ir if do_smoothing: qcd_sm = smoothfit.smoothfit(qcd_r, fitFunction = smoothing_func, fitRange = qcdSmoothRange, makePlots = True, verbose = False, outfileName="qcd_smoothfit_"+r+".root") top_sm = smoothfit.smoothfit(top_r, fitFunction = top_smoothing_func, fitRange = topSmoothRange, makePlots = True, verbose = False, outfileName="top_smoothfit_"+r+".root") print "top total:", top_r.Integral(), " ; qcd total:", qcd_r.Integral(), "here! 2.5" if addSmoothErrorBin: qcd_final = smoothfit.MakeSmoothHistoWithError(qcd_r, qcd_sm) top_final = smoothfit.MakeSmoothHistoWithError(top_r, top_sm) else: qcd_final = smoothfit.MakeSmoothHisto(qcd_r, qcd_sm["nom"]) top_final = smoothfit.MakeSmoothHisto(top_r, top_sm["nom"]) qcd_final.SetNameTitle("qcd_hh_"+r+"__clone", "qcd_hh_"+r+"__clone") top_final.SetNameTitle("ttbar_hh_"+r+"__clone", "ttbar_hh_"+r+"__clone") else: qcd_final = qcd_r.Clone("qcd_hh_"+r+"__clone") top_final = top_r.Clone("ttbar_hh_"+r+"__clone") print "top total:", top_final.Integral(), " ; qcd total:", qcd_final.Integral(), "here! 3" zjet_final = zjet_r.Clone("zjet_hh_"+r+"__clone") if rebinFinal is not None: qcd_final = qcd_final.Rebin(len(rebinFinal)-1, qcd_final.GetName()+"_rebinFinal", rebinFinal) top_final = top_final.Rebin(len(rebinFinal)-1, top_final.GetName()+"_rebinFinal", rebinFinal) zjet_final = zjet_final.Rebin(len(rebinFinal)-1, zjet_final.GetName()+"_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qcd_final, "qcd_hh","Overwrite") outfileStat.WriteTObject(top_final, "ttbar_hh","Overwrite") outfileStat.WriteTObject(zjet_final, "zjet_hh","Overwrite") qcd_final.SetDirectory(0) top_final.SetDirectory(0) zjet_final.SetDirectory(0) output_Dict[r]["qcd"]["nom"] = qcd_final output_Dict[r]["ttbar"]["nom"] = top_final output_Dict[r]["zjet"]["nom"] = zjet_final # for systematics, don't need anything after this in loop if isSystematicVariation: continue ################################################################################################################################## ### propagate correlated systematics from the smoothing procedure---> these "replace" the stat error on the bins ############# ################################################################################################################################## ##### This is adding smoothing systematics if do_smoothing: ## qcd smoothing variations################################################################# if not addSmoothErrorBin: for ivar in range(len(qcd_sm["vars"])): qup = qcd_sm["vars"][ivar][0] qdw = qcd_sm["vars"][ivar][1] qcd_r_qup = smoothfit.MakeSmoothHisto(qcd_r, qup) qcd_r_qdw = smoothfit.MakeSmoothHisto(qcd_r, qdw) qcd_r_qup.SetNameTitle("qcd_hh_"+r+"_smoothQ"+str(ivar)+"up__clone", "qcd_hh_"+r+"_smoothQ"+str(ivar)+"up__clone") qcd_r_qdw.SetNameTitle("qcd_hh_"+r+"_smoothQ"+str(ivar)+"down__clone", "qcd_hh_"+r+"_smoothQ"+str(ivar)+"down__clone") if rebinFinal is not None: qcd_r_qup = qcd_r_qup.Rebin(len(rebinFinal)-1, qcd_r_qup.GetName()+"_rebinFinal", rebinFinal) qcd_r_qdw = qcd_r_qdw.Rebin(len(rebinFinal)-1, qcd_r_qdw.GetName()+"_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qcd_r_qup, "qcd_hh_smoothQ"+str(ivar)+"up","Overwrite") outfileStat.WriteTObject(qcd_r_qdw, "qcd_hh_smoothQ"+str(ivar)+"down","Overwrite") qcd_r_qup.SetDirectory(0) qcd_r_qdw.SetDirectory(0) output_Dict[r]["qcd"]["smoothQ"+str(ivar)+"up"] = qcd_r_qup output_Dict[r]["qcd"]["smoothQ"+str(ivar)+"down"] = qcd_r_qdw ## qcd smoothing function variations ################################################################# if smoothing_func == "ExpModGauss": smoothFuncCompSyst = EMGSmoothSyst.smoothFuncCompare(qcd_r, fitFunction = smoothing_func, fitRange = qcdSmoothRange, funcCompareRange=(900, qcdSmoothRange[1]), makePlots = True, verbose = False, outfileName="EMGSmoothFuncCompare_"+r+".root", plotExtra=False) # Qi else: # smoothFuncCompSyst = smoothfit.smoothFuncCompare(qcd_r, fitRange = (900, qcdSmoothRange[1]), smoothFuncCompSyst = smoothfit.smoothFuncCompare(qcd_r, fitRange = qcdSmoothRange, # qi makePlots = True, verbose = False, outfileName="smoothFuncCompare_"+r+".root", plotExtra=False) # Qi qcd_r_func_up = smoothFuncCompSyst["up"] qcd_r_func_dw = smoothFuncCompSyst["dw"] qcd_r_func_up_super = smoothFuncCompSyst["up_super"] qcd_r_func_dw_super = smoothFuncCompSyst["dw_super"] if rebinFinal is not None: qcd_r_func_up = qcd_r_func_up.Rebin(len(rebinFinal)-1, qcd_r_func_up.GetName()+"_rebinFinal", rebinFinal) qcd_r_func_dw = qcd_r_func_dw.Rebin(len(rebinFinal)-1, qcd_r_func_dw.GetName()+"_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qcd_r_func_up, "qcd_hh_smoothFuncup","Overwrite") outfileStat.WriteTObject(qcd_r_func_dw, "qcd_hh_smoothFuncdown","Overwrite") outfileStat.WriteTObject(qcd_r_func_up_super, "qcd_hh_smoothFuncSuperup","Overwrite") outfileStat.WriteTObject(qcd_r_func_dw_super, "qcd_hh_smoothFuncSuperdown","Overwrite") # treat negative bin ClearNegBin(qcd_r_func_up) ClearNegBin(qcd_r_func_dw) ClearNegBin(qcd_r_func_up_super) ClearNegBin(qcd_r_func_dw_super) qcd_r_func_up.SetDirectory(0) qcd_r_func_dw.SetDirectory(0) output_Dict[r]["qcd"]["smoothFuncup"] = qcd_r_func_up output_Dict[r]["qcd"]["smoothFuncdown"] = qcd_r_func_dw qcd_r_func_up_super.SetDirectory(0) qcd_r_func_dw_super.SetDirectory(0) output_Dict[r]["qcd"]["smoothFuncup_super"] = qcd_r_func_up_super output_Dict[r]["qcd"]["smoothFuncdown_super"] = qcd_r_func_dw_super stepped_min_vals = [] stepped_max_vals = [] stepped_fitting = True if stepped_fitting == True: starting_bin = qcd_r_qup.FindBin(qcdSmoothRange[0]) stepped_min_vals.append(str(qcdSmoothRange[0])) for step in range(0, 4): current_starting_bin = starting_bin + step*1 current_starting_mass = qcd_r_qup.GetBinCenter(current_starting_bin) stepped_min_vals.append(str(current_starting_mass)) stepped_max_vals = ["3000"] else: stepped_max_vals = ["1850","2000","2250","2500"] stepped_min_vals = [str(qcdSmoothRange[0]),"1300","1400"] print "MAX AND MIN ARE:" print stepped_max_vals print stepped_min_vals smoothfit.smoothFuncRangeCompare(qcd_r, fitFunction = smoothing_func, fitRange = qcdSmoothRange, fitMaxVals = stepped_max_vals, fitMinVals=stepped_min_vals, makePlots = True, plotExtra = False, verbose = False, outfileName="smoothFuncRangeCompare_"+r+".root") # Qi ## ttbar smoothing variations############################################################################## if not addSmoothErrorBin: for ivar in range(len(top_sm["vars"])): tup = top_sm["vars"][ivar][0] tdw = top_sm["vars"][ivar][1] top_r_tup = smoothfit.MakeSmoothHisto(top_r, tup) top_r_tdw = smoothfit.MakeSmoothHisto(top_r, tdw) top_r_tup.SetNameTitle("ttbar_hh_"+r+"_smoothQ"+str(ivar)+"up__clone", "ttbar_hh_"+r+"_smoothQ"+str(ivar)+"up__clone") top_r_tdw.SetNameTitle("ttbar_hh_"+r+"_smoothQ"+str(ivar)+"down__clone", "ttbar_hh_"+r+"_smoothQ"+str(ivar)+"down__clone") if rebinFinal is not None: top_r_tup = top_r_tup.Rebin(len(rebinFinal)-1, top_r_tup.GetName()+"_rebinFinal", rebinFinal) top_r_tdw = top_r_tdw.Rebin(len(rebinFinal)-1, top_r_tdw.GetName()+"_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(top_r_tup, "ttbar_hh_smoothQ"+str(ivar)+"up","Overwrite") outfileStat.WriteTObject(top_r_tdw, "ttbar_hh_smoothQ"+str(ivar)+"down","Overwrite") ClearNegBin(top_r_tup) ClearNegBin(top_r_tdw) top_r_tup.SetDirectory(0) top_r_tdw.SetDirectory(0) output_Dict[r]["ttbar"]["smoothQ"+str(ivar)+"up"] = top_r_tup output_Dict[r]["ttbar"]["smoothQ"+str(ivar)+"down"] = top_r_tdw ######################################################################################################## ### propagate correlated systematics from normalization fits for mu_qcd and top_scale ############### ######################################################################################################## ##### This is adding systematics from the fit #print pvars for ivar in range(len(pvars)): sys_qcd = [] sys_top = [] sys_bkg = [] for iUD in range(2): upDw = ("up" if iUD ==0 else "down") mu_qcd_var = pvars[ivar][iUD][ir] top_scale_var = pvars[ivar][iUD][n_channels + (0 if use_one_top_nuis else ir) ] qvar = qcd_r.Clone("qvar") qvar.Scale( mu_qcd_var * qcd_int / qvar.Integral() ) ## for ibin in range(1, qvar.GetNbinsX()+1): ## if qvar.GetBinError(ibin) > qvar.GetBinContent(ibin): ## qvar.SetBinError(ibin, qvar.GetBinContent(ibin)) tvar = top_r.Clone("tvar") tvar.Scale( top_scale_var * top_int / tvar.Integral() ) ### store some numbers for table sys_qcd.append( qvar.Integral() - Nbkg_dict[r]["qcd"] ) sys_top.append( tvar.Integral() - Nbkg_dict[r]["top"] ) sys_bkg.append( qvar.Integral() + tvar.Integral() - Nbkg_dict[r]["bkg"] ) #vartxt = vartxt + str(r) + ' ' + str(ivar) + ' ' + str(iUD) + ' ' + str(qvar.Integral()) + ' ' + str(tvar.Integral()) + ' ' + str( (qvar.Integral() + tvar.Integral())) + '\n' ## Now do smoothing ####### if do_smoothing: qvar_sm = smoothfit.smoothfit(qvar, fitFunction = smoothing_func, fitRange = qcdSmoothRange, makePlots = False, verbose = verbose, outfileName="qcd_smoothfit_"+r+"_Norm"+str(ivar)+str(iUD)+".root") tvar_sm = smoothfit.smoothfit(tvar, fitFunction = top_smoothing_func, fitRange = topSmoothRange, makePlots = False, verbose = verbose, outfileName="top_smoothfit_"+r+"_Norm"+str(ivar)+str(iUD)+".root") if addSmoothErrorBin: qvar_final = smoothfit.MakeSmoothHistoWithError(qvar, qvar_sm) tvar_final = smoothfit.MakeSmoothHistoWithError(tvar, tvar_sm) else: qvar_final = smoothfit.MakeSmoothHisto(qvar, qvar_sm["nom"]) tvar_final = smoothfit.MakeSmoothHisto(tvar, tvar_sm["nom"]) qvar_final.SetNameTitle("qcd_hh_"+r+"_normY"+str(ivar)+upDw+"__clone", "qcd_hh_"+r+"_normY"+str(ivar)+upDw+"__clone") tvar_final.SetNameTitle("ttbar_hh_"+r+"_normY"+str(ivar)+upDw+"__clone", "ttbar_hh_"+r+"_normY"+str(ivar)+upDw+"__clone") else: qvar_final = qvar.Clone("qcd_hh_"+r+"_normY"+str(ivar)+upDw+"__clone") tvar_final = tvar.Clone("ttbar_hh_"+r+"_normY"+str(ivar)+upDw+"__clone") if rebinFinal is not None: qvar_final = qvar_final.Rebin(len(rebinFinal)-1, qvar_final.GetName()+"_rebinFinal", rebinFinal) tvar_final = tvar_final.Rebin(len(rebinFinal)-1, tvar_final.GetName()+"_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qvar_final, "qcd_hh_normY"+str(ivar)+upDw,"Overwrite") outfileStat.WriteTObject(tvar_final, "ttbar_hh_normY"+str(ivar)+upDw,"Overwrite") qvar_final.SetDirectory(0) tvar_final.SetDirectory(0) output_Dict[r]["qcd"]["normY"+str(ivar)+upDw] = qvar_final output_Dict[r]["ttbar"]["normY"+str(ivar)+upDw] = tvar_final # store some numbers for table later e_qcd_i = np.max( np.abs(sys_qcd) ) e_top_i = np.max( np.abs(sys_top) ) e_bkg_i = np.max( np.abs(sys_bkg) ) Nbkg_SysList[r]["qcd"].append( e_qcd_i ) Nbkg_SysList[r]["top"].append( e_top_i ) Nbkg_SysList[r]["bkg"].append( e_bkg_i ) ######################################################################################################## ####### QCD Shape and Norm estimated from CR ################################################ ######################################################################################################## ##### This is adding systematics from the CR if QCDSyst_Dict!=None and isMhhDistribution: # qi original_norm = qcd_r.Integral() qvar_shape_up = qcd_r.Clone("qvar_QCDshape_up") qvar_shape_dw = qcd_r.Clone("qvar_QCDshape_dw") ClearNegBin(qvar_shape_up) ClearNegBin(qvar_shape_dw) ## Now do smoothing if do_smoothing: qvar_shape_up_sm = smoothfit.smoothfit(qvar_shape_up, fitFunction = smoothing_func, fitRange = qcdSmoothRange, makePlots = False, verbose = verbose, outfileName="qcd_smoothfit_"+r+"_QCDShapeup.root") qvar_shape_dw_sm = smoothfit.smoothfit(qvar_shape_dw, fitFunction = smoothing_func, fitRange = qcdSmoothRange, makePlots = False, verbose = verbose, outfileName="qcd_smoothfit_"+r+"_QCDShapedown.root") if addSmoothErrorBin: qvar_shape_up_final = smoothfit.MakeSmoothHistoWithError(qvar_shape_up, qvar_shape_up_sm) qvar_shape_dw_final = smoothfit.MakeSmoothHistoWithError(qvar_shape_dw, qvar_shape_dw_sm) else: qvar_shape_up_final = smoothfit.MakeSmoothHisto(qvar_shape_up, qvar_shape_up_sm["nom"]) qvar_shape_dw_final = smoothfit.MakeSmoothHisto(qvar_shape_dw, qvar_shape_dw_sm["nom"]) qvar_shape_up_final.Multiply( QCDSyst_Dict["Shape_"+r] ) qvar_shape_dw_final.Divide( QCDSyst_Dict["Shape_"+r] ) qvar_shape_up_final.SetNameTitle("qcd_hh_"+r+"_QCDShapeCRup__clone", "qcd_hh_"+r+"_QCDShapeCRup__clone") qvar_shape_dw_final.SetNameTitle("qcd_hh_"+r+"_QCDShapeCRdown__clone", "qcd_hh_"+r+"_QCDShapeCRdown__clone") else: qvar_shape_up_final = qvar_shape_up.Clone("qcd_hh_"+r+"_QCDShapeCRup__clone") qvar_shape_dw_final = qvar_shape_dw.Clone("qcd_hh_"+r+"_QCDShapeCRdown__clone") #make sure normalization is correct! qvar_shape_up_final.Scale( original_norm/qvar_shape_up_final.Integral() ) qvar_shape_dw_final.Scale( original_norm/qvar_shape_dw_final.Integral() ) if rebinFinal is not None: qvar_shape_up_final = qvar_shape_up_final.Rebin(len(rebinFinal)-1, qvar_shape_up_final.GetName()+"_rebinFinal", rebinFinal) qvar_shape_dw_final = qvar_shape_dw_final.Rebin(len(rebinFinal)-1, qvar_shape_dw_final.GetName()+"_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qvar_shape_up_final, "qcd_hh_QCDShapeCRup") outfileStat.WriteTObject(qvar_shape_dw_final, "qcd_hh_QCDShapeCRdown") qvar_shape_up_final.SetDirectory(0) qvar_shape_dw_final.SetDirectory(0) output_Dict[r]["qcd"]["QCDShapeCRup"] = qvar_shape_up_final output_Dict[r]["qcd"]["QCDShapeCRdown"] = qvar_shape_dw_final ########################################################################################### ### Norm comparison in CR ############################################################ ########################################################################################### if QCDSyst_Dict != None: qvar_normCR_up = qcd_final.Clone("qcd_hh_"+r+"_QCDnormCRup__clone") qvar_normCR_up.Scale( 1.0 + QCDSyst_Dict["Scale_"+r] ) qvar_normCR_dw = qcd_final.Clone("qcd_hh_"+r+"_QCDnormCRdown__clone") qvar_normCR_dw.Scale( 1.0 - QCDSyst_Dict["Scale_"+r] ) if rebinFinal is not None: qvar_normCR_up = qvar_normCR_up.Rebin(len(rebinFinal)-1, qvar_normCR_up.GetName()+"_rebinFinal", rebinFinal) qvar_normCR_dw = qvar_normCR_dw.Rebin(len(rebinFinal)-1, qvar_normCR_dw.GetName()+"_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qvar_normCR_up, "qcd_hh_QCDNormCRup") outfileStat.WriteTObject(qvar_normCR_dw, "qcd_hh_QCDNormCRdown") qvar_normCR_up.SetDirectory(0) qvar_normCR_dw.SetDirectory(0) output_Dict[r]["qcd"]["QCDNormCRup"] = qvar_normCR_up output_Dict[r]["qcd"]["QCDNormCRdown"] = qvar_normCR_dw ##################################################################################################################### ### top shape systematics in 4b region, if using 3b shape ########################################################### ##################################################################################################################### if r == "44" and nbtag_top_shape_SRPred_for4b == "33" and MassRegionName == "SR" and isMhhDistribution: # qi ## ttbarShapeSRSyst_Dict = SystTools.ttbarShapeSysSR(topfileName, ## distributionName, ## signal_region = "22", ## compare_region = "33", ## btag_WP = btag_WP, ## makePlots = True, ## verbose = False, ## outfileNameBase="TopShapeSRSysfit.root") ttbarShapeSRSyst_Dict = SystToolsSmooth.ttbarShapeSysSR(topfileName, distributionName, signal_region = "33", compare_region = "22", btag_WP = btag_WP, smoothing_func = top_smoothing_func, SmoothRange = topSmoothRange,# (100, 2500), makePlots = True, verbose = False, outfileNameBase="TopShapeSRSysfitSmooth.root") tvar_shape_up = top_r.Clone("tvar_ttbarShapeSR_up") #tvar_shape_up.Multiply( ttbarShapeSRSyst_Dict["fup"] ) tvar_shape_dw = top_r.Clone("tvar_ttbarShapeSR_dw") #tvar_shape_dw.Multiply( ttbarShapeSRSyst_Dict["fdw"] ) ClearNegBin(tvar_shape_up) ClearNegBin(tvar_shape_dw) tvar_shape_up.Scale( top_r.Integral() / tvar_shape_up.Integral() ) tvar_shape_dw.Scale( top_r.Integral() / tvar_shape_dw.Integral() ) ## Now do smoothing ########################## if do_smoothing: tvar_shape_up_sm = smoothfit.smoothfit(tvar_shape_up, fitFunction = top_smoothing_func, fitRange = topSmoothRange, makePlots = False, verbose = verbose, outfileName="top_smoothfit_"+r+"_ttbarShapeSRup.root") tvar_shape_dw_sm = smoothfit.smoothfit(tvar_shape_dw, fitFunction = top_smoothing_func, fitRange = topSmoothRange, makePlots = False, verbose = verbose, outfileName="top_smoothfit_"+r+"_ttbarShapeSRedown.root") if addSmoothErrorBin: tvar_shape_up_final = smoothfit.MakeSmoothHistoWithError(tvar_shape_up, tvar_shape_up_sm) tvar_shape_dw_final = smoothfit.MakeSmoothHistoWithError(tvar_shape_dw, tvar_shape_dw_sm) else: tvar_shape_up_final = smoothfit.MakeSmoothHisto(tvar_shape_up, tvar_shape_up_sm["nom"]) tvar_shape_dw_final = smoothfit.MakeSmoothHisto(tvar_shape_dw, tvar_shape_dw_sm["nom"]) tvar_shape_up_final.Multiply( ttbarShapeSRSyst_Dict["Shape"] ) tvar_shape_dw_final.Divide( ttbarShapeSRSyst_Dict["Shape"] ) tvar_shape_up_final.SetNameTitle("ttbar_hh_"+r+"_ttbarShapeSRup__clone", "ttbar_hh_"+r+"_ttbarShapeSRup__clone") tvar_shape_dw_final.SetNameTitle("ttbar_hh_"+r+"_ttbarShapeSRdown__clone", "ttbar_hh_"+r+"_ttbarShapeSRdown__clone") else: tvar_shape_up_final = tvar_shape_up.Clone("ttbar_hh_"+r+"_ttbarShapeSRup__clone") tvar_shape_dw_final = tvar_shape_dw.Clone("ttbar_hh_"+r+"_ttbarShapeSRdown__clone") if rebinFinal is not None: tvar_shape_up_final = tvar_shape_up_final.Rebin(len(rebinFinal)-1, tvar_shape_up_final.GetName()+"_rebinFinal", rebinFinal) tvar_shape_dw_final = tvar_shape_dw_final.Rebin(len(rebinFinal)-1, tvar_shape_dw_final.GetName()+"_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(tvar_shape_up_final, "ttbar_hh_ttbarShapeSRup") outfileStat.WriteTObject(tvar_shape_dw_final, "ttbar_hh_ttbarShapeSRdown") tvar_shape_up_final.SetDirectory(0) tvar_shape_dw_final.SetDirectory(0) output_Dict[r]["ttbar"]["ttbarShapeSRup"] = tvar_shape_up_final output_Dict[r]["ttbar"]["ttbarShapeSRdown"] = tvar_shape_dw_final ### close outfiles, if used ### if makeOutputFiles: outfileStat.Close() ### Print tables ### PrintTable( Nbkg_dict, Nbkg_SysList, regions) #print vartxt output_Dict['regions'] = regions #print output_Dict return
def HistoAnalysis( datafileName="data/hist_data.root", topfileName="data/hist_ttbar.root", zjetfileName=None, distributionName="mHH_pole", n_trkjet=["4", "3", "2"], n_btag=["4", "3", "2"], btag_WP="77", NRebin=10, use_one_top_nuis=False, use_scale_top_0b=False, nbtag_top_shape_normFit_for4b="33", nbtag_top_shape_SRPred_for4b="33", rebinFinal=None, smoothing_func="Dijet", top_smoothing_func="Dijet", inputFitResult=None, inputQCDSyst_Dict=None, doSmoothing=True, addSmoothErrorBin=False, qcdSmoothRange=(1200, 3000), # (100, 2500), topSmoothRange=(1200, 3000), #(100, 2000), isSystematicVariation=False, verbose=False, makeOutputFiles=True, MassRegionName="SR"): global func1 global func2 ##### Parse Inputs ############################################ dist_name = distributionName num_trkjet = np.asarray(n_trkjet) if num_trkjet.shape == (): num_trkjet = np.asarray([n_trkjet]) num_btag = np.asarray(n_btag) if num_btag.shape == (): num_btag = np.asarray([n_btag]) if num_btag.shape != num_trkjet.shape: print "Must have same number of track jet and b-tag regions specified" sys.exit(0) btag_WP = btag_WP n_rebin = NRebin nbtag_top_shape_for4b = nbtag_top_shape_SRPred_for4b topShape_nbtag_for4b = nbtag_top_shape_for4b if nbtag_top_shape_for4b == None: topShape_nbtag_for4b = num_btag + num_btag useOneTopNuis = use_one_top_nuis scaleTop0b = use_scale_top_0b n_channels = num_trkjet.shape[0] regions = [num_trkjet[i] + num_btag[i] for i in range(n_channels)] ##for outputing isMhhDistribution = (distributionName == "mHH_l" or distributionName == "mHH_pole") do_smoothing = (doSmoothing if isMhhDistribution else False) # qi ################################################################## ##### Storage Variables ############################################ output_Dict = {} Nbkg_dict = {} Nbkg_SysList = {} for ir in regions: Nbkg_dict[ir] = {"qcd": 0, "top": 0, "zjet": 0, "bkg": 0} Nbkg_SysList[ir] = {"qcd": [], "top": [], "zjet": [], "bkg": []} vartxt = '' ################################################################## ##### Do Background Fits ############################################ if inputFitResult == None: bkgFitResults = BkgFit.BackgroundFit( datafileName=datafileName, topfileName=topfileName, zjetfileName=zjetfileName, distributionName="leadHCand_Mass", n_trkjet=n_trkjet, n_btag=n_btag, btag_WP=btag_WP, NRebin=2, #NRebin, use_one_top_nuis=use_one_top_nuis, use_scale_top_0b=use_scale_top_0b, nbtag_top_shape_for4b=nbtag_top_shape_normFit_for4b, makePlots=True, verbose=verbose) else: bkgFitResults = inputFitResult pvars = bkgFitResults["pvars"] output_Dict["fitResults"] = bkgFitResults ################################################################## ##### Get QCD Shape Systematics from CR ############################## if MassRegionName == "SR": # should only affect SR if inputQCDSyst_Dict == None and isMhhDistribution: # qi QCDSyst_Dict = SystToolsSmooth.QCDSystematics( datafileName=datafileName, topfileName=topfileName, zjetfileName=zjetfileName, distributionName= "mHH_l", # this has been decided to fix on DiJetMass n_trkjet=n_trkjet, n_btag=n_btag, btag_WP=btag_WP, mu_qcd_vals=bkgFitResults["muqcd"], topscale_vals=bkgFitResults["topscale"], NRebin=5, smoothing_func=smoothing_func, SmoothRange=(1100, 3000), # (100, 2500), use_one_top_nuis=use_one_top_nuis, use_scale_top_0b=use_scale_top_0b, nbtag_top_shape_for4b=nbtag_top_shape_SRPred_for4b, makePlots=True, verbose=False, outfileNameBase="QCDSysfitSmooth.root") ## QCDSyst_Dict = SystTools.QCDSystematics(datafileName=datafileName, ## topfileName=topfileName, ## zjetfileName=zjetfileName, ## distributionName= "mHH_l", # this has been decided to fix on DiJetMass ## n_trkjet = n_trkjet, ## n_btag = n_btag, ## btag_WP = btag_WP, ## mu_qcd_vals = bkgFitResults["muqcd"], ## topscale_vals = bkgFitResults["topscale"], ## NRebin = NRebin, ## use_one_top_nuis = use_one_top_nuis, ## use_scale_top_0b = use_scale_top_0b, ## nbtag_top_shape_for4b = nbtag_top_shape_SRPred_for4b, ## makePlots = True, ## verbose = False, ## outfileNameBase="QCDSysfit.root") elif inputQCDSyst_Dict != None: QCDSyst_Dict = inputQCDSyst_Dict else: QCDSyst_Dict = None else: QCDSyst_Dict = None output_Dict["QCDSystCR"] = QCDSyst_Dict ################################################################## ##### Get Signal Region Histograms ################################ datafile = R.TFile(datafileName, "READ") topfile = R.TFile(topfileName, "READ") zjetfile = (R.TFile(zjetfileName, "READ") if zjetfileName != None else None) histos = {} # collect all histograms for r in ["44", "33", "22", "40", "30", "20"]: # folder_r = HistLocStr(dist_name, r[0], r[1], btag_WP, "SR") #folder( r[0], r[1], btag_WP) folder_r = HistLocStr(dist_name, r[0], r[1], btag_WP, MassRegionName) #folder( r[0], r[1], btag_WP) data_r = datafile.Get(folder_r).Clone("data_" + r) data_r.SetDirectory(0) # if (r == "42") and (MassRegionName == "SR") and blindData2bSR and ( (distributionName == "DiJetMass") or (distributionName == "DiJetMassPrime") ): # data_r = BlindData2bSR(data_r) top_r = topfile.Get(folder_r).Clone("top_" + r) top_r.SetDirectory(0) zjet_r = CheckAndGet(zjetfile, folder_r, top_r).Clone("zjet_" + r) zjet_r.SetDirectory(0) for ibin in range(1, top_r.GetNbinsX() + 1): if top_r.GetBinContent(ibin) < 0: top_r.SetBinContent(ibin, 0) top_r.SetBinError(ibin, 0) data_r.Rebin(n_rebin) top_r.Rebin(n_rebin) zjet_r.Rebin(n_rebin) histos[r] = {"data": data_r, "top": top_r, "zjet": zjet_r} datafile.Close() topfile.Close() if zjetfile != None: zjetfile.Close() ################################################################## ##### scaling and subtractions ################################# for ir in range(len(regions)): r = regions[ir] output_Dict[r] = {"qcd": {}, "ttbar": {}, "zjet": {}} if makeOutputFiles: outfileStat = R.TFile("outfile_boosted_" + r + ".root", "RECREATE") r_0b = r[0] + "0" #r_3b = r[0]+"3" top_0b = histos[r_0b]["top"].Clone("top_0b__" + r) if scaleTop0b: top_0b.Scale((bkgFitResults["topscale"][0] if use_one_top_nuis else bkgFitResults["topscale"][ir])) zjet_0b = histos[r_0b]["zjet"].Clone("zjet_0b__" + r) qcd_r = histos[r_0b]["data"].Clone("qcd__" + r) qcd_r.Add( top_0b, -1 ) # added by Qi --- we still want top to be subtracted, given that their fraction is increasing in Run 2. qcd_r.Add(zjet_0b, -1) qcd_int = qcd_r.Integral() for ibin in range(1, qcd_r.GetNbinsX() + 1): if qcd_r.GetBinContent(ibin) < 0: qcd_r.SetBinContent(ibin, 0) qcd_r.SetBinError(ibin, 0) top_r = histos[r]["top"].Clone("top__" + r) if (nbtag_top_shape_for4b == "33") and (r == "44") and ( MassRegionName == "SR" ): # the 3b top shape is only used during the SR prediction for 44 region temp_scaler = top_r.Integral( ) / histos[nbtag_top_shape_for4b]["top"].Integral() top_r = histos[nbtag_top_shape_for4b]["top"].Clone("top__" + r) top_r.Scale(temp_scaler) top_int = top_r.Integral() zjet_r = histos[r]["zjet"].Clone("zjet__" + r) mu_qcd = bkgFitResults["muqcd"][ir] top_scale = (bkgFitResults["topscale"][0] if use_one_top_nuis else bkgFitResults["topscale"][ir]) qcd_r.Scale(mu_qcd) top_r.Scale(top_scale) # store some numbers for table later e_qcd = R.Double(0.0) e_top = R.Double(0.0) Nbkg_dict[r]["qcd"] = qcd_r.IntegralAndError(0, qcd_r.GetNbinsX() + 1, e_qcd) Nbkg_dict[r]["top"] = top_r.IntegralAndError(0, top_r.GetNbinsX() + 1, e_top) Nbkg_dict[r]["bkg"] = Nbkg_dict[r]["qcd"] + Nbkg_dict[r]["top"] Nbkg_SysList[r]["qcd"].append(float(e_qcd)) Nbkg_SysList[r]["top"].append(float(e_top)) Nbkg_SysList[r]["bkg"].append( np.sqrt(float(e_qcd)**2 + float(e_top)**2)) # Qi Question ## Now do smoothing ########################################################################################### if do_smoothing: ## qcd_normed = qcd_r.Clone("normed") ## qcd_normed.SetDirectory(0) ## qcd_normed.Scale(1.0 / qcd_normed.Integral()) ## qcd_normed_sm = smoothfit.smoothfit(qcd_normed, fitFunction = smoothing_func, fitRange = qcdSmoothRange, makePlots = True, verbose = True, outfileName="qcd_normed_smoothfit_"+r+".root") qcd_sm = smoothfit.smoothfit(qcd_r, fitFunction=smoothing_func, fitRange=qcdSmoothRange, makePlots=True, verbose=False, outfileName="qcd_smoothfit_" + r + ".root") top_sm = smoothfit.smoothfit(top_r, fitFunction=top_smoothing_func, fitRange=topSmoothRange, makePlots=True, verbose=False, outfileName="top_smoothfit_" + r + ".root") if addSmoothErrorBin: qcd_final = smoothfit.MakeSmoothHistoWithError(qcd_r, qcd_sm) top_final = smoothfit.MakeSmoothHistoWithError(top_r, top_sm) else: qcd_final = smoothfit.MakeSmoothHisto(qcd_r, qcd_sm["nom"]) top_final = smoothfit.MakeSmoothHisto(top_r, top_sm["nom"]) qcd_final.SetNameTitle("qcd_hh_" + r + "__clone", "qcd_hh_" + r + "__clone") top_final.SetNameTitle("ttbar_hh_" + r + "__clone", "ttbar_hh_" + r + "__clone") else: qcd_final = qcd_r.Clone("qcd_hh_" + r + "__clone") top_final = top_r.Clone("ttbar_hh_" + r + "__clone") zjet_final = zjet_r.Clone("zjet_hh_" + r + "__clone") if rebinFinal is not None: qcd_final = qcd_final.Rebin( len(rebinFinal) - 1, qcd_final.GetName() + "_rebinFinal", rebinFinal) top_final = top_final.Rebin( len(rebinFinal) - 1, top_final.GetName() + "_rebinFinal", rebinFinal) zjet_final = zjet_final.Rebin( len(rebinFinal) - 1, zjet_final.GetName() + "_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qcd_final, "qcd_hh", "Overwrite") outfileStat.WriteTObject(top_final, "ttbar_hh", "Overwrite") outfileStat.WriteTObject(zjet_final, "zjet_hh", "Overwrite") qcd_final.SetDirectory(0) top_final.SetDirectory(0) zjet_final.SetDirectory(0) output_Dict[r]["qcd"]["nom"] = qcd_final output_Dict[r]["ttbar"]["nom"] = top_final output_Dict[r]["zjet"]["nom"] = zjet_final # for systematics, don't need anything after this in loop if isSystematicVariation: continue ################################################################################################################################## ### propagate correlated systematics from the smoothing procedure---> these "replace" the stat error on the bins ############# ################################################################################################################################## if do_smoothing: ## qcd smoothing variations################################################################# if not addSmoothErrorBin: for ivar in range(len(qcd_sm["vars"])): qup = qcd_sm["vars"][ivar][0] qdw = qcd_sm["vars"][ivar][1] qcd_r_qup = smoothfit.MakeSmoothHisto(qcd_r, qup) qcd_r_qdw = smoothfit.MakeSmoothHisto(qcd_r, qdw) qcd_r_qup.SetNameTitle( "qcd_hh_" + r + "_smoothQ" + str(ivar) + "Up__clone", "qcd_hh_" + r + "_smoothQ" + str(ivar) + "Up__clone") qcd_r_qdw.SetNameTitle( "qcd_hh_" + r + "_smoothQ" + str(ivar) + "Down__clone", "qcd_hh_" + r + "_smoothQ" + str(ivar) + "Down__clone") if rebinFinal is not None: qcd_r_qup = qcd_r_qup.Rebin( len(rebinFinal) - 1, qcd_r_qup.GetName() + "_rebinFinal", rebinFinal) qcd_r_qdw = qcd_r_qdw.Rebin( len(rebinFinal) - 1, qcd_r_qdw.GetName() + "_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject( qcd_r_qup, "qcd_hh_smoothQ" + str(ivar) + "Up", "Overwrite") outfileStat.WriteTObject( qcd_r_qdw, "qcd_hh_smoothQ" + str(ivar) + "Down", "Overwrite") qcd_r_qup.SetDirectory(0) qcd_r_qdw.SetDirectory(0) output_Dict[r]["qcd"]["smoothQ" + str(ivar) + "Up"] = qcd_r_qup output_Dict[r]["qcd"]["smoothQ" + str(ivar) + "Down"] = qcd_r_qdw ## qcd smoothing function variations ################################################################# if smoothing_func == "ExpModGauss": smoothFuncCompSyst = EMGSmoothSyst.smoothFuncCompare( qcd_r, fitFunction=smoothing_func, fitRange=qcdSmoothRange, funcCompareRange=(900, qcdSmoothRange[1]), makePlots=True, verbose=False, outfileName="EMGSmoothFuncCompare_" + r + ".root", plotExtra=False) # Qi else: # smoothFuncCompSyst = smoothfit.smoothFuncCompare(qcd_r, fitRange = (900, qcdSmoothRange[1]), smoothFuncCompSyst = smoothfit.smoothFuncCompare( qcd_r, fitRange=(qcdSmoothRange[0], qcdSmoothRange[1]), # qi makePlots=True, verbose=False, outfileName="smoothFuncCompare_" + r + ".root", plotExtra=False) # Qi qcd_r_func_up = smoothFuncCompSyst["up"] qcd_r_func_dw = smoothFuncCompSyst["dw"] qcd_r_func_up_super = smoothFuncCompSyst["up_super"] qcd_r_func_dw_super = smoothFuncCompSyst["dw_super"] if rebinFinal is not None: qcd_r_func_up = qcd_r_func_up.Rebin( len(rebinFinal) - 1, qcd_r_func_up.GetName() + "_rebinFinal", rebinFinal) qcd_r_func_dw = qcd_r_func_dw.Rebin( len(rebinFinal) - 1, qcd_r_func_dw.GetName() + "_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qcd_r_func_up, "qcd_hh_smoothFuncUp", "Overwrite") outfileStat.WriteTObject(qcd_r_func_dw, "qcd_hh_smoothFuncDown", "Overwrite") outfileStat.WriteTObject(qcd_r_func_up_super, "qcd_hh_smoothFuncSuperUp", "Overwrite") outfileStat.WriteTObject(qcd_r_func_dw_super, "qcd_hh_smoothFuncSuperDown", "Overwrite") # treat negative bin for ibin in range(1, qcd_r_func_up.GetNbinsX() + 1): if qcd_r_func_up.GetBinContent(ibin) < 0: qcd_r_func_up.SetBinContent(ibin, 0) qcd_r_func_up.SetBinError(ibin, 0) if qcd_r_func_dw.GetBinContent(ibin) < 0: qcd_r_func_dw.SetBinContent(ibin, 0) qcd_r_func_dw.SetBinError(ibin, 0) if qcd_r_func_up_super.GetBinContent(ibin) < 0: qcd_r_func_up_super.SetBinContent(ibin, 0) qcd_r_func_up_super.SetBinError(ibin, 0) if qcd_r_func_dw_super.GetBinContent(ibin) < 0: qcd_r_func_dw_super.SetBinContent(ibin, 0) qcd_r_func_dw_super.SetBinError(ibin, 0) qcd_r_func_up.SetDirectory(0) qcd_r_func_dw.SetDirectory(0) output_Dict[r]["qcd"]["smoothFuncUp"] = qcd_r_func_up output_Dict[r]["qcd"]["smoothFuncDown"] = qcd_r_func_dw qcd_r_func_up_super.SetDirectory(0) qcd_r_func_dw_super.SetDirectory(0) output_Dict[r]["qcd"]["smoothFuncUp_super"] = qcd_r_func_up_super output_Dict[r]["qcd"]["smoothFuncDown_super"] = qcd_r_func_dw_super #smoothfit.smoothFuncRangeCompare(qcd_r, fitRange = (900, qcdSmoothRange[1]), makePlots = True, verbose = False, outfileName="smoothFuncRangeCompare_"+r+".root") smoothfit.smoothFuncRangeCompare( qcd_r, fitFunction=smoothing_func, fitRange=qcdSmoothRange, fitMaxVals=["1750", "2000", "2500"], fitMinVals=[str(qcdSmoothRange[0]), "1200", "1500"], makePlots=True, plotExtra=False, verbose=False, outfileName="smoothFuncRangeCompare_" + r + ".root") # Qi ## ttbar smoothing variations############################################################################## if not addSmoothErrorBin: for ivar in range(len(top_sm["vars"])): tup = top_sm["vars"][ivar][0] tdw = top_sm["vars"][ivar][1] top_r_tup = smoothfit.MakeSmoothHisto(top_r, tup) top_r_tdw = smoothfit.MakeSmoothHisto(top_r, tdw) top_r_tup.SetNameTitle( "ttbar_hh_" + r + "_smoothT" + str(ivar) + "Up__clone", "ttbar_hh_" + r + "_smoothT" + str(ivar) + "Up__clone") top_r_tdw.SetNameTitle( "ttbar_hh_" + r + "_smoothT" + str(ivar) + "Down__clone", "ttbar_hh_" + r + "_smoothT" + str(ivar) + "Down__clone") if rebinFinal is not None: top_r_tup = top_r_tup.Rebin( len(rebinFinal) - 1, top_r_tup.GetName() + "_rebinFinal", rebinFinal) top_r_tdw = top_r_tdw.Rebin( len(rebinFinal) - 1, top_r_tdw.GetName() + "_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject( top_r_tup, "ttbar_hh_smoothT" + str(ivar) + "Up", "Overwrite") outfileStat.WriteTObject( top_r_tdw, "ttbar_hh_smoothT" + str(ivar) + "Down", "Overwrite") top_r_tup.SetDirectory(0) top_r_tdw.SetDirectory(0) output_Dict[r]["ttbar"]["smoothT" + str(ivar) + "Up"] = top_r_tup output_Dict[r]["ttbar"]["smoothT" + str(ivar) + "Down"] = top_r_tdw ######################################################################################################## ### propagate correlated systematics from normalization fits for mu_qcd and top_scale ############### ######################################################################################################## for ivar in range(len(pvars)): sys_qcd = [] sys_top = [] sys_bkg = [] for iUD in range(2): UpDw = ("Up" if iUD == 0 else "Down") mu_qcd_var = pvars[ivar][iUD][ir] top_scale_var = pvars[ivar][iUD][ n_channels + (0 if use_one_top_nuis else ir)] qvar = qcd_r.Clone("qvar") qvar.Scale(mu_qcd_var * qcd_int / qvar.Integral()) ## for ibin in range(1, qvar.GetNbinsX()+1): ## if qvar.GetBinError(ibin) > qvar.GetBinContent(ibin): ## qvar.SetBinError(ibin, qvar.GetBinContent(ibin)) tvar = top_r.Clone("tvar") tvar.Scale(top_scale_var * top_int / tvar.Integral()) ### store some numbers for table sys_qcd.append(qvar.Integral() - Nbkg_dict[r]["qcd"]) sys_top.append(tvar.Integral() - Nbkg_dict[r]["top"]) sys_bkg.append(qvar.Integral() + tvar.Integral() - Nbkg_dict[r]["bkg"]) #vartxt = vartxt + str(r) + ' ' + str(ivar) + ' ' + str(iUD) + ' ' + str(qvar.Integral()) + ' ' + str(tvar.Integral()) + ' ' + str( (qvar.Integral() + tvar.Integral())) + '\n' ## Now do smoothing ####### if do_smoothing: qvar_sm = smoothfit.smoothfit( qvar, fitFunction=smoothing_func, fitRange=qcdSmoothRange, makePlots=False, verbose=verbose, outfileName="qcd_smoothfit_" + r + "_Norm" + str(ivar) + str(iUD) + ".root") tvar_sm = smoothfit.smoothfit( tvar, fitFunction=top_smoothing_func, fitRange=topSmoothRange, makePlots=False, verbose=verbose, outfileName="top_smoothfit_" + r + "_Norm" + str(ivar) + str(iUD) + ".root") if addSmoothErrorBin: qvar_final = smoothfit.MakeSmoothHistoWithError( qvar, qvar_sm) tvar_final = smoothfit.MakeSmoothHistoWithError( tvar, tvar_sm) else: qvar_final = smoothfit.MakeSmoothHisto( qvar, qvar_sm["nom"]) tvar_final = smoothfit.MakeSmoothHisto( tvar, tvar_sm["nom"]) qvar_final.SetNameTitle( "qcd_hh_" + r + "_normY" + str(ivar) + UpDw + "__clone", "qcd_hh_" + r + "_normY" + str(ivar) + UpDw + "__clone") tvar_final.SetNameTitle( "ttbar_hh_" + r + "_normY" + str(ivar) + UpDw + "__clone", "ttbar_hh_" + r + "_normY" + str(ivar) + UpDw + "__clone") else: qvar_final = qvar.Clone("qcd_hh_" + r + "_normY" + str(ivar) + UpDw + "__clone") tvar_final = tvar.Clone("ttbar_hh_" + r + "_normY" + str(ivar) + UpDw + "__clone") if rebinFinal is not None: qvar_final = qvar_final.Rebin( len(rebinFinal) - 1, qvar_final.GetName() + "_rebinFinal", rebinFinal) tvar_final = tvar_final.Rebin( len(rebinFinal) - 1, tvar_final.GetName() + "_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qvar_final, "qcd_hh_normY" + str(ivar) + UpDw, "Overwrite") outfileStat.WriteTObject( tvar_final, "ttbar_hh_normY" + str(ivar) + UpDw, "Overwrite") qvar_final.SetDirectory(0) tvar_final.SetDirectory(0) output_Dict[r]["qcd"]["normY" + str(ivar) + UpDw] = qvar_final output_Dict[r]["ttbar"]["normY" + str(ivar) + UpDw] = tvar_final # store some numbers for table later e_qcd_i = np.max(np.abs(sys_qcd)) e_top_i = np.max(np.abs(sys_top)) e_bkg_i = np.max(np.abs(sys_bkg)) Nbkg_SysList[r]["qcd"].append(e_qcd_i) Nbkg_SysList[r]["top"].append(e_top_i) Nbkg_SysList[r]["bkg"].append(e_bkg_i) ######################################################################################################## ####### QCD Shape and Norm estimated from CR ################################################ ######################################################################################################## if QCDSyst_Dict != None and isMhhDistribution: # qi qvar_shape_up = qcd_r.Clone("qvar_QCDshape_up") #qvar_shape_up.Multiply( QCDSyst_Dict["Shape_"+r]["fup"] ) qvar_shape_dw = qcd_r.Clone("qvar_QCDshape_dw") #qvar_shape_dw.Multiply( QCDSyst_Dict["Shape_"+r]["fdw"] ) for ibinX in range(1, qvar_shape_up.GetNbinsX() + 1): if (qvar_shape_up.GetBinContent(ibinX) < 0): qvar_shape_up.SetBinContent(ibinX, 0) qvar_shape_up.SetBinError(ibinX, 0) if (qvar_shape_dw.GetBinContent(ibinX) < 0): qvar_shape_dw.SetBinContent(ibinX, 0) qvar_shape_dw.SetBinError(ibinX, 0) qvar_shape_up.Scale(qcd_r.Integral() / qvar_shape_up.Integral()) qvar_shape_dw.Scale(qcd_r.Integral() / qvar_shape_dw.Integral()) ## Now do smoothing if do_smoothing: qvar_shape_up_sm = smoothfit.smoothfit( qvar_shape_up, fitFunction=smoothing_func, fitRange=qcdSmoothRange, makePlots=False, verbose=verbose, outfileName="qcd_smoothfit_" + r + "_QCDShapeUp.root") qvar_shape_dw_sm = smoothfit.smoothfit( qvar_shape_dw, fitFunction=smoothing_func, fitRange=qcdSmoothRange, makePlots=False, verbose=verbose, outfileName="qcd_smoothfit_" + r + "_QCDShapeDown.root") if addSmoothErrorBin: qvar_shape_up_final = smoothfit.MakeSmoothHistoWithError( qvar_shape_up, qvar_shape_up_sm) qvar_shape_dw_final = smoothfit.MakeSmoothHistoWithError( qvar_shape_dw, qvar_shape_dw_sm) else: qvar_shape_up_final = smoothfit.MakeSmoothHisto( qvar_shape_up, qvar_shape_up_sm["nom"]) qvar_shape_dw_final = smoothfit.MakeSmoothHisto( qvar_shape_dw, qvar_shape_dw_sm["nom"]) qvar_shape_up_final.Multiply(QCDSyst_Dict["Shape_" + r]) qvar_shape_dw_final.Divide(QCDSyst_Dict["Shape_" + r]) qvar_shape_up_final.SetNameTitle( "qcd_hh_" + r + "_QCDShapeCRUp__clone", "qcd_hh_" + r + "_QCDShapeCRUp__clone") qvar_shape_dw_final.SetNameTitle( "qcd_hh_" + r + "_QCDShapeCRDown__clone", "qcd_hh_" + r + "_QCDShapeCRDown__clone") else: qvar_shape_up_final = qvar_shape_up.Clone( "qcd_hh_" + r + "_QCDShapeCRUp__clone") qvar_shape_dw_final = qvar_shape_dw.Clone( "qcd_hh_" + r + "_QCDShapeCRDown__clone") if rebinFinal is not None: qvar_shape_up_final = qvar_shape_up_final.Rebin( len(rebinFinal) - 1, qvar_shape_up_final.GetName() + "_rebinFinal", rebinFinal) qvar_shape_dw_final = qvar_shape_dw_final.Rebin( len(rebinFinal) - 1, qvar_shape_dw_final.GetName() + "_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qvar_shape_up_final, "qcd_hh_QCDShapeCRUp") outfileStat.WriteTObject(qvar_shape_dw_final, "qcd_hh_QCDShapeCRDown") qvar_shape_up_final.SetDirectory(0) qvar_shape_dw_final.SetDirectory(0) output_Dict[r]["qcd"]["QCDShapeCRUp"] = qvar_shape_up_final output_Dict[r]["qcd"]["QCDShapeCRDown"] = qvar_shape_dw_final ########################################################################################### ### Norm comparison in CR ############################################################ ########################################################################################### if QCDSyst_Dict != None: qvar_normCR_up = qcd_final.Clone("qcd_hh_" + r + "_QCDnormCRUp__clone") qvar_normCR_up.Scale(1.0 + QCDSyst_Dict["Scale_" + r]) qvar_normCR_dw = qcd_final.Clone("qcd_hh_" + r + "_QCDnormCRDown__clone") qvar_normCR_dw.Scale(1.0 - QCDSyst_Dict["Scale_" + r]) if rebinFinal is not None: qvar_normCR_up = qvar_normCR_up.Rebin( len(rebinFinal) - 1, qvar_normCR_up.GetName() + "_rebinFinal", rebinFinal) qvar_normCR_dw = qvar_normCR_dw.Rebin( len(rebinFinal) - 1, qvar_normCR_dw.GetName() + "_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(qvar_normCR_up, "qcd_hh_QCDNormCRUp") outfileStat.WriteTObject(qvar_normCR_dw, "qcd_hh_QCDNormCRDown") qvar_normCR_up.SetDirectory(0) qvar_normCR_dw.SetDirectory(0) output_Dict[r]["qcd"]["QCDNormCRUp"] = qvar_normCR_up output_Dict[r]["qcd"]["QCDNormCRDown"] = qvar_normCR_dw ##################################################################################################################### ### top shape systematics in 4b region, if using 3b shape ########################################################### ##################################################################################################################### if r == "44" and nbtag_top_shape_SRPred_for4b == "33" and MassRegionName == "SR" and isMhhDistribution: # qi ## ttbarShapeSRSyst_Dict = SystTools.ttbarShapeSysSR(topfileName, ## distributionName, ## signal_region = "22", ## compare_region = "33", ## btag_WP = btag_WP, ## makePlots = True, ## verbose = False, ## outfileNameBase="TopShapeSRSysfit.root") ttbarShapeSRSyst_Dict = SystToolsSmooth.ttbarShapeSysSR( topfileName, distributionName, signal_region="33", compare_region="22", btag_WP=btag_WP, smoothing_func=top_smoothing_func, SmoothRange=topSmoothRange, # (100, 2500), makePlots=True, verbose=False, outfileNameBase="TopShapeSRSysfitSmooth.root") tvar_shape_up = top_r.Clone("tvar_ttbarShapeSR_up") #tvar_shape_up.Multiply( ttbarShapeSRSyst_Dict["fup"] ) tvar_shape_dw = top_r.Clone("tvar_ttbarShapeSR_dw") #tvar_shape_dw.Multiply( ttbarShapeSRSyst_Dict["fdw"] ) for ibinX in range(1, tvar_shape_up.GetNbinsX() + 1): if (tvar_shape_up.GetBinContent(ibinX) < 0): tvar_shape_up.SetBinContent(ibinX, 0) tvar_shape_up.SetBinError(ibinX, 0) if (tvar_shape_dw.GetBinContent(ibinX) < 0): tvar_shape_dw.SetBinContent(ibinX, 0) tvar_shape_dw.SetBinError(ibinX, 0) tvar_shape_up.Scale(top_r.Integral() / tvar_shape_up.Integral()) tvar_shape_dw.Scale(top_r.Integral() / tvar_shape_dw.Integral()) ## Now do smoothing ########################## if do_smoothing: tvar_shape_up_sm = smoothfit.smoothfit( tvar_shape_up, fitFunction=top_smoothing_func, fitRange=topSmoothRange, makePlots=False, verbose=verbose, outfileName="top_smoothfit_" + r + "_ttbarShapeSRUp.root") tvar_shape_dw_sm = smoothfit.smoothfit( tvar_shape_dw, fitFunction=top_smoothing_func, fitRange=topSmoothRange, makePlots=False, verbose=verbose, outfileName="top_smoothfit_" + r + "_ttbarShapeSReDown.root") if addSmoothErrorBin: tvar_shape_up_final = smoothfit.MakeSmoothHistoWithError( tvar_shape_up, tvar_shape_up_sm) tvar_shape_dw_final = smoothfit.MakeSmoothHistoWithError( tvar_shape_dw, tvar_shape_dw_sm) else: tvar_shape_up_final = smoothfit.MakeSmoothHisto( tvar_shape_up, tvar_shape_up_sm["nom"]) tvar_shape_dw_final = smoothfit.MakeSmoothHisto( tvar_shape_dw, tvar_shape_dw_sm["nom"]) tvar_shape_up_final.Multiply(ttbarShapeSRSyst_Dict["Shape"]) tvar_shape_dw_final.Divide(ttbarShapeSRSyst_Dict["Shape"]) tvar_shape_up_final.SetNameTitle( "ttbar_hh_" + r + "_ttbarShapeSRUp__clone", "ttbar_hh_" + r + "_ttbarShapeSRUp__clone") tvar_shape_dw_final.SetNameTitle( "ttbar_hh_" + r + "_ttbarShapeSRDown__clone", "ttbar_hh_" + r + "_ttbarShapeSRDown__clone") else: tvar_shape_up_final = tvar_shape_up.Clone( "ttbar_hh_" + r + "_ttbarShapeSRUp__clone") tvar_shape_dw_final = tvar_shape_dw.Clone( "ttbar_hh_" + r + "_ttbarShapeSRDown__clone") if rebinFinal is not None: tvar_shape_up_final = tvar_shape_up_final.Rebin( len(rebinFinal) - 1, tvar_shape_up_final.GetName() + "_rebinFinal", rebinFinal) tvar_shape_dw_final = tvar_shape_dw_final.Rebin( len(rebinFinal) - 1, tvar_shape_dw_final.GetName() + "_rebinFinal", rebinFinal) if makeOutputFiles: outfileStat.WriteTObject(tvar_shape_up_final, "ttbar_hh_ttbarShapeSRUp") outfileStat.WriteTObject(tvar_shape_dw_final, "ttbar_hh_ttbarShapeSRDown") tvar_shape_up_final.SetDirectory(0) tvar_shape_dw_final.SetDirectory(0) output_Dict[r]["ttbar"]["ttbarShapeSRUp"] = tvar_shape_up_final output_Dict[r]["ttbar"]["ttbarShapeSRDown"] = tvar_shape_dw_final ### close outfiles, if used ### if makeOutputFiles: outfileStat.Close() ### Print tables ### #PrintTable( Nbkg_dict, Nbkg_SysList, regions) #print vartxt #print output_Dict output_Dict['regions'] = regions return output_Dict
def HistoAnalysis(datafileName="hist_data.root", topfileName="hist_ttbar.root", distributionName="DiJetMass", n_trkjet=["4", "4"], n_btag=["4", "3"], btag_WP="77", NRebin=1, use_one_top_nuis=False, use_scale_top_2b=False, nbtag_top_shape_normFit=None, nbtag_top_shape_SRPred=None, rebinFinal=None, verbose=False): global func1 global func2 ##### Parse Inputs ############################################ dist_name = distributionName num_trkjet = np.asarray(n_trkjet) if num_trkjet.shape == (): num_trkjet = np.asarray([n_trkjet]) num_btag = np.asarray(n_btag) if num_btag.shape == (): num_btag = np.asarray([n_btag]) if num_btag.shape != num_trkjet.shape: print "Must have same number of track jet and b-tag regions specified" sys.exit(0) btag_WP = btag_WP n_rebin = NRebin nbtag_top_shape = nbtag_top_shape_SRPred topShape_nbtag = nbtag_top_shape if nbtag_top_shape == None: topShape_nbtag = num_btag useOneTopNuis = use_one_top_nuis scaleTop2b = use_scale_top_2b n_channels = num_trkjet.shape[0] regions = [num_trkjet[i] + num_btag[i] for i in range(n_channels)] ################################################################## ##### Do Background Fits ############################################ bkgFitResults = BkgFit.BackgroundFit( datafileName=datafileName, topfileName=topfileName, distributionName="LeadCaloJetM", n_trkjet=n_trkjet, n_btag=n_btag, btag_WP=btag_WP, NRebin=NRebin, use_one_top_nuis=use_one_top_nuis, use_scale_top_2b=use_scale_top_2b, nbtag_top_shape=nbtag_top_shape_normFit, makePlots=True, verbose=verbose) pvars = bkgFitResults["pvars"] ################################################################## ##### Get Signal Region Histograms ################################ datafile = R.TFile(datafileName, "READ") topfile = R.TFile(topfileName, "READ") histos = {} # collect all histograms for r in ["44", "43", "42", "33", "32"]: folder_r = HistLocStr(dist_name, r[0], r[1], btag_WP, "SR") #folder( r[0], r[1], btag_WP) data_r = datafile.Get(folder_r).Clone("data_" + r) data_r.SetDirectory(0) top_r = topfile.Get(folder_r).Clone("top_" + r) top_r.SetDirectory(0) for ibin in range(1, top_r.GetNbinsX() + 1): if top_r.GetBinContent(ibin) < 0: top_r.SetBinContent(ibin, 0) top_r.SetBinError(ibin, 0) histos[r] = {"data": data_r, "top": top_r} datafile.Close() topfile.Close() ################################################################## ##### scaling and subtractions ################################# for ir in range(len(regions)): r = regions[ir] outfileStat = R.TFile("outfile_boosted_" + r + ".root", "RECREATE") r_2b = r[0] + "2" r_3b = r[0] + "3" top_2b = histos[r_2b]["top"].Clone("top_2b__" + r) if scaleTop2b: top_2b.Scale((bkgFitResults["topscale"][0] if use_one_top_nuis else bkgFitResults["topscale"][ir])) qcd_r = histos[r_2b]["data"].Clone("qcd__" + r) qcd_int = qcd_r.Integral() qcd_r.Add( top_2b, -1 ) # added by Qi --- we still want top to be subtracted, given that their fraction is increasing in Run 2. top_r = histos[r]["top"].Clone("top__" + r) if (nbtag_top_shape == "3") and ( r == "44" ): # the 3b top shape is only used during the SR prediction for 44 region temp_scaler = top_r.Integral() / histos[r_3b]["top"].Integral() top_r = histos[r_3b]["top"].Clone("top__" + r) top_r.Scale(temp_scaler) top_int = top_r.Integral() mu_qcd = bkgFitResults["muqcd"][ir] top_scale = (bkgFitResults["topscale"][0] if use_one_top_nuis else bkgFitResults["topscale"][ir]) qcd_r.Scale(mu_qcd) top_r.Scale(top_scale) ## Now do smoothing qcd_sm = smoothfit.smoothfit(qcd_r, fitFunction="Exp", fitRange=(900, 2000), makePlots=True, verbose=verbose, outfileName="qcd_smoothfit_" + r + ".root") top_sm = smoothfit.smoothfit(top_r, fitFunction="Exp", fitRange=(850, 1200), makePlots=True, verbose=verbose, outfileName="top_smoothfit_" + r + ".root") qcd_final = smoothfit.MakeSmoothHisto(qcd_r, qcd_sm["nom"]) top_final = smoothfit.MakeSmoothHisto(top_r, top_sm["nom"]) if rebinFinal is not None: qcd_final = qcd_final.Rebin( len(rebinFinal) - 1, qcd_final.GetName() + "_rebinFinal", rebinFinal) top_final = top_final.Rebin( len(rebinFinal) - 1, top_final.GetName() + "_rebinFinal", rebinFinal) # outfileStat.WriteTObject(qcd_final, "qcd_hh_nominal","Overwrite") # outfileStat.WriteTObject(top_final, "top_hh_nominal","Overwrite") outfileStat.WriteTObject(qcd_final, "qcd_hh", "Overwrite") outfileStat.WriteTObject(top_final, "ttbar_hh", "Overwrite") ### propagate correlated systematics from the smoothing procedure---> these "replace" the stat error on the bins ############# for ivar in range(len(qcd_sm["vars"])): qup = qcd_sm["vars"][ivar][0] qdw = qcd_sm["vars"][ivar][1] qcd_r_qup = smoothfit.MakeSmoothHisto(qcd_r, qup) qcd_r_qdw = smoothfit.MakeSmoothHisto(qcd_r, qdw) if rebinFinal is not None: qcd_r_qup = qcd_r_qup.Rebin( len(rebinFinal) - 1, qcd_r_qup.GetName() + "_rebinFinal", rebinFinal) qcd_r_qdw = qcd_r_qdw.Rebin( len(rebinFinal) - 1, qcd_r_qdw.GetName() + "_rebinFinal", rebinFinal) outfileStat.WriteTObject(qcd_r_qup, "qcd_hh_smoothQ" + str(ivar) + "Up", "Overwrite") outfileStat.WriteTObject(qcd_r_qdw, "qcd_hh_smoothQ" + str(ivar) + "Down", "Overwrite") for ivar in range(len(top_sm["vars"])): tup = top_sm["vars"][ivar][0] tdw = top_sm["vars"][ivar][1] top_r_tup = smoothfit.MakeSmoothHisto(top_r, tup) top_r_tdw = smoothfit.MakeSmoothHisto(top_r, tdw) if rebinFinal is not None: top_r_tup = top_r_tup.Rebin( len(rebinFinal) - 1, top_r_tup.GetName() + "_rebinFinal", rebinFinal) top_r_tdw = top_r_tdw.Rebin( len(rebinFinal) - 1, top_r_tdw.GetName() + "_rebinFinal", rebinFinal) # outfileStat.WriteTObject(top_r_tup, "top_hh_smoothT"+str(ivar)+"Up","Overwrite") # outfileStat.WriteTObject(top_r_tdw, "top_hh_smoothT"+str(ivar)+"Down","Overwrite") outfileStat.WriteTObject(top_r_tup, "ttbar_hh_smoothT" + str(ivar) + "Up", "Overwrite") outfileStat.WriteTObject(top_r_tdw, "ttbar_hh_smoothT" + str(ivar) + "Down", "Overwrite") ### propagate correlated systematics from normalization fits for mu_qcd and top_scale ############### for ivar in range(len(pvars)): for iUD in range(2): mu_qcd_var = pvars[ivar][iUD][ir] top_scale_var = pvars[ivar][iUD][ n_channels + (0 if use_one_top_nuis else ir)] qvar = qcd_r.Clone("qvar") qvar.Scale(mu_qcd_var * qcd_int / qvar.Integral()) tvar = top_r.Clone("tvar") tvar.Scale(top_scale_var * top_int / tvar.Integral()) ## Now do smoothing qvar_sm = smoothfit.smoothfit(qvar, fitFunction="Exp", fitRange=(900, 2000), makePlots=False, verbose=verbose, outfileName="qcd_smoothfit_" + r + "_Norm" + str(ivar) + str(iUD) + ".root") tvar_sm = smoothfit.smoothfit(tvar, fitFunction="Exp", fitRange=(850, 1200), makePlots=False, verbose=verbose, outfileName="top_smoothfit_" + r + "_Norm" + str(ivar) + str(iUD) + ".root") qvar_final = smoothfit.MakeSmoothHisto(qvar, qvar_sm["nom"]) tvar_final = smoothfit.MakeSmoothHisto(tvar, tvar_sm["nom"]) if rebinFinal is not None: qvar_final = qvar_final.Rebin( len(rebinFinal) - 1, qvar_final.GetName() + "_rebinFinal", rebinFinal) tvar_final = tvar_final.Rebin( len(rebinFinal) - 1, tvar_final.GetName() + "_rebinFinal", rebinFinal) UpDw = ("Up" if iUD == 0 else "Down") outfileStat.WriteTObject(qvar_final, "qcd_hh_normY" + str(ivar) + UpDw, "Overwrite") # outfileStat.WriteTObject(tvar_final, "top_hh_normY"+str(ivar)+UpDw,"Overwrite") outfileStat.WriteTObject(tvar_final, "ttbar_hh_normY" + str(ivar) + UpDw, "Overwrite") outfileStat.Close() ## if False: ## pred_final = qcd_final.Clone("pred_final__"+r) ## pred_final.Add( top_final ) ## func1 = qcd_sm["nom"] ## func2 = top_sm["nom"] ## pred_sm = R.TF1("pred_sm", FuncSum, 900, 3000) ## pred_sm.Draw("same") ## top_sm["nom"].Draw("same") ## pred_final_raw = qcd_r.Clone("qcd_final_raw__"+r) ## pred_final_raw.Add(top_r) ## outfile = R.TFile("outfile_"+r+".root","RECREATE") ## c=R.TCanvas() ## pred_final_raw.Draw("HIST") ## top_r.SetLineColor(R.kBlack) ## top_r.SetFillColor(R.kGreen) ## top_r.Draw("sameHIST") ## pred_sm.Draw("same") ## top_sm["nom"].Draw("same") ## c.Write() ## c=R.TCanvas() ## pred_final.Draw("HIST") ## top_final.SetLineColor(R.kBlack) ## top_final.SetFillColor(R.kGreen) ## top_final.Draw("sameHIST") ## c.Write() ## outfile.Close() return
def ttbarShapeSysSR( topfileName="hist_ttbar.root", distributionName="mHH_l", signal_region="33", compare_region="44", btag_WP="77", smoothing_func="Exp", SmoothRange=(1200, 3000), # (100, 2500), makePlots=False, verbose=False, outfileNameBase="TopShapeSRSysfitSmooth.root"): global rfunc1 global rfunc2 topfile = R.TFile(topfileName, "READ") ttbarShapeSRSyst_Dict = {} colorlist = [ R.kGreen, R.kOrange, R.kMagenta, R.kCyan, R.kPink, (R.kAzure + 1), R.kGreen + 2, R.kOrange + 5 ] ## get top SR shape folder_sig = HistLocStr(distributionName, signal_region[0], signal_region[1], btag_WP, "SR") #folder( r[0], r[1], btag_WP) top_sig = topfile.Get(folder_sig).Clone("top_sig_" + signal_region) top_sig.SetDirectory(0) top_sig.Rebin(5) ## get top comparison shape folder_comp = HistLocStr(distributionName, compare_region[0], compare_region[1], btag_WP, "SR") #folder( r[0], r[1], btag_WP) top_comp = topfile.Get(folder_comp).Clone("top_comp_" + compare_region) top_comp.SetDirectory(0) top_comp.Rebin(5) ## remove negative values ## assume same binning, else division won't work later for ibin in range(1, top_sig.GetNbinsX() + 1): if top_sig.GetBinContent(ibin) < 0: top_sig.SetBinContent(ibin, 0) top_sig.SetBinError(ibin, 0) if top_comp.GetBinContent(ibin) < 0: top_comp.SetBinContent(ibin, 0) top_comp.SetBinError(ibin, 0) ## normalize to same area top_sig.Scale(top_comp.Integral() / top_sig.Integral()) c = R.TCanvas("c1_topsys", "c1_topsys") xleg, yleg = 0.52, 0.7 leg = R.TLegend(xleg, yleg, xleg + 0.3, yleg + 0.2) leg.SetFillColor(0) leg.SetBorderSize(0) leg.SetMargin(0.3) top_comp.SetXTitle("m_{JJ} [GeV]") top_comp.SetYTitle("Entries") top_comp.Draw("E1") leg.AddEntry(top_comp, "Top Comparison Distribution", "LP") ################################# ## smooth bkg and data ################################## top_comp_sm = smoothfit.smoothfit( top_comp, fitFunction=smoothing_func, fitRange=SmoothRange, makePlots=False, verbose=False, outfileName="top_comp_smoothfit_TopShape4b.root") top_comp_sm_h = smoothfit.MakeSmoothHisto(top_comp, top_comp_sm["nom"]) top_comp_sm["nom"].SetLineColor(R.kBlack) top_comp_sm["nom"].Draw("same") leg.AddEntry(top_comp_sm["nom"], "Top Comparison Distribution Smooth", "L") top_sig_sm = smoothfit.smoothfit( top_sig, fitFunction=smoothing_func, fitRange=SmoothRange, makePlots=False, verbose=False, outfileName="top_sig_smoothfit_TopShape4.root") top_sig_sm_h = smoothfit.MakeSmoothHisto(top_sig, top_sig_sm["nom"]) top_sig_sm["nom"].SetLineColor(R.kBlue) top_sig_sm["nom"].Draw("same") leg.AddEntry(top_sig_sm["nom"], "Top Nominal Distribution Smooth", "L") rfunc1 = top_comp_sm["nom"] rfunc2 = top_sig_sm["nom"] xMax = top_comp.GetXaxis().GetBinUpEdge(top_comp.GetXaxis().GetNbins()) ratio_sm = R.TF1("ratio_topsys_sm", rfunc_ratio, SmoothRange[0], xMax, 0) for ivar in range(len(top_comp_sm["vars"])): dup = top_comp_sm["vars"][ivar][0] ddw = top_comp_sm["vars"][ivar][1] dup.SetLineColor(colorlist[ivar]) ddw.SetLineColor(colorlist[ivar]) dup.Draw("same") ddw.Draw("same") leg.AddEntry(dup, "Top Comparison Smooth Variation", "L") top_comp_r_qup = smoothfit.MakeSmoothHisto(top_comp, dup) top_comp_r_qdw = smoothfit.MakeSmoothHisto(top_comp, ddw) for ibin in range(1, top_comp_sm_h.GetNbinsX() + 1): err_val = np.max( np.abs([ top_comp_sm_h.GetBinContent(ibin) - top_comp_r_qup.GetBinContent(ibin), top_comp_sm_h.GetBinContent(ibin) - top_comp_r_qdw.GetBinContent(ibin) ])) top_comp_sm_h.SetBinError( ibin, np.sqrt(top_comp_sm_h.GetBinError(ibin)**2 + err_val**2)) c.SetLogy(1) leg.Draw("same") c.SaveAs( outfileNameBase.split(".root")[0] + "_sig" + signal_region + "_comp" + compare_region + ".root") c.SaveAs( outfileNameBase.split(".root")[0] + "_sig" + signal_region + "_comp" + compare_region + ".pdf") c.Close() h_ratio_cr_nom = top_comp_sm_h.Clone("top_comp_sm_h_TopShape4b") h_ratio_cr_nom.Divide(top_comp_sm["nom"]) h_ratio_cr_nom.SetDirectory(0) h_ratio_cr = top_comp_sm["nom"].GetHistogram() h_ratio_cr.Divide(top_sig_sm["nom"]) h_ratio_cr.SetDirectory(0) ttbarShapeSRSyst_Dict["Shape"] = ratio_sm c2 = R.TCanvas("c2_topsys", "c2_topsys") leg = R.TLegend(0.2, 0.7, 0.5, 0.9) leg.SetFillColor(0) h_ratio_cr_nom.SetFillColor(R.kBlack) h_ratio_cr_nom.SetFillStyle(3004) h_ratio_cr_nom.SetMarkerSize(0) h_ratio_cr_nom.GetXaxis().SetRangeUser(1000, 3000) h_ratio_cr_nom.GetYaxis().SetRangeUser(0, 3) h_ratio_cr_nom.GetXaxis().SetLabelSize(0.04) h_ratio_cr_nom.GetYaxis().SetLabelSize(0.04) h_ratio_cr_nom.SetXTitle("m_{JJ} [GeV]") h_ratio_cr_nom.SetYTitle("Ratio") h_ratio_cr_nom.Draw("E2") leg.AddEntry(h_ratio_cr_nom, "Nominal", "LF") h_ratio_cr.SetLineColor(R.kBlue) h_ratio_cr.Draw("same") leg.AddEntry(h_ratio_cr, "Predicted", "L") #ratio_sm.Draw("same") leg.Draw("same") c2.SaveAs( outfileNameBase.split(".root")[0] + "_sig" + signal_region + "_comp" + compare_region + "_ratio.root") c2.SaveAs( outfileNameBase.split(".root")[0] + "_sig" + signal_region + "_comp" + compare_region + "_ratio.pdf") c2.Close() topfile.Close() return ttbarShapeSRSyst_Dict
def QCDSystematics( datafileName="hist_data.root", topfileName="hist_ttbar.root", zjetfileName="hist_Zjets.root", distributionName="mHH_l", n_trkjet=["4", "3", "2"], n_btag=["4", "3", "2"], btag_WP="77", mu_qcd_vals=[1.0, 1.0], topscale_vals=[1.0, 1.0], NRebin=1, smoothing_func="Dijet", SmoothRange=(1100, 3000), # (100, 2500), use_one_top_nuis=False, use_scale_top_0b=False, nbtag_top_shape_for4b=None, makePlots=False, verbose=False, outfileNameBase="QCDSysfitSmooth.root"): global rfunc1 global rfunc2 ##### Parse Inputs ############################################ dist_name = distributionName num_trkjet = np.asarray(n_trkjet) if num_trkjet.shape == (): num_trkjet = np.asarray([n_trkjet]) num_btag = np.asarray(n_btag) if num_btag.shape == (): num_btag = np.asarray([n_btag]) if num_btag.shape != num_trkjet.shape: print "Must have same number of track jet and b-tag regions specified" sys.exit(0) btag_WP = btag_WP n_rebin = NRebin useOneTopNuis = use_one_top_nuis scaleTop0b = use_scale_top_0b n_channels = num_trkjet.shape[0] regions = [num_trkjet[i] + num_btag[i] for i in range(n_channels)] ################################################################## colorlist = [ R.kGreen, R.kOrange, R.kMagenta, R.kCyan, R.kPink, (R.kAzure + 1), R.kGreen + 2, R.kOrange + 5 ] ##### Get Signal Region Histograms ################################ datafile = R.TFile(datafileName, "READ") topfile = R.TFile(topfileName, "READ") zjetfile = (R.TFile(zjetfileName, "READ") if zjetfileName != None else None) histos = {} # collect all histograms for r in ["44", "33", "22", "40", "30", "20"]: folder_r = HistLocStr(dist_name, r[0], r[1], btag_WP, "CR") #folder( r[0], r[1], btag_WP) data_r = datafile.Get(folder_r).Clone("data_" + r) data_r.SetDirectory(0) top_r = topfile.Get(folder_r).Clone("top_" + r) top_r.SetDirectory(0) zjet_r = CheckAndGet(zjetfile, folder_r, top_r).Clone("zjet_" + r) zjet_r.SetDirectory(0) for ibin in range(1, top_r.GetNbinsX() + 1): if top_r.GetBinContent(ibin) < 0: top_r.SetBinContent(ibin, 0) top_r.SetBinError(ibin, 0) data_r.Rebin(n_rebin) top_r.Rebin(n_rebin) zjet_r.Rebin(n_rebin) histos[r] = {"data": data_r, "top": top_r, "zjet": zjet_r} datafile.Close() topfile.Close() if zjetfile != None: zjetfile.Close() ################################################################## ####### outpue object ################### QCDSyst_Dict = {} ##### scaling and subtractions ################################# for ir in range(len(regions)): r = regions[ir] r_0b = r[0] + "0" top_0b = histos[r_0b]["top"].Clone("top_0b__" + r) if scaleTop0b: top_0b.Scale( (topscale_vals[0] if use_one_top_nuis else topscale_vals[ir])) zjet_0b = histos[r_0b]["zjet"].Clone("zjet_0b__" + r) qcd_r = histos[r_0b]["data"].Clone("qcd__" + r) qcd_r.Add( top_0b, -1 ) # added by Qi --- we still want top to be subtracted, given that their fraction is increasing in Run 2. qcd_r.Add(zjet_0b, -1) qcd_int = qcd_r.Integral() if nbtag_top_shape_for4b != None: top_r = histos[nbtag_top_shape_for4b]["top"].Clone("top__" + r) top_r.Scale(histos[r]["top"].Integral() / top_r.Integral()) #scale to correct norm for region else: top_r = histos[r]["top"].Clone("top__" + r) top_int = top_r.Integral() zjet_r = histos[r]["zjet"].Clone("zjet__" + r) mu_qcd = mu_qcd_vals[ir] top_scale = (topscale_vals[0] if use_one_top_nuis else topscale_vals[ir]) qcd_r.Scale(mu_qcd) top_r.Scale(top_scale) N_qcd_r = qcd_r.Integral() #now do ratio bkg_r = qcd_r.Clone("bkg__" + r) bkg_r.Add(top_r) bkg_r.Add(zjet_r) N_bkg_r = bkg_r.Integral() Err_N_data_CR_r = R.Double(0) N_data_CR_r = histos[r]["data"].IntegralAndError( 0, histos[r]["data"].GetNbinsX() + 1, Err_N_data_CR_r) bkg_r.Scale(histos[r]["data"].Integral() / bkg_r.Integral()) c = R.TCanvas("c1_cr_" + r, "c1_cr_" + r) xleg, yleg = 0.52, 0.7 leg = R.TLegend(xleg, yleg, xleg + 0.3, yleg + 0.2) leg.SetFillColor(0) leg.SetBorderSize(0) leg.SetMargin(0.3) histos[r]["data"].SetXTitle("m_{JJ} [GeV]") histos[r]["data"].SetYTitle("Entries") histos[r]["data"].Draw("E1") leg.AddEntry(histos[r]["data"], "CR data", "LP") ################################## ## smooth bkg and data ################################## data_sm = smoothfit.smoothfit(histos[r]["data"], fitFunction=smoothing_func, fitRange=SmoothRange, makePlots=False, verbose=False, useLikelihood=True, outfileName="data_smoothfit_CRsyst_" + r + ".root") data_sm_h = smoothfit.MakeSmoothHisto(histos[r]["data"], data_sm["nom"]) data_sm["nom"].SetNameTitle("data_smoothfit_CRsyst_" + r, "data_smoothfit_CRsyst_" + r) data_sm["nom"].SetLineColor(R.kBlack) data_sm["nom"].Draw("same") leg.AddEntry(data_sm["nom"], "CR data smoothed", "L") bkg_sm = smoothfit.smoothfit(bkg_r, fitFunction=smoothing_func, fitRange=SmoothRange, makePlots=False, verbose=False, outfileName="bkg_smoothfit_CRsyst_" + r + ".root") bkg_sm_h = smoothfit.MakeSmoothHisto(bkg_r, bkg_sm["nom"]) bkg_sm["nom"].SetNameTitle("bkg_smoothfit_CRsyst_" + r, "bkg_smoothfit_CRsyst_" + r) bkg_sm["nom"].SetLineColor(R.kBlue) bkg_sm["nom"].Draw("same") leg.AddEntry(bkg_sm["nom"], "CR Prediction smoothed", "L") rfunc1 = data_sm["nom"] rfunc2 = bkg_sm["nom"] xMax = histos[r]["data"].GetXaxis().GetBinUpEdge( histos[r]["data"].GetXaxis().GetNbins()) ratio_sm = R.TF1("ratio_crsys_sm" + r, rfunc_ratio, SmoothRange[0], xMax, 0) ## ratio_sm.SetLineColor(R.kGray) for ivar in range(len(data_sm["vars"])): dup = data_sm["vars"][ivar][0] ddw = data_sm["vars"][ivar][1] dup.SetLineColor(colorlist[ivar]) ddw.SetLineColor(colorlist[ivar]) dup.Draw("same") ddw.Draw("same") leg.AddEntry(dup, "CR data smoothed variation", "L") data_r_qup = smoothfit.MakeSmoothHisto(histos[r]["data"], dup) data_r_qdw = smoothfit.MakeSmoothHisto(histos[r]["data"], ddw) for ibin in range(1, data_sm_h.GetNbinsX() + 1): err_val = np.max( np.abs([ data_sm_h.GetBinContent(ibin) - data_r_qup.GetBinContent(ibin), data_sm_h.GetBinContent(ibin) - data_r_qdw.GetBinContent(ibin) ])) data_sm_h.SetBinError( ibin, np.sqrt(data_sm_h.GetBinError(ibin)**2 + err_val**2)) c.SetLogy(1) leg.Draw("same") c.SaveAs(outfileNameBase.split(".root")[0] + "_" + r + ".root") c.SaveAs(outfileNameBase.split(".root")[0] + "_" + r + ".pdf") c.Close() h_ratio_cr_nom = data_sm_h.Clone("data_sm_h_CRsyst_" + r) h_ratio_cr_nom.Divide(data_sm["nom"]) h_ratio_cr_nom.SetDirectory(0) h_ratio_cr = data_sm["nom"].GetHistogram() h_ratio_cr.Divide(bkg_sm["nom"]) h_ratio_cr.SetDirectory(0) QCDSyst_Dict["Shape_" + r] = ratio_sm #scale is max of ratio non-unity and CR stat error QCDSyst_Dict["Scale_" + r] = np.max( np.abs([(N_bkg_r - N_data_CR_r) / N_bkg_r, (Err_N_data_CR_r / N_data_CR_r), _extraNormCRSysDict.get(r, 0.)])) print "Scale_" + r, QCDSyst_Dict[ "Scale_" + r], N_bkg_r, N_data_CR_r, Err_N_data_CR_r, ( N_bkg_r - N_data_CR_r) / N_bkg_r, Err_N_data_CR_r / N_data_CR_r c2 = R.TCanvas("c2_cr_" + r, "c2_cr_" + r) leg = R.TLegend(0.2, 0.7, 0.5, 0.9) leg.SetFillColor(0) h_ratio_cr_nom.SetFillColor(R.kBlack) h_ratio_cr_nom.SetFillStyle(3004) h_ratio_cr_nom.SetMarkerSize(0) h_ratio_cr_nom.GetXaxis().SetRangeUser(1000, 3000) h_ratio_cr_nom.GetYaxis().SetRangeUser(0, 3) h_ratio_cr_nom.GetXaxis().SetLabelSize(0.04) h_ratio_cr_nom.GetYaxis().SetLabelSize(0.04) h_ratio_cr_nom.SetXTitle("m_{JJ} [GeV]") h_ratio_cr_nom.SetYTitle("Ratio") h_ratio_cr_nom.Draw("E2") leg.AddEntry(h_ratio_cr_nom, "CR data", "LF") h_ratio_cr.SetLineColor(R.kBlue) h_ratio_cr.Draw("same") leg.AddEntry(h_ratio_cr, "CR Prediction", "L") ratio_sm.Draw("same") leg.Draw("same") c2.SaveAs(outfileNameBase.split(".root")[0] + "_ratio_" + r + ".root") c2.SaveAs(outfileNameBase.split(".root")[0] + "_ratio_" + r + ".pdf") c2.Close() datafile.Close() topfile.Close() return QCDSyst_Dict