def Direct_Estimator(var, cut, year): from root_numpy import root2array, fill_hist, array2root import numpy.lib.recfunctions as rfn ### Preliminary Operations ### treeRead = not cut in [ "nnqq", "en", "enqq", "mn", "mnqq", "ee", "eeqq", "mm", "mmqq", "em", "emqq", "qqqq" ] # Read from tree channel = cut unit = '' if "GeV" in variable[var]['title']: unit = ' GeV' isBlind = BLIND and 'SR' in channel isAH = False #'qqqq' in channel or 'hp' in channel or 'lp' in channel showSignal = False if 'SB' in cut or 'TR' in cut else True #'SR' in channel or channel=='qqqq'#or len(channel)==5 stype = "HVT model B" if len(sign) > 0 and 'AZh' in sign[0]: stype = "2HDM" elif len(sign) > 0 and 'monoH' in sign[0]: stype = "Z'-2HDM m_{A}=300 GeV" if treeRead: for k in sorted(alias.keys(), key=len, reverse=True): if BTAGGING == 'semimedium': if k in cut: cut = cut.replace(k, aliasSM[k]) else: if k in cut: cut = cut.replace( k, alias[k].format(WP=working_points[BTAGGING])) print "Plotting from", ("tree" if treeRead else "file"), var, "in", channel, "channel with:" print " cut :", cut if var == 'jj_deltaEta_widejet': if "jj_deltaEta_widejet<1.1 && " in cut: print print "omitting jj_deltaEta_widejet<1.1 cut to draw the deltaEta distribution" print cut = cut.replace("jj_deltaEta_widejet<1.1 && ", "") else: print print "no 'jj_deltaEta_widejet<1.1 && ' in the cut string detected, so it cannot be ommited explicitly" print ### Create and fill MC histograms ### # Create dict file = {} tree = {} hist = {} ### Create and fill MC histograms ### for i, s in enumerate(back + sign): if True: #FIXME if variable[var]['nbins'] > 0: hist[s] = TH1F( s, ";" + variable[var]['title'] + ";Events / ( " + str( (variable[var]['max'] - variable[var]['min']) / variable[var]['nbins']) + unit + " );" + ('log' if variable[var]['log'] else ''), variable[var]['nbins'], variable[var]['min'], variable[var]['max']) else: hist[s] = TH1F( s, ";" + variable[var]['title'] + ";Events" + ('log' if variable[var]['log'] else ''), len(variable[var]['bins']) - 1, array('f', variable[var]['bins'])) hist[s].Sumw2() for j, ss in enumerate(sample[s]['files']): if not 'data' in s: if year == "run2" or year in ss: arr = root2array( NTUPLEDIR + ss + ".root", branches=[ var, "jpt_1", "jpt_2", "eventWeightLumi", "TMath::Abs(jflavour_1)==5 && TMath::Abs(jflavour_2)==5", "TMath::Abs(jflavour_1)==5 && TMath::Abs(jflavour_2)!=5", "TMath::Abs(jflavour_1)!=5 && TMath::Abs(jflavour_2)==5", "TMath::Abs(jflavour_1)!=5 && TMath::Abs(jflavour_2)!=5" ], selection=cut if len(cut) > 0 else "") print "imported " + NTUPLEDIR + ss + ".root" arr.dtype.names = [ var, "jpt_1", "jpt_2", "eventWeightLumi", "bb", "bq", "qb", "qq" ] MANtag_eff1 = np.array(map(MANtag_eff, arr["jpt_1"])) MANtag_eff2 = np.array(map(MANtag_eff, arr["jpt_2"])) MANtag_mis1 = np.array(map(MANtag_mis, arr["jpt_1"])) MANtag_mis2 = np.array(map(MANtag_mis, arr["jpt_2"])) MANtag_weight = np.multiply( arr["eventWeightLumi"], np.multiply(arr['bb'], np.multiply(MANtag_eff1, MANtag_eff2)) + np.multiply( arr['bq'], np.multiply(MANtag_eff1, MANtag_mis2)) + np.multiply(arr['qb'], np.multiply(MANtag_mis1, MANtag_eff2)) + np.multiply(arr['qq'], np.multiply(MANtag_mis1, MANtag_mis2))) fill_hist(hist[s], arr[var], weights=MANtag_weight) deepCSV_eff1 = np.array(map(deepCSV_eff, arr["jpt_1"])) deepCSV_eff2 = np.array(map(deepCSV_eff, arr["jpt_2"])) deepCSV_mis1 = np.array(map(deepCSV_mis, arr["jpt_1"])) deepCSV_mis2 = np.array(map(deepCSV_mis, arr["jpt_2"])) deepCSV_weight = np.multiply( arr["eventWeightLumi"], np.multiply( arr['bb'], np.multiply(deepCSV_eff1, deepCSV_eff2)) + np.multiply( arr['bq'], np.multiply(deepCSV_eff1, deepCSV_mis2)) + np.multiply( arr['qb'], np.multiply(deepCSV_mis1, deepCSV_eff2)) + np.multiply( arr['qq'], np.multiply(deepCSV_mis1, deepCSV_mis2))) if var == "jj_mass_widejet" and options.save and not "data" in ss: arr = rfn.append_fields(arr, "MANtag_weight", MANtag_weight, usemask=False) arr = rfn.append_fields(arr, "deepCSV_weight", deepCSV_weight, usemask=False) array2root(arr, NTUPLEDIR + "MANtag/" + ss + "_" + BTAGGING + ".root", treename="tree", mode='recreate') print "saved as", NTUPLEDIR + "MANtag/" + ss + "_" + BTAGGING + ".root" arr = None hist[s].Scale(sample[s]['weight'] if hist[s].Integral() >= 0 else 0) hist[s].SetFillColor(sample[s]['fillcolor']) hist[s].SetFillStyle(sample[s]['fillstyle']) hist[s].SetLineColor(sample[s]['linecolor']) hist[s].SetLineStyle(sample[s]['linestyle']) if channel.endswith('TR') and channel.replace('TR', '') in topSF: hist['TTbarSL'].Scale(topSF[channel.replace('TR', '')][0]) hist['ST'].Scale(topSF[channel.replace('TR', '')][0]) hist['BkgSum'] = hist['data_obs'].Clone( "BkgSum") if 'data_obs' in hist else hist[back[0]].Clone("BkgSum") hist['BkgSum'].Reset("MICES") hist['BkgSum'].SetFillStyle(3003) hist['BkgSum'].SetFillColor(1) for i, s in enumerate(back): hist['BkgSum'].Add(hist[s]) # Create data and Bkg sum histograms if options.blind or 'SR' in channel: hist['data_obs'] = hist['BkgSum'].Clone("data_obs") hist['data_obs'].Reset("MICES") # Set histogram style hist['data_obs'].SetMarkerStyle(20) hist['data_obs'].SetMarkerSize(1.25) for i, s in enumerate(back + sign + ['BkgSum']): addOverflow(hist[s], False) # Add overflow for i, s in enumerate(sign): hist[s].SetLineWidth(3) for i, s in enumerate(sign): sample[s][ 'plot'] = True #sample[s]['plot'] and s.startswith(channel[:2]) if isAH: for i, s in enumerate(back): hist[s].SetFillStyle(3005) hist[s].SetLineWidth(2) #for i, s in enumerate(sign): # hist[s].SetFillStyle(0) if not var == "Events": sfnorm = hist[data[0]].Integral() / hist['BkgSum'].Integral() print "Applying SF:", sfnorm for i, s in enumerate(back + ['BkgSum']): hist[s].Scale(sfnorm) if BLIND and var.endswith("Mass"): for i, s in enumerate(data + back + ['BkgSum']): first, last = hist[s].FindBin(65), hist[s].FindBin(135) for j in range(first, last): hist[s].SetBinContent(j, -1.e-4) if BLIND and var.endswith("Tau21"): for i, s in enumerate(data): first, last = hist[s].FindBin(0), hist[s].FindBin(0.6) for j in range(first, last): hist[s].SetBinContent(j, -1.e-4) # Create stack if variable[var]['nbins'] > 0: bkg = THStack( "Bkg", ";" + hist['BkgSum'].GetXaxis().GetTitle() + ";Events / ( " + str( (variable[var]['max'] - variable[var]['min']) / variable[var]['nbins']) + unit + " )") else: bkg = THStack("Bkg", ";" + hist['BkgSum'].GetXaxis().GetTitle() + ";Events; ") for i, s in enumerate(back): bkg.Add(hist[s]) # Legend leg = TLegend(0.65, 0.6, 0.95, 0.9) leg.SetBorderSize(0) leg.SetFillStyle(0) #1001 leg.SetFillColor(0) if len(data) > 0: leg.AddEntry(hist[data[0]], sample[data[0]]['label'], "pe") for i, s in reversed(list(enumerate(['BkgSum'] + back))): leg.AddEntry(hist[s], sample[s]['label'], "f") if showSignal: for i, s in enumerate(sign): if sample[s]['plot']: leg.AddEntry(hist[s], sample[s]['label'], "fl") leg.SetY1(0.9 - leg.GetNRows() * 0.05) # --- Display --- c1 = TCanvas("c1", hist.values()[0].GetXaxis().GetTitle(), 800, 800 if RATIO else 600) if RATIO: c1.Divide(1, 2) setTopPad(c1.GetPad(1), RATIO) setBotPad(c1.GetPad(2), RATIO) c1.cd(1) c1.GetPad(bool(RATIO)).SetTopMargin(0.06) c1.GetPad(bool(RATIO)).SetRightMargin(0.05) c1.GetPad(bool(RATIO)).SetTicks(1, 1) log = variable[var]['log'] #"log" in hist['BkgSum'].GetZaxis().GetTitle() if log: c1.GetPad(bool(RATIO)).SetLogy() # Draw bkg.Draw("HIST") # stack hist['BkgSum'].Draw("SAME, E2") # sum of bkg if not isBlind and len(data) > 0: hist['data_obs'].Draw("SAME, PE") # data if 'sync' in hist: hist['sync'].Draw("SAME, PE") #data_graph.Draw("SAME, PE") if showSignal: smagn = 1. #if treeRead else 1.e2 #if log else 1.e2 for i, s in enumerate(sign): # if sample[s]['plot']: hist[s].Scale(smagn) hist[s].Draw( "SAME, HIST" ) # signals Normalized, hist[s].Integral()*sample[s]['weight'] textS = drawText(0.80, 0.9 - leg.GetNRows() * 0.05 - 0.02, stype + " (x%d)" % smagn, True) #bkg.GetYaxis().SetTitleOffset(bkg.GetYaxis().GetTitleOffset()*1.075) bkg.GetYaxis().SetTitleOffset(0.9) #bkg.GetYaxis().SetTitleOffset(2.) bkg.SetMaximum((5. if log else 1.25) * max( bkg.GetMaximum(), hist['data_obs'].GetBinContent(hist['data_obs'].GetMaximumBin()) + hist['data_obs'].GetBinError(hist['data_obs'].GetMaximumBin()))) #if bkg.GetMaximum() < max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum()): bkg.SetMaximum(max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum())*1.25) bkg.SetMinimum( max( min(hist['BkgSum'].GetBinContent(hist['BkgSum'].GetMinimumBin( )), hist['data_obs'].GetMinimum()), 5.e-1) if log else 0.) if log: bkg.GetYaxis().SetNoExponent(bkg.GetMaximum() < 1.e4) #bkg.GetYaxis().SetMoreLogLabels(True) bkg.GetXaxis().SetRangeUser(variable[var]['min'], variable[var]['max']) #if log: bkg.SetMinimum(1) leg.Draw() #drawCMS(LUMI[year], "Preliminary") drawCMS(LUMI[year], "Work in Progress", suppressCMS=True) drawRegion('XVH' + channel, True) drawAnalysis(channel) setHistStyle(bkg, 1.2 if RATIO else 1.1) setHistStyle(hist['BkgSum'], 1.2 if RATIO else 1.1) if RATIO: c1.cd(2) err = hist['BkgSum'].Clone("BkgErr;") err.SetTitle("") err.GetYaxis().SetTitle("Data / MC") err.GetYaxis().SetTitleOffset(0.9) err.GetXaxis().SetRangeUser(variable[var]['min'], variable[var]['max']) for i in range(1, err.GetNbinsX() + 1): err.SetBinContent(i, 1) if hist['BkgSum'].GetBinContent(i) > 0: err.SetBinError( i, hist['BkgSum'].GetBinError(i) / hist['BkgSum'].GetBinContent(i)) setBotStyle(err) errLine = err.Clone("errLine") errLine.SetLineWidth(1) errLine.SetFillStyle(0) res = hist['data_obs'].Clone("Residues") for i in range(0, res.GetNbinsX() + 1): if hist['BkgSum'].GetBinContent(i) > 0: res.SetBinContent( i, res.GetBinContent(i) / hist['BkgSum'].GetBinContent(i)) res.SetBinError( i, res.GetBinError(i) / hist['BkgSum'].GetBinContent(i)) if 'sync' in hist: res.SetMarkerColor(2) res.SetMarkerStyle(31) res.Reset() for i in range(0, res.GetNbinsX() + 1): x = hist['data_obs'].GetXaxis().GetBinCenter(i) if hist['sync'].GetBinContent(hist['sync'].FindBin(x)) > 0: res.SetBinContent( i, hist['data_obs'].GetBinContent( hist['data_obs'].FindBin(x)) / hist['sync'].GetBinContent(hist['sync'].FindBin(x))) res.SetBinError( i, hist['data_obs'].GetBinError( hist['data_obs'].FindBin(x)) / hist['sync'].GetBinContent(hist['sync'].FindBin(x))) setBotStyle(res) #err.GetXaxis().SetLabelOffset(err.GetXaxis().GetLabelOffset()*5) #err.GetXaxis().SetTitleOffset(err.GetXaxis().GetTitleOffset()*2) err.Draw("E2") errLine.Draw("SAME, HIST") if not isBlind and len(data) > 0: res.Draw("SAME, PE0") #res_graph.Draw("SAME, PE0") if len(err.GetXaxis().GetBinLabel( 1)) == 0: # Bin labels: not a ordinary plot drawRatio(hist['data_obs'], hist['BkgSum']) drawStat(hist['data_obs'], hist['BkgSum']) c1.Update() if gROOT.IsBatch(): if channel == "": channel = "nocut" varname = var.replace('.', '_').replace('()', '') if not os.path.exists("plots/" + channel): os.makedirs("plots/" + channel) suffix = '' if "b" in channel or 'mu' in channel: suffix += "_" + BTAGGING c1.Print("plots/MANtag_study/" + channel + "/" + varname + "_" + year + suffix + ".png") c1.Print("plots/MANtag_study/" + channel + "/" + varname + "_" + year + suffix + ".pdf") # Print table printTable(hist, sign)
def plot(var, cut, year, norm=False, nm1=False): ### Preliminary Operations ### treeRead = not cut in [ "nnqq", "en", "enqq", "mn", "mnqq", "ee", "eeqq", "mm", "mmqq", "em", "emqq", "qqqq" ] # Read from tree channel = cut unit = '' if "GeV" in variable[var]['title']: unit = ' GeV' isBlind = BLIND and 'SR' in channel isAH = False #'qqqq' in channel or 'hp' in channel or 'lp' in channel showSignal = False if 'SB' in cut or 'TR' in cut else True #'SR' in channel or channel=='qqqq'#or len(channel)==5 stype = "HVT model B" if len(sign) > 0 and 'AZh' in sign[0]: stype = "2HDM" elif len(sign) > 0 and 'monoH' in sign[0]: stype = "Z'-2HDM m_{A}=300 GeV" if treeRead: for k in sorted(alias.keys(), key=len, reverse=True): if BTAGGING == 'semimedium': if k in cut: if ADDSELECTION: cut = cut.replace( k, aliasSM[k] + SELECTIONS[options.selection]) else: cut = cut.replace(k, aliasSM[k]) else: if k in cut: if ADDSELECTION: cut = cut.replace( k, alias[k].format(WP=working_points[BTAGGING]) + SELECTIONS[options.selection]) else: cut = cut.replace( k, alias[k].format(WP=working_points[BTAGGING])) # Determine Primary Dataset pd = sample['data_obs']['files'] print "Plotting from", ("tree" if treeRead else "file"), var, "in", channel, "channel with:" print " dataset:", pd print " cut :", cut if var == 'jj_deltaEta_widejet': if "jj_deltaEta_widejet<1.1 && " in cut: print print "omitting jj_deltaEta_widejet<1.1 cut to draw the deltaEta distribution" print cut = cut.replace("jj_deltaEta_widejet<1.1 && ", "") else: print print "no 'jj_deltaEta_widejet<1.1 && ' in the cut string detected, so it cannot be ommited explicitly" print ### Create and fill MC histograms ### # Create dict file = {} tree = {} hist = {} ### Create and fill MC histograms ### for i, s in enumerate(data + back + sign): if treeRead: # Project from tree tree[s] = TChain("tree") for j, ss in enumerate(sample[s]['files']): if not 'data' in s or ('data' in s and ss in pd): if year == "run2" or year in ss: tree[s].Add(NTUPLEDIR + ss + ".root") if variable[var]['nbins'] > 0: hist[s] = TH1F( s, ";" + variable[var]['title'] + ";Events / ( " + str( (variable[var]['max'] - variable[var]['min']) / variable[var]['nbins']) + unit + " );" + ('log' if variable[var]['log'] else ''), variable[var]['nbins'], variable[var]['min'], variable[var]['max']) else: hist[s] = TH1F( s, ";" + variable[var]['title'] + ";Events" + ('log' if variable[var]['log'] else ''), len(variable[var]['bins']) - 1, array('f', variable[var]['bins'])) hist[s].Sumw2() cutstring = "(eventWeightLumi)" + ("*(" + cut + ")" if len(cut) > 0 else "") tree[s].Project(s, var, cutstring) if not tree[s].GetTree() == None: hist[s].SetOption("%s" % tree[s].GetTree().GetEntriesFast()) else: # Histogram written to file for j, ss in enumerate(sample[s]['files']): if not 'data' in s or ('data' in s and ss in pd): file[ss] = TFile(NTUPLEDIR + ss + ".root", "R") if file[ss].IsZombie(): print "WARNING: file", NTUPLEDIR + ss + ".root", "does not exist" continue tmphist = file[ss].Get(cut + "/" + var) if tmphist == None: continue if not s in hist.keys(): hist[s] = tmphist else: hist[s].Add(tmphist) hist[s].Scale(sample[s]['weight'] if hist[s].Integral() >= 0 else 0) hist[s].SetFillColor(sample[s]['fillcolor']) hist[s].SetFillStyle(sample[s]['fillstyle']) hist[s].SetLineColor(sample[s]['linecolor']) hist[s].SetLineStyle(sample[s]['linestyle']) if channel.endswith('TR') and channel.replace('TR', '') in topSF: hist['TTbarSL'].Scale(topSF[channel.replace('TR', '')][0]) hist['ST'].Scale(topSF[channel.replace('TR', '')][0]) hist['BkgSum'] = hist['data_obs'].Clone( "BkgSum") if 'data_obs' in hist else hist[back[0]].Clone("BkgSum") hist['BkgSum'].Reset("MICES") hist['BkgSum'].SetFillStyle(3003) hist['BkgSum'].SetFillColor(1) for i, s in enumerate(back): hist['BkgSum'].Add(hist[s]) if options.norm: for i, s in enumerate(back + ['BkgSum']): hist[s].Scale(hist[data[0]].Integral() / hist['BkgSum'].Integral()) # Create data and Bkg sum histograms if options.blind or 'SR' in channel: hist['data_obs'] = hist['BkgSum'].Clone("data_obs") hist['data_obs'].Reset("MICES") # Set histogram style hist['data_obs'].SetMarkerStyle(20) hist['data_obs'].SetMarkerSize(1.25) for i, s in enumerate(data + back + sign + ['BkgSum']): addOverflow(hist[s], False) # Add overflow for i, s in enumerate(sign): hist[s].SetLineWidth(3) for i, s in enumerate(sign): sample[s][ 'plot'] = True #sample[s]['plot'] and s.startswith(channel[:2]) if isAH: for i, s in enumerate(back): hist[s].SetFillStyle(3005) hist[s].SetLineWidth(2) #for i, s in enumerate(sign): # hist[s].SetFillStyle(0) if not var == "Events": sfnorm = hist[data[0]].Integral() / hist['BkgSum'].Integral() print "Applying SF:", sfnorm for i, s in enumerate(back + ['BkgSum']): hist[s].Scale(sfnorm) if BLIND and var.endswith("Mass"): for i, s in enumerate(data + back + ['BkgSum']): first, last = hist[s].FindBin(65), hist[s].FindBin(135) for j in range(first, last): hist[s].SetBinContent(j, -1.e-4) if BLIND and var.endswith("Tau21"): for i, s in enumerate(data): first, last = hist[s].FindBin(0), hist[s].FindBin(0.6) for j in range(first, last): hist[s].SetBinContent(j, -1.e-4) if SYNC and var == "jj_mass_widejet" and year in ["2016", "2017", "2018"]: #iFile = TFile("sync/JetHT_run" + year + "_red_cert_scan.root", "READ") #hist['sync'] = iFile.Get("Mjj") if year == '2016': iFile = TFile("sync/2016/2016_07Aug2017_1246_1p1.root", "READ") hist['sync'] = iFile.Get("h_mjj_data") elif year == '2017': iFile = TFile( "sync/2017/histos_Run2017BCDEF_17Nov2017_JEC2017_mjj1530_cemf_lt_0p8_deltaETA_lt_1p1.root", "READ") hist['sync'] = iFile.Get("h_mjj_data") elif year == '2018': iFile = TFile( "sync/2018/Double_sideband_inputs_18v10_preliminary_v2.root", "READ") hist['sync'] = iFile.Get("h_mjj") # hist['sync'] = tmp.Rebin(len(dijet_bins)-1, "sync", array('d', dijet_bins)) # hist['sync'] = tmp.Rebin(100, "sync") hist['sync'].SetMarkerStyle(31) hist['sync'].SetMarkerSize(1.25) hist['sync'].SetMarkerColor(2) print "Imported and drawing sync file" # Create stack if variable[var]['nbins'] > 0: bkg = THStack( "Bkg", ";" + hist['BkgSum'].GetXaxis().GetTitle() + ";Events / ( " + str( (variable[var]['max'] - variable[var]['min']) / variable[var]['nbins']) + unit + " )") else: bkg = THStack("Bkg", ";" + hist['BkgSum'].GetXaxis().GetTitle() + ";Events; ") for i, s in enumerate(back): bkg.Add(hist[s]) # Legend leg = TLegend(0.65, 0.6, 0.95, 0.9) leg.SetBorderSize(0) leg.SetFillStyle(0) #1001 leg.SetFillColor(0) if len(data) > 0: leg.AddEntry(hist[data[0]], sample[data[0]]['label'], "pe") for i, s in reversed(list(enumerate(['BkgSum'] + back))): leg.AddEntry(hist[s], sample[s]['label'], "f") if showSignal: for i, s in enumerate(sign): if sample[s]['plot']: leg.AddEntry(hist[s], sample[s]['label'], "fl") leg.SetY1(0.9 - leg.GetNRows() * 0.05) # --- Display --- c1 = TCanvas("c1", hist.values()[0].GetXaxis().GetTitle(), 800, 800 if RATIO else 600) if RATIO: c1.Divide(1, 2) setTopPad(c1.GetPad(1), RATIO) setBotPad(c1.GetPad(2), RATIO) c1.cd(1) c1.GetPad(bool(RATIO)).SetTopMargin(0.06) c1.GetPad(bool(RATIO)).SetRightMargin(0.05) c1.GetPad(bool(RATIO)).SetTicks(1, 1) log = variable[var]['log'] #"log" in hist['BkgSum'].GetZaxis().GetTitle() if log: c1.GetPad(bool(RATIO)).SetLogy() # Draw bkg.Draw("HIST") # stack hist['BkgSum'].Draw("SAME, E2") # sum of bkg if not isBlind and len(data) > 0: hist['data_obs'].Draw("SAME, PE") # data if 'sync' in hist: hist['sync'].Draw("SAME, PE") #data_graph.Draw("SAME, PE") if showSignal: smagn = 1. #if treeRead else 1.e2 #if log else 1.e2 for i, s in enumerate(sign): # if sample[s]['plot']: hist[s].Scale(smagn) hist[s].Draw( "SAME, HIST" ) # signals Normalized, hist[s].Integral()*sample[s]['weight'] textS = drawText(0.80, 0.9 - leg.GetNRows() * 0.05 - 0.02, stype + " (x%d)" % smagn, True) #bkg.GetYaxis().SetTitleOffset(bkg.GetYaxis().GetTitleOffset()*1.075) bkg.GetYaxis().SetTitleOffset(0.9) #bkg.GetYaxis().SetTitleOffset(2.) bkg.SetMaximum((5. if log else 1.25) * max( bkg.GetMaximum(), hist['data_obs'].GetBinContent(hist['data_obs'].GetMaximumBin()) + hist['data_obs'].GetBinError(hist['data_obs'].GetMaximumBin()))) #if bkg.GetMaximum() < max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum()): bkg.SetMaximum(max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum())*1.25) bkg.SetMinimum( max( min(hist['BkgSum'].GetBinContent(hist['BkgSum'].GetMinimumBin( )), hist['data_obs'].GetMinimum()), 5.e-1) if log else 0.) if log: bkg.GetYaxis().SetNoExponent(bkg.GetMaximum() < 1.e4) #bkg.GetYaxis().SetMoreLogLabels(True) bkg.GetXaxis().SetRangeUser(variable[var]['min'], variable[var]['max']) #if log: bkg.SetMinimum(1) leg.Draw() #drawCMS(LUMI[year], "Preliminary") #drawCMS(LUMI[year], "Work in Progress", suppressCMS=True) drawCMS(LUMI[year], "", suppressCMS=True) drawRegion('XVH' + channel, True) drawAnalysis(channel) setHistStyle(bkg, 1.2 if RATIO else 1.1) setHistStyle(hist['BkgSum'], 1.2 if RATIO else 1.1) if RATIO: c1.cd(2) err = hist['BkgSum'].Clone("BkgErr;") err.SetTitle("") if SYNC: err.GetYaxis().SetTitle("Nano/Mini") else: err.GetYaxis().SetTitle("Data / MC") err.GetYaxis().SetTitleOffset(0.9) err.GetXaxis().SetRangeUser(variable[var]['min'], variable[var]['max']) for i in range(1, err.GetNbinsX() + 1): err.SetBinContent(i, 1) if hist['BkgSum'].GetBinContent(i) > 0: err.SetBinError( i, hist['BkgSum'].GetBinError(i) / hist['BkgSum'].GetBinContent(i)) setBotStyle(err) errLine = err.Clone("errLine") errLine.SetLineWidth(1) errLine.SetFillStyle(0) res = hist['data_obs'].Clone("Residues") for i in range(0, res.GetNbinsX() + 1): if hist['BkgSum'].GetBinContent(i) > 0: res.SetBinContent( i, res.GetBinContent(i) / hist['BkgSum'].GetBinContent(i)) res.SetBinError( i, res.GetBinError(i) / hist['BkgSum'].GetBinContent(i)) if 'sync' in hist: res.SetMarkerColor(1) res.SetMarkerStyle(20) res.Reset() for i in range(0, res.GetNbinsX() + 1): x = hist['data_obs'].GetXaxis().GetBinCenter(i) if hist['sync'].GetBinContent(hist['sync'].FindBin(x)) > 0: res.SetBinContent( i, hist['data_obs'].GetBinContent( hist['data_obs'].FindBin(x)) / hist['sync'].GetBinContent(hist['sync'].FindBin(x))) res.SetBinError( i, hist['data_obs'].GetBinError( hist['data_obs'].FindBin(x)) / hist['sync'].GetBinContent(hist['sync'].FindBin(x))) setBotStyle(res) #err.GetXaxis().SetLabelOffset(err.GetXaxis().GetLabelOffset()*5) #err.GetXaxis().SetTitleOffset(err.GetXaxis().GetTitleOffset()*2) err.Draw("E2") errLine.Draw("SAME, HIST") if not isBlind and len(data) > 0: res.Draw("SAME, PE0") #res_graph.Draw("SAME, PE0") if len(err.GetXaxis().GetBinLabel( 1)) == 0: # Bin labels: not a ordinary plot drawRatio(hist['data_obs'], hist['BkgSum']) drawStat(hist['data_obs'], hist['BkgSum']) if SYNC: err.GetYaxis().SetRangeUser(0.9, 1.1) c1.Update() if gROOT.IsBatch(): if channel == "": channel = "nocut" varname = var.replace('.', '_').replace('()', '') if not os.path.exists("plots/" + channel): os.makedirs("plots/" + channel) suffix = '' if "b" in channel or 'mu' in channel: suffix += "_" + BTAGGING if ADDSELECTION: suffix += "_" + options.selection c1.Print("plots/" + channel + "/" + varname + "_" + year + suffix + ".png") c1.Print("plots/" + channel + "/" + varname + "_" + year + suffix + ".pdf") # Print table printTable(hist, sign) # if True: # sFile = TFile("sync/data_2016.root", "RECREATE") # sFile.cd() # hist['data_obs']. if not gROOT.IsBatch(): raw_input("Press Enter to continue...")
def plot(var, cut, nm1=False): ### Preliminary Operations ### treeRead = True if not FILE else False # Read from tree channel = cut isBlind = BLIND showSignal = False if 'SB' in cut or 'TR' in cut else True # Determine explicit cut if treeRead: for k in sorted(alias.keys(), key=len, reverse=True): if k in cut: cut = cut.replace(k, alias[k]) # Determine Primary Dataset pd = [] if "isSingleMuonPhotonTrigger" in cut: pd = [x for x in sample['data_obs']['files'] if "MuonEG" in x] elif "isJPsiTrigger" in cut: pd = [x for x in sample['data_obs']['files'] if "Charmonium" in x] else: print "Cannot determine Primary Dataset." exit() print "Plotting from", ("tree" if treeRead else "file"), var, "in", channel, "channel with:" print " dataset:", pd print " cut :", cut if isBlind and "SR" in channel and var in ["H_mass"]: cut += " && ( isMC ? 1 : !(H_mass > 86 && H_mass < 96) && !(H_mass > 120 && H_mass < 130) )" ### Create and fill MC histograms ### # Create dict file = {} tree = {} hist = {} cutstring = "(eventWeightLumi)" + ("*(" + cut + ")" if len(cut) > 0 else "") ### Create and fill MC histograms ### for i, s in enumerate(data + back + sign): hist[s] = TH1F( s, ";" + variable[var]['title'] + ";Events;" + ('logx' if variable[var]['logx'] else '') + ('logy' if variable[var]['logy'] else ''), variable[var]['nbins'], variable[var]['min'], variable[var]['max']) hist[s].Sumw2() tree[s] = TChain("Events") for j, ss in enumerate(sample[s]['files']): if s in data and not ss in pd: continue if YEAR == 2016 and not ('Run2016' in ss or 'Summer16' in ss): continue if YEAR == 2017 and not ('Run2017' in ss or 'Fall17' in ss): continue if YEAR == 2018 and not ('Run2018' in ss or 'Autumn18' in ss): continue for f in os.listdir(NTUPLEDIR + '/' + ss): tree[s].Add(NTUPLEDIR + '/' + ss + '/' + f) tree[s].Project(s, var, cutstring) if not tree[s].GetTree() == None: hist[s].SetOption("%s" % tree[s].GetTree().GetEntriesFast()) # jobs = [] # queue = multiprocessing.Queue() # for i, s in enumerate(data+back+sign): # for j, ss in enumerate(sample[s]['files']): # if s in data and not ss in pd: continue # if YEAR == 2016 and not ('Run2016' in ss or 'Summer16' in ss): continue # if YEAR == 2017 and not ('Run2017' in ss or 'Fall17' in ss): continue # if YEAR == 2018 and not ('Run2018' in ss or 'Autumn18' in ss): continue # if treeRead: # Project from tree ## hist[s] = loopProject(s, ss, variable[var], cutstring, True) # p = multiprocessing.Process(target=parallelProject, args=(queue, s, ss, variable[var], cutstring, )) # jobs.append(p) # p.start() # else: # Histogram written to file # hist[s] = readhist(FILE, s, var, cut) # # # Wait for all jobs to finish # for job in jobs: # h = queue.get() # if not h.GetOption() in hist: hist[h.GetOption()] = h # else: hist[h.GetOption()].Add(h) # for job in jobs: # job.join() # Histogram style for i, s in enumerate(data + back + sign): hist[s].Scale(sample[s]['weight'] if hist[s].Integral() >= 0 else 0) hist[s].SetFillColor(sample[s]['fillcolor']) hist[s].SetFillStyle(sample[s]['fillstyle'] if not options.norm else 0) hist[s].SetLineColor(sample[s]['linecolor']) hist[s].SetLineStyle(sample[s]['linestyle']) hist[s].SetLineWidth(sample[s]['linewidth']) ### Create Bkg Sum histogram ### hist['BkgSum'] = hist['data_obs'].Clone( "BkgSum") if 'data_obs' in hist else hist[back[0]].Clone("BkgSum") hist['BkgSum'].Reset("MICES") hist['BkgSum'].SetFillStyle(3003) hist['BkgSum'].SetFillColor(1) for i, s in enumerate(back): hist['BkgSum'].Add(hist[s]) if options.norm: for i, s in enumerate(back + ['BkgSum']): hist[s].Scale(hist[data[0]].Integral() / hist['BkgSum'].Integral()) for i, s in enumerate(sign): hist[s].Scale(hist[data[0]].Integral() / hist[s].Integral()) # Create data and Bkg sum histograms # if BLIND: # or 'SR' in channel: # hist['data_obs'] = hist['BkgSum'].Clone("data_obs") # hist['data_obs'].Reset("MICES") # Set histogram style hist['data_obs'].SetMarkerStyle(20) hist['data_obs'].SetMarkerSize(1.25) # for i, s in enumerate(data+back+sign+['BkgSum']): addOverflow(hist[s], False) # Add overflow for i, s in enumerate(sign): hist[s].SetLineWidth(3) for i, s in enumerate(sign): sample[s]['plot'] = True # Create stack bkg = THStack("Bkg", ";" + hist['BkgSum'].GetXaxis().GetTitle() + ";Events") for i, s in enumerate(back): bkg.Add(hist[s]) # Legend leg = TLegend(0.65, 0.6, 0.95, 0.9) leg.SetBorderSize(0) leg.SetFillStyle(0) #1001 leg.SetFillColor(0) if len(data) > 0: leg.AddEntry(hist[data[0]], sample[data[0]]['label'], "pe") for i, s in reversed(list(enumerate(['BkgSum'] + back))): leg.AddEntry(hist[s], sample[s]['label'], "f") if showSignal: for i, s in enumerate(sign): if sample[s]['plot']: leg.AddEntry(hist[s], sample[s]['label'], "fl") leg.SetY1(0.9 - leg.GetNRows() * 0.04) # --- Display --- c1 = TCanvas("c1", hist.values()[0].GetXaxis().GetTitle(), 800, 800 if RATIO else 600) if RATIO: c1.Divide(1, 2) setTopPad(c1.GetPad(1), RATIO) setBotPad(c1.GetPad(2), RATIO) c1.cd(1) c1.GetPad(bool(RATIO)).SetTopMargin(0.06) c1.GetPad(bool(RATIO)).SetRightMargin(0.05) c1.GetPad(bool(RATIO)).SetTicks(1, 1) logX, logY = "logx" in hist['BkgSum'].GetZaxis().GetTitle( ), "logy" in hist['BkgSum'].GetZaxis().GetTitle() if logY: c1.GetPad(bool(RATIO)).SetLogy() if logX: c1.GetPad(bool(RATIO)).SetLogx() # Draw bkg.Draw("HIST") # stack hist['BkgSum'].Draw("SAME, E2") # sum of bkg if len(data) > 0: hist['data_obs'].Draw("SAME, PE") # data #data_graph.Draw("SAME, PE") # if showSignal: # smagn = 1. #if treeRead else 1.e2 #if logY else 1.e2 for i, s in enumerate(sign): if sample[s]['plot']: hist[s].Draw("SAME, HIST") # hist[s].Scale(smagn) # hist[s].Draw("SAME, HIST") # signals Normalized, hist[s].Integral()*sample[s]['weight'] # #textS = drawText(0.80, 0.9-leg.GetNRows()*0.05 - 0.02, stype+" (x%d)" % smagn, True) bkg.GetYaxis().SetTitleOffset(bkg.GetYaxis().GetTitleOffset() * 1.075) bkg.SetMaximum((5. if logY else 1.25) * max( bkg.GetMaximum(), hist['data_obs'].GetBinContent(hist['data_obs'].GetMaximumBin()) + hist['data_obs'].GetBinError(hist['data_obs'].GetMaximumBin()))) #if bkg.GetMaximum() < max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum()): bkg.SetMaximum(max(hist[sign[0]].GetMaximum(), hist[sign[-1]].GetMaximum())*1.25) bkg.SetMinimum( max( min(hist['BkgSum'].GetBinContent(hist['BkgSum'].GetMinimumBin( )), hist['data_obs'].GetMinimum()), 5.e-1) if logY else 0.) if logY: bkg.GetYaxis().SetNoExponent(bkg.GetMaximum() < 1.e4) bkg.GetYaxis().SetMoreLogLabels(True) #if logY: bkg.SetMinimum(1) leg.Draw() drawCMS(LUMI[YEAR], "Preliminary") if channel in aliasNames: drawRegion(aliasNames[channel], True) #drawAnalysis(channel) #if nm1 and not cutValue is None: drawCut(cutValue, bkg.GetMinimum(), bkg.GetMaximum()) #FIXME #if len(sign) > 0: # if channel.startswith('X') and len(sign)>0: drawNorm(0.9-0.05*(leg.GetNRows()+1), "#sigma(X) = %.1f pb" % 1.) setHistStyle(bkg, 1.2 if RATIO else 1.1) setHistStyle(hist['BkgSum'], 1.2 if RATIO else 1.1) if RATIO: c1.cd(2) if logX: c1.GetPad(2).SetLogx() err = hist['BkgSum'].Clone("BkgErr;") err.SetTitle("") err.GetYaxis().SetTitle("Data / Bkg") for i in range(1, err.GetNbinsX() + 1): err.SetBinContent(i, 1) if hist['BkgSum'].GetBinContent(i) > 0: err.SetBinError( i, hist['BkgSum'].GetBinError(i) / hist['BkgSum'].GetBinContent(i)) setBotStyle(err) errLine = err.Clone("errLine") errLine.SetLineWidth(1) errLine.SetFillStyle(0) res = hist['data_obs'].Clone("Residues") for i in range(0, res.GetNbinsX() + 1): if hist['BkgSum'].GetBinContent(i) > 0: res.SetBinContent( i, res.GetBinContent(i) / hist['BkgSum'].GetBinContent(i)) res.SetBinError( i, res.GetBinError(i) / hist['BkgSum'].GetBinContent(i)) setBotStyle(res) #err.GetXaxis().SetLabelOffset(err.GetXaxis().GetLabelOffset()*5) #err.GetXaxis().SetTitleOffset(err.GetXaxis().GetTitleOffset()*2) err.Draw("E2") errLine.Draw("SAME, HIST") if len(data) > 0: res.Draw("SAME, PE0") #res_graph.Draw("SAME, PE0") if len(err.GetXaxis().GetBinLabel( 1)) == 0: # Bin labels: not a ordinary plot drawRatio(hist['data_obs'], hist['BkgSum']) drawStat(hist['data_obs'], hist['BkgSum']) if var in ["H_mass"]: c1.cd(bool(RATIO)) boxZ = drawBox(XZMIN, hist['data_obs'].GetMinimum(), XZMAX, hist['data_obs'].GetMaximum() / 1.30, "Z") boxH = drawBox(XHMIN, hist['data_obs'].GetMinimum(), XHMAX, hist['data_obs'].GetMaximum() / 1.30, "H") c1.Update() if True: #gROOT.IsBatch(): varname = var.replace('.', '_').replace('()', '') if not os.path.exists("plots/" + channel): os.makedirs("plots/" + channel) c1.Print("plots/" + channel + "/" + varname + ".png") c1.Print("plots/" + channel + "/" + varname + ".pdf") # Print table printTable(hist, sign) if not gROOT.IsBatch(): raw_input("Press Enter to continue...")
def btag_efficiency(cut, year, pT_range=None): ### Preliminary Operations ### from root_numpy import hist2array from sklearn.metrics import roc_curve import numpy as np genPoints = [ 1600, 1800, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 7000, 8000 ] signal = ['ZpBB_M' + str(x) for x in genPoints] btag_vars = BTAGGEFFVARS nbins = 100 min_, max_ = 0., 1. treeRead = True channel = cut isAH = False for k in sorted(alias.keys(), key=len, reverse=True): if k in cut: cut = cut.replace(k, aliasSM[k]) ### Create and fill MC histograms ### # Create dict file = {} tree = {} hist = {} ### Create and fill MC histograms ### for i, s in enumerate(back + signal): tree[s] = TChain("tree") for j, ss in enumerate(sample[s]['files']): if year == "run2" or year in ss: ### to run on big ntuples: #k = 0 #while True: # if os.path.exists("/eos/user/m/msommerh/Zprime_to_bb_analysis/" + ss + "/" + ss+ "_flatTuple_{}.root".format(k)): # tree[s].Add("/eos/user/m/msommerh/Zprime_to_bb_analysis/" + ss + "/" + ss + "_flatTuple_{}.root".format(k)) # k += 1 # else: # print "found {} files for sample:".format(k), ss # break #if k == 0: # print ' WARNING: files for sample', ss , 'do not exist, continuing' # return True ### end big ntuples tree[s].Add(NTUPLEDIR + ss + ".root") ## to run on skimmed ntuples for var in btag_vars: hist[s + "_" + var] = TH1F(s + "_" + var, ";efficiency; mistag rate", nbins, min_, max_) for suf in ["_1", "_2"]: temp_hist = TH1F(s + "_" + var + suf, ";efficiency; mistag rate", nbins, min_, max_) temp_hist.Sumw2() if pT_range is not None: pT_cut = " && jpt" + suf + ">=" + str( pT_range[0]) + " && jpt" + suf + "<" + str(pT_range[1]) else: pT_cut = "" if s in signal: flavourcut = " && abs(jflavour" + suf + ")==5" else: flavourcut = " && (abs(jflavour" + suf + ")<4 || abs(jflavour" + suf + ")==9 || abs(jflavour" + suf + ")==21)" ## FIXME this currently excludes charms if len(cut) == 0: flavourcut = flavourcut[4:] cutstring = "(eventWeightLumi)" + "*(" + cut + flavourcut + pT_cut + ")" tree[s].Project(s + "_" + var + suf, var + suf, cutstring) if not tree[s].GetTree() == None: hist[s + "_" + var].SetOption( "%s" % tree[s].GetTree().GetEntriesFast()) #hist[s][var+suf].Scale(sample[s]['weight'] if hist[s].Integral() >= 0 else 0) hist[s + "_" + var].Add(temp_hist) temp_hist.Delete() fpr = {} tpr = {} thr = {} for var in btag_vars: hist_sig = TH1F(var + "_sig", ";efficiency; mistag rate", nbins, min_, max_) hist_bkg = TH1F(var + "_bkg", ";efficiency; mistag rate", nbins, min_, max_) for i, s in enumerate(signal): hist_sig.Add(hist[s + "_" + var]) for i, s in enumerate(back): hist_bkg.Add(hist[s + "_" + var]) sig_arr, sig_edges = hist2array(hist_sig, return_edges=True) bkg_arr, bkg_edges = hist2array(hist_bkg, return_edges=True) assert len(sig_edges[0]) == len(bkg_edges[0]) vals = [] sig_weights = [] bkg_weights = [] for j, entry in enumerate(sig_edges[0][:-1]): vals.append(0.5 * (sig_edges[0][j] + sig_edges[0][j + 1])) sig_weights.append(sig_arr[j]) bkg_weights.append(bkg_arr[j]) sig_labels = np.ones(len(sig_weights)) bkg_labels = np.zeros(len(bkg_weights)) fpr[var], tpr[var], thr[var] = roc_curve(np.concatenate( (sig_labels, bkg_labels)), np.array(vals + vals), sample_weight=np.concatenate( (sig_weights, bkg_weights))) canv = TCanvas('c', 'c', 500, 650) canv.SetGrid() graphs = {} for j, var in enumerate(btag_vars): graphs[var] = TGraph(len(tpr[var]), tpr[var], fpr[var]) graphs[var].SetLineColor(btag_colors[var]) graphs[var].SetMarkerStyle(1) graphs[var].SetMarkerColor(btag_colors[var]) graphs[var].SetLineWidth(2) graphs[var].SetTitle(";b tagging efficiency;mistag rate (udsg jets)") graphs[var].GetXaxis().SetLimits(0., 1.) graphs[var].GetHistogram().SetMinimum(1e-4) graphs[var].GetHistogram().SetMaximum(1.) graphs[var].GetYaxis().SetTitleOffset(1.4) leg = TLegend(0.65, 0.15, 0.9, 0.35) for j, var in enumerate(btag_vars): leg.AddEntry(graphs[var], btag_titles[var]) if j == 0: graphs[var].Draw("APL") else: graphs[var].Draw("PL SAME") latex = TLatex(0.05, 0.5, str(pT_range[0]) + '<p_{T}<' + str(pT_range[1]) + ' GeV') latex.SetTextSize(0.043) latex.Draw() leg.Draw() canv.SetLogy() if pT_range is not None: pt_suff = "pT{}to{}".format(pT_range[0], pT_range[1]) else: pt_suff = "incl_pT" canv.Print("plots/btag_eff/ROC_{}_{}.png".format(year, pt_suff)) canv.Print("plots/btag_eff/ROC_{}_{}.pdf".format(year, pt_suff))