def readAndFillHist(filename, bnn, c1, h1, m4lmela, step=10000, SR=False, treename="HZZ4LeptonsAnalysisReduced"): # --------------------------------------- # open ntuple file # --------------------------------------- data = Ntuple(filename, treename) print filename size = data.size() if find(filename, 'SM') > 0: ntrain = 50000 else: ntrain = 3000 print 'ntrain: %s' % ntrain fields = [VARNAME] inputs = vector('double')(len(fields)) count = 0 for row, event in enumerate(data): if event.f_outlier: continue if event.f_mass4l < MASSMIN: continue if event.f_mass4l > MASSMAX: continue if count % step == 0: print "%10d\t%10d" % (row, count) count += 1 if count < ntrain: continue if SR: d = m4lmela(event.f_mass4l, event.f_D_bkg_kin) if d < BNNCUT: continue w = event.f_weight for ii in xrange(len(fields)): inputs[ii] = event.__getattr__(fields[ii]) D = bnn(inputs) h1.Fill(D, w) if count % step == 0: c1.cd() h1.Draw("hist") c1.Update() gSystem.ProcessEvents() c1.cd() h1.Scale(1.0 / h1.Integral()) h1.Draw("hist") c1.Update() gSystem.ProcessEvents()
def readAndFillHist(filename, bnn, c1, h1, c2, h2, c3, h3, c4, h4, treename='HZZ4LeptonsAnalysisReduced'): # --------------------------------------- # open ntuple file # --------------------------------------- isData = find(filename, 'data') > 0 ntuple = Ntuple(filename, treename) for row, event in enumerate(ntuple): o = event.f_outlier x = event.f_mass4l if o: continue if x < MASSMIN: continue if x > MASSMAX: continue w = event.f_weight y = event.f_D_bkg_kin D = bnn(x, y) h1.Fill(D, w) h2.Fill(x, y, w) h3.Fill(x, w) h4.Fill(y, w) if row % 10000 == 0: for c, h in [(c1, h1), (c3, h3), (c4, h4)]: c.cd() if isData: h.Draw('ep') else: h.Draw("hist") c.Update() gSystem.ProcessEvents() c2.cd() h2.Draw("p") c2.Update() for c, h in [(c1, h1), (c2, h2), (c3, h3), (c4, h4)]: c.cd() if isData: h.Draw('ep') else: h.Scale(1.0/h.Integral()) h.Draw("hist") c.Update() gSystem.ProcessEvents()
def main(): print "\n\tmakesimdata.py\n" treename = "HZZ4LeptonsAnalysisReduced" # source names srcnames = ['gg', 'VV', 'bkg'] # ntuple variable names varnames = ['D_VVgg_MLP', 'D_VVgg_BDT', 'D_bkg', 'weight'] # --------------------------------------------------- # 1. load data into memory # 2. create a bootstrap sample of a given size # by randomly selecting events with a # probability proportional to event weight # 3. write out events to an ntuple # --------------------------------------------------- records = [] weight = 0.0 for name in srcnames: filename = 'd_4mu_%s.root' % name print 'read %s' % filename ntuple = Ntuple(filename, treename) for row in ntuple: rec = [] for varname in varnames: rec.append(row(varname)) records.append(rec) weight += records[-1][-1] # weight is last column print "Total weight (300/fb): %8.2f" % weight print "\tcompute cdf of weights" wcdf = len(records) * [0] wcdf[0] = records[0][-1] for i in xrange(1, len(records)): wcdf[i] = wcdf[i - 1] + records[i][-1] sumw = wcdf[-1] if sumw != weight: sys.exit("huh?") # randomly select "N" events according to event weight N = int(sumw + 0.5) # number of events to select print "\tselecting %d events" % N outrecords = [] for i in xrange(N): w = uniform(0, sumw) k = binsearch(wcdf, w) if k < 0: sys.exit("**error** not found %f *** should not happen!" % w) outrecords.append(records[k]) outrecords[-1][-1] = 1.0 # write out records to an ntuple filename = 'd_4mu_simdata.root' makeTree(filename, treename, outrecords)
def wrout(filename, name): treename = "HZZ4LeptonsAnalysisReduced" # name of Root tree weightname = "f_weight" # name of event weight variable ntuple = Ntuple(filename, treename) size = len(ntuple) if find(filename, 'SM') > 0: ntrain = 50000 else: ntrain = 3000 print "\n\n==> filename: %s" % filename total1 = 0.0 total2 = 0.0 etotal2 = 0.0 records = [] fmt = ' %15.3e' * 4 + '\n' count = 0 for ii, event in enumerate(ntuple): if ii % 10000 == 0: print '.', sys.stdout.flush() if event.f_outlier: continue if event.f_mass4l < MASSMIN: continue if event.f_mass4l > MASSMAX: continue total1 += 1.0 total2 += event.f_weight etotal2 += event.f_weight**2 record = fmt % \ (event.f_weight, event.f_mass4l, event.f_D_bkg_kin, event.f_pt4l) records.append(record) count += 1 if count >= ntrain: break shuffle(records) fmt = ' %s' * 4 + '\n' record = fmt % ('f_weight', 'f_mass4l', 'f_D_bkg_kin', 'f_pt4l') records.insert(0, record) outfilename = '%s_weighted.txt' % name print print outfilename open(outfilename, 'w').writelines(records) ecount = total2**2 / etotal2 print print 'training sample from file %s' % filename print "\ttotal: %10d events" % size print "\ttotal (without outliers): %10.0f events" % total1 print "\teffective total: %10.0f events" % ecount print
def main(): # --------------------------------------- # set up some standard graphics style # --------------------------------------- tdrstyle.setTDRStyle() gStyle.SetPadRightMargin(0.12) gStyle.SetOptStat('ei') gStyle.SetStatFont(42) gStyle.SetStatFontSize(0.03) gStyle.SetStatBorderSize(1) gStyle.SetStatH(0.2) gStyle.SetStatW(0.3) gStyle.SetStatX(0.83) gStyle.SetStatY(0.93) bnn = m4lmela bnn3 = m4lmelamet #change the CMS_lumi variables (see CMS_lumi.py) iPeriod = 4 iPos = 0 CMS_lumi.relPosX = 0.12 CMS_lumi.lumi_13TeV = "36 fb^{-1}" CMS_lumi.writeExtraText = 1 CMS_lumi.extraText = "Simulation" if len(sys.argv) > 1: filenames = sys.argv[1:] else: sys.exit(''' Usage: makeHists.py ntuple-file ... ''') # name of graphics file name = nameonly(filenames[0]) plotname = replace(name, 'ntuple_', 'fig_') plotnameSRCR = replace(name, 'ntuple_', 'fig_regions_') histfname = replace(name, 'ntuple_', 'histos_') if len(filenames) > 1: plotname = stripit.sub('', plotname) histfname= stripit.sub('', histfname) outfilename = 'histos/%s.root' % histfname # change marker size and color depending on filename isData = False isHiggs= False if find(plotname, 'data') > -1: msize = 0.8 mcolor = kBlack isData = True CMS_lumi.extraText = "Preliminary" elif find(plotname, 'bkg') > -1: msize = 0.01 mcolor = kMagenta+1 elif find(plotname, 'higgs') > -1: msize = 0.01 mcolor = kCyan+1 isHiggs = True else: msize = 0.01 mcolor = kCyan+2 # --------------------------------------- # open ntuple file # --------------------------------------- ntuple = Ntuple(filenames, treename='HZZ4LeptonsAnalysisReduced') nevents = ntuple.size() print '='*80 print 'number of entries: %d' % nevents print 'output file: %s' % outfilename print '='*80 # open output root file for histograms hfile = TFile(outfilename, 'recreate') hfile.cd() # --------------------------------------- # book histograms # --------------------------------------- # 2-D plot in (f_mass4l, f_D_bkg_kin) space cname = 'histos/%s' % plotname cm4lD = TCanvas(cname, cname, 10, 10, 500, 500) hm4lD = TH2F('hm4lD', '', MASSBINS, MASSMIN, MASSMAX, DBINS, 0, 1) hm4lD.SetMarkerSize(msize) hm4lD.SetMarkerColor(mcolor) hm4lD.GetXaxis().SetTitle('#font[12]{m}_{4l} (GeV)') hm4lD.GetYaxis().SetTitle('#font[12]{D}_{bkg}^{kin}') hm4lD.Sumw2() # needed to handle weights correctly hm4lD.SetMinimum(0) hm4lD.GetXaxis().SetNdivisions(505) hm4lD.GetYaxis().SetNdivisions(505) # 1-D plot in D(f_mass4l, f_D_bkg_kin) space cname = 'histos/%s_bnn' % plotname cbnn = TCanvas(cname, cname, 520, 10, 500, 500) hbnn = TH1F('hbnn', '', DBINS, 0, 1) hbnn.SetLineWidth(1) hbnn.SetFillColor(mcolor) hbnn.SetFillStyle(3001) hbnn.GetXaxis().SetTitle('D(#font[12]{m}_{4l}, '\ '#font[12]{D}_{bkg}^{kin})') hbnn.GetXaxis().SetNdivisions(505) hbnn.Sumw2() hbnn.SetMinimum(0) # 1-D plot in f_mass4l cname = 'histos/%s_m4l' % plotname cm4l = TCanvas(cname, cname, 1040, 10, 500, 500) hm4l = TH1F('hm4l', '', MASSBINS, MASSMIN, MASSMAX) hm4l.SetLineWidth(1) hm4l.SetFillColor(mcolor) hm4l.SetFillStyle(3001) hm4l.GetXaxis().SetTitle('#font[12]{m}_{4l} (GeV)') hm4l.GetXaxis().SetNdivisions(505) hm4l.Sumw2() hm4l.SetMinimum(0) # 1-D plot in f_pfmet cname = 'histos/%s_met' % plotname cmet = TCanvas(cname, cname, 10, 510, 500, 500) hmet = TH1F('hmet', '', METBINS, METMIN, METMAX) hmet.SetLineWidth(1) hmet.SetFillColor(mcolor) hmet.SetFillStyle(3001) hmet.GetXaxis().SetTitle('#font[12]{E}_{T}^{miss} (GeV)') hmet.GetXaxis().SetNdivisions(505) hmet.Sumw2() hmet.SetMinimum(0) # 1-D plot in D(f_mass4l, f_D_bkg_kin, f_pfmet) cname = 'histos/%s_bnn3' % plotname cbnn3 = TCanvas(cname, cname, 510, 510, 500, 500) hbnn3 = TH1F('hbnn3', '', DBINS, 0, 1) hbnn3.SetLineWidth(1) hbnn3.SetFillColor(mcolor) hbnn3.SetFillStyle(3001) hbnn3.GetXaxis().SetTitle('D(#font[12]{m}_{4l}, '\ '#font[12]{D}_{bkg}^{kin}, '\ '#font[12]{E}_{T}^{miss})') hbnn3.GetXaxis().SetNdivisions(505) hbnn3.Sumw2() hbnn3.SetMinimum(0) hbag = [] for ii, (name, xtitle, xoff, yoff, xbins, xmin, xmax) in enumerate(PLOTS): hbag.append(HistBag(ntuple, plotnameSRCR, name, xtitle, xoff, yoff, xbins, xmin, xmax)) for ii,(name,xtitle, xoff, yoff, xbins, xmin, xmax) in enumerate(PLOTSZOOM): hbag.append(HistBag(ntuple, plotnameSRCR, name, xtitle, xoff, yoff, xbins, xmin, xmax, ymin=1.e-9, postfix='_zoom')) # --------------------------------------- # Loop over events # --------------------------------------- t1 = 0.0 t2 = 0.0 w1 = 0.0 w2 = 0.0 passed = 0 for index, event in enumerate(ntuple): m4l = event.f_mass4l if m4l < MASSMIN: continue if m4l > MASSMAX: continue #if (m4l >= 100) and (m4l <=150): continue w = event.f_weight t1 += w t2 += w*w if event.f_outlier: continue w1 += w w2 += w*w Dbkg = event.f_D_bkg_kin met = event.f_pfmet Dbnn = bnn(m4l, Dbkg) Dbnn3= bnn3(m4l, Dbkg, met) hm4l.Fill(m4l, w) hm4lD.Fill(m4l, Dbkg, w) hbnn.Fill(Dbnn, w) hbnn3.Fill(Dbnn3, w) hmet.Fill(met, w) if Dbnn > BNNCUT: for h in hbag: h.fillSR() else: for h in hbag: h.fillCR() if passed % SKIP == 0: print '%10d %10d' % (passed, index) cm4lD.cd() hm4lD.Draw('p') cm4lD.Update() gSystem.ProcessEvents() cbnn.cd() if isData: hbnn.Draw('ep') else: hbnn.Draw('hist') cbnn.Update() gSystem.ProcessEvents() for h in hbag: h.draw() passed += 1 # now re-scale histograms to compensate for removal of # outlier events. scaleFactor = t1 / w1 t2 = sqrt(t2) w2 = sqrt(w2) hm4l.Scale(scaleFactor) hm4lD.Scale(scaleFactor) hbnn.Scale(scaleFactor) hbnn3.Scale(scaleFactor) hmet.Scale(scaleFactor) for h in hbag: h.scale(scaleFactor) s1, s2, c1, c2 = hintegral(hbnn, BNNCUT) print '='*80 txtfilename = '%s.txt' % plotname print txtfilename out = open(txtfilename, 'w') record = 'number of entries: %d' % nevents print record out.write('%s\n' % record) record = "number of events (with outliers): %10.2e +/- %-10.1e" % \ (t1, t2) print record out.write('%s\n' % record) record = "number of events (without outliers): %10.2e +/- %-10.1e" % \ (w1, w2) print record out.write('%s\n' % record) record = ' ' print record out.write('%s\n' % record) record = 'histograms scaled by factor %10.3f' % scaleFactor print record out.write('%s\n' % record) record = "number of events (BNN > %3.1f): %10.2e +/- %-10.1e" % \ (BNNCUT, s1, s2) print record out.write('%s\n' % record) record = "number of events (BNN <= %3.1f): %10.2e +/- %-10.1e" % \ (BNNCUT, c1, c2) print record out.write('%s\n' % record) out.close() print '='*80 cm4lD.cd() hm4lD.Draw('p') CMS_lumi.CMS_lumi(cm4lD, iPeriod, iPos) cm4lD.Update() gSystem.ProcessEvents() cm4lD.SaveAs('.png') cbnn.cd() if isData: hbnn.Draw('ep') else: hbnn.Draw('hist') CMS_lumi.CMS_lumi(cbnn, iPeriod, iPos) cbnn.Update() gSystem.ProcessEvents() cbnn.SaveAs('.png') cbnn3.cd() if isData: hbnn3.Draw('ep') else: hbnn3.Draw('hist') CMS_lumi.CMS_lumi(cbnn3, iPeriod, iPos) cbnn3.Update() gSystem.ProcessEvents() cbnn3.SaveAs('.png') cmet.cd() if isData: hmet.Draw('ep') else: hmet.Draw('hist') CMS_lumi.CMS_lumi(cmet, iPeriod, iPos) cmet.Update() gSystem.ProcessEvents() cmet.SaveAs('.png') cm4l.cd() if isData: hm4l.Draw('ep') else: hm4l.Draw('hist') CMS_lumi.CMS_lumi(cm4l, iPeriod, iPos) cm4l.Update() gSystem.ProcessEvents() cm4l.SaveAs('.png') for h in hbag: h.draw(True) hfile.cd() cm4lD.Write() cbnn.Write() cbnn3.Write() cmet.Write() cm4l.Write() for h in hbag: h.write() hfile.Write() hfile.Close() sleep(2)
def wrout(filename, name): treename = "HZZ4LeptonsAnalysisReduced" # name of Root tree weightname = "f_weight" # name of event weight variable ntuple = Ntuple(filename, treename) size = len(ntuple) print "\n\n==> filename: %s" % filename total1 = 0.0 total2 = 0.0 etotal2 = 0.0 records = [] fmt = ' %15.3e' * 13 + '\n' count = 0 for ii, event in enumerate(ntuple): if ii % 10000 == 0: print '.', sys.stdout.flush() if event.f_outlier: continue if event.f_mass4l < MASSMIN: continue if event.f_mass4l > MASSMAX: continue total1 += 1.0 total2 += event.f_weight etotal2 += event.f_weight**2 record = fmt % \ (event.f_weight, event.f_D_bkg_kin, event.f_pt4l, event.f_pfmet, event.f_mT, event.f_mass4l, event.f_Z1mass, event.f_Z2mass, event.f_angle_costhetastar, event.f_angle_costheta1, event.f_angle_costheta2, event.f_angle_phi, event.f_angle_phistar1) records.append(record) shuffle(records) fmt = ' %s' * 13 + '\n' record = fmt % ('f_weight', 'f_D_bkg_kin', 'f_pt4l', 'f_pfmet', 'f_mT', 'f_mass4l', 'f_Z1mass', 'f_Z2mass', 'f_angle_costhetastar', 'f_angle_costheta1', 'f_angle_costheta2', 'f_angle_phi', 'f_angle_phistar1') records.insert(0, record) outfilename = '%s_weighted.txt' % name print print outfilename open(outfilename, 'w').writelines(records) ecount = total2**2/etotal2 print print 'training sample from file %s' % filename print "\ttotal: %10d events" % size print "\ttotal (without outliers): %10.0f events" % total1 print "\teffective total: %10.0f events" % ecount print