Example #1
0
def readAndFillHist(filename,
                    bnn,
                    c1,
                    h1,
                    m4lmela,
                    step=10000,
                    SR=False,
                    treename="HZZ4LeptonsAnalysisReduced"):
    # ---------------------------------------
    # open ntuple file
    # ---------------------------------------
    data = Ntuple(filename, treename)
    print filename
    size = data.size()
    if find(filename, 'SM') > 0:
        ntrain = 50000
    else:
        ntrain = 3000

    print 'ntrain: %s' % ntrain

    fields = [VARNAME]
    inputs = vector('double')(len(fields))

    count = 0
    for row, event in enumerate(data):

        if event.f_outlier: continue
        if event.f_mass4l < MASSMIN: continue
        if event.f_mass4l > MASSMAX: continue

        if count % step == 0:
            print "%10d\t%10d" % (row, count)

        count += 1
        if count < ntrain: continue

        if SR:
            d = m4lmela(event.f_mass4l, event.f_D_bkg_kin)
            if d < BNNCUT: continue

        w = event.f_weight
        for ii in xrange(len(fields)):
            inputs[ii] = event.__getattr__(fields[ii])
        D = bnn(inputs)

        h1.Fill(D, w)
        if count % step == 0:
            c1.cd()
            h1.Draw("hist")
            c1.Update()
            gSystem.ProcessEvents()
    c1.cd()
    h1.Scale(1.0 / h1.Integral())
    h1.Draw("hist")
    c1.Update()
    gSystem.ProcessEvents()
Example #2
0
def readAndFillHist(filename, bnn,
                    c1, h1,
                    c2, h2,
                    c3, h3,
                    c4, h4,
                        treename='HZZ4LeptonsAnalysisReduced'):
    # ---------------------------------------
    # open ntuple file
    # ---------------------------------------
    isData = find(filename, 'data') > 0
    
    ntuple = Ntuple(filename, treename)           

    for row, event in enumerate(ntuple):

        o = event.f_outlier
        x = event.f_mass4l
        
        if o: continue        
        if x < MASSMIN: continue
        if x > MASSMAX: continue
            
        w = event.f_weight        
        y = event.f_D_bkg_kin

        D = bnn(x, y)

        h1.Fill(D, w)
        h2.Fill(x, y, w)
        h3.Fill(x, w)
        h4.Fill(y, w)
        
        if row % 10000 == 0:
            for c, h in [(c1, h1),
                         (c3, h3),
                         (c4, h4)]:
                c.cd()
                if isData:
                    h.Draw('ep')
                else:
                    h.Draw("hist")
                c.Update()
                gSystem.ProcessEvents()            

            c2.cd()
            h2.Draw("p")
            c2.Update()

    for c, h in [(c1, h1),
                 (c2, h2),
                 (c3, h3),
                 (c4, h4)]:
        c.cd()
        if isData:
            h.Draw('ep')
        else:
            h.Scale(1.0/h.Integral())
            h.Draw("hist")
        c.Update()
        gSystem.ProcessEvents()
Example #3
0
def main():
    print "\n\tmakesimdata.py\n"

    treename = "HZZ4LeptonsAnalysisReduced"
    # source names
    srcnames = ['gg', 'VV', 'bkg']
    # ntuple variable names
    varnames = ['D_VVgg_MLP', 'D_VVgg_BDT', 'D_bkg', 'weight']

    # ---------------------------------------------------
    # 1. load data into memory
    # 2. create a bootstrap sample of a given size
    #    by randomly selecting events with a
    #    probability proportional to event weight
    # 3. write out events to an ntuple
    # ---------------------------------------------------
    records = []
    weight = 0.0
    for name in srcnames:
        filename = 'd_4mu_%s.root' % name
        print 'read %s' % filename
        ntuple = Ntuple(filename, treename)
        for row in ntuple:
            rec = []
            for varname in varnames:
                rec.append(row(varname))
            records.append(rec)
            weight += records[-1][-1]  # weight is last column

    print "Total weight (300/fb): %8.2f" % weight
    print "\tcompute cdf of weights"
    wcdf = len(records) * [0]
    wcdf[0] = records[0][-1]
    for i in xrange(1, len(records)):
        wcdf[i] = wcdf[i - 1] + records[i][-1]
    sumw = wcdf[-1]
    if sumw != weight:
        sys.exit("huh?")

    # randomly select "N" events according to event weight
    N = int(sumw + 0.5)  # number of events to select
    print "\tselecting %d events" % N
    outrecords = []
    for i in xrange(N):
        w = uniform(0, sumw)
        k = binsearch(wcdf, w)
        if k < 0:
            sys.exit("**error** not found %f *** should not happen!" % w)
        outrecords.append(records[k])
        outrecords[-1][-1] = 1.0

    # write out records to an ntuple
    filename = 'd_4mu_simdata.root'
    makeTree(filename, treename, outrecords)
Example #4
0
def wrout(filename, name):
    treename = "HZZ4LeptonsAnalysisReduced"  # name of Root tree
    weightname = "f_weight"  # name of event weight variable

    ntuple = Ntuple(filename, treename)
    size = len(ntuple)
    if find(filename, 'SM') > 0:
        ntrain = 50000
    else:
        ntrain = 3000

    print "\n\n==> filename: %s" % filename

    total1 = 0.0
    total2 = 0.0
    etotal2 = 0.0
    records = []
    fmt = ' %15.3e' * 4 + '\n'

    count = 0
    for ii, event in enumerate(ntuple):
        if ii % 10000 == 0:
            print '.',
            sys.stdout.flush()

        if event.f_outlier: continue
        if event.f_mass4l < MASSMIN: continue
        if event.f_mass4l > MASSMAX: continue

        total1 += 1.0
        total2 += event.f_weight
        etotal2 += event.f_weight**2

        record = fmt % \
          (event.f_weight,
           event.f_mass4l,
           event.f_D_bkg_kin,
           event.f_pt4l)
        records.append(record)

        count += 1
        if count >= ntrain: break

    shuffle(records)
    fmt = ' %s' * 4 + '\n'

    record = fmt % ('f_weight', 'f_mass4l', 'f_D_bkg_kin', 'f_pt4l')
    records.insert(0, record)
    outfilename = '%s_weighted.txt' % name
    print
    print outfilename
    open(outfilename, 'w').writelines(records)

    ecount = total2**2 / etotal2
    print
    print 'training sample from file %s' % filename
    print "\ttotal:                       %10d events" % size
    print "\ttotal (without outliers):    %10.0f events" % total1
    print "\teffective total:             %10.0f events" % ecount

    print
Example #5
0
def main():
    # ---------------------------------------
    # set up some standard graphics style
    # ---------------------------------------
    tdrstyle.setTDRStyle()
    gStyle.SetPadRightMargin(0.12)
    gStyle.SetOptStat('ei')
    gStyle.SetStatFont(42)
    gStyle.SetStatFontSize(0.03)
    gStyle.SetStatBorderSize(1)
    gStyle.SetStatH(0.2)
    gStyle.SetStatW(0.3)
    gStyle.SetStatX(0.83)
    gStyle.SetStatY(0.93)    

    bnn  = m4lmela
    bnn3 = m4lmelamet
    
    #change the CMS_lumi variables (see CMS_lumi.py)
    iPeriod = 4
    iPos    = 0
    CMS_lumi.relPosX = 0.12
    CMS_lumi.lumi_13TeV = "36 fb^{-1}"
    CMS_lumi.writeExtraText = 1
    CMS_lumi.extraText = "Simulation"
                
    if len(sys.argv) > 1:
        filenames = sys.argv[1:]
    else:
        sys.exit('''
    Usage:
        makeHists.py ntuple-file ...
        ''')

    # name of graphics file
    name = nameonly(filenames[0])
    plotname  = replace(name, 'ntuple_', 'fig_')
    plotnameSRCR  = replace(name, 'ntuple_', 'fig_regions_')
    histfname = replace(name, 'ntuple_', 'histos_')
    if len(filenames) > 1:
        plotname = stripit.sub('', plotname)
        histfname= stripit.sub('', histfname)
    outfilename = 'histos/%s.root' % histfname
        
    # change marker size and color depending on filename
    isData = False
    isHiggs= False
    if find(plotname, 'data') > -1:
        msize  = 0.8
        mcolor = kBlack
        isData = True
        CMS_lumi.extraText = "Preliminary"
        
    elif find(plotname, 'bkg') > -1:
        msize  = 0.01
        mcolor = kMagenta+1
        
    elif find(plotname, 'higgs') > -1:
        msize  = 0.01
        mcolor = kCyan+1
        isHiggs = True
                
    else:
        msize  = 0.01
        mcolor = kCyan+2

    # ---------------------------------------
    # open ntuple file
    # ---------------------------------------        
    ntuple  = Ntuple(filenames, treename='HZZ4LeptonsAnalysisReduced')
    nevents = ntuple.size()

    print '='*80            
    print 'number of entries: %d' % nevents
    print 'output file: %s' % outfilename    
    print '='*80

    # open output root file for histograms
    hfile = TFile(outfilename, 'recreate')
    hfile.cd()


    # ---------------------------------------
    # book histograms
    # ---------------------------------------
    # 2-D plot in (f_mass4l, f_D_bkg_kin) space
    cname = 'histos/%s' % plotname 
    cm4lD = TCanvas(cname, cname, 10, 10, 500, 500)    
    hm4lD = TH2F('hm4lD', '', MASSBINS, MASSMIN, MASSMAX, DBINS, 0, 1)
    hm4lD.SetMarkerSize(msize)
    hm4lD.SetMarkerColor(mcolor)
    hm4lD.GetXaxis().SetTitle('#font[12]{m}_{4l} (GeV)')
    hm4lD.GetYaxis().SetTitle('#font[12]{D}_{bkg}^{kin}')
    hm4lD.Sumw2()     # needed to handle weights correctly
    hm4lD.SetMinimum(0)
    hm4lD.GetXaxis().SetNdivisions(505)
    hm4lD.GetYaxis().SetNdivisions(505)
    
    # 1-D plot in D(f_mass4l, f_D_bkg_kin) space
    cname = 'histos/%s_bnn' % plotname
    cbnn  = TCanvas(cname, cname, 520, 10, 500, 500)    
    hbnn  = TH1F('hbnn', '', DBINS, 0, 1)
    hbnn.SetLineWidth(1)
    hbnn.SetFillColor(mcolor)
    hbnn.SetFillStyle(3001)
    hbnn.GetXaxis().SetTitle('D(#font[12]{m}_{4l}, '\
                             '#font[12]{D}_{bkg}^{kin})')
    hbnn.GetXaxis().SetNdivisions(505)
    hbnn.Sumw2()
    hbnn.SetMinimum(0)

    # 1-D plot in f_mass4l
    cname = 'histos/%s_m4l' % plotname
    cm4l  = TCanvas(cname, cname, 1040, 10, 500, 500)    
    hm4l  = TH1F('hm4l', '', MASSBINS, MASSMIN, MASSMAX)
    hm4l.SetLineWidth(1)
    hm4l.SetFillColor(mcolor)
    hm4l.SetFillStyle(3001)
    hm4l.GetXaxis().SetTitle('#font[12]{m}_{4l} (GeV)')
    hm4l.GetXaxis().SetNdivisions(505)
    hm4l.Sumw2()
    hm4l.SetMinimum(0)


    # 1-D plot in f_pfmet
    cname = 'histos/%s_met' % plotname
    cmet  = TCanvas(cname, cname, 10, 510, 500, 500)    
    hmet  = TH1F('hmet', '', METBINS, METMIN, METMAX)
    hmet.SetLineWidth(1)
    hmet.SetFillColor(mcolor)
    hmet.SetFillStyle(3001)
    hmet.GetXaxis().SetTitle('#font[12]{E}_{T}^{miss} (GeV)')
    hmet.GetXaxis().SetNdivisions(505)
    hmet.Sumw2()
    hmet.SetMinimum(0)    

    
    # 1-D plot in D(f_mass4l, f_D_bkg_kin, f_pfmet)
    cname = 'histos/%s_bnn3' % plotname
    cbnn3  = TCanvas(cname, cname, 510, 510, 500, 500)    
    hbnn3  = TH1F('hbnn3', '', DBINS, 0, 1)
    hbnn3.SetLineWidth(1)
    hbnn3.SetFillColor(mcolor)
    hbnn3.SetFillStyle(3001)
    hbnn3.GetXaxis().SetTitle('D(#font[12]{m}_{4l}, '\
                             '#font[12]{D}_{bkg}^{kin}, '\
                                  '#font[12]{E}_{T}^{miss})')
    hbnn3.GetXaxis().SetNdivisions(505)
    hbnn3.Sumw2()
    hbnn3.SetMinimum(0)    


    hbag = []
    for ii, (name, xtitle, xoff, yoff, xbins, xmin, xmax) in enumerate(PLOTS):
        hbag.append(HistBag(ntuple, plotnameSRCR, name, xtitle, xoff, yoff,
                                xbins, xmin, xmax))

    for ii,(name,xtitle, xoff, yoff, xbins, xmin, xmax) in enumerate(PLOTSZOOM):
        hbag.append(HistBag(ntuple, plotnameSRCR, name, xtitle, xoff, yoff,
                            xbins, xmin, xmax,
                            ymin=1.e-9,
                            postfix='_zoom'))
 
    # ---------------------------------------
    # Loop over events
    # ---------------------------------------

    t1 = 0.0
    t2 = 0.0
    w1 = 0.0
    w2 = 0.0
    passed = 0
    for index, event in enumerate(ntuple):
        
        m4l  = event.f_mass4l

        if m4l  < MASSMIN: continue
        if m4l  > MASSMAX: continue

        #if (m4l >= 100) and (m4l <=150): continue
                        
        w = event.f_weight
        t1 += w
        t2 += w*w
        
        if event.f_outlier: continue
        w1 += w
        w2 += w*w

        Dbkg = event.f_D_bkg_kin
        met  = event.f_pfmet
        
        Dbnn = bnn(m4l, Dbkg)
        Dbnn3= bnn3(m4l, Dbkg, met)
        
        hm4l.Fill(m4l, w)
        hm4lD.Fill(m4l, Dbkg, w)
        hbnn.Fill(Dbnn, w)
        hbnn3.Fill(Dbnn3, w)
        hmet.Fill(met, w)
        
        if Dbnn > BNNCUT:
            for h in hbag:
                h.fillSR()            
        else:
            for h in hbag:
                h.fillCR()               
            
        if passed % SKIP == 0:
            print '%10d %10d' % (passed, index)
            
            cm4lD.cd()
            hm4lD.Draw('p')
            cm4lD.Update()
            gSystem.ProcessEvents()
            
            cbnn.cd()
            if isData:
                hbnn.Draw('ep')
            else:
                hbnn.Draw('hist')
            cbnn.Update()
            gSystem.ProcessEvents()
            
            for h in hbag:
                h.draw()      
        passed += 1
        
    # now re-scale histograms to compensate for removal of
    # outlier events.
    scaleFactor = t1 / w1
    t2 = sqrt(t2)
    w2 = sqrt(w2)
    
    hm4l.Scale(scaleFactor)
    hm4lD.Scale(scaleFactor)
    hbnn.Scale(scaleFactor)
    hbnn3.Scale(scaleFactor)
    hmet.Scale(scaleFactor)
    for h in hbag:
        h.scale(scaleFactor)

    s1, s2, c1, c2 = hintegral(hbnn, BNNCUT)

    print '='*80
    txtfilename = '%s.txt' % plotname
    print txtfilename
    out = open(txtfilename, 'w')
    record = 'number of entries: %d' % nevents
    print record
    out.write('%s\n' % record)

    record = "number of events (with outliers):     %10.2e +/- %-10.1e" % \
      (t1, t2)
    print record
    out.write('%s\n' % record)

    record = "number of events (without outliers):  %10.2e +/- %-10.1e" % \
      (w1, w2)
    print record
    out.write('%s\n' % record)    

    record = ' '
    print record
    out.write('%s\n' % record)    
    
    record = 'histograms scaled by factor %10.3f' % scaleFactor
    print record
    out.write('%s\n' % record)
    
    record = "number of events (BNN >  %3.1f):        %10.2e +/- %-10.1e" % \
      (BNNCUT, s1, s2)
    print record
    out.write('%s\n' % record)
          
    record = "number of events (BNN <= %3.1f):        %10.2e +/- %-10.1e" % \
      (BNNCUT, c1, c2)
    print record
    out.write('%s\n' % record)

    out.close()            
    print '='*80

    
    cm4lD.cd()
    hm4lD.Draw('p')
    CMS_lumi.CMS_lumi(cm4lD, iPeriod, iPos)
    cm4lD.Update()
    gSystem.ProcessEvents()
    cm4lD.SaveAs('.png')

    cbnn.cd()
    if isData:
        hbnn.Draw('ep')
    else:
        hbnn.Draw('hist')
    CMS_lumi.CMS_lumi(cbnn, iPeriod, iPos)            
    cbnn.Update()
    gSystem.ProcessEvents()
    cbnn.SaveAs('.png')

    cbnn3.cd()
    if isData:
        hbnn3.Draw('ep')
    else:
        hbnn3.Draw('hist')
    CMS_lumi.CMS_lumi(cbnn3, iPeriod, iPos)            
    cbnn3.Update()
    gSystem.ProcessEvents()
    cbnn3.SaveAs('.png')    

    cmet.cd()
    if isData:
        hmet.Draw('ep')
    else:
        hmet.Draw('hist')
    CMS_lumi.CMS_lumi(cmet, iPeriod, iPos)            
    cmet.Update()
    gSystem.ProcessEvents()
    cmet.SaveAs('.png')    

    
    cm4l.cd()
    if isData:
        hm4l.Draw('ep')
    else:
        hm4l.Draw('hist')
    CMS_lumi.CMS_lumi(cm4l, iPeriod, iPos)            
    cm4l.Update()
    gSystem.ProcessEvents()
    cm4l.SaveAs('.png')        

    for h in hbag:
        h.draw(True)      
    hfile.cd()
    cm4lD.Write()
    cbnn.Write()
    cbnn3.Write()
    cmet.Write()
    cm4l.Write()
    
    for h in hbag:
        h.write()      

    hfile.Write()
    hfile.Close()
    
    sleep(2)
Example #6
0
def wrout(filename, name):
    treename   = "HZZ4LeptonsAnalysisReduced"  # name of Root tree 
    weightname = "f_weight"  # name of event weight variable
        
    ntuple = Ntuple(filename, treename)
    size   = len(ntuple)
    print "\n\n==> filename: %s" % filename
    
    total1  = 0.0
    total2  = 0.0
    etotal2 = 0.0
    records = []
    fmt = ' %15.3e' * 13 + '\n'

    count = 0
    for ii, event in enumerate(ntuple):
        if ii % 10000 == 0:
            print '.',
            sys.stdout.flush()
        
        if event.f_outlier: continue
        if event.f_mass4l < MASSMIN: continue
        if event.f_mass4l > MASSMAX: continue
            
        total1  += 1.0
        total2  += event.f_weight
        etotal2 += event.f_weight**2
        
        record = fmt % \
          (event.f_weight,
           event.f_D_bkg_kin,
           event.f_pt4l,
           event.f_pfmet,
           event.f_mT,
           event.f_mass4l,
           event.f_Z1mass,
           event.f_Z2mass,
           event.f_angle_costhetastar,
           event.f_angle_costheta1,
           event.f_angle_costheta2,
           event.f_angle_phi,
           event.f_angle_phistar1)
        records.append(record)
    shuffle(records)
    fmt = ' %s' * 13 + '\n'
    
    record = fmt % ('f_weight',
                    'f_D_bkg_kin',
                    'f_pt4l',
                    'f_pfmet',
                    'f_mT',
                    'f_mass4l',
                    'f_Z1mass',
                    'f_Z2mass',
                    'f_angle_costhetastar',
                    'f_angle_costheta1',
                    'f_angle_costheta2',
                    'f_angle_phi',
                    'f_angle_phistar1')
    records.insert(0, record)
    outfilename = '%s_weighted.txt' % name
    print
    print outfilename
    open(outfilename, 'w').writelines(records)

    ecount = total2**2/etotal2
    print
    print 'training sample from file %s' % filename
    print "\ttotal:                       %10d events" % size
    print "\ttotal (without outliers):    %10.0f events" % total1
    print "\teffective total:             %10.0f events" % ecount

    print