#!/usr/bin/python """ This script makes the MC histogram for comparison to data prediction """ import RA2b ## set the luminosity of the MC samples doLumi = 24.5 ## get Zinv and ttZ histograms zinv = RA2b.getHist('zinv', applyPuWeight=True, doLumi=doLumi, removeZkfactor=False) ttzvv = RA2b.getHist('ttzvv', applyPuWeight=True, doLumi=doLumi) ## add the two samples zinv.Add(ttzvv) ## store the histogrma zinv.SaveAs('../plots/histograms/ZinvMCttzMC174bin.root')
hName = str(histNameRoot) + str(proc) + "ee" if (removeDYkfactor and 'dy' in hName): MCfile.Get(hName).Scale(1 / 1.23) hsEE.Add(MCfile.Get(hName)) hsMC.append(hsEE) print '' for i in range(len(hData)): hData[i]["loose"].Print() hData[i]["sig"].Print() for hist in hsMC: hist.Print() purityList = RA2b.getZmassFitPlot(doDiMu=doMumu, doDiEl=doEe, dataSet=hData, mcSet=hsMC, doLumi=35.9) for i in range(len(purityList)): print "purity = " + str(purityList[i][0]) + " +/- " + str( purityList[i][1]) for i in range(1, 5): cIter = jbGroup + i canv = ROOT.gROOT.FindObject("canvas" + str(cIter)) if (type(canv) == ROOT.TCanvas): if (i == 2): canv.SaveAs(str(plotNameRoot) + "mm.pdf") elif (i == 4): canv.SaveAs(str(plotNameRoot) + "ee.pdf")
## define kinematic range ## kinRange = [] applyMHTCut=True ## if qcd binning add 11-13 to beginning of kinRange if(sample=='ldp' or sample=='hdp'): kinRange+=range(11,14) applyMHTCut=False ## all samples use 10 nominal kinematic bins kinRange+=range(1,11) ############################ ## the individual dilepton yields per bin zee = RA2b.getHist('zee',dphiCut=sample,kinRange=kinRange,applyMHTCut=applyMHTCut) zmm = RA2b.getHist('zmm',dphiCut=sample,kinRange=kinRange,applyMHTCut=applyMHTCut) ## the stat uncertainty on the extrapolation factors zll_Extrap = RA2b.getExtrapolation(['zee','zmm'],njSplit=False,doFactorization=True,kinRange=-1,dphiCut=sample,applyMHTCut=applyMHTCut) statErr = [] for i in range(1,20): if(zll_Extrap.GetBinContent(i)==0): statErr.append(1.) else: statErr.append(zll_Extrap.GetBinError(i)/zll_Extrap.GetBinContent(i)) #################################################################### ## get the purity uncertainties from the effHists.root file ######## #################################################################### effFile = ROOT.TFile("../plots/histograms/effHists.root")
import RA2b import ROOT ########## trigger effs from manuel ############## trig_m = (0.988, 0.004) trig_e = (0.988, 0.004) ########## run fits to get purity ################ fit_2j = [] fit_3to4j = [] fit_5jplus = [] # nb=12 is nb>=2 for nb in [0, 1, 12]: fit_2j.append(RA2b.getZmassFitPlot(bJetBin=nb, nJetBin=1)) fit_3to4j.append(RA2b.getZmassFitPlot(bJetBin=nb, nJetBin=2)) fit_5jplus.append(RA2b.getZmassFitPlot(bJetBin=nb, extraCuts='NJets>=5')) fits = [fit_2j, fit_3to4j, fit_5jplus, fit_5jplus, fit_5jplus] ########## get the efficiency file ################ effFile = ROOT.TFile("../plots/histograms/effHists.root", "UPDATE") ######### set the purities found above ############ h_pur_m = effFile.Get("h_pur_m") h_pur_e = effFile.Get("h_pur_e") Bin = 1 for nj in range(1, 6): for nb in range(4):
for i in range(len(histsByLep['N'])): nhName = histsByLep['N'][i] # for hName in histsByLep: print str(nhName) hNumer = Nfile.Get(nhName) hNumer.SetName(str(nhName)+"N") dhName = histsByLep['D'][i] print str(dhName) hDen = Dfile.Get(dhName) hDen.SetName(str(dhName)+"D") hDen.Scale(fScale) if ("hZmass" in nhName): doLogy = False if ("mm" in nhName and MZmmMax != 0): hNumer.SetMaximum(MZmmMax) elif ("ee" in nhName and MZeeMax != 0): hNumer.SetMaximum(MZeeMax) else: doLogy = True canvas = RA2b.getPlotAndRatio( numHists=hNumer, denomHists=hDen, doRatio=True, doLogy=doLogy, doCMSlumi=True, iPeriod=8, drawHorizontalLine=True, xTitle=hNumer.GetXaxis().GetTitle(), yTitle=hNumer.GetYaxis().GetTitle(), ratioMin=ratioMin, ratioMax=ratioMax, legList = legList ) canvas.SaveAs(str(nhName)+".pdf") # def getPlotAndRatio(numHists, denomHists=None, bottomPlots=None, doStack=None, Title=None, xTitle=None, yTitle=None, doCMSlumi=None, iPos=None, iPeriod=None, extraText=None, ratioTitle=None, ratioMin=None, ratioMax=None, doLogy=None, doFlip=None, doDiff=None, doPull=None, makeLeg=None, legList=None, legCoords=None, textCoords=None, canvasSize=None, canvasName=None, numColors=None, denomColor=None, numMarkers=None, denomMarker=None, markerSize=None, lineWidth=None, numDrawStyles=None, denomDrawStyle=None, drawErrorBand=None, stackColors=None, axisTitleSize=None, drawVerticalLines=None, drawHorizontalLine=None, statBox=None, drawText=None, text=None, setMax=None, setMin=None, doClosureStyle=None,errorBandColor=None,errorBandFillStyle=None,legHeader=None,nDivRatio=None,doNumFill=None, hLineVal=None, hLineColors=None,nDivX=None,ratioGridx=None,ratioGridy=None,topGridx=None,topGridy=None,doRatio=None,numFillStyles=None,numFillColors=None)
nj_binning = [1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5] else: nj_binning = [1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5] ## include sideband if ldp or hdp if (sample == 'ldp' or sample == 'hdp'): mht_binning = [250., 300., 350., 400., 450., 600., 750, 900.] mhtCut = False else: mhtCut = True mht_binning = [300., 350., 400., 450., 600., 750, 900.] ## get the double ratio graphs nj_dr = RA2b.getDoubleRatioGraph('NJets', applyPuWeight=True, dphiCut=sample, binning=nj_binning, applyMHTCut=mhtCut) mht_dr = RA2b.getDoubleRatioGraph('MHT', applyPuWeight=True, dphiCut=sample, binning=mht_binning, applyMHTCut=mhtCut) ht_dr = RA2b.getDoubleRatioGraph('HT', applyPuWeight=True, dphiCut=sample, applyMHTCut=mhtCut) ## get the double ratio plots with values and uncertainties dr_out = RA2b.getDoubleRatioPlot([nj_dr, mht_dr, ht_dr])
def trainAndTest(self): ## set output file name if undefined if self.outFile == None: self.outFile = 'NT' + str(len(self.nTrees)) + '_MD' + str( len(self.maxDepth)) self.outFile += '_MNS' + self.minNodeSize[:-1] + '_' + self.signalSample self.outFile += '_' + time.strftime('%m-%d') + '_' ## prevent overwriting fileIter = 0 while os.path.isfile(self.outFile + str(fileIter) + '.root'): fileIter += 1 ## transform to ROOT.TFile type self.outFile = ROOT.TFile(self.outFile + str(fileIter) + '.root', 'RECREATE') ## Declare the TMVA Factory object factory = ROOT.TMVA.Factory( 'TMVAMulticlass_' + self.signalSample, self.outFile, '!V:!Silent:Color:!DrawProgressBar:' 'Transformations=I;D;P;G,D:' 'AnalysisType=multiclass') ## Tell the Factory which features to learn with for variable in self.variableSet: factory.AddVariable(variable) ## declare dictionaries to store sample related info fileList = {} TFileList = {} TTreeList = {} ## Load up the training and testing samples (1 sig + all bkgs) for sample in [self.signalSample] + self.bkgSet: ## get file list from RA2b module fileList[sample] = RA2b.getFileList(sample) ## define dictionaries of file lists per sample TFileList[sample] = [] TTreeList[sample] = [] ## loop over file lists, getting sample weights, ## and adding samples to the factory for i in range(len(fileList[sample])): w = RA2b.getTreeWeight(fileList[sample][i]) TFileList[sample].append(ROOT.TFile(fileList[sample][i])) TTreeList[sample].append(TFileList[sample][i].Get('tree')) factory.AddTree(TTreeList[sample][i], sample, w) ## get the cuts to apply before training cuts = RA2b.getCuts('sig', dphiCut=self.dPhiCut, applyMHTCut=self.mhtCut, extraCuts=self.extraCuts) ## Prepare the factory for training and testing ## by normalizing to equal number of events factory.PrepareTrainingAndTestTree( cuts, 'SplitMode=Random:' 'NormMode=EqualNumEvents:!V') # Boosted Decision Trees with gradient boosting if self.useMethod['BDTG']: for nT in self.nTrees: for maxD in self.maxDepth: factory.BookMethod( ROOT.TMVA.Types.kBDT, 'BDTG_NTrees' + str(nT) + '_MaxDepth' + str(maxD), '!H:!V:NTrees=' + str(nT) + ':BoostType=Grad:Shrinkage=0.10:' 'MinNodeSize=' + self.minNodeSize + ':GradBaggingFraction=0.50:' 'nCuts=20:MaxDepth=' + str(maxD)) # Multi-layer perceptron (ANN) if self.useMethod['MLP']: factory.BookMethod( 'MLP', 'MLP', '!H:!V:NeuronType=tanh:' 'NCycles=1000:HiddenLayers=N+5,5:' 'TestRate=5:EstimatorType=MSE') # functional discriminant with GA minimizer if self.useMethod['FDA_GA']: factory.BookMethod( ROOT.TMVA.Types.kFDA, 'FDA_GA', 'H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:' 'ParRanges=(-1,1);(-10,10);' '(-10,10);(-10,10);(-10,10):' 'FitMethod=GA:PopSize=300:Cycles=3:' 'Steps=20:Trim=True:SaveBestGen=1') # PDE-Foam approach if self.useMethod['PDEFoam']: factory.BookMethod( ROOT.TMVA.Types.kPDEFoam, 'PDEFoam', '!H:!V:TailCut=0.001:VolFrac=0.0666:' 'nActiveCells=500:nSampl=2000:nBin=5:' 'Nmin=100:Kernel=None:Compress=T') ## train, test, and evaluate factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() ## Delete factory to prevent memory leak factory.Delete() ## close file before finishing script self.outFile.Close()