def dofit(self): mi = ModalInterval(self.data) mi.setSigmaLevel(1) self.mode.setVal(mi.halfSampleMode()) self.effsigma.setVal(0.5 * mi.length()) self.bootdata.add(self.bootset) resampler = Resampler(self.data) for iboot in range(self.nboot): mi = ModalInterval(resampler.bootstrap()) mi.setSigmaLevel(1) self.mode.setVal(mi.halfSampleMode()) self.effsigma.setVal(0.5 * mi.length()) self.bootdata.add(self.bootset) self.mode.setVal(self.bootdata.mean(self.mode)) self.effsigma.setVal(self.bootdata.mean(self.effsigma)) self.mode.setError(self.bootdata.rmsVar(self.mode).getVal()) self.effsigma.setError(self.bootdata.rmsVar(self.effsigma).getVal())
def bootstrap(self, repeat=10): nbinsx = len(self.fractions) xlow = 0.5 * self.fractions[0] xup = self.fractions[-1] + xlow errors_rms = ROOT.TProfile(self.name + '_errors_rms', self.title, nbinsx, xlow, xup, 's') errors_mi1 = ROOT.TGraphAsymmErrors(nbinsx) errors_mi2 = ROOT.TGraphAsymmErrors(nbinsx) for graph in [errors_mi1, errors_mi2]: graph.SetTitle(self.title) resampler = Resampler(self.data) bootdata = {} for iteration in range(repeat): replica = resampler.bootstrap() boot = self.get_width_ratio(replica, self.fractions) for i in range(boot.GetN()): x = boot.GetX()[i] y = boot.GetY()[i] #y = boot.GetY()[i] - self.width_ratio.GetY()[i] errors_rms.Fill(x, y) bootdata.setdefault(x, []).append(y) for i, (x, ydist) in enumerate(sorted(bootdata.items())): ysize = len(ydist) yarray = array.array('d', ydist) y = ROOT.TMath.Median(ysize, yarray) errors_mi1.SetPoint(i, x, y) errors_mi2.SetPoint(i, x, y) exh = exl = 0. mi = ModalInterval(ysize, yarray) mi.setSigmaLevel(1) eyl = y - mi.lowerBound() eyh = mi.upperBound() - y errors_mi1.SetPointError(i, exl, exh, eyl, eyh) mi.setSigmaLevel(2) eyl = y - mi.lowerBound() eyh = mi.upperBound() - y errors_mi2.SetPointError(i, exl, exh, eyl, eyh) return errors_rms, errors_mi1, errors_mi2
def makegraph(self): row = self.data.get() if row.getSize() < 1: raise RuntimeError, 'Dataset must contain at least one variable!' variable = row.first() self.data.tree().Draw(variable.GetName(), '', 'goff') size = self.data.tree().GetSelectedRows() first = self.data.tree().GetV1() modalinterval = ModalInterval(size, first) widths = [] for x in self.fractions: modalinterval.setFraction(x) widths.append(modalinterval.length()) xvalues = array.array('d', self.fractions) yvalues = array.array('d', widths) self.graph = ROOT.TGraph(len(self.fractions), xvalues, yvalues) ## Decorate the graph self.graph.SetName(variable.GetName()) self.graph.SetTitle(variable.GetTitle()) self.graph.GetXaxis().SetTitle('Sample Fraction') ytitle = 'Subsample Width' if variable.getUnit(): ytitle += ' (%s)' % variable.getUnit() self.graph.GetYaxis().SetTitle(ytitle)
def get_data(self): 'Gets the RooDataSet with deltaE data.' chain = ROOT.TChain('Analysis') datapath = '/raid2/veverka/yyTrees/tworeg' if self.emtype == 'pho': self.filenames = ''' testSelection.v3.PhotonRun2011AandB30Nov2011v1AOD.preselcut3.sel0.n1cut0.smear0.phtcorr219.phtid1.merged.root testSelection.v3.GluGluToHToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root testSelection.v3.TTH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root testSelection.v3.VBF_HToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root testSelection.v3.WH_ZH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root '''.split() elif self.emtype == 'ele': self.filenames = ''' testSelectionZeev1.v3.DoubleElectronRun2011A30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root testSelectionZeev1.v3.DoubleElectronRun2011B30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root testSelectionZeev1.v3.DYJetsToLL_TuneZ2_M50_7TeVmadgraphtauolaFall11PU_S6_START42_V14Bv1AODSIM.etcut25.corr216.eleid1.datapu6.mcpu1.r*.scale0.root '''.split() else: raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype) for f in self.filenames: chain.Add(os.path.join(datapath, f)) ## Selection if self.emtype == 'pho': cuts = ['100 <= mpair & mpair <= 180'] elif self.emtype == 'ele': cuts = ['80 <= mpair & mpair <= 100'] else: raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype) cuts.append({ 'mc': 'runNumber == 1', 'data': 'runNumber > 1' }[self.src]) cuts.extend({ 'cat0': ['scr9 > 0.94', 'fabs(sceta) < 1.48'], 'cat1': ['scr9 <= 0.94', 'fabs(sceta) < 1.48'], 'cat2': ['scr9 > 0.94', 'fabs(sceta) >= 1.48'], 'cat3': ['scr9 <= 0.94', 'fabs(sceta) >= 1.48'], 'calcat0': ['scr9 > 0.94', 'fabs(sceta) < 1'], 'calcat1': ['scr9 < 0.94', 'fabs(sceta) < 1'], 'calcat2': ['scr9 > 0.94', '1 < fabs(sceta) & fabs(sceta) < 1.48'], 'calcat3': ['scr9 < 0.94', '1 < fabs(sceta) & fabs(sceta) < 1.48'], 'calcat4': ['scr9 > 0.94', '1.48 < fabs(sceta) & fabs(sceta) < 2'], 'calcat5': ['scr9 < 0.94', '1.48 < fabs(sceta) & fabs(sceta) < 2'], 'calcat6': ['scr9 > 0.94', '2 < fabs(sceta) & fabs(sceta) < 2.5'], 'calcat7': ['scr9 < 0.94', '2 < fabs(sceta) & fabs(sceta) < 2.5'], }[self.cat]) if self.numentries > 0: cuts.append('Entry$ < %d' % self.numentries) self.deltaE.SetTitle('200*(scen_bendavid - scen_yangyong)/' ' (scen_bendavid + scen_yangyong)') self.data = dataset.get(tree=chain, variable=self.deltaE, cuts=cuts[:], name=self.name + '_data') self.data_half_odd = dataset.get(tree=chain, variable=self.deltaE, cuts=cuts[:] + ['Entry$ % 2 == 0'], name=self.name + '_data_half_odd') self.data_half_even = dataset.get(tree=chain, variable=self.deltaE, cuts=cuts[:] + ['Entry$ % 2 == 1'], name=self.name + '_data_half_even') if self.debuglevel > 0: reduced_range = roo.EventRange(0, 5000) self.data = self.data.reduce(reduced_range) self.data_half_odd = self.data_half_odd.reduce(reduced_range) self.data_half_even = self.data_half_even.reduce(reduced_range) nentries = self.data.tree().Draw('deltaE', '', 'goff') self.modal_interval = ModalInterval(nentries, self.data.tree().GetV1(), 1.) if self.fitmode == 'odd-even': self.train_data = self.data_half_odd self.fit_data = self.data_half_even elif self.fitmode == 'event-odd': self.train_data = self.data_half_even self.fit_data = self.data_half_odd elif self.fitmode == 'full-full': self.train_data = self.data self.fit_data = self.data else: raise RuntimeError, "Fit mode `%s' not supported!" % self.fitmode ## Make sure that the trainining dataset isn't too large if self.train_data.numEntries() > self.numentries_train_max: prescale = ( self.train_data.numEntries() / self.numentries_train_max + 1) self.deltaE.SetTitle('deltaE') self.train_data = dataset.get( tree=self.train_data.tree(), variable=self.deltaE, cuts=['Entry$ %% %d == 0' % prescale], name=self.name + '_train_data') nentries = self.train_data.tree().Draw('deltaE', '', 'goff') self.modal_interval_training = ModalInterval( nentries, self.train_data.tree().GetV1(), 0.99) ## Set a nice title for the x-axis of plots if self.emtype == 'pho': self.deltaE.SetTitle('Photon #DeltaE_{two regr.}/E') elif self.emtype == 'ele': self.deltaE.SetTitle('Electron #DeltaE_{two regr.}/E') else: raise RuntimeError, "Unsupported emtype `%s'!" % self.emtype
def test(): ''' Tests the RooRhoKeysPdf class. ''' import FWLite.Tools.canvases as canvases import FWLite.Tools.cmsstyle as cmsstyle ROOT.RooRandom.randomGenerator().SetSeed(2) global gnlls, hrhoval, hrhoerr hrhoval = ROOT.TH1F('hrhoval', 'hrhoval', 100, 0, 5) hrhoerr = ROOT.TH1F('hrhoerr', 'hrhoerr', 100, 0, 1) gnlls = [] for itoy in range(1): global w w = ROOT.RooWorkspace('w', 'w') # model = w.factory('Gaussian::model(x[-50, 50], mean[0], sigma[1])') model = w.factory('BreitWigner::model(x[-5, 5], mean[0], sigma[1])') x = w.var('x') oset = ROOT.RooArgSet(x) data = model.generate(oset, 1000) w.Import(data) # rho = w.factory('rho[1, 0, 100]') # testpdf = RooRhoKeysPdf('testpdf', 'testpdf', x, rho, data) # w.Import(testpdf) testpdf = w.factory('RooRhoKeysPdf::testpdf(x, rho[1, 0, 100], modelData)') rho = w.var('rho') plot = x.frame() data.plotOn(plot) model.plotOn(plot) testpdf.plotOn(plot, roo.LineColor(ROOT.kRed)) rho.setVal(2) testpdf.LoadDataSet(data) testpdf.plotOn(plot, roo.LineColor(ROOT.kGreen)) rho.setVal(3) testpdf.LoadDataSet(data) testpdf.plotOn(plot, roo.LineColor(ROOT.kBlack)) canvases.next('RooRhoKeysPdf_Test%d' % itoy) plot.Draw() canvases.update() resampler = Resampler(data) data0 = resampler.prescale(2, [0], 'data0') data1 = resampler.prescale(2, [1], 'data1') w.Import(data0) w.Import(data1) testpdf0 = w.factory('RooRhoKeysPdf::testpdf0(x, rho, data0)') testpdf1 = w.factory('RooRhoKeysPdf::testpdf1(x, rho, data1)') gnll = ROOT.TGraph() for rhoval in [0.5 + 0.05 * i for i in range(50)]: rho.setVal(rhoval) testpdf0.LoadDataSet(data0) testpdf1.LoadDataSet(data1) nll = 0 nll += testpdf0.createNLL(data1).getVal() nll += testpdf1.createNLL(data0).getVal() # print rhoval, nll gnll.SetPoint(gnll.GetN(), rhoval, nll) locmin = ROOT.TMath.LocMin(gnll.GetN(), gnll.GetY()) xmin = gnll.GetX()[max(locmin-5, 0)] xmax = gnll.GetX()[min(locmin+5, gnll.GetN()-1)] fres = gnll.Fit('pol2', 's', '', xmin, xmax) p1 = fres.Get().GetParams()[1] p2 = fres.Get().GetParams()[2] rhoval = - 0.5 * p1 / p2 rhoerr = 1/ROOT.TMath.Sqrt(2 * p2) hrhoval.Fill(rhoval) hrhoerr.Fill(rhoerr) canvases.next('gnll%d' % itoy) gnll.Draw('ap') gnll.GetXaxis().SetTitle('#rho') gnll.GetYaxis().SetTitle('- log L') gnlls.append(gnll) canvases.next('rhoerr') hrhoerr.Draw() canvases.next('rhoval') hrhoval.Draw() canvases.update() from FWLite.Tools.modalinterval import ModalInterval global mi mi = ModalInterval(w.data('data0')) print mi.halfSampleMode()