Beispiel #1
0
 def dofit(self):
     mi = ModalInterval(self.data)
     mi.setSigmaLevel(1)
     self.mode.setVal(mi.halfSampleMode())
     self.effsigma.setVal(0.5 * mi.length())
     self.bootdata.add(self.bootset)
     resampler = Resampler(self.data)
     for iboot in range(self.nboot):
         mi = ModalInterval(resampler.bootstrap())
         mi.setSigmaLevel(1)
         self.mode.setVal(mi.halfSampleMode())
         self.effsigma.setVal(0.5 * mi.length())
         self.bootdata.add(self.bootset)
     self.mode.setVal(self.bootdata.mean(self.mode))
     self.effsigma.setVal(self.bootdata.mean(self.effsigma))
     self.mode.setError(self.bootdata.rmsVar(self.mode).getVal())
     self.effsigma.setError(self.bootdata.rmsVar(self.effsigma).getVal())
Beispiel #2
0
 def bootstrap(self, repeat=10):
     nbinsx = len(self.fractions)
     xlow = 0.5 * self.fractions[0]
     xup = self.fractions[-1] + xlow
     errors_rms = ROOT.TProfile(self.name + '_errors_rms', self.title,
                                nbinsx, xlow, xup, 's')
     errors_mi1 = ROOT.TGraphAsymmErrors(nbinsx)
     errors_mi2 = ROOT.TGraphAsymmErrors(nbinsx)
     for graph in [errors_mi1, errors_mi2]:
         graph.SetTitle(self.title)
     resampler = Resampler(self.data)
     bootdata = {}
     for iteration in range(repeat):
         replica = resampler.bootstrap()
         boot = self.get_width_ratio(replica, self.fractions)
         for i in range(boot.GetN()):
             x = boot.GetX()[i]
             y = boot.GetY()[i]
             #y = boot.GetY()[i] - self.width_ratio.GetY()[i]
             errors_rms.Fill(x, y)
             bootdata.setdefault(x, []).append(y)
     for i, (x, ydist) in enumerate(sorted(bootdata.items())):
         ysize = len(ydist)
         yarray = array.array('d', ydist)
         y = ROOT.TMath.Median(ysize, yarray)
         errors_mi1.SetPoint(i, x, y)
         errors_mi2.SetPoint(i, x, y)
         exh = exl = 0.
         mi = ModalInterval(ysize, yarray)
         mi.setSigmaLevel(1)
         eyl = y - mi.lowerBound()
         eyh = mi.upperBound() - y
         errors_mi1.SetPointError(i, exl, exh, eyl, eyh)
         mi.setSigmaLevel(2)
         eyl = y - mi.lowerBound()
         eyh = mi.upperBound() - y
         errors_mi2.SetPointError(i, exl, exh, eyl, eyh)
     return errors_rms, errors_mi1, errors_mi2
Beispiel #3
0
 def makegraph(self):
     row = self.data.get()
     if row.getSize() < 1:
         raise RuntimeError, 'Dataset must contain at least one variable!'
     variable = row.first()
     self.data.tree().Draw(variable.GetName(), '', 'goff')
     size = self.data.tree().GetSelectedRows()
     first = self.data.tree().GetV1()
     modalinterval = ModalInterval(size, first)
     widths = []
     for x in self.fractions:
         modalinterval.setFraction(x)
         widths.append(modalinterval.length())
     xvalues = array.array('d', self.fractions)
     yvalues = array.array('d', widths)
     self.graph = ROOT.TGraph(len(self.fractions), xvalues, yvalues)
     ## Decorate the graph
     self.graph.SetName(variable.GetName())
     self.graph.SetTitle(variable.GetTitle())
     self.graph.GetXaxis().SetTitle('Sample Fraction')
     ytitle = 'Subsample Width'
     if variable.getUnit():
         ytitle += ' (%s)' % variable.getUnit()
     self.graph.GetYaxis().SetTitle(ytitle)
Beispiel #4
0
 def makegraph(self):
     row = self.data.get()
     if row.getSize() < 1:
         raise RuntimeError, 'Dataset must contain at least one variable!'
     variable = row.first()
     self.data.tree().Draw(variable.GetName(), '', 'goff')
     size = self.data.tree().GetSelectedRows()
     first = self.data.tree().GetV1()
     modalinterval = ModalInterval(size, first)
     widths = []
     for x in self.fractions:
         modalinterval.setFraction(x)
         widths.append(modalinterval.length())
     xvalues = array.array('d', self.fractions)
     yvalues = array.array('d', widths)
     self.graph = ROOT.TGraph(len(self.fractions), xvalues, yvalues)
     ## Decorate the graph
     self.graph.SetName(variable.GetName())
     self.graph.SetTitle(variable.GetTitle())
     self.graph.GetXaxis().SetTitle('Sample Fraction')
     ytitle = 'Subsample Width'
     if variable.getUnit():
         ytitle += ' (%s)' % variable.getUnit()
     self.graph.GetYaxis().SetTitle(ytitle)
Beispiel #5
0
 def bootstrap(self, repeat=10):
     nbinsx = len(self.fractions)
     xlow = 0.5 * self.fractions[0]
     xup = self.fractions[-1] + xlow
     errors_rms = ROOT.TProfile(self.name + '_errors_rms', self.title, 
                                nbinsx, xlow, xup, 's')
     errors_mi1 = ROOT.TGraphAsymmErrors(nbinsx)
     errors_mi2 = ROOT.TGraphAsymmErrors(nbinsx)
     for graph in [errors_mi1, errors_mi2]:
         graph.SetTitle(self.title)
     resampler = Resampler(self.data)
     bootdata = {}
     for iteration in range(repeat):
         replica = resampler.bootstrap()
         boot = self.get_width_ratio(replica, self.fractions)
         for i in range(boot.GetN()):
             x = boot.GetX()[i]
             y = boot.GetY()[i]
             #y = boot.GetY()[i] - self.width_ratio.GetY()[i]
             errors_rms.Fill(x, y)
             bootdata.setdefault(x, []).append(y)
     for i, (x, ydist) in enumerate(sorted(bootdata.items())):
         ysize = len(ydist)
         yarray = array.array('d', ydist)
         y = ROOT.TMath.Median(ysize, yarray)
         errors_mi1.SetPoint(i, x, y)
         errors_mi2.SetPoint(i, x, y)
         exh = exl = 0.
         mi = ModalInterval(ysize, yarray)
         mi.setSigmaLevel(1)
         eyl = y - mi.lowerBound()
         eyh = mi.upperBound() - y
         errors_mi1.SetPointError(i, exl, exh, eyl, eyh)
         mi.setSigmaLevel(2)
         eyl = y - mi.lowerBound()
         eyh = mi.upperBound() - y
         errors_mi2.SetPointError(i, exl, exh, eyl, eyh)
     return errors_rms, errors_mi1, errors_mi2
Beispiel #6
0
    def get_data(self):
        'Gets the RooDataSet with deltaE data.'
        chain = ROOT.TChain('Analysis')
        datapath = '/raid2/veverka/yyTrees/tworeg'

        if self.emtype == 'pho':
            self.filenames = '''
testSelection.v3.PhotonRun2011AandB30Nov2011v1AOD.preselcut3.sel0.n1cut0.smear0.phtcorr219.phtid1.merged.root

testSelection.v3.GluGluToHToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root

testSelection.v3.TTH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root

testSelection.v3.VBF_HToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root

testSelection.v3.WH_ZH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root
'''.split()
        elif self.emtype == 'ele':
            self.filenames = '''
testSelectionZeev1.v3.DoubleElectronRun2011A30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root

testSelectionZeev1.v3.DoubleElectronRun2011B30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root
  
testSelectionZeev1.v3.DYJetsToLL_TuneZ2_M50_7TeVmadgraphtauolaFall11PU_S6_START42_V14Bv1AODSIM.etcut25.corr216.eleid1.datapu6.mcpu1.r*.scale0.root
'''.split()
        else:
            raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype)

        for f in self.filenames:
            chain.Add(os.path.join(datapath, f))

        ## Selection
        if self.emtype == 'pho':
            cuts = ['100 <= mpair & mpair <= 180']
        elif self.emtype == 'ele':
            cuts = ['80 <= mpair & mpair <= 100']
        else:
            raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype)

        cuts.append({
            'mc': 'runNumber == 1',
            'data': 'runNumber >  1'
        }[self.src])
        cuts.extend({
            'cat0': ['scr9 >  0.94', 'fabs(sceta) <  1.48'],
            'cat1': ['scr9 <= 0.94', 'fabs(sceta) <  1.48'],
            'cat2': ['scr9 >  0.94', 'fabs(sceta) >= 1.48'],
            'cat3': ['scr9 <= 0.94', 'fabs(sceta) >= 1.48'],
            'calcat0': ['scr9 >  0.94', 'fabs(sceta) <  1'],
            'calcat1': ['scr9 <  0.94', 'fabs(sceta) <  1'],
            'calcat2':
            ['scr9 >  0.94', '1 < fabs(sceta) & fabs(sceta) <  1.48'],
            'calcat3':
            ['scr9 <  0.94', '1 < fabs(sceta) & fabs(sceta) <  1.48'],
            'calcat4':
            ['scr9 >  0.94', '1.48 < fabs(sceta) & fabs(sceta) <  2'],
            'calcat5':
            ['scr9 <  0.94', '1.48 < fabs(sceta) & fabs(sceta) <  2'],
            'calcat6': ['scr9 >  0.94', '2 < fabs(sceta) & fabs(sceta) < 2.5'],
            'calcat7': ['scr9 <  0.94', '2 < fabs(sceta) & fabs(sceta) < 2.5'],
        }[self.cat])

        if self.numentries > 0:
            cuts.append('Entry$ < %d' % self.numentries)

        self.deltaE.SetTitle('200*(scen_bendavid - scen_yangyong)/'
                             '    (scen_bendavid + scen_yangyong)')
        self.data = dataset.get(tree=chain,
                                variable=self.deltaE,
                                cuts=cuts[:],
                                name=self.name + '_data')
        self.data_half_odd = dataset.get(tree=chain,
                                         variable=self.deltaE,
                                         cuts=cuts[:] + ['Entry$ % 2 == 0'],
                                         name=self.name + '_data_half_odd')
        self.data_half_even = dataset.get(tree=chain,
                                          variable=self.deltaE,
                                          cuts=cuts[:] + ['Entry$ % 2 == 1'],
                                          name=self.name + '_data_half_even')
        if self.debuglevel > 0:
            reduced_range = roo.EventRange(0, 5000)
            self.data = self.data.reduce(reduced_range)
            self.data_half_odd = self.data_half_odd.reduce(reduced_range)
            self.data_half_even = self.data_half_even.reduce(reduced_range)

        nentries = self.data.tree().Draw('deltaE', '', 'goff')
        self.modal_interval = ModalInterval(nentries,
                                            self.data.tree().GetV1(), 1.)
        if self.fitmode == 'odd-even':
            self.train_data = self.data_half_odd
            self.fit_data = self.data_half_even
        elif self.fitmode == 'event-odd':
            self.train_data = self.data_half_even
            self.fit_data = self.data_half_odd
        elif self.fitmode == 'full-full':
            self.train_data = self.data
            self.fit_data = self.data
        else:
            raise RuntimeError, "Fit mode `%s' not supported!" % self.fitmode

        ## Make sure that the trainining dataset isn't too large
        if self.train_data.numEntries() > self.numentries_train_max:
            prescale = (
                self.train_data.numEntries() / self.numentries_train_max + 1)
            self.deltaE.SetTitle('deltaE')
            self.train_data = dataset.get(
                tree=self.train_data.tree(),
                variable=self.deltaE,
                cuts=['Entry$ %% %d == 0' % prescale],
                name=self.name + '_train_data')
        nentries = self.train_data.tree().Draw('deltaE', '', 'goff')
        self.modal_interval_training = ModalInterval(
            nentries,
            self.train_data.tree().GetV1(), 0.99)

        ## Set a nice title for the x-axis of plots
        if self.emtype == 'pho':
            self.deltaE.SetTitle('Photon #DeltaE_{two regr.}/E')
        elif self.emtype == 'ele':
            self.deltaE.SetTitle('Electron #DeltaE_{two regr.}/E')
        else:
            raise RuntimeError, "Unsupported emtype `%s'!" % self.emtype
Beispiel #7
0
 def dofit(self):
     mi = ModalInterval(self.data)
     mi.setSigmaLevel(1)
     self.mode.setVal(mi.halfSampleMode())
     self.effsigma.setVal(0.5 * mi.length())
     self.bootdata.add(self.bootset)
     resampler = Resampler(self.data)
     for iboot in range(self.nboot):
         mi = ModalInterval(resampler.bootstrap())
         mi.setSigmaLevel(1)
         self.mode.setVal(mi.halfSampleMode())
         self.effsigma.setVal(0.5 * mi.length())
         self.bootdata.add(self.bootset)
     self.mode.setVal(self.bootdata.mean(self.mode))
     self.effsigma.setVal(self.bootdata.mean(self.effsigma))
     self.mode.setError(self.bootdata.rmsVar(self.mode).getVal())
     self.effsigma.setError(self.bootdata.rmsVar(self.effsigma).getVal())
Beispiel #8
0
def test():
    '''
    Tests the RooRhoKeysPdf class.
    '''
    import FWLite.Tools.canvases as canvases
    import FWLite.Tools.cmsstyle as cmsstyle

    ROOT.RooRandom.randomGenerator().SetSeed(2)
    global gnlls, hrhoval, hrhoerr
    hrhoval = ROOT.TH1F('hrhoval', 'hrhoval', 100, 0, 5)
    hrhoerr = ROOT.TH1F('hrhoerr', 'hrhoerr', 100, 0, 1)
    gnlls = []
    for itoy in range(1):
        global w
        w = ROOT.RooWorkspace('w', 'w')
        # model = w.factory('Gaussian::model(x[-50, 50], mean[0], sigma[1])')
        model = w.factory('BreitWigner::model(x[-5, 5], mean[0], sigma[1])')
        x = w.var('x')
        oset = ROOT.RooArgSet(x)
        data = model.generate(oset, 1000)
        w.Import(data)
        # rho = w.factory('rho[1, 0, 100]')
        # testpdf = RooRhoKeysPdf('testpdf', 'testpdf', x, rho, data)
        # w.Import(testpdf)
        testpdf = w.factory('RooRhoKeysPdf::testpdf(x, rho[1, 0, 100], modelData)')
        rho = w.var('rho')
        plot = x.frame()
        data.plotOn(plot)
        model.plotOn(plot)
        testpdf.plotOn(plot, roo.LineColor(ROOT.kRed))

        rho.setVal(2)
        testpdf.LoadDataSet(data)
        testpdf.plotOn(plot, roo.LineColor(ROOT.kGreen))

        rho.setVal(3)
        testpdf.LoadDataSet(data)
        testpdf.plotOn(plot, roo.LineColor(ROOT.kBlack))

        canvases.next('RooRhoKeysPdf_Test%d' % itoy)
        plot.Draw()
        canvases.update()

        resampler = Resampler(data)
        data0 = resampler.prescale(2, [0], 'data0')
        data1 = resampler.prescale(2, [1], 'data1')
        w.Import(data0)
        w.Import(data1)
        testpdf0 = w.factory('RooRhoKeysPdf::testpdf0(x, rho, data0)')
        testpdf1 = w.factory('RooRhoKeysPdf::testpdf1(x, rho, data1)')

        gnll = ROOT.TGraph()
        for rhoval in [0.5 + 0.05 * i for i in range(50)]:
            rho.setVal(rhoval)
            testpdf0.LoadDataSet(data0)
            testpdf1.LoadDataSet(data1)
            nll = 0
            nll += testpdf0.createNLL(data1).getVal()
            nll += testpdf1.createNLL(data0).getVal()
            # print rhoval, nll
            gnll.SetPoint(gnll.GetN(), rhoval, nll)

        locmin = ROOT.TMath.LocMin(gnll.GetN(), gnll.GetY())
        xmin = gnll.GetX()[max(locmin-5, 0)]
        xmax = gnll.GetX()[min(locmin+5, gnll.GetN()-1)]
        fres = gnll.Fit('pol2', 's', '', xmin, xmax)
        p1 = fres.Get().GetParams()[1]
        p2 = fres.Get().GetParams()[2]
        rhoval =  - 0.5 * p1 / p2
        rhoerr = 1/ROOT.TMath.Sqrt(2 * p2)
        hrhoval.Fill(rhoval)
        hrhoerr.Fill(rhoerr)
        
        canvases.next('gnll%d' % itoy)
        gnll.Draw('ap')
        gnll.GetXaxis().SetTitle('#rho')
        gnll.GetYaxis().SetTitle('- log L')
        gnlls.append(gnll)
    canvases.next('rhoerr')
    hrhoerr.Draw()
    canvases.next('rhoval')
    hrhoval.Draw()
    canvases.update()

    from FWLite.Tools.modalinterval import ModalInterval
    global mi
    mi = ModalInterval(w.data('data0'))
    print mi.halfSampleMode()