def get_dataset(data_name, varname, max_entries=-1, option='merged'):
    tree = trees.get(data_name, option)
    cfg = config_map[varname]
    datasets = []
    ## Get a dataset for each expression-selection pair
    for expr, cuts in zip(cfg.expressions, cfg.selections):
        if hasattr(cfg, 'binning') and len(cfg.binning.split(',')) == 3:
            nbins, varmin, varmax = map(float, cfg.binning.split(','))
            variable = ROOT.RooRealVar(cfg.name, expr, varmin, varmax)
            variable.setBins(int(nbins))
        else:
            variable = ROOT.RooRealVar(cfg.name, expr)
        cuts = [cuts]
        if max_entries > 0:
            cuts.append('Entry$ < %d' % max_entries)
        dataset = datasetly.get(tree=tree, variable=variable, cuts=cuts)
        variable = dataset.get().first()
        variable.SetTitle(cfg.title)
        variable.setUnit(cfg.unit)
        datasets.append(dataset)
    ## End of loop over expressions and selections
    dataset = datasets[0]
    for further_dataset in datasets[1:]:
        dataset.append(further_dataset)
    return dataset
def get_dataset(data_name, varname, max_entries = -1, option = 'merged'):
    tree = trees.get(data_name, option)
    cfg = config_map[varname]
    datasets = []
    ## Get a dataset for each expression-selection pair
    for expr, cuts in zip(cfg.expressions, cfg.selections):
        if hasattr(cfg, 'binning') and len(cfg.binning.split(',')) == 3:
            nbins, varmin, varmax = map(float, cfg.binning.split(','))
            variable = ROOT.RooRealVar(cfg.name, expr, varmin, varmax)
            variable.setBins(int(nbins))
        else:
            variable = ROOT.RooRealVar(cfg.name, expr)
        cuts = [cuts]
        if max_entries > 0:
            cuts.append('Entry$ < %d' % max_entries)
        dataset = datasetly.get(tree=tree, variable=variable, cuts=cuts)
        variable = dataset.get().first()
        variable.SetTitle(cfg.title)
        variable.setUnit(cfg.unit)
        datasets.append(dataset)
    ## End of loop over expressions and selections
    dataset = datasets[0]
    for further_dataset in datasets[1:]:
        dataset.append(further_dataset)
    return dataset
Example #3
0
 def __init__(self,
              name,
              varname,
              option,
              max_entries,
              prescale,
              prescale_phase=0):
     print 'DEBUG', self.__class__.__name__, '__init__'
     if prescale > 1:
         msg = ', '.join(
             ['max_entries=%d' % max_entries,
              'prescale=%d' % prescale])
         raise RuntimeError, 'Illegal arguments ' + msg
     self.name = name
     self.varname = varname
     #self.max_entries = max_entries
     #self.option = self.option
     tree = trees.get(name, option)
     cfg = config_map[varname]
     datasets = []
     ## Get a dataset for each expression-selection pair
     for expr, cuts in zip(cfg.expressions, cfg.selections):
         if hasattr(cfg, 'qqbinning') and len(
                 cfg.qqbinning.split(',')) == 3:
             nbins, varmin, varmax = map(float, cfg.qqbinning.split(','))
             variable = ROOT.RooRealVar(cfg.name, expr, varmin, varmax)
             variable.setBins(int(nbins))
         else:
             variable = ROOT.RooRealVar(cfg.name, expr, 0.)
         cuts = [cuts]
         ## Adds an appropriate prescale
         if max_entries > 0:
             all_entries = float(tree.GetEntries())
             prescale = ROOT.TMath.CeilNint(all_entries / max_entries)
         if prescale > 1:
             cut = 'Entry$ %% %d == %d' % (prescale, prescale_phase)
             print 'Prescaling %s: %s' % (name, cut)
             cuts.append(cut)
         dataset = datasetly.get(tree=tree, variable=variable, cuts=cuts)
         variable = dataset.get().first()
         variable.SetTitle(cfg.title)
         variable.setUnit(cfg.unit)
         datasets.append(dataset)
     ## End of loop over expressions and selections
     dataset = datasets[0]
     for further_dataset in datasets[1:]:
         dataset.append(further_dataset)
     dataset.SetTitle('Raw ' + name.split('-')[0].capitalize())
     dataset.SetName('raw_' + name.split('-')[0])
     print 'max_entries, numEntries', max_entries, dataset.numEntries()
     #if max_entries > 0 and dataset.numEntries() > max_entries:
     ### Downsample to reduce the size of data
     #dataset.Print()
     #print 'QQ DEBUG: Downsampling to', max_entries
     #dataset = Resampler(dataset).downsample(max_entries)
     #dataset.Print()
     self.data = dataset
     self.xvar = dataset.get().first()
Example #4
0
    def get_samples(self):
        '''Get the MC and data samples and import them in the workspace.'''

        ## Yong's trees with the default CMSSW photon cluster corrections
        chains = esChains.getChains('v13')

        ## Map of variable names and corresponding TTree expressions to
        ## calculate it.
        expression_map = {
            'mmg': 'mmg',
            'mm': 'mm',
            'gpt': 'gamenergy/cosh(gameta)',
            'geta': 'gameta',
            'r9': 'gamr9',
            'sihih': '100*gamsigmaIetaIeta',
            'weight': 'evtweight',
            'sphi': 'gamscphiWidth',
            'seta': 'gamscetaWidth',
        }

        # Change titles to TTree expressions while saving the original titles.
        title_map = self.replace_variable_titles(expression_map)

        variables = [
            self.w.var(xname) for xname in expression_map if xname != 'weight'
        ]
        weight = self.w.var('weight')

        data = dataset.get(tree=chains['data'],
                           variables=variables,
                           weight=weight,
                           cuts=self.cuts[:],
                           name='data')
        mc = dataset.get(tree=chains['z'],
                         variables=variables,
                         weight=weight,
                         cuts=self.cuts[:],
                         name='mc')

        # Change the titles back to their original values.
        self.replace_variable_titles(title_map)

        self.w.Import(data)
        self.w.Import(mc)
def getdata():
    '''
    Clones the dataset from the file, closes the file and returns the clone.
    '''    
    trees = get_trees(tree_version)
    data = {}
    for source, tree in trees.items():
        data[source] = dataset.get(tree=tree, variable=variable,
                                   weight=weight[source], cuts = cuts)
    return data
Example #6
0
 def get_merged_dataset(self):
     self.merged_variables = []
     for src in self.sources:
         new_variable = self.variable.Clone(src.name)
         new_variable.SetTitle(src.name)
         self.merged_variables.append(new_variable)
     self.merged_dataset = dataset.get(tree=self.tree,
                                       variables=self.merged_variables)
     for src, var in zip(self.sources, self.merged_variables):
         var.SetTitle(src.title)
Example #7
0
 def get_merged_dataset(self):
     self.merged_variables = []
     for src in self.sources:
         new_variable = self.variable.Clone(src.name)
         new_variable.SetTitle(src.name)
         self.merged_variables.append(new_variable)
     self.merged_dataset = dataset.get(tree = self.tree,
                                       variables = self.merged_variables)
     for src, var in zip(self.sources, self.merged_variables):
         var.SetTitle(src.title)
Example #8
0
 def __init__(self, name, varname, option, max_entries, prescale,
              prescale_phase=0):
     print 'DEBUG', self.__class__.__name__, '__init__' 
     if prescale > 1:
         msg = ', '.join(['max_entries=%d' % max_entries,
                           'prescale=%d' % prescale])
         raise RuntimeError, 'Illegal arguments ' + msg
     self.name = name
     self.varname = varname
     #self.max_entries = max_entries
     #self.option = self.option
     tree = trees.get(name, option)
     cfg = config_map[varname]
     datasets = []
     ## Get a dataset for each expression-selection pair
     for expr, cuts in zip(cfg.expressions, cfg.selections):
         if hasattr(cfg, 'qqbinning') and len(cfg.qqbinning.split(',')) == 3:
             nbins, varmin, varmax = map(float, cfg.qqbinning.split(','))
             variable = ROOT.RooRealVar(cfg.name, expr, varmin, varmax)
             variable.setBins(int(nbins))
         else:
             variable = ROOT.RooRealVar(cfg.name, expr, 0.)
         cuts = [cuts]
             ## Adds an appropriate prescale
         if max_entries > 0:
             all_entries = float(tree.GetEntries())
             prescale = ROOT.TMath.CeilNint(all_entries / max_entries)
         if prescale > 1:
             cut = 'Entry$ %% %d == %d' % (prescale, prescale_phase)
             print 'Prescaling %s: %s' % (name, cut)
             cuts.append(cut)
         dataset = datasetly.get(tree=tree, variable=variable, cuts=cuts)
         variable = dataset.get().first()
         variable.SetTitle(cfg.title)
         variable.setUnit(cfg.unit)
         datasets.append(dataset)
     ## End of loop over expressions and selections
     dataset = datasets[0]
     for further_dataset in datasets[1:]:
         dataset.append(further_dataset)
     dataset.SetTitle('Raw ' + name.split('-')[0].capitalize())
     dataset.SetName('raw_' + name.split('-')[0])
     print 'max_entries, numEntries', max_entries, dataset.numEntries()
     #if max_entries > 0 and dataset.numEntries() > max_entries:
         ### Downsample to reduce the size of data
         #dataset.Print()
         #print 'QQ DEBUG: Downsampling to', max_entries
         #dataset = Resampler(dataset).downsample(max_entries)
         #dataset.Print()
     self.data = dataset
     self.xvar = dataset.get().first()
Example #9
0
    def get_samples(self):
        '''Get the MC and data samples and import them in the workspace.'''

        ## Yong's trees with the default CMSSW photon cluster corrections
        chains = esChains.getChains('v13')

        ## Map of variable names and corresponding TTree expressions to
        ## calculate it.
        expression_map = {
            'mmg': 'mmg',
            'mm' : 'mm' ,
            'gpt' : 'gamenergy/cosh(gameta)',
            'geta' : 'gameta', 
            'r9' : 'gamr9' ,
            'sihih' : '100*gamsigmaIetaIeta',
            'weight' : 'evtweight',
            'sphi' : 'gamscphiWidth',
            'seta' : 'gamscetaWidth',
            }

        # Change titles to TTree expressions while saving the original titles.
        title_map = self.replace_variable_titles(expression_map)

        variables = [self.w.var(xname) for xname in expression_map
                     if xname != 'weight']
        weight = self.w.var('weight')
        
        data = dataset.get(tree=chains['data'], variables=variables,
                           weight=weight, cuts=self.cuts[:], name='data')
        mc = dataset.get(tree=chains['z'], variables=variables,
                         weight=weight, cuts=self.cuts[:], name='mc')

        # Change the titles back to their original values.
        self.replace_variable_titles(title_map)
        
        self.w.Import(data)
        self.w.Import(mc)
Example #10
0
    def get_data(self):
        'Gets the RooDataSet with deltaE data.'
        chain = ROOT.TChain('Analysis')
        datapath = '/raid2/veverka/yyTrees/tworeg'

        if self.emtype == 'pho':
            self.filenames = '''
testSelection.v3.PhotonRun2011AandB30Nov2011v1AOD.preselcut3.sel0.n1cut0.smear0.phtcorr219.phtid1.merged.root

testSelection.v3.GluGluToHToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root

testSelection.v3.TTH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root

testSelection.v3.VBF_HToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root

testSelection.v3.WH_ZH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root
'''.split()
        elif self.emtype == 'ele':
            self.filenames = '''
testSelectionZeev1.v3.DoubleElectronRun2011A30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root

testSelectionZeev1.v3.DoubleElectronRun2011B30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root
  
testSelectionZeev1.v3.DYJetsToLL_TuneZ2_M50_7TeVmadgraphtauolaFall11PU_S6_START42_V14Bv1AODSIM.etcut25.corr216.eleid1.datapu6.mcpu1.r*.scale0.root
'''.split()
        else:
            raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype)

        for f in self.filenames:
            chain.Add(os.path.join(datapath, f))

        ## Selection
        if self.emtype == 'pho':
            cuts = ['100 <= mpair & mpair <= 180']
        elif self.emtype == 'ele':
            cuts = ['80 <= mpair & mpair <= 100']
        else:
            raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype)

        cuts.append({
            'mc': 'runNumber == 1',
            'data': 'runNumber >  1'
        }[self.src])
        cuts.extend({
            'cat0': ['scr9 >  0.94', 'fabs(sceta) <  1.48'],
            'cat1': ['scr9 <= 0.94', 'fabs(sceta) <  1.48'],
            'cat2': ['scr9 >  0.94', 'fabs(sceta) >= 1.48'],
            'cat3': ['scr9 <= 0.94', 'fabs(sceta) >= 1.48'],
            'calcat0': ['scr9 >  0.94', 'fabs(sceta) <  1'],
            'calcat1': ['scr9 <  0.94', 'fabs(sceta) <  1'],
            'calcat2':
            ['scr9 >  0.94', '1 < fabs(sceta) & fabs(sceta) <  1.48'],
            'calcat3':
            ['scr9 <  0.94', '1 < fabs(sceta) & fabs(sceta) <  1.48'],
            'calcat4':
            ['scr9 >  0.94', '1.48 < fabs(sceta) & fabs(sceta) <  2'],
            'calcat5':
            ['scr9 <  0.94', '1.48 < fabs(sceta) & fabs(sceta) <  2'],
            'calcat6': ['scr9 >  0.94', '2 < fabs(sceta) & fabs(sceta) < 2.5'],
            'calcat7': ['scr9 <  0.94', '2 < fabs(sceta) & fabs(sceta) < 2.5'],
        }[self.cat])

        if self.numentries > 0:
            cuts.append('Entry$ < %d' % self.numentries)

        self.deltaE.SetTitle('200*(scen_bendavid - scen_yangyong)/'
                             '    (scen_bendavid + scen_yangyong)')
        self.data = dataset.get(tree=chain,
                                variable=self.deltaE,
                                cuts=cuts[:],
                                name=self.name + '_data')
        self.data_half_odd = dataset.get(tree=chain,
                                         variable=self.deltaE,
                                         cuts=cuts[:] + ['Entry$ % 2 == 0'],
                                         name=self.name + '_data_half_odd')
        self.data_half_even = dataset.get(tree=chain,
                                          variable=self.deltaE,
                                          cuts=cuts[:] + ['Entry$ % 2 == 1'],
                                          name=self.name + '_data_half_even')
        if self.debuglevel > 0:
            reduced_range = roo.EventRange(0, 5000)
            self.data = self.data.reduce(reduced_range)
            self.data_half_odd = self.data_half_odd.reduce(reduced_range)
            self.data_half_even = self.data_half_even.reduce(reduced_range)

        nentries = self.data.tree().Draw('deltaE', '', 'goff')
        self.modal_interval = ModalInterval(nentries,
                                            self.data.tree().GetV1(), 1.)
        if self.fitmode == 'odd-even':
            self.train_data = self.data_half_odd
            self.fit_data = self.data_half_even
        elif self.fitmode == 'event-odd':
            self.train_data = self.data_half_even
            self.fit_data = self.data_half_odd
        elif self.fitmode == 'full-full':
            self.train_data = self.data
            self.fit_data = self.data
        else:
            raise RuntimeError, "Fit mode `%s' not supported!" % self.fitmode

        ## Make sure that the trainining dataset isn't too large
        if self.train_data.numEntries() > self.numentries_train_max:
            prescale = (
                self.train_data.numEntries() / self.numentries_train_max + 1)
            self.deltaE.SetTitle('deltaE')
            self.train_data = dataset.get(
                tree=self.train_data.tree(),
                variable=self.deltaE,
                cuts=['Entry$ %% %d == 0' % prescale],
                name=self.name + '_train_data')
        nentries = self.train_data.tree().Draw('deltaE', '', 'goff')
        self.modal_interval_training = ModalInterval(
            nentries,
            self.train_data.tree().GetV1(), 0.99)

        ## Set a nice title for the x-axis of plots
        if self.emtype == 'pho':
            self.deltaE.SetTitle('Photon #DeltaE_{two regr.}/E')
        elif self.emtype == 'ele':
            self.deltaE.SetTitle('Electron #DeltaE_{two regr.}/E')
        else:
            raise RuntimeError, "Unsupported emtype `%s'!" % self.emtype
    def get_data(self):
        "Gets the RooDataSet with deltaE data."
        chain = ROOT.TChain("Analysis")
        datapath = "/raid2/veverka/yyTrees/tworeg"

        if self.emtype == "pho":
            self.filenames = """
testSelection.v3.PhotonRun2011AandB30Nov2011v1AOD.preselcut3.sel0.n1cut0.smear0.phtcorr219.phtid1.merged.root

testSelection.v3.GluGluToHToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root

testSelection.v3.TTH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root

testSelection.v3.VBF_HToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root

testSelection.v3.WH_ZH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root
""".split()
        elif self.emtype == "ele":
            self.filenames = """
testSelectionZeev1.v3.DoubleElectronRun2011A30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root

testSelectionZeev1.v3.DoubleElectronRun2011B30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root
  
testSelectionZeev1.v3.DYJetsToLL_TuneZ2_M50_7TeVmadgraphtauolaFall11PU_S6_START42_V14Bv1AODSIM.etcut25.corr216.eleid1.datapu6.mcpu1.r*.scale0.root
""".split()
        else:
            raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype)

        for f in self.filenames:
            chain.Add(os.path.join(datapath, f))

        ## Selection
        if self.emtype == "pho":
            cuts = ["100 <= mpair & mpair <= 180"]
        elif self.emtype == "ele":
            cuts = ["80 <= mpair & mpair <= 100"]
        else:
            raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype)

        cuts.append({"mc": "runNumber == 1", "data": "runNumber >  1"}[self.src])
        cuts.extend(
            {
                "cat0": ["scr9 >  0.94", "fabs(sceta) <  1.48"],
                "cat1": ["scr9 <= 0.94", "fabs(sceta) <  1.48"],
                "cat2": ["scr9 >  0.94", "fabs(sceta) >= 1.48"],
                "cat3": ["scr9 <= 0.94", "fabs(sceta) >= 1.48"],
                "calcat0": ["scr9 >  0.94", "fabs(sceta) <  1"],
                "calcat1": ["scr9 <  0.94", "fabs(sceta) <  1"],
                "calcat2": ["scr9 >  0.94", "1 < fabs(sceta) & fabs(sceta) <  1.48"],
                "calcat3": ["scr9 <  0.94", "1 < fabs(sceta) & fabs(sceta) <  1.48"],
                "calcat4": ["scr9 >  0.94", "1.48 < fabs(sceta) & fabs(sceta) <  2"],
                "calcat5": ["scr9 <  0.94", "1.48 < fabs(sceta) & fabs(sceta) <  2"],
                "calcat6": ["scr9 >  0.94", "2 < fabs(sceta) & fabs(sceta) < 2.5"],
                "calcat7": ["scr9 <  0.94", "2 < fabs(sceta) & fabs(sceta) < 2.5"],
            }[self.cat]
        )

        if self.numentries > 0:
            cuts.append("Entry$ < %d" % self.numentries)

        self.deltaE.SetTitle("200*(scen_bendavid - scen_yangyong)/" "    (scen_bendavid + scen_yangyong)")
        self.data = dataset.get(tree=chain, variable=self.deltaE, cuts=cuts[:], name=self.name + "_data")
        self.data_half_odd = dataset.get(
            tree=chain, variable=self.deltaE, cuts=cuts[:] + ["Entry$ % 2 == 0"], name=self.name + "_data_half_odd"
        )
        self.data_half_even = dataset.get(
            tree=chain, variable=self.deltaE, cuts=cuts[:] + ["Entry$ % 2 == 1"], name=self.name + "_data_half_even"
        )
        if self.debuglevel > 0:
            reduced_range = roo.EventRange(0, 5000)
            self.data = self.data.reduce(reduced_range)
            self.data_half_odd = self.data_half_odd.reduce(reduced_range)
            self.data_half_even = self.data_half_even.reduce(reduced_range)

        nentries = self.data.tree().Draw("deltaE", "", "goff")
        self.modal_interval = ModalInterval(nentries, self.data.tree().GetV1(), 1.0)
        if self.fitmode == "odd-even":
            self.train_data = self.data_half_odd
            self.fit_data = self.data_half_even
        elif self.fitmode == "event-odd":
            self.train_data = self.data_half_even
            self.fit_data = self.data_half_odd
        elif self.fitmode == "full-full":
            self.train_data = self.data
            self.fit_data = self.data
        else:
            raise RuntimeError, "Fit mode `%s' not supported!" % self.fitmode

        ## Make sure that the trainining dataset isn't too large
        if self.train_data.numEntries() > self.numentries_train_max:
            prescale = self.train_data.numEntries() / self.numentries_train_max + 1
            self.deltaE.SetTitle("deltaE")
            self.train_data = dataset.get(
                tree=self.train_data.tree(),
                variable=self.deltaE,
                cuts=["Entry$ %% %d == 0" % prescale],
                name=self.name + "_train_data",
            )
        nentries = self.train_data.tree().Draw("deltaE", "", "goff")
        self.modal_interval_training = ModalInterval(nentries, self.train_data.tree().GetV1(), 0.99)

        ## Set a nice title for the x-axis of plots
        if self.emtype == "pho":
            self.deltaE.SetTitle("Photon #DeltaE_{two regr.}/E")
        elif self.emtype == "ele":
            self.deltaE.SetTitle("Electron #DeltaE_{two regr.}/E")
        else:
            raise RuntimeError, "Unsupported emtype `%s'!" % self.emtype