def get_dataset(data_name, varname, max_entries=-1, option='merged'): tree = trees.get(data_name, option) cfg = config_map[varname] datasets = [] ## Get a dataset for each expression-selection pair for expr, cuts in zip(cfg.expressions, cfg.selections): if hasattr(cfg, 'binning') and len(cfg.binning.split(',')) == 3: nbins, varmin, varmax = map(float, cfg.binning.split(',')) variable = ROOT.RooRealVar(cfg.name, expr, varmin, varmax) variable.setBins(int(nbins)) else: variable = ROOT.RooRealVar(cfg.name, expr) cuts = [cuts] if max_entries > 0: cuts.append('Entry$ < %d' % max_entries) dataset = datasetly.get(tree=tree, variable=variable, cuts=cuts) variable = dataset.get().first() variable.SetTitle(cfg.title) variable.setUnit(cfg.unit) datasets.append(dataset) ## End of loop over expressions and selections dataset = datasets[0] for further_dataset in datasets[1:]: dataset.append(further_dataset) return dataset
def get_dataset(data_name, varname, max_entries = -1, option = 'merged'): tree = trees.get(data_name, option) cfg = config_map[varname] datasets = [] ## Get a dataset for each expression-selection pair for expr, cuts in zip(cfg.expressions, cfg.selections): if hasattr(cfg, 'binning') and len(cfg.binning.split(',')) == 3: nbins, varmin, varmax = map(float, cfg.binning.split(',')) variable = ROOT.RooRealVar(cfg.name, expr, varmin, varmax) variable.setBins(int(nbins)) else: variable = ROOT.RooRealVar(cfg.name, expr) cuts = [cuts] if max_entries > 0: cuts.append('Entry$ < %d' % max_entries) dataset = datasetly.get(tree=tree, variable=variable, cuts=cuts) variable = dataset.get().first() variable.SetTitle(cfg.title) variable.setUnit(cfg.unit) datasets.append(dataset) ## End of loop over expressions and selections dataset = datasets[0] for further_dataset in datasets[1:]: dataset.append(further_dataset) return dataset
def __init__(self, name, varname, option, max_entries, prescale, prescale_phase=0): print 'DEBUG', self.__class__.__name__, '__init__' if prescale > 1: msg = ', '.join( ['max_entries=%d' % max_entries, 'prescale=%d' % prescale]) raise RuntimeError, 'Illegal arguments ' + msg self.name = name self.varname = varname #self.max_entries = max_entries #self.option = self.option tree = trees.get(name, option) cfg = config_map[varname] datasets = [] ## Get a dataset for each expression-selection pair for expr, cuts in zip(cfg.expressions, cfg.selections): if hasattr(cfg, 'qqbinning') and len( cfg.qqbinning.split(',')) == 3: nbins, varmin, varmax = map(float, cfg.qqbinning.split(',')) variable = ROOT.RooRealVar(cfg.name, expr, varmin, varmax) variable.setBins(int(nbins)) else: variable = ROOT.RooRealVar(cfg.name, expr, 0.) cuts = [cuts] ## Adds an appropriate prescale if max_entries > 0: all_entries = float(tree.GetEntries()) prescale = ROOT.TMath.CeilNint(all_entries / max_entries) if prescale > 1: cut = 'Entry$ %% %d == %d' % (prescale, prescale_phase) print 'Prescaling %s: %s' % (name, cut) cuts.append(cut) dataset = datasetly.get(tree=tree, variable=variable, cuts=cuts) variable = dataset.get().first() variable.SetTitle(cfg.title) variable.setUnit(cfg.unit) datasets.append(dataset) ## End of loop over expressions and selections dataset = datasets[0] for further_dataset in datasets[1:]: dataset.append(further_dataset) dataset.SetTitle('Raw ' + name.split('-')[0].capitalize()) dataset.SetName('raw_' + name.split('-')[0]) print 'max_entries, numEntries', max_entries, dataset.numEntries() #if max_entries > 0 and dataset.numEntries() > max_entries: ### Downsample to reduce the size of data #dataset.Print() #print 'QQ DEBUG: Downsampling to', max_entries #dataset = Resampler(dataset).downsample(max_entries) #dataset.Print() self.data = dataset self.xvar = dataset.get().first()
def get_samples(self): '''Get the MC and data samples and import them in the workspace.''' ## Yong's trees with the default CMSSW photon cluster corrections chains = esChains.getChains('v13') ## Map of variable names and corresponding TTree expressions to ## calculate it. expression_map = { 'mmg': 'mmg', 'mm': 'mm', 'gpt': 'gamenergy/cosh(gameta)', 'geta': 'gameta', 'r9': 'gamr9', 'sihih': '100*gamsigmaIetaIeta', 'weight': 'evtweight', 'sphi': 'gamscphiWidth', 'seta': 'gamscetaWidth', } # Change titles to TTree expressions while saving the original titles. title_map = self.replace_variable_titles(expression_map) variables = [ self.w.var(xname) for xname in expression_map if xname != 'weight' ] weight = self.w.var('weight') data = dataset.get(tree=chains['data'], variables=variables, weight=weight, cuts=self.cuts[:], name='data') mc = dataset.get(tree=chains['z'], variables=variables, weight=weight, cuts=self.cuts[:], name='mc') # Change the titles back to their original values. self.replace_variable_titles(title_map) self.w.Import(data) self.w.Import(mc)
def getdata(): ''' Clones the dataset from the file, closes the file and returns the clone. ''' trees = get_trees(tree_version) data = {} for source, tree in trees.items(): data[source] = dataset.get(tree=tree, variable=variable, weight=weight[source], cuts = cuts) return data
def get_merged_dataset(self): self.merged_variables = [] for src in self.sources: new_variable = self.variable.Clone(src.name) new_variable.SetTitle(src.name) self.merged_variables.append(new_variable) self.merged_dataset = dataset.get(tree=self.tree, variables=self.merged_variables) for src, var in zip(self.sources, self.merged_variables): var.SetTitle(src.title)
def get_merged_dataset(self): self.merged_variables = [] for src in self.sources: new_variable = self.variable.Clone(src.name) new_variable.SetTitle(src.name) self.merged_variables.append(new_variable) self.merged_dataset = dataset.get(tree = self.tree, variables = self.merged_variables) for src, var in zip(self.sources, self.merged_variables): var.SetTitle(src.title)
def __init__(self, name, varname, option, max_entries, prescale, prescale_phase=0): print 'DEBUG', self.__class__.__name__, '__init__' if prescale > 1: msg = ', '.join(['max_entries=%d' % max_entries, 'prescale=%d' % prescale]) raise RuntimeError, 'Illegal arguments ' + msg self.name = name self.varname = varname #self.max_entries = max_entries #self.option = self.option tree = trees.get(name, option) cfg = config_map[varname] datasets = [] ## Get a dataset for each expression-selection pair for expr, cuts in zip(cfg.expressions, cfg.selections): if hasattr(cfg, 'qqbinning') and len(cfg.qqbinning.split(',')) == 3: nbins, varmin, varmax = map(float, cfg.qqbinning.split(',')) variable = ROOT.RooRealVar(cfg.name, expr, varmin, varmax) variable.setBins(int(nbins)) else: variable = ROOT.RooRealVar(cfg.name, expr, 0.) cuts = [cuts] ## Adds an appropriate prescale if max_entries > 0: all_entries = float(tree.GetEntries()) prescale = ROOT.TMath.CeilNint(all_entries / max_entries) if prescale > 1: cut = 'Entry$ %% %d == %d' % (prescale, prescale_phase) print 'Prescaling %s: %s' % (name, cut) cuts.append(cut) dataset = datasetly.get(tree=tree, variable=variable, cuts=cuts) variable = dataset.get().first() variable.SetTitle(cfg.title) variable.setUnit(cfg.unit) datasets.append(dataset) ## End of loop over expressions and selections dataset = datasets[0] for further_dataset in datasets[1:]: dataset.append(further_dataset) dataset.SetTitle('Raw ' + name.split('-')[0].capitalize()) dataset.SetName('raw_' + name.split('-')[0]) print 'max_entries, numEntries', max_entries, dataset.numEntries() #if max_entries > 0 and dataset.numEntries() > max_entries: ### Downsample to reduce the size of data #dataset.Print() #print 'QQ DEBUG: Downsampling to', max_entries #dataset = Resampler(dataset).downsample(max_entries) #dataset.Print() self.data = dataset self.xvar = dataset.get().first()
def get_samples(self): '''Get the MC and data samples and import them in the workspace.''' ## Yong's trees with the default CMSSW photon cluster corrections chains = esChains.getChains('v13') ## Map of variable names and corresponding TTree expressions to ## calculate it. expression_map = { 'mmg': 'mmg', 'mm' : 'mm' , 'gpt' : 'gamenergy/cosh(gameta)', 'geta' : 'gameta', 'r9' : 'gamr9' , 'sihih' : '100*gamsigmaIetaIeta', 'weight' : 'evtweight', 'sphi' : 'gamscphiWidth', 'seta' : 'gamscetaWidth', } # Change titles to TTree expressions while saving the original titles. title_map = self.replace_variable_titles(expression_map) variables = [self.w.var(xname) for xname in expression_map if xname != 'weight'] weight = self.w.var('weight') data = dataset.get(tree=chains['data'], variables=variables, weight=weight, cuts=self.cuts[:], name='data') mc = dataset.get(tree=chains['z'], variables=variables, weight=weight, cuts=self.cuts[:], name='mc') # Change the titles back to their original values. self.replace_variable_titles(title_map) self.w.Import(data) self.w.Import(mc)
def get_data(self): 'Gets the RooDataSet with deltaE data.' chain = ROOT.TChain('Analysis') datapath = '/raid2/veverka/yyTrees/tworeg' if self.emtype == 'pho': self.filenames = ''' testSelection.v3.PhotonRun2011AandB30Nov2011v1AOD.preselcut3.sel0.n1cut0.smear0.phtcorr219.phtid1.merged.root testSelection.v3.GluGluToHToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root testSelection.v3.TTH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root testSelection.v3.VBF_HToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root testSelection.v3.WH_ZH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root '''.split() elif self.emtype == 'ele': self.filenames = ''' testSelectionZeev1.v3.DoubleElectronRun2011A30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root testSelectionZeev1.v3.DoubleElectronRun2011B30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root testSelectionZeev1.v3.DYJetsToLL_TuneZ2_M50_7TeVmadgraphtauolaFall11PU_S6_START42_V14Bv1AODSIM.etcut25.corr216.eleid1.datapu6.mcpu1.r*.scale0.root '''.split() else: raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype) for f in self.filenames: chain.Add(os.path.join(datapath, f)) ## Selection if self.emtype == 'pho': cuts = ['100 <= mpair & mpair <= 180'] elif self.emtype == 'ele': cuts = ['80 <= mpair & mpair <= 100'] else: raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype) cuts.append({ 'mc': 'runNumber == 1', 'data': 'runNumber > 1' }[self.src]) cuts.extend({ 'cat0': ['scr9 > 0.94', 'fabs(sceta) < 1.48'], 'cat1': ['scr9 <= 0.94', 'fabs(sceta) < 1.48'], 'cat2': ['scr9 > 0.94', 'fabs(sceta) >= 1.48'], 'cat3': ['scr9 <= 0.94', 'fabs(sceta) >= 1.48'], 'calcat0': ['scr9 > 0.94', 'fabs(sceta) < 1'], 'calcat1': ['scr9 < 0.94', 'fabs(sceta) < 1'], 'calcat2': ['scr9 > 0.94', '1 < fabs(sceta) & fabs(sceta) < 1.48'], 'calcat3': ['scr9 < 0.94', '1 < fabs(sceta) & fabs(sceta) < 1.48'], 'calcat4': ['scr9 > 0.94', '1.48 < fabs(sceta) & fabs(sceta) < 2'], 'calcat5': ['scr9 < 0.94', '1.48 < fabs(sceta) & fabs(sceta) < 2'], 'calcat6': ['scr9 > 0.94', '2 < fabs(sceta) & fabs(sceta) < 2.5'], 'calcat7': ['scr9 < 0.94', '2 < fabs(sceta) & fabs(sceta) < 2.5'], }[self.cat]) if self.numentries > 0: cuts.append('Entry$ < %d' % self.numentries) self.deltaE.SetTitle('200*(scen_bendavid - scen_yangyong)/' ' (scen_bendavid + scen_yangyong)') self.data = dataset.get(tree=chain, variable=self.deltaE, cuts=cuts[:], name=self.name + '_data') self.data_half_odd = dataset.get(tree=chain, variable=self.deltaE, cuts=cuts[:] + ['Entry$ % 2 == 0'], name=self.name + '_data_half_odd') self.data_half_even = dataset.get(tree=chain, variable=self.deltaE, cuts=cuts[:] + ['Entry$ % 2 == 1'], name=self.name + '_data_half_even') if self.debuglevel > 0: reduced_range = roo.EventRange(0, 5000) self.data = self.data.reduce(reduced_range) self.data_half_odd = self.data_half_odd.reduce(reduced_range) self.data_half_even = self.data_half_even.reduce(reduced_range) nentries = self.data.tree().Draw('deltaE', '', 'goff') self.modal_interval = ModalInterval(nentries, self.data.tree().GetV1(), 1.) if self.fitmode == 'odd-even': self.train_data = self.data_half_odd self.fit_data = self.data_half_even elif self.fitmode == 'event-odd': self.train_data = self.data_half_even self.fit_data = self.data_half_odd elif self.fitmode == 'full-full': self.train_data = self.data self.fit_data = self.data else: raise RuntimeError, "Fit mode `%s' not supported!" % self.fitmode ## Make sure that the trainining dataset isn't too large if self.train_data.numEntries() > self.numentries_train_max: prescale = ( self.train_data.numEntries() / self.numentries_train_max + 1) self.deltaE.SetTitle('deltaE') self.train_data = dataset.get( tree=self.train_data.tree(), variable=self.deltaE, cuts=['Entry$ %% %d == 0' % prescale], name=self.name + '_train_data') nentries = self.train_data.tree().Draw('deltaE', '', 'goff') self.modal_interval_training = ModalInterval( nentries, self.train_data.tree().GetV1(), 0.99) ## Set a nice title for the x-axis of plots if self.emtype == 'pho': self.deltaE.SetTitle('Photon #DeltaE_{two regr.}/E') elif self.emtype == 'ele': self.deltaE.SetTitle('Electron #DeltaE_{two regr.}/E') else: raise RuntimeError, "Unsupported emtype `%s'!" % self.emtype
def get_data(self): "Gets the RooDataSet with deltaE data." chain = ROOT.TChain("Analysis") datapath = "/raid2/veverka/yyTrees/tworeg" if self.emtype == "pho": self.filenames = """ testSelection.v3.PhotonRun2011AandB30Nov2011v1AOD.preselcut3.sel0.n1cut0.smear0.phtcorr219.phtid1.merged.root testSelection.v3.GluGluToHToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root testSelection.v3.TTH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root testSelection.v3.VBF_HToGG_M-140_7TeV-powheg-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root testSelection.v3.WH_ZH_HToGG_M-140_7TeV-pythia6Fall11-PU_S6_START42_V14B-v1AODSIM.preselcut3.sel0.n1cut0.smear3.phtcorr219.phtid1.r1.root """.split() elif self.emtype == "ele": self.filenames = """ testSelectionZeev1.v3.DoubleElectronRun2011A30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root testSelectionZeev1.v3.DoubleElectronRun2011B30Nov2011v1AOD.etcut25.corr216.eleid1.datapu0.mcpu0.r*.scale1.root testSelectionZeev1.v3.DYJetsToLL_TuneZ2_M50_7TeVmadgraphtauolaFall11PU_S6_START42_V14Bv1AODSIM.etcut25.corr216.eleid1.datapu6.mcpu1.r*.scale0.root """.split() else: raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype) for f in self.filenames: chain.Add(os.path.join(datapath, f)) ## Selection if self.emtype == "pho": cuts = ["100 <= mpair & mpair <= 180"] elif self.emtype == "ele": cuts = ["80 <= mpair & mpair <= 100"] else: raise RuntimeError, "Illegal emtype: `%s'!" % str(self.emtype) cuts.append({"mc": "runNumber == 1", "data": "runNumber > 1"}[self.src]) cuts.extend( { "cat0": ["scr9 > 0.94", "fabs(sceta) < 1.48"], "cat1": ["scr9 <= 0.94", "fabs(sceta) < 1.48"], "cat2": ["scr9 > 0.94", "fabs(sceta) >= 1.48"], "cat3": ["scr9 <= 0.94", "fabs(sceta) >= 1.48"], "calcat0": ["scr9 > 0.94", "fabs(sceta) < 1"], "calcat1": ["scr9 < 0.94", "fabs(sceta) < 1"], "calcat2": ["scr9 > 0.94", "1 < fabs(sceta) & fabs(sceta) < 1.48"], "calcat3": ["scr9 < 0.94", "1 < fabs(sceta) & fabs(sceta) < 1.48"], "calcat4": ["scr9 > 0.94", "1.48 < fabs(sceta) & fabs(sceta) < 2"], "calcat5": ["scr9 < 0.94", "1.48 < fabs(sceta) & fabs(sceta) < 2"], "calcat6": ["scr9 > 0.94", "2 < fabs(sceta) & fabs(sceta) < 2.5"], "calcat7": ["scr9 < 0.94", "2 < fabs(sceta) & fabs(sceta) < 2.5"], }[self.cat] ) if self.numentries > 0: cuts.append("Entry$ < %d" % self.numentries) self.deltaE.SetTitle("200*(scen_bendavid - scen_yangyong)/" " (scen_bendavid + scen_yangyong)") self.data = dataset.get(tree=chain, variable=self.deltaE, cuts=cuts[:], name=self.name + "_data") self.data_half_odd = dataset.get( tree=chain, variable=self.deltaE, cuts=cuts[:] + ["Entry$ % 2 == 0"], name=self.name + "_data_half_odd" ) self.data_half_even = dataset.get( tree=chain, variable=self.deltaE, cuts=cuts[:] + ["Entry$ % 2 == 1"], name=self.name + "_data_half_even" ) if self.debuglevel > 0: reduced_range = roo.EventRange(0, 5000) self.data = self.data.reduce(reduced_range) self.data_half_odd = self.data_half_odd.reduce(reduced_range) self.data_half_even = self.data_half_even.reduce(reduced_range) nentries = self.data.tree().Draw("deltaE", "", "goff") self.modal_interval = ModalInterval(nentries, self.data.tree().GetV1(), 1.0) if self.fitmode == "odd-even": self.train_data = self.data_half_odd self.fit_data = self.data_half_even elif self.fitmode == "event-odd": self.train_data = self.data_half_even self.fit_data = self.data_half_odd elif self.fitmode == "full-full": self.train_data = self.data self.fit_data = self.data else: raise RuntimeError, "Fit mode `%s' not supported!" % self.fitmode ## Make sure that the trainining dataset isn't too large if self.train_data.numEntries() > self.numentries_train_max: prescale = self.train_data.numEntries() / self.numentries_train_max + 1 self.deltaE.SetTitle("deltaE") self.train_data = dataset.get( tree=self.train_data.tree(), variable=self.deltaE, cuts=["Entry$ %% %d == 0" % prescale], name=self.name + "_train_data", ) nentries = self.train_data.tree().Draw("deltaE", "", "goff") self.modal_interval_training = ModalInterval(nentries, self.train_data.tree().GetV1(), 0.99) ## Set a nice title for the x-axis of plots if self.emtype == "pho": self.deltaE.SetTitle("Photon #DeltaE_{two regr.}/E") elif self.emtype == "ele": self.deltaE.SetTitle("Electron #DeltaE_{two regr.}/E") else: raise RuntimeError, "Unsupported emtype `%s'!" % self.emtype