def test_BranchList(self): longCutString = "((V_new_mass>75.&V_new_mass<105.&Jet_btagCMVAV2[hJCMVAV2idx[0]]<0.9432&Jet_btagCMVAV2[hJCMVAV2idx[1]]<-0.5884&abs(VHbb::deltaPhi(HCMVAV2_reg_phi_corrJERUp,V_new_phi))>2.5&BasicCutsCMVA&(hJetCMVAV2_pt_reg_0_corrJERUp>20&hJetCMVAV2_pt_reg_1_corrJERUp>20))&V_new_pt>50)&(Vtype_new==1&(abs(vLeptons_new_eta[0])>=1.57||abs(vLeptons_new_eta[0])<=1.44)&(abs(vLeptons_new_eta[1])>=1.57||abs(vLeptons_new_eta[1])<=1.44))&(V_new_pt>50&V_new_pt<150)" branchList = BranchList(longCutString) branchesWhichShouldExist = ['hJetCMVAV2_pt_reg_0_corrJERUp', 'V_new_mass', 'Jet_btagCMVAV2', 'hJetCMVAV2_pt_reg_1_corrJERUp', 'V_new_pt', 'HCMVAV2_reg_phi_corrJERUp', 'V_new_phi', 'Vtype_new', 'hJCMVAV2idx', 'BasicCutsCMVA', 'vLeptons_new_eta'] for branchName in branchesWhichShouldExist: self.assertTrue(branchName in branchList.getListOfBranches())
def test_BranchList(self): longCutString = "((V_new_mass>75.&V_new_mass<105.&Jet_btagCMVAV2[hJCMVAV2idx[0]]<0.9432&Jet_btagCMVAV2[hJCMVAV2idx[1]]<-0.5884&abs(VHbb::deltaPhi(HCMVAV2_reg_phi_corrJERUp,V_new_phi))>2.5&BasicCutsCMVA&(hJetCMVAV2_pt_reg_0_corrJERUp>20&hJetCMVAV2_pt_reg_1_corrJERUp>20))&V_new_pt>50)&(Vtype_new==1&(abs(vLeptons_new_eta[0])>=1.57||abs(vLeptons_new_eta[0])<=1.44)&(abs(vLeptons_new_eta[1])>=1.57||abs(vLeptons_new_eta[1])<=1.44))&(V_new_pt>50&V_new_pt<150)" branchList = BranchList(longCutString) branchesWhichShouldExist = [ 'hJetCMVAV2_pt_reg_0_corrJERUp', 'V_new_mass', 'Jet_btagCMVAV2', 'hJetCMVAV2_pt_reg_1_corrJERUp', 'V_new_pt', 'HCMVAV2_reg_phi_corrJERUp', 'V_new_phi', 'Vtype_new', 'hJCMVAV2idx', 'BasicCutsCMVA', 'vLeptons_new_eta' ] for branchName in branchesWhichShouldExist: self.assertTrue(branchName in branchList.getListOfBranches())
def run(self): # ---------------------------------------------------------------------------------------------------------------------- # cache samples # ---------------------------------------------------------------------------------------------------------------------- for sampleToCache in [self.sampleIdentifier]: print ('*'*80) print (' ',sampleToCache) print ('*'*80) # prepare caches for training and evaluation samples treeCaches = [] self.sampleTree = None # use all (sub)samples which come from the same files (sampleIdentifier) subsamples = [x for x in self.samples if x.identifier == sampleToCache] # list of branches to keep for use as MVA input variables branchListOfMVAVars = BranchList() for sample in subsamples: for trainingRegion,trainingRegionInfo in self.trainingRegionsDict.iteritems(): for additionalCut in [self.TrainCut, self.EvalCut]: branchListOfMVAVars.addCut(trainingRegionInfo['vars']) for weightVar in trainingRegionInfo['weightVars']: branchListOfMVAVars.addCut(weightVar) branchListOfMVAVars.addCut(self.config.get('Weights', 'weightF')) mvaBranches = branchListOfMVAVars.getListOfBranches() # loop over all samples for sample in subsamples: # add cuts for all training regions for trainingRegion,trainingRegionInfo in self.trainingRegionsDict.iteritems(): # add cuts for training and evaluation for additionalCut in [self.TrainCut, self.EvalCut]: # cuts sampleCuts = [sample.subcut] if additionalCut: sampleCuts.append(additionalCut) if trainingRegionInfo['cut']: sampleCuts.append(trainingRegionInfo['cut']) # add cache object tc = TreeCache.TreeCache( name='{region}_{sample}_{tr}'.format(region=trainingRegion, sample=sample.name, tr='TRAIN' if additionalCut==self.TrainCut else 'EVAL'), sample=sample.name, cutList=sampleCuts, inputFolder=self.samplesPath, splitFilesChunks=self.splitFilesChunks, chunkNumber=self.chunkNumber, splitFilesChunkSize=self.splitFilesChunkSize, branches=mvaBranches, config=self.config, debug=True ) # check if this part of the sample is already cached isCached = tc.partIsCached() if not isCached or self.force: if isCached: tc.deleteCachedFiles(chunkNumber=self.chunkNumber) # for the first sample which comes from this files, load the tree if not self.sampleTree: self.sampleTree = SampleTree({'name': sample.identifier, 'folder': self.samplesPath}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True) treeCaches.append(tc.setSampleTree(self.sampleTree).cache()) if len(treeCaches) > 0: # run on the tree self.sampleTree.process() else: print ("nothing to do!")
dataSamples = eval(config.get('dc:' + region, 'data')) for dataSample in dataSamples: sampleTree = SampleTree({ 'name': dataSample, 'folder': inputFolder }, config=config) outputFileName = logFolder + '/' + region + '_' + dataSample + '_' + opts.run + '_' + opts.event + '.txt' print("save event list to:", outputFileName) treePlayer = sampleTree.tree.GetPlayer() treePlayer.SetScanRedirect(True) treePlayer.SetScanFileName(outputFileName) branchList = BranchList(["run", "event"]) regionCut = config.get( 'Cuts', config.get('dc:' + region, 'cut') if config.has_option( 'dc:' + region, 'cut') else region) branchList.addCut(regionCut) expressions = ":".join(branchList.getListOfBranches()) if len( opts.expressions) < 1 else opts.expressions branchList.addCut(expressions) sampleTree.enableBranches(branchList.getListOfBranches()) sampleTree.tree.Scan(expressions, "run==" + opts.run + "&&event==" + opts.event, "colsize=16")
def run(self): # keep additional branches for plotting try: keepBranchesPlot = eval( self.config.get('Branches', 'keep_branches_plot')) except: keepBranchesPlot = [] try: keepBranchesPlot += eval( self.config.get('Branches', 'keep_branches')) except: pass # also keep some branches which might be used later in variables definition and weights try: for section in self.config.sections(): try: if section.startswith( 'plotDef:') and self.config.has_option( section, 'relPath'): keepBranchesPlot.append( self.config.get(section, 'relPath')) except Exception as e: print("\x1b[31mWARNING: config error in:", section, "=>", e, "\x1b[0m") except Exception as e2: print( "\x1b[31mERROR: config file contains an error! automatic selection of branches to keep will not work!\x1b[0m" ) print(e2) try: keepBranchesPlot.append(self.config.get('Weights', 'weightF')) except: pass # plotting region cut for region, regionInfo in self.regionsDict.iteritems(): keepBranchesPlot.append(regionInfo['cut']) keepBranchesPlotFinal = BranchList( keepBranchesPlot).getListOfBranches() print("KEEP:", keepBranchesPlotFinal) # ---------------------------------------------------------------------------------------------------------------------- # cache samples # ---------------------------------------------------------------------------------------------------------------------- for sampleToCache in [self.sampleIdentifier]: print('*' * 80) print(' ', sampleToCache) print('*' * 80) # prepare caches for training and evaluation samples treeCaches = [] sampleTree = None # for all (sub)samples which come from the same files (sampleIdentifier) subsamples = [ x for x in self.samples if x.identifier == sampleToCache ] for sample in subsamples: # add cuts for all training regions for region, regionInfo in self.regionsDict.iteritems(): configSection = 'Plot:%s' % region # cuts sampleCuts = [sample.subcut] if regionInfo['cut']: sampleCuts.append(regionInfo['cut']) if self.config.has_option(configSection, 'Datacut'): sampleCuts.append( self.config.get(configSection, 'Datacut')) if self.config.has_option('Plot_general', 'addBlindingCut'): sampleCuts.append( self.config.has_option('Plot_general', 'addBlindingCut')) # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.) cacheName = 'plot:{region}_{sample}'.format( region=region, sample=sample.name) # add cache object tc = TreeCache.TreeCache( name=cacheName, sample=sample.name, cutList=sampleCuts, inputFolder=self.samplesPath, splitFilesChunks=self.splitFilesChunks, chunkNumber=self.chunkNumber, splitFilesChunkSize=self.splitFilesChunkSize, fileList=self.fileList, branches=keepBranchesPlotFinal, config=self.config, debug=True) # check if this part of the sample is already cached isCached = tc.partIsCached() if not isCached or self.forceRedo: if isCached: tc.deleteCachedFiles(chunkNumber=self.chunkNumber) # for the first sample which comes from this files, load the tree if not self.sampleTree: self.sampleTree = SampleTree( { 'name': sample.identifier, 'folder': self.samplesPath }, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True) if not self.sampleTree or not self.sampleTree.tree: print( "\x1b[31mERROR: creation of sample tree failed!!\x1b[0m" ) raise Exception("CreationOfSampleTreeFailed") # consistency check on the file list at submission time and now fileListNow = self.sampleTree.getSampleFileNameChunk( self.chunkNumber) if self.fileList and (sorted(self.fileList) != sorted(fileListNow)): print( "\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m" ) raise Exception("SampleFilesHaveChanged") treeCaches.append( tc.setSampleTree(self.sampleTree).cache()) else: print("INFO: already cached!", tc, "(", tc.hash, ")") if len(treeCaches) > 0: # run on the tree self.sampleTree.process() else: print("nothing to do!")
def prepare(self): if len(self.dcMakers) > 0: self.treeCaches = [] self.sampleTree = None # cuts allSamples = self.getAllSamples() subsamples = [ x for x in allSamples if x.identifier == self.sampleToCache ] # loop over all datacard regions for dcMaker in self.dcMakers: # loop over all subsamples (which come from the same root tree files) for sample in subsamples: # combine subcut and systematics cut with logical AND # systematics cuts are combined with logical OR, such that 1 cache file can be used for all the systematics isData = (sample.type == 'DATA') systematicsCuts = sorted( list( set([ x['cachecut'] for x in dcMaker.getSystematicsList( isData=isData) ]))) sampleCuts = { 'AND': [sample.subcut, { 'OR': systematicsCuts }] } if self.verbose: print( json.dumps(sampleCuts, sort_keys=True, indent=8, default=str)) # make list of branches to keep in root file branchList = BranchList(sample.subcut) branchList.addCut( [x['cachecut'] for x in dcMaker.getSystematicsList()]) branchList.addCut( [x['cut'] for x in dcMaker.getSystematicsList()]) branchList.addCut( [x['var'] for x in dcMaker.getSystematicsList()]) branchList.addCut( [x['weight'] for x in dcMaker.getSystematicsList()]) branchList.addCut(self.config.get('Weights', 'weightF')) branchList.addCut( eval(self.config.get('Branches', 'keep_branches'))) branchesToKeep = branchList.getListOfBranches() # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.) cacheName = 'dc:{region}_{sample}'.format( region=dcMaker.getRegion(), sample=sample.name) # add cache object tc = TreeCache.TreeCache( name=cacheName, sample=sample.name, cutList=sampleCuts, cutSequenceMode='TREE', branches=branchesToKeep, inputFolder=dcMaker.path, splitFilesChunks=self.splitFilesChunks, chunkNumber=self.chunkNumber, splitFilesChunkSize=self.splitFilesChunkSize, fileList=self.fileList, config=self.config, debug=self.verbose) # check if this part of the sample is already cached isCached = tc.partIsCached() print( "check if sample \x1b[34m{sample}\x1b[0m part {part} is cached:" .format(sample=sample.name, part=self.chunkNumber), isCached) if not isCached or self.forceRedo: if isCached: tc.deleteCachedFiles(chunkNumber=self.chunkNumber) # for the first sample which comes from this files, load the tree if not self.sampleTree: self.sampleTree = SampleTree( { 'name': sample.identifier, 'folder': dcMaker.path }, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True) if not self.sampleTree or not self.sampleTree.tree: print( "\x1b[31mERROR: creation of sample tree failed!!\x1b[0m" ) raise Exception("CreationOfSampleTreeFailed") # consistency check on the file list at submission time and now fileListNow = self.sampleTree.getSampleFileNameChunk( self.chunkNumber) if self.fileList and (sorted(self.fileList) != sorted(fileListNow)): print( "\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m" ) raise Exception("SampleFilesHaveChanged") # connect the TreeCache object to the input sampleTree and add it to the list of cached trees self.treeCaches.append( tc.setSampleTree(self.sampleTree).cache()) else: print("WARNING: no datacard regions added, nothing to do.") return self
def run(self): # ---------------------------------------------------------------------------------------------------------------------- # cache samples # ---------------------------------------------------------------------------------------------------------------------- for sampleToCache in [self.sampleIdentifier]: print('*' * 80) print(' ', sampleToCache) print('*' * 80) # prepare caches for training and evaluation samples treeCaches = [] self.sampleTree = None # use all (sub)samples which come from the same files (sampleIdentifier) subsamples = [ x for x in self.samples if x.identifier == sampleToCache ] # list of branches to keep for use as MVA input variables branchListOfMVAVars = BranchList() for sample in subsamples: for trainingRegion, trainingRegionInfo in self.trainingRegionsDict.iteritems( ): for additionalCut in [self.TrainCut, self.EvalCut]: branchListOfMVAVars.addCut(trainingRegionInfo['vars']) branchListOfMVAVars.addCut(self.config.get('Weights', 'weightF')) mvaBranches = branchListOfMVAVars.getListOfBranches() # loop over all samples for sample in subsamples: # add cuts for all training regions for trainingRegion, trainingRegionInfo in self.trainingRegionsDict.iteritems( ): # add cuts for training and evaluation for additionalCut in [self.TrainCut, self.EvalCut]: # cuts sampleCuts = [sample.subcut] if additionalCut: sampleCuts.append(additionalCut) if trainingRegionInfo['cut']: sampleCuts.append(trainingRegionInfo['cut']) # add cache object tc = TreeCache.TreeCache( name='{region}_{sample}_{tr}'.format( region=trainingRegion, sample=sample.name, tr='TRAIN' if additionalCut == self.TrainCut else 'EVAL'), sample=sample.name, cutList=sampleCuts, inputFolder=self.samplesPath, splitFilesChunks=self.splitFilesChunks, chunkNumber=self.chunkNumber, splitFilesChunkSize=self.splitFilesChunkSize, branches=mvaBranches, config=self.config, debug=True) # check if this part of the sample is already cached isCached = tc.partIsCached() if not isCached or self.force: if isCached: tc.deleteCachedFiles( chunkNumber=self.chunkNumber) # for the first sample which comes from this files, load the tree if not self.sampleTree: self.sampleTree = SampleTree( { 'name': sample.identifier, 'folder': self.samplesPath }, splitFilesChunkSize=self. splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True) treeCaches.append( tc.setSampleTree(self.sampleTree).cache()) if len(treeCaches) > 0: # run on the tree self.sampleTree.process() else: print("nothing to do!")
parser.add_option("-R", "--regions", dest="regions", default="", help="regions") (opts, args) = parser.parse_args(argv) config = XbbConfigTools(XbbConfigReader.read(opts.tag)) inputFolder = config.get('Directories', 'dcSamples') logFolder = config.get('Directories', 'tagDir') config.loadNamespaces() regions = config.getDatacardRegions() if len(opts.regions) < 1 else config.parseCommaSeparatedList(opts.regions) for region in regions: dataSamples = eval(config.get('dc:'+region, 'data')) for dataSample in dataSamples: sampleTree = SampleTree({'name': dataSample, 'folder': inputFolder}, config=config) outputFileName = logFolder + '/' + region + '_' + dataSample + '.txt' print("save event list to:", outputFileName) treePlayer = sampleTree.tree.GetPlayer() treePlayer.SetScanRedirect(True) treePlayer.SetScanFileName(outputFileName) branchList = BranchList(["run","event"]) regionCut = config.get('Cuts', config.get('dc:'+region, 'cut') if config.has_option('dc:'+region, 'cut') else region) branchList.addCut(regionCut) sampleTree.enableBranches(branchList.getListOfBranches()) sampleTree.tree.Scan("run:event", regionCut, "colsize=16")
'sample': sample, 'folder': directory }, config=config) #raw_input() # since we load all trees, we can compute the factor to scale cross section to luminosity directly (otherwise write it to ntuples # first and then use it as branch, or compute it with full set of trees before) scaleXStoLumi = sampleTree.getScale(sample) # enable only used branches! # this will speed up processing a lot sampleTree.enableBranches( BranchList([ signalRegionSelection, weightExpression_DeepCSV, weightExpression_DeepJet, taggerExpression_DeepCSV, taggerExpression_DeepJet ]).getListOfBranches() + ['Jet*']) # this will create the TTreeformula objects sampleTree.addFormula(signalRegionSelection) sampleTree.addFormula(signalRegionSelection_roc) sampleTree.addFormula(weightExpression_DeepCSV) sampleTree.addFormula(weightExpression_DeepJet) sampleTree.addFormula(weightExpression_DeepCSV_nosf) sampleTree.addFormula(weightExpression_DeepJet_nosf) sampleTree.addFormula(taggerExpression_DeepCSV) sampleTree.addFormula(taggerExpression_DeepJet) isSignal = 1 if sample.name in signalNames else 0
def prepare(self): if len(self.dcMakers) > 0: self.treeCaches = [] self.sampleTree = None # cuts allSamples = self.getAllSamples() subsamples = [x for x in allSamples if x.identifier == self.sampleToCache] # loop over all datacard regions for dcMaker in self.dcMakers: # loop over all subsamples (which come from the same root tree files) for sample in subsamples: # combine subcut and systematics cut with logical AND # systematics cuts are combined with logical OR, such that 1 cache file can be used for all the systematics isData = (sample.type == 'DATA') systematicsCuts = sorted(list(set([x['cachecut'] for x in dcMaker.getSystematicsList(isData=isData)]))) sampleCuts = {'AND': [sample.subcut, {'OR': systematicsCuts}]} if True or self.verbose: print (json.dumps(sampleCuts, sort_keys=True, indent=8, default=str)) # make list of branches to keep in root file branchList = BranchList(sample.subcut) branchList.addCut([x['cachecut'] for x in dcMaker.getSystematicsList()]) branchList.addCut([x['cut'] for x in dcMaker.getSystematicsList()]) branchList.addCut([x['var'] for x in dcMaker.getSystematicsList()]) branchList.addCut([x['weight'] for x in dcMaker.getSystematicsList()]) branchList.addCut(self.config.get('Weights', 'weightF')) branchList.addCut(eval(self.config.get('Branches', 'keep_branches'))) branchesToKeep = branchList.getListOfBranches() # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.) cacheName = 'dc:{region}_{sample}'.format(region=dcMaker.getRegion(), sample=sample.name) # add cache object tc = TreeCache.TreeCache( name=cacheName, sample=sample.name, cutList=sampleCuts, cutSequenceMode='TREE', branches=branchesToKeep, inputFolder=dcMaker.path, splitFilesChunks=self.splitFilesChunks, chunkNumber=self.chunkNumber, splitFilesChunkSize=self.splitFilesChunkSize, fileList=self.fileList, config=self.config, debug=self.verbose ) # check if this part of the sample is already cached isCached = tc.partIsCached() print ("check if sample \x1b[34m{sample}\x1b[0m part {part} is cached:".format(sample=sample.name, part=self.chunkNumber), isCached) if not isCached or self.forceRedo: if isCached: tc.deleteCachedFiles(chunkNumber=self.chunkNumber) # for the first sample which comes from this files, load the tree if not self.sampleTree: self.sampleTree = SampleTree({'name': sample.identifier, 'folder': dcMaker.path}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True) if not self.sampleTree or not self.sampleTree.tree: print ("\x1b[31mERROR: creation of sample tree failed!!\x1b[0m") raise Exception("CreationOfSampleTreeFailed") # consistency check on the file list at submission time and now fileListNow = self.sampleTree.getSampleFileNameChunk(self.chunkNumber) if self.fileList and (sorted(self.fileList) != sorted(fileListNow)): print ("\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m") raise Exception("SampleFilesHaveChanged") # connect the TreeCache object to the input sampleTree and add it to the list of cached trees self.treeCaches.append(tc.setSampleTree(self.sampleTree).cache()) else: print("WARNING: no datacard regions added, nothing to do.") return self
def test_BanchListDuplicates(self): branchList = BranchList(['a', 'b', 'c', 'c', 'c']) branchList.addCut(['c', 'd', 'd']) self.assertEqual(len(branchList.getListOfBranches()), 4)
def test_BanchListDuplicates(self): branchList = BranchList(['a', 'b', 'c', 'c', 'c']) branchList.addCut(['c','d','d']) self.assertEqual(len(branchList.getListOfBranches()), 4)
from __future__ import print_function import ROOT ROOT.gROOT.SetBatch(True) from myutils.XbbConfig import XbbConfigReader, XbbConfigTools from myutils.sampleTree import SampleTree as SampleTree from myutils.BranchList import BranchList config = XbbConfigTools(XbbConfigReader.read("Zvv2017")) sampleTree = SampleTree( { 'name': 'MET', 'folder': config.get('Directories', 'dcSamples') }, config=config) variables = ["H_pt", "MET_Pt", "H_pt/MET_Pt"] # enable only explicitly used branches sampleTree.enableBranches(BranchList(variables).getListOfBranches()) # create TTReeFormula's for variable in variables: sampleTree.addFormula(variable) # loop over events for event in sampleTree: print( sampleTree.tree.GetReadEntry(), ", ".join([x + "=%1.4f" % sampleTree.evaluate(x) for x in variables])) if sampleTree.tree.GetReadEntry() > 98: break