class XbbRun: def __init__(self, opts): # get file list self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None print "len(filelist)",len(self.filelist), if len(self.filelist) > 0: print "filelist[0]:", self.filelist[0] else: print '' # config self.debug = 'XBBDEBUG' in os.environ self.verifyCopy = True self.opts = opts self.config = BetterConfigParser() self.config.read(opts.config) self.channel = self.config.get('Configuration', 'channel') # load namespace, TODO VHbbNameSpace = self.config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) # directories self.pathIN = self.config.get('Directories', opts.inputDir) self.pathOUT = self.config.get('Directories', opts.outputDir) self.tmpDir = self.config.get('Directories', 'scratch') print 'INput samples:\t%s'%self.pathIN print 'OUTput samples:\t%s'%self.pathOUT self.fileLocator = FileLocator(config=self.config) # check if given sample identifier uniquely matches a samples from config matchingSamples = ParseInfo(samples_path=self.pathIN, config=self.config).find(identifier=opts.sampleIdentifier) if len(matchingSamples) != 1: print "ERROR: need exactly 1 sample identifier as input with -S !!" print matchingSamples exit(1) self.sample = matchingSamples[0] # collections self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0 else [] if len(self.collections) < 1: print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m" print 'collections to add:', self.collections self.collections = self.parseCollectionList(self.collections) print 'after parsing:', self.collections # temorary folder to save the files of this job on the scratch temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex # input files self.subJobs = [] if opts.join: print("INFO: join input files! This is an experimental feature!") # translate naming convention of .txt file to imported files after the prep step inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist] self.subJobs.append({ 'inputFileNames': self.filelist, 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) else: # create separate subjob for all files (default!) for inputFileName in self.filelist: inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)] self.subJobs.append({ 'inputFileNames': [inputFileName], 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) # lists of single modules can be given instead of a module, "--addCollections Sys.all" # [Sys] # all = ['Sys.Vtype', 'Sys.Leptons', ...] # TODO: make it fully recursive def parseCollectionList(self, collections): collectionsListsReplaced = [] for collection in collections: if '.' in collection: section = collection.split('.')[0] key = collection.split('.')[1] listExpression = self.config.get(section, key).strip() if listExpression.startswith('[') and listExpression.endswith(']'): listParsed = eval(listExpression) for i in listParsed: collectionsListsReplaced.append(i) else: collectionsListsReplaced.append(collection) else: collectionsListsReplaced.append(collection) return collectionsListsReplaced # run all subjobs def run(self): nFilesProcessed = 0 nFilesFailed = 0 for subJob in self.subJobs: # only process if output is non-existing/broken or --force was used if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']): # create directories outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1]) tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1]) self.fileLocator.makedirs(outputFolder) self.fileLocator.makedirs(tmpFolder) # load sample tree sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config) if not sampleTree.tree: print "trying fallback...", len(subJob['inputFileNames']) if len(subJob['inputFileNames']) == 1: # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep) fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0] print "FO:", fileNameOriginal xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal) sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector) if not sampleTree.tree: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" nFilesFailed += 1 continue else: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m" nFilesFailed += 1 continue # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this: # [Sys] # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll') # (instead of passing the tree in the constructor, the setTree method can be used) pyModules = [] versionTable = [] for collection in self.collections: if '.' in collection: section = collection.split('.')[0] key = collection.split('.')[1] if self.config.has_section(section) and self.config.has_option(section, key): pyCode = self.config.get(section, key) elif '(' in collection and collection.endswith(')'): print "WARNING: config option", collection, " not found, interpreting it as Python code!" pyCode = collection else: print "\x1b[31mERROR: config option not found:", collection, ". To specify Python code directly, pass a complete constructor, e.g. --addCollections 'Module.Class()'. Module has to be placed in python/myutils/ folder.\x1b[0m" raise Exception("ConfigError") # import module from myutils moduleName = pyCode.split('(')[0].split('.')[0].strip() if self.debug: print "DEBUG: import module:", moduleName print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m") globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils") # get object wObject = eval(pyCode) # pass the tree and other variables if needed to finalize initialization if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")): wObject.customInit({'config': self.config, 'sampleTree': sampleTree, 'tree': sampleTree.tree, 'sample': self.sample, 'channel': self.channel, 'pathIN': self.pathIN, 'pathOUT': self.pathOUT, }) # add callbacks if the objects provides any if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")): sampleTree.addCallback('event', wObject.processEvent) for cb in ["finish", "prepareOutput"]: if hasattr(wObject, cb) and callable(getattr(wObject, cb)): sampleTree.addCallback(cb, getattr(wObject, cb)) # add branches if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")): sampleTree.addOutputBranches(wObject.getBranches()) pyModules.append(wObject) versionTable.append([moduleName, wObject.getVersion() if hasattr(wObject, "getVersion") else 0]) else: print "\x1b[31mERROR: config option not found:", collection, " the format should be: [Section].[Option]\x1b[0m" raise Exception("ConfigError") for moduleName, moduleVersion in versionTable: print " > {m}:{v}".format(m=moduleName, v=moduleVersion) # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas() if 'addbranches' in self.collections: writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict")) sampleTree.addOutputBranches(writeNewVariables) # DEPRECATED, do not use anymore ---> use BranchTools.Drop() if 'removebranches' in self.collections: bl_branch = eval(config.get('Branches', 'useless_branch')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) bl_branch = eval(config.get('Branches', 'useless_after_sys')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) # define output file sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend) # run processing for pyModule in pyModules: if hasattr(pyModule, "beforeProcessing"): getattr(pyModule, "beforeProcessing")() sampleTree.process() for pyModule in pyModules: if hasattr(pyModule, "afterProcessing"): getattr(pyModule, "afterProcessing")() # if output trees have been produced: copy temporary file to output folder if sampleTree.getNumberOfOutputTrees() > 0: try: self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'copy ', subJob['tmpFileName'], subJob['outputFileName'] if self.verifyCopy: if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...' self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'INFO: second attempt copy done!' if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print '\x1b[31mERROR: output still broken!\x1b[0m' nFilesFailed += 1 raise Exception("FileCopyError") else: print 'INFO: file is good after second attempt!' except Exception as e: print e print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m" # delete temporary file try: self.fileLocator.rm(subJob['tmpFileName']) except Exception as e: print e print "WARNING: could not delete file on scratch!" # clean up if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")): getattr(wObject, "cleanUp")() else: print 'SKIP:', subJob['inputFileNames'] if nFilesFailed > 0: raise Exception("ProcessingIncomplete")
matchingSamples = [x for x in info if x.identifier==opts.sampleIdentifier and not x.subsample] if len(matchingSamples) != 1: print ("need exactly 1 sample identifier as input with -S !!", matchingSamples) exit(1) sample = matchingSamples[0] for fileName in filelist: localFileName = fileLocator.getFilenameAfterPrep(fileName) inputFileName = "{path}/{subfolder}/{filename}".format(path=INpath, subfolder=sample.identifier, filename=localFileName) outputFileName = "{path}/{subfolder}/{filename}".format(path=OUTpath, subfolder=sample.identifier, filename=localFileName) tmpFileName = "{path}/{subfolder}/{filename}".format(path=tmpDir, subfolder=sample.identifier, filename=localFileName) outputFolder = '/'.join(outputFileName.split('/')[:-1]) tmpFolder = '/'.join(tmpFileName.split('/')[:-1]) fileLocator.makedirs(tmpFolder) fileLocator.makedirs(outputFolder) if not fileLocator.isValidRootFile(outputFileName) or opts.force: # load sample tree sampleTree = SampleTree([inputFileName], config=config) if not sampleTree.tree: print ("\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m") continue # Set branch adress for all vars for i in range(0, len(theMVAs)): theMVAs[i].setVariables(sampleTree.tree, sample) mvaBranches = [] for i in range(0, len(theMVAs)): mvaBranches.append({ 'name': MVAinfos[i].MVAname, 'length': len(systematics.split()), 'formula': theMVAs[i].evaluate, 'leaflist': ':'.join(systematics.split())+'/F',
if len(collections) < 1: print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m" print 'collections to add:', collections for fileName in filelist: localFileName = fileLocator.getFilenameAfterPrep(fileName) inputFileName = "{path}/{subfolder}/{filename}".format(path=pathIN, subfolder=sample.identifier, filename=localFileName) outputFileName = "{path}/{subfolder}/{filename}".format(path=pathOUT, subfolder=sample.identifier, filename=localFileName) tmpFileName = "{path}/{subfolder}/{filename}".format(path=tmpDir, subfolder=sample.identifier, filename=localFileName) outputFolder = '/'.join(outputFileName.split('/')[:-1]) tmpFolder = '/'.join(tmpFileName.split('/')[:-1]) fileLocator.makedirs(tmpFolder) fileLocator.makedirs(outputFolder) if opts.force or not fileLocator.isValidRootFile(outputFileName): # load sample tree and initialize vtype corrector sampleTree = SampleTree([inputFileName], config=config) if not sampleTree.tree: # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep) fileNameOriginal = pathIN + '/' + fileName print "FO:", fileNameOriginal xrootdRedirector = fileLocator.getRedirector(fileNameOriginal) sampleTree = SampleTree([fileNameOriginal], config=config, xrootdRedirector=xrootdRedirector) if not sampleTree.tree: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" continue # lists of single modules can be given instead of a module, "--addCollections Sys.all" # [Sys] # all = ['Sys.Vtype', 'Sys.Leptons', ...]
class SkimsHelper(object): def __init__(self, config, region, sampleIdentifier=None, opts=None): self.config = config self.region = region self.sampleIdentifiers = sampleIdentifier.split(',') if sampleIdentifier and len(sampleIdentifier) > 0 else None # VHbb namespace VHbbNameSpace=config.get('VHbbNameSpace','library') returnCode = ROOT.gSystem.Load(VHbbNameSpace) if returnCode != 0: print ("\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"%returnCode) else: print ("INFO: loaded VHbbNameSpace: %s"%VHbbNameSpace) # input/output paths self.fileLocator = FileLocator(config=self.config) self.pathIN = self.config.get('Directories', opts.inputDir) self.pathOUT = self.config.get('Directories', opts.outputDir) self.tmpDir = self.config.get('Directories', 'scratch') self.samplesPath = config.get('Directories', 'plottingSamples') self.samplesDefinitions = config.get('Directories','samplesinfo') self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath) self.sampleFilesFolder = config.get('Directories', 'samplefiles') self.plotPath = config.get('Directories', 'plotpath') # plot regions self.configSection='Plot:%s'%region # additional cut to only plot a subset of the region self.subcut = None if self.config.has_option(self.configSection, 'subcut'): self.subcut = self.config.get(self.configSection, 'subcut') print("INFO: use cut:", self.subcut) # additional global blinding cut: self.addBlindingCut = None if self.config.has_option('Plot_general','addBlindingCut'): #contained in plots, cut on the event number self.addBlindingCut = self.config.get('Plot_general','addBlindingCut') print ('adding add. blinding cut:', self.addBlindingCut) # load samples self.data = eval(self.config.get(self.configSection, 'Datas')) # read the data corresponding to each CR (section) self.mc = eval(self.config.get('Plot_general', 'samples')) # read the list of mc samples self.total_lumi = eval(self.config.get('General', 'lumi')) self.signalRegion = False if self.config.has_option(self.configSection, 'Signal'): self.mc.append(self.config.get(self.configSection, 'Signal')) self.signalRegion = True self.dataSamples = self.samplesInfo.get_samples(self.data) self.mcSamples = self.samplesInfo.get_samples(self.mc) # filter samples used in the plot if self.sampleIdentifiers: self.dataSamples = [x for x in self.dataSamples if x.identifier in self.sampleIdentifiers] self.mcSamples = [x for x in self.mcSamples if x.identifier in self.sampleIdentifiers] def prepare(self): # add DATA + MC samples self.fileNames = [] for sample in self.dataSamples + self.mcSamples: print(sample.identifier) # cuts sampleCuts = [sample.subcut] if self.config.has_option('Cuts', self.region): sampleCuts.append(self.config.get('Cuts', self.region)) if self.config.has_option(self.configSection, 'Datacut'): sampleCuts.append(self.config.get(self.configSection, 'Datacut')) if self.addBlindingCut: sampleCuts.append(self.addBlindingCut) # get sample tree from cache self.fileNames += TreeCache.TreeCache( sample=sample, cutList=sampleCuts, inputFolder=self.samplesPath, config=config ).findCachedFileNames() if len(self.fileNames) < 1: print("\x1b[31mERROR: no files found, run cacheplot!\x1b[0m") return self def run(self): name = self.config.get('Configuration', 'channel') if self.config.has_option('Configuration', 'channel') else '_' timestamp = datetime.datetime.now().strftime("%y%m%d") tmpName = self.tmpDir + '/skim_' + name + '_' + region + '_' + timestamp + '_tmp.root' destName = self.pathOUT + '/skim_' + name + '_' + region + '_' + timestamp + '.root' sampleTree = SampleTree(self.fileNames, config=self.config) if self.config.has_option('Plot_general', 'controlSample'): controlSampleDict = eval(self.config.get('Plot_general', 'controlSample')) controlSample = controlSampleDict[self.region] if self.region in controlSampleDict else -1 sampleTree.addOutputBranch("controlSample", lambda x: controlSample, branchType="i") print("INFO: setting controlSample to", controlSample) sampleTree.addOutputTree(tmpName, cut='1', branches='*', friend=False) sampleTree.process() # copy to final destination if sampleTree.getNumberOfOutputTrees() > 0: try: self.fileLocator.cp(tmpName, destName, force=True) print('copy ', tmpName, destName) if not self.fileLocator.isValidRootFile(destName): print("\x1b[31mERROR: copy failed, output is broken!\x1b[0m") else: try: self.fileLocator.rm(tmpName) except Exception as e: print(e) except Exception as e: print("\x1b[31mERROR: copy failed!", e, "\x1b[0m")
class XbbRun: def __init__(self, opts): # get file list self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None print "len(filelist)",len(self.filelist), if len(self.filelist) > 0: print "filelist[0]:", self.filelist[0] else: print '' # config self.debug = 'XBBDEBUG' in os.environ self.verifyCopy = True self.opts = opts self.config = BetterConfigParser() self.config.read(opts.config) samplesinfo = self.config.get('Directories', 'samplesinfo') self.channel = self.config.get('Configuration', 'channel') # load namespace, TODO VHbbNameSpace = self.config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) # directories self.pathIN = self.config.get('Directories', opts.inputDir) self.pathOUT = self.config.get('Directories', opts.outputDir) self.tmpDir = self.config.get('Directories', 'scratch') print 'INput samples:\t%s'%self.pathIN print 'OUTput samples:\t%s'%self.pathOUT self.fileLocator = FileLocator(config=self.config) # check if given sample identifier uniquely matches a samples from config matchingSamples = ParseInfo(samplesinfo, self.pathIN).find(identifier=opts.sampleIdentifier) if len(matchingSamples) != 1: print "ERROR: need exactly 1 sample identifier as input with -S !!" print matchingSamples exit(1) self.sample = matchingSamples[0] # collections self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0 else [] if len(self.collections) < 1: print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m" print 'collections to add:', self.collections self.collections = self.parseCollectionList(self.collections) print 'after parsing:', self.collections # temorary folder to save the files of this job on the scratch temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex # input files self.subJobs = [] if opts.join: print("INFO: join input files! This is an experimental feature!") # translate naming convention of .txt file to imported files after the prep step inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist] self.subJobs.append({ 'inputFileNames': self.filelist, 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) else: # create separate subjob for all files (default!) for inputFileName in self.filelist: inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)] self.subJobs.append({ 'inputFileNames': [inputFileName], 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) # lists of single modules can be given instead of a module, "--addCollections Sys.all" # [Sys] # all = ['Sys.Vtype', 'Sys.Leptons', ...] # TODO: make it fully recursive def parseCollectionList(self, collections): collectionsListsReplaced = [] for collection in collections: if '.' in collection: section = collection.split('.')[0] key = collection.split('.')[1] listExpression = self.config.get(section, key).strip() if listExpression.startswith('[') and listExpression.endswith(']'): listParsed = eval(listExpression) for i in listParsed: collectionsListsReplaced.append(i) else: collectionsListsReplaced.append(collection) else: collectionsListsReplaced.append(collection) return collectionsListsReplaced # run all subjobs def run(self): nFilesProcessed = 0 nFilesFailed = 0 for subJob in self.subJobs: # only process if output is non-existing/broken or --force was used if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']): # create directories outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1]) tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1]) self.fileLocator.makedirs(outputFolder) self.fileLocator.makedirs(tmpFolder) # load sample tree sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config) if not sampleTree.tree: print "trying fallback...", len(subJob['inputFileNames']) if len(subJob['inputFileNames']) == 1: # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep) fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0] print "FO:", fileNameOriginal xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal) sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector) if not sampleTree.tree: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" nFilesFailed += 1 continue else: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m" nFilesFailed += 1 continue # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this: # [Sys] # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll') # (instead of passing the tree in the constructor, the setTree method can be used) pyModules = [] for collection in self.collections: if '.' in collection: section = collection.split('.')[0] key = collection.split('.')[1] pyCode = self.config.get(section, key) # import module from myutils moduleName = pyCode.split('(')[0].split('.')[0].strip() if self.debug: print "DEBUG: import module:", moduleName print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m") globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils") # get object wObject = eval(pyCode) # pass the tree and other variables if needed to finalize initialization if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")): wObject.customInit({'config': self.config, 'sampleTree': sampleTree, 'tree': sampleTree.tree, 'sample': self.sample, 'channel': self.channel, 'pathIN': self.pathIN, 'pathOUT': self.pathOUT, }) # add callbacks if the objects provides any if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")): sampleTree.addCallback('event', wObject.processEvent) # add branches if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")): sampleTree.addOutputBranches(wObject.getBranches()) pyModules.append(wObject) # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas() if 'addbranches' in self.collections: writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict")) sampleTree.addOutputBranches(writeNewVariables) # DEPRECATED, do not use anymore ---> use BranchTools.Drop() if 'removebranches' in self.collections: bl_branch = eval(config.get('Branches', 'useless_branch')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) bl_branch = eval(config.get('Branches', 'useless_after_sys')) for br in bl_branch: sampleTree.addBranchToBlacklist(br) # define output file sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend) # run processing for pyModule in pyModules: if hasattr(pyModule, "beforeProcessing"): getattr(pyModule, "beforeProcessing")() sampleTree.process() for pyModule in pyModules: if hasattr(pyModule, "afterProcessing"): getattr(pyModule, "afterProcessing")() # if output trees have been produced: copy temporary file to output folder if sampleTree.getNumberOfOutputTrees() > 0: try: self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'copy ', subJob['tmpFileName'], subJob['outputFileName'] if self.verifyCopy: if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...' self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True) print 'INFO: second attempt copy done!' if not self.fileLocator.isValidRootFile(subJob['outputFileName']): print '\x1b[31mERROR: output still broken!\x1b[0m' nFilesFailed += 1 raise Exception("FileCopyError") else: print 'INFO: file is good after second attempt!' except Exception as e: print e print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m" # delete temporary file try: self.fileLocator.rm(subJob['tmpFileName']) except Exception as e: print e print "WARNING: could not delete file on scratch!" # clean up if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")): getattr(wObject, "cleanUp")() else: print 'SKIP:', subJob['inputFileNames'] if nFilesFailed > 0: raise Exception("ProcessingIncomplete")
class SkimsHelper(object): def __init__(self, config, region, sampleIdentifier=None, opts=None): self.config = config self.region = region self.sampleIdentifiers = sampleIdentifier.split( ',') if sampleIdentifier and len(sampleIdentifier) > 0 else None # VHbb namespace VHbbNameSpace = config.get('VHbbNameSpace', 'library') returnCode = ROOT.gSystem.Load(VHbbNameSpace) if returnCode != 0: print( "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m" % returnCode) else: print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace) # input/output paths self.fileLocator = FileLocator(config=self.config) self.pathIN = self.config.get('Directories', opts.inputDir) self.pathOUT = self.config.get('Directories', opts.outputDir) self.tmpDir = self.config.get('Directories', 'scratch') self.samplesPath = config.get('Directories', 'plottingSamples') self.samplesInfo = ParseInfo(samples_path=self.samplesPath, config=self.config) self.sampleFilesFolder = config.get('Directories', 'samplefiles') self.plotPath = config.get('Directories', 'plotpath') # plot regions self.configSection = 'Plot:%s' % region # additional cut to only plot a subset of the region self.subcut = None if self.config.has_option(self.configSection, 'subcut'): self.subcut = self.config.get(self.configSection, 'subcut') print("INFO: use cut:", self.subcut) # additional global blinding cut: self.addBlindingCut = None if self.config.has_option( 'Plot_general', 'addBlindingCut' ): #contained in plots, cut on the event number self.addBlindingCut = self.config.get('Plot_general', 'addBlindingCut') print('adding add. blinding cut:', self.addBlindingCut) # load samples self.data = eval(self.config.get( self.configSection, 'Datas')) # read the data corresponding to each CR (section) self.mc = eval(self.config.get( 'Plot_general', 'samples')) # read the list of mc samples self.total_lumi = eval(self.config.get('General', 'lumi')) self.signalRegion = False if self.config.has_option(self.configSection, 'Signal'): self.mc.append(self.config.get(self.configSection, 'Signal')) self.signalRegion = True self.dataSamples = self.samplesInfo.get_samples(self.data) self.mcSamples = self.samplesInfo.get_samples(self.mc) # filter samples used in the plot if self.sampleIdentifiers: self.dataSamples = [ x for x in self.dataSamples if x.identifier in self.sampleIdentifiers ] self.mcSamples = [ x for x in self.mcSamples if x.identifier in self.sampleIdentifiers ] def prepare(self): # add DATA + MC samples self.fileNames = [] for sample in self.dataSamples + self.mcSamples: print(sample.identifier) # cuts sampleCuts = [sample.subcut] if self.config.has_option('Cuts', self.region): sampleCuts.append(self.config.get('Cuts', self.region)) if self.config.has_option(self.configSection, 'Datacut'): sampleCuts.append( self.config.get(self.configSection, 'Datacut')) if self.addBlindingCut: sampleCuts.append(self.addBlindingCut) # get sample tree from cache tc = TreeCache.TreeCache(sample=sample, cutList=sampleCuts, inputFolder=self.samplesPath, config=config) if tc.isCached(): self.fileNames += tc.findCachedFileNames() else: print("ERROR: not cached, run cacheplot again") raise Exception("NotCached") if len(self.fileNames) < 1: print("\x1b[31mERROR: no files found, run cacheplot!\x1b[0m") return self def run(self): name = self.config.get('Configuration', 'channel') if self.config.has_option( 'Configuration', 'channel') else '_' timestamp = datetime.datetime.now().strftime("%y%m%d") tmpName = self.tmpDir + '/skim_' + name + '_' + region + '_' + timestamp + '_tmp.root' destName = self.pathOUT + '/skim_' + name + '_' + region + '_' + timestamp + '.root' sampleTree = SampleTree(self.fileNames, config=self.config) if self.config.has_option('Plot_general', 'controlSample'): controlSampleDict = eval( self.config.get('Plot_general', 'controlSample')) controlSample = controlSampleDict[ self.region] if self.region in controlSampleDict else -1 sampleTree.addOutputBranch("controlSample", lambda x: controlSample, branchType="i") print("INFO: setting controlSample to", controlSample) sampleTree.addOutputTree(tmpName, cut='1', branches='*', friend=False) sampleTree.process() # copy to final destination if sampleTree.getNumberOfOutputTrees() > 0: try: self.fileLocator.cp(tmpName, destName, force=True) print('copy ', tmpName, destName) if not self.fileLocator.isValidRootFile(destName): print( "\x1b[31mERROR: copy failed, output is broken!\x1b[0m") else: try: self.fileLocator.rm(tmpName) except Exception as e: print(e) except Exception as e: print("\x1b[31mERROR: copy failed!", e, "\x1b[0m")
print 'collections to add:', collections for fileName in filelist: localFileName = fileLocator.getFilenameAfterPrep(fileName) inputFileName = "{path}/{subfolder}/{filename}".format( path=pathIN, subfolder=sample.identifier, filename=localFileName) outputFileName = "{path}/{subfolder}/{filename}".format( path=pathOUT, subfolder=sample.identifier, filename=localFileName) tmpFileName = "{path}/{subfolder}/{filename}".format( path=tmpDir, subfolder=sample.identifier, filename=localFileName) outputFolder = '/'.join(outputFileName.split('/')[:-1]) tmpFolder = '/'.join(tmpFileName.split('/')[:-1]) fileLocator.makedirs(tmpFolder) fileLocator.makedirs(outputFolder) if opts.force or not fileLocator.isValidRootFile(outputFileName): # load sample tree and initialize vtype corrector sampleTree = SampleTree([inputFileName], config=config) if not sampleTree.tree: # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep) fileNameOriginal = pathIN + '/' + fileName print "FO:", fileNameOriginal xrootdRedirector = fileLocator.getRedirector(fileNameOriginal) sampleTree = SampleTree([fileNameOriginal], config=config, xrootdRedirector=xrootdRedirector) if not sampleTree.tree: print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" continue # lists of single modules can be given instead of a module, "--addCollections Sys.all"
# process all given files for fileName in filelist: localFileName = fileName.split('/')[ -1] #TODO! # fileLocator.getFilenameAfterPrep(fileName) inputFileName = "{path}/{subfolder}/{filename}".format( path=INpath, subfolder=sample.identifier, filename=localFileName) outputFileName = "{path}/{subfolder}/{filename}".format( path=OUTpath, subfolder=sample.identifier, filename=localFileName) tmpFileName = "{path}/{subfolder}/{filename}".format( path=tmpDir, subfolder=sample.identifier, filename=localFileName) outputFolder = '/'.join(outputFileName.split('/')[:-1]) tmpFolder = '/'.join(tmpFileName.split('/')[:-1]) fileLocator.makedirs(tmpFolder) fileLocator.makedirs(outputFolder) if not fileLocator.isValidRootFile(outputFileName) or opts.force: # load sample tree sampleTree = SampleTree([inputFileName], config=config) if not sampleTree.tree: print( "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m" ) continue systematics = config.get('systematics', 'systematics').split(' ') print("systematics:", systematics) classifiers = opts.discr.split(',') for classifier in classifiers: varset = config.get(classifier, 'treeVarSet') if sample.type == 'DATA':