def __init__(self, config, sampleIdentifier, trainingRegions, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, force=False): self.config = config self.force = force self.sampleIdentifier = sampleIdentifier self.trainingRegions = trainingRegions self.sampleTree = None self.samplesPath = self.config.get('Directories', 'MVAin') self.samplesDefinitions = self.config.get('Directories', 'samplesinfo') self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath) self.sampleFilesFolder = self.config.get('Directories', 'samplefiles') self.backgroundSampleNames = list( set( sum([ eval(self.config.get(trainingRegion, 'backgrounds')) for trainingRegion in self.trainingRegions ], []))) self.signalSampleNames = list( set( sum([ eval(self.config.get(trainingRegion, 'signals')) for trainingRegion in self.trainingRegions ], []))) self.samples = self.samplesInfo.get_samples( list(set(self.backgroundSampleNames + self.signalSampleNames))) self.trainingRegionsDict = {} for trainingRegion in self.trainingRegions: treeCutName = config.get(trainingRegion, 'treeCut') treeVarSet = config.get(trainingRegion, 'treeVarSet').strip() systematics = [ x for x in config.get('systematics', 'systematics').split(' ') if len(x.strip()) > 0 ] mvaVars = [] for systematic in systematics: mvaVars += config.get(treeVarSet, systematic).strip().split(' ') self.trainingRegionsDict[trainingRegion] = { 'cut': config.get('Cuts', treeCutName), 'vars': mvaVars, } self.TrainCut = config.get('Cuts', 'TrainCut') self.EvalCut = config.get('Cuts', 'EvalCut') self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.splitFilesChunkSize = splitFilesChunkSize VHbbNameSpace = config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace)
def customInit(self, initVars): self.sample = initVars['sample'] self.sampleTree = initVars['sampleTree'] self.config = initVars['config'] self.samplesInfo = ParseInfo(samples_path=self.config.get( 'Directories', 'dcSamples'), config=self.config) self.subsamples = [ x for x in self.samplesInfo if x.identifier == self.sample.identifier and x.subsample ] print("INFO: subsamples/cut") for s in self.subsamples: print(" >", s.name, s.subcut) self.sampleTree.addFormula(s.subcut) if not self.groupDict: self.groupDict = eval(self.config.get('LimitGeneral', 'Group')) self.groupNames = list(set(self.groupDict.values())) self.groups = { k: [x for x, y in self.groupDict.iteritems() if y == k] for k in self.groupNames } for groupName, sampleNames in self.groups.iteritems(): self.branches.append({ 'name': self.prefix + groupName, 'formula': self.isInGroup, 'arguments': groupName }) self.branches.append({ 'name': 'sampleIndex', 'formula': self.getSampleIndex, 'type': 'i' }) if self.eventCountsDict: self.branches.append({ 'name': 'event_unique', 'formula': self.getEventNumber, 'type': 'l' }) if len(self.sampleTree.sampleFileNames) != 1: print( "ERROR: adding unique event numbers for chains is not implemented!" ) raise Exception("SampleGroup__customInit__not_implemented") self.eventNumberOffset = self.eventCountsDict[ self.sample.identifier][self.sampleTree.sampleFileNames[0]]
def __init__(self, config, mvaName): self.config = config self.factoryname = config.get('factory', 'factoryname') self.factorysettings = config.get('factory', 'factorysettings') self.samplesPath = config.get('Directories', 'MVAin') self.samplesDefinitions = config.get('Directories', 'samplesinfo') self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath) self.sampleFilesFolder = config.get('Directories', 'samplefiles') self.treeVarSet = config.get(mvaName, 'treeVarSet') self.MVAtype = config.get(mvaName, 'MVAtype') self.MVAsettings = config.get(mvaName, 'MVAsettings') self.mvaName = mvaName VHbbNameSpace = config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) # variables self.MVA_Vars = {} self.MVA_Vars['Nominal'] = config.get(self.treeVarSet, 'Nominal').strip().split(' ') # samples backgroundSampleNames = eval(config.get(mvaName, 'backgrounds')) signalSampleNames = eval(config.get(mvaName, 'signals')) self.samples = { 'BKG': self.samplesInfo.get_samples(backgroundSampleNames), 'SIG': self.samplesInfo.get_samples(signalSampleNames), } self.treeCutName = config.get(mvaName, 'treeCut') self.treeCut = config.get('Cuts', self.treeCutName) self.TrainCut = config.get('Cuts', 'TrainCut') self.EvalCut = config.get('Cuts', 'EvalCut') print("TRAINING CUT:", self.TrainCut) print("EVAL CUT:", self.EvalCut) self.globalRescale = 2.0 self.trainingOutputFileName = 'mvatraining_{factoryname}_{region}.root'.format( factoryname=self.factoryname, region=mvaName) print("INFO: MvaTrainingHelper class created.")
def __init__(self, config, sampleIdentifier, regions, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None): self.config = config self.sampleIdentifier = sampleIdentifier self.regions = list(set(regions)) self.forceRedo = forceRedo self.sampleTree = None self.samplesPath = self.config.get('Directories', 'plottingSamples') self.samplesInfo = ParseInfo(samples_path=self.samplesPath, config=self.config) self.sampleFilesFolder = self.config.get('Directories', 'samplefiles') self.sampleNames = list( eval(self.config.get('Plot_general', 'samples'))) self.dataNames = list(eval(self.config.get('Plot_general', 'Data'))) self.samples = self.samplesInfo.get_samples(self.sampleNames + self.dataNames) self.regionsDict = {} for region in self.regions: treeCut = config.get('Cuts', region) self.regionsDict[region] = {'cut': treeCut} self.splitFilesChunkSize = splitFilesChunkSize self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.fileList = FileList.decompress(fileList) if fileList else None VHbbNameSpace = config.get('VHbbNameSpace', 'library') returnCode = ROOT.gSystem.Load(VHbbNameSpace) if returnCode != 0: print( "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m" % returnCode) else: print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace)
config = BetterConfigParser() config.read(opts.config) #namelist=opts.names.split(',') #print "namelist:",namelist pathIN = config.get('Directories', 'PREPin') pathOUT = config.get('Directories', 'PREPout') samplesinfo = config.get('Directories', 'samplesinfo') sampleconf = BetterConfigParser() sampleconf.read(samplesinfo) prefix = sampleconf.get('General', 'prefix') info = ParseInfo(samples_path=pathIN, config=config) print "samplesinfo:", samplesinfo cross_sections = {} samples = [] for job in info: if not job.identifier in samples: if type(job.xsec) is list: job.xsec = job.xsec[0] cross_sections[job.identifier] = job.xsec samples.append(job.identifier) for sample in samples: print sample, "\t", cross_sections[sample] # print dir(job) # print "job.name:",job.name," job.cross_section:",job.xsec # print "job.prefix:",job.prefix # if not job.name in namelist:
def __init__(self, config, region, vars=None, title=None): self.config = config self.region = region self.vars = vars self.title = title if title and len(title) > 0 else None # VHbb namespace VHbbNameSpace = config.get('VHbbNameSpace', 'library') returnCode = ROOT.gSystem.Load(VHbbNameSpace) if returnCode != 0: print( "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m" % returnCode) else: print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace) # additional blinding cut: self.addBlindingCut = None if self.config.has_option( 'Plot_general', 'addBlindingCut' ): #contained in plots, cut on the event number self.addBlindingCut = self.config.get('Plot_general', 'addBlindingCut') print('adding add. blinding cut:', self.addBlindingCut) self.samplesPath = config.get('Directories', 'plottingSamples') self.samplesDefinitions = config.get('Directories', 'samplesinfo') self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath) self.sampleFilesFolder = config.get('Directories', 'samplefiles') self.plotPath = config.get('Directories', 'plotpath') # plot regions self.configSection = 'Plot:%s' % region if self.vars and type(self.vars) == list: self.vars = [x.strip() for x in self.vars if len(x.strip()) > 0] if not self.vars or len(self.vars) < 1: varListFromConfig = self.config.get(self.configSection, 'vars').split(',') print("VARS::", self.configSection, " => ", varListFromConfig) self.vars = [ x.strip() for x in varListFromConfig if len(x.strip()) > 0 ] # load samples self.data = eval(self.config.get( self.configSection, 'Datas')) # read the data corresponding to each CR (section) self.mc = eval(self.config.get( 'Plot_general', 'samples')) # read the list of mc samples self.total_lumi = eval(self.config.get('General', 'lumi')) self.signalRegion = False if self.config.has_option(self.configSection, 'Signal'): self.mc.append(self.config.get(self.configSection, 'Signal')) self.signalRegion = True self.dataSamples = self.samplesInfo.get_samples(self.data) self.mcSamples = self.samplesInfo.get_samples(self.mc) self.groupDict = eval(self.config.get('Plot_general', 'Group')) self.subcutPlotName = '' self.histogramStacks = {}
def __init__(self, config, region, vars=None, title=None, sampleIdentifier=None): self.debug = 'XBBDEBUG' in os.environ self.config = config self.region = region self.vars = vars self.title = title if title and len(title) > 0 else None self.sampleIdentifiers = sampleIdentifier.split( ',') if sampleIdentifier and len(sampleIdentifier) > 0 else None # VHbb namespace VHbbNameSpace = config.get('VHbbNameSpace', 'library') returnCode = ROOT.gSystem.Load(VHbbNameSpace) if returnCode != 0: print( "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m" % returnCode) else: print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace) # input/output paths self.samplesPath = config.get('Directories', 'plottingSamples') self.samplesInfo = ParseInfo(samples_path=self.samplesPath, config=self.config) self.sampleFilesFolder = config.get('Directories', 'samplefiles') self.plotPath = config.get('Directories', 'plotpath') # plot regions self.configSection = 'Plot:%s' % region self.dataOverBackground = self.config.has_option( 'Plot_general', 'plotDataOverBackground') and eval( self.config.get('Plot_general', 'plotDataOverBackground')) # variables if self.vars and type(self.vars) == list: self.vars = [x.strip() for x in self.vars if len(x.strip()) > 0] # if variables not specified in command line, read from config if not self.vars or len(self.vars) < 1: varListFromConfig = self.config.get(self.configSection, 'vars').split(',') print("VARS::", self.configSection, " => ", varListFromConfig) self.vars = [ x.strip() for x in varListFromConfig if len(x.strip()) > 0 ] # resolve plot variables (find plot section name if ROOT expression is given) self.vars = [ XbbTools.resolvePlotVariable(var, self.config) for var in vars ] # additional cut to only plot a subset of the region self.subcut = None if self.config.has_option(self.configSection, 'subcut'): self.subcut = self.config.get(self.configSection, 'subcut') print("INFO: use cut:", self.subcut) # additional global blinding cut: self.addBlindingCut = None if self.config.has_option( 'Plot_general', 'addBlindingCut' ): #contained in plots, cut on the event number self.addBlindingCut = self.config.get('Plot_general', 'addBlindingCut') print('adding add. blinding cut:', self.addBlindingCut) # load samples if self.config.has_section(self.configSection): # read data from region definition if self.config.has_option(self.configSection, 'Datas'): self.data = eval( self.config.get(self.configSection, 'Datas') ) # read the data corresponding to each CR (section) elif self.config.has_option(self.configSection, 'Data'): self.data = eval( self.config.get(self.configSection, 'Data') ) # read the data corresponding to each CR (section) else: self.data = eval(self.config.get('Plot_general', 'Data')) else: # use default datasets self.data = eval(self.config.get('Plot_general', 'Data')) self.mc = eval(self.config.get( 'Plot_general', 'samples')) # read the list of mc samples self.total_lumi = eval(self.config.get('General', 'lumi')) self.signalRegion = False self.dataSamples = self.samplesInfo.get_samples(self.data) self.mcSamples = self.samplesInfo.get_samples(self.mc) # filter samples used in the plot if self.sampleIdentifiers: self.dataSamples = [ x for x in self.dataSamples if x.identifier in self.sampleIdentifiers ] self.mcSamples = [ x for x in self.mcSamples if x.identifier in self.sampleIdentifiers ] self.groupDict = eval(self.config.get('Plot_general', 'Group')) self.subcutPlotName = '' self.histogramStacks = {}
train_list = (config.get('MVALists', 'List_for_submitscript')).split(',') print train_list for item in train_list: submit(item, repDict) if opts.task == 'dc': DC_vars = (config.get('LimitGeneral', 'List')).split(',') print DC_vars Plot_vars = [''] if opts.task == 'plot' or opts.task == 'singleplot' or opts.task == 'mergesingleplot' or opts.task == 'checksingleplot': Plot_vars = (config.get('Plot_general', 'List')).split(',') if not opts.task == 'prep': path = config.get("Directories", "samplepath") info = ParseInfo(samplesinfo, path) if opts.task == 'plot': repDict['queue'] = 'all.q' for item in Plot_vars: submit(item, repDict) if opts.task == 'trainReg': repDict['queue'] = 'all.q' submit('trainReg', repDict) elif opts.task == 'dc': repDict['queue'] = 'all.q' for item in DC_vars: # item here contains the dc name submit(item, repDict)
def __init__(self, config, mvaName): self.dataRepresentationVersion = 2 self.config = config self.samplesPath = config.get('Directories', 'MVAin') self.samplesInfo = ParseInfo(samples_path=self.samplesPath, config=self.config) self.sampleFilesFolder = config.get('Directories', 'samplefiles') self.logpath = config.get('Directories', 'logpath') self.treeVarSet = config.get(mvaName, 'treeVarSet') self.mvaName = mvaName self.MVAsettings = config.get(mvaName,'MVAsettings') self.factoryname = 'scikit-test1' VHbbNameSpace = config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) # variables self.MVA_Vars = {} self.MVA_Vars['Nominal'] = config.get(self.treeVarSet, 'Nominal').strip().split(' ') # samples self.backgroundSampleNames = eval(config.get(mvaName, 'backgrounds')) self.signalSampleNames = eval(config.get(mvaName, 'signals')) self.samples = { 'BKG': self.samplesInfo.get_samples(self.backgroundSampleNames), 'SIG': self.samplesInfo.get_samples(self.signalSampleNames), } # MVA signal region cuts self.treeCutName = config.get(mvaName, 'treeCut') self.treeCut = config.get('Cuts', self.treeCutName) # split in train/test samples self.datasets = ['train', 'test'] self.varsets = ['X', 'y', 'sample_weight'] self.trainCut = config.get('Cuts', 'TrainCut') self.evalCut = config.get('Cuts', 'EvalCut') print("TRAINING CUT:", self.trainCut) print("TEST CUT:", self.evalCut) self.globalRescale = 2.0 # default parameters self.parameters = { 'factoryname': self.factoryname, 'mvaName': self.mvaName, 'MVAregionCut': self.treeCutName + ': ' + self.treeCut, #'classifier': 'GradientBoostingClassifier', 'classifier': 'RandomForestClassifier', #'classifier': 'ExtraTreesClassifier', #'classifier': 'FT_GradientBoostingClassifier', 'max_depth': None, 'max_leaf_nodes': None, 'class_weight': 'balanced', #'criterion': 'friedman_mse', 'criterion': 'gini', #'n_estimators': 3000, 'n_estimators': 400, #'learning_rate': 0.1, 'algorithm': 'SAMME.R', #'min_samples_leaf': 100, 'splitter': 'best', 'max_features': 4, 'subsample': 0.6, 'limit': -1, 'additional_signal_weight': 1.0, 'min_impurity_split': 0.0, 'bootstrap': True, } # load parameters from config in a format similar to Root TMVA parameter string self.MVAsettingsEvaluated = [] for mvaSetting in self.MVAsettings.split(':'): self.parameters[mvaSetting.split('=')[0].strip()] = eval(mvaSetting.split('=')[1].strip()) try: self.MVAsettingsEvaluated.append('%s'%mvaSetting.split('=')[0].strip() + '=' + '%r'%self.parameters[mvaSetting.split('=')[0].strip()]) except: print("???:", mvaSetting) self.MVAsettingsEvaluated.append(mvaSetting) self.MVAsettingsEvaluated = ':'.join(self.MVAsettingsEvaluated)
def __init__(self, config, mvaName): self.mvaName = mvaName VHbbNameSpace = config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) self.dataFormatVersion = 2 self.sampleTrees = [] self.config = config self.samplesPath = config.get('Directories', 'MVAin') self.samplesDefinitions = config.get('Directories', 'samplesinfo') self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath) # region self.treeCutName = config.get(mvaName, 'treeCut') self.treeCut = config.get('Cuts', self.treeCutName) # split in train/eval sets self.trainCut = config.get('Cuts', 'TrainCut') self.evalCut = config.get('Cuts', 'EvalCut') # rescale MC by 2 because of train/eval split self.globalRescale = 2.0 # variables and systematics self.treeVarSet = config.get(mvaName, 'treeVarSet') self.systematics = config.get('systematics', 'systematics').strip().split(' ') self.MVA_Vars = { 'Nominal': [ x for x in config.get(self.treeVarSet, 'Nominal').strip().split(' ') if len(x.strip()) > 0 ] } for sys in self.systematics: self.MVA_Vars[sys] = [ x for x in config.get(self.treeVarSet, sys).strip().split(' ') if len(x.strip()) > 0 ] self.weightSYS = [] self.weightWithoutBtag = self.config.get('Weights', 'weight_noBTag') self.weightSYSweights = {} for d in ['Up', 'Down']: for syst in [ 'HFStats1', 'HFStats2', 'LF', 'HF', 'LFStats1', 'LFStats2', 'cErr2', 'cErr1', 'JES' ]: systFullName = "btag_" + syst + "_" + d weightName = "bTagWeightCMVAV2_Moriond_" + syst + d self.weightSYSweights[ systFullName] = self.weightWithoutBtag + '*' + weightName self.weightSYS.append(systFullName) # samples self.sampleNames = { # 'BKG_TT': eval(self.config.get('Plot_general', 'TT')), # 'BKG_ST': eval(self.config.get('Plot_general', 'ST')), # 'BKG_VV': eval(self.config.get('Plot_general', 'VV')), # 'BKG_DY2b': eval(self.config.get('Plot_general', 'DY2b')), # 'BKG_DY1b': eval(self.config.get('Plot_general', 'DY1b')), # 'BKG_DY0b': eval(self.config.get('Plot_general', 'DYlight')), # 'SIG_ggZH': eval(self.config.get('Plot_general', 'ggZH')), # 'SIG_qqZH': eval(self.config.get('Plot_general', 'qqZH')), 'SIG_ALL': eval(self.config.get('Plot_general', 'allSIG')), 'BKG_ALL': eval(self.config.get('Plot_general', 'allBKG')), } self.samples = { category: self.samplesInfo.get_samples(samples) for category, samples in self.sampleNames.iteritems() }
def __init__(self, config, region, sampleIdentifier=None, opts=None): self.config = config self.region = region self.sampleIdentifiers = sampleIdentifier.split( ',') if sampleIdentifier and len(sampleIdentifier) > 0 else None # VHbb namespace VHbbNameSpace = config.get('VHbbNameSpace', 'library') returnCode = ROOT.gSystem.Load(VHbbNameSpace) if returnCode != 0: print( "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m" % returnCode) else: print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace) # input/output paths self.fileLocator = FileLocator(config=self.config) self.pathIN = self.config.get('Directories', opts.inputDir) self.pathOUT = self.config.get('Directories', opts.outputDir) self.tmpDir = self.config.get('Directories', 'scratch') self.samplesPath = config.get('Directories', 'plottingSamples') self.samplesInfo = ParseInfo(samples_path=self.samplesPath, config=self.config) self.sampleFilesFolder = config.get('Directories', 'samplefiles') self.plotPath = config.get('Directories', 'plotpath') # plot regions self.configSection = 'Plot:%s' % region # additional cut to only plot a subset of the region self.subcut = None if self.config.has_option(self.configSection, 'subcut'): self.subcut = self.config.get(self.configSection, 'subcut') print("INFO: use cut:", self.subcut) # additional global blinding cut: self.addBlindingCut = None if self.config.has_option( 'Plot_general', 'addBlindingCut' ): #contained in plots, cut on the event number self.addBlindingCut = self.config.get('Plot_general', 'addBlindingCut') print('adding add. blinding cut:', self.addBlindingCut) # load samples self.data = eval(self.config.get( self.configSection, 'Datas')) # read the data corresponding to each CR (section) self.mc = eval(self.config.get( 'Plot_general', 'samples')) # read the list of mc samples self.total_lumi = eval(self.config.get('General', 'lumi')) self.signalRegion = False if self.config.has_option(self.configSection, 'Signal'): self.mc.append(self.config.get(self.configSection, 'Signal')) self.signalRegion = True self.dataSamples = self.samplesInfo.get_samples(self.data) self.mcSamples = self.samplesInfo.get_samples(self.mc) # filter samples used in the plot if self.sampleIdentifiers: self.dataSamples = [ x for x in self.dataSamples if x.identifier in self.sampleIdentifiers ] self.mcSamples = [ x for x in self.mcSamples if x.identifier in self.sampleIdentifiers ]
from ROOT import TAxis from ROOT import TLorentzVector from ROOT import TMath from ROOT import TLegend #from ROOT import cmath from ROOT import gStyle from ROOT import gPad from ROOT import TCanvas, TColor, TGaxis, TH1F, TPad from ROOT import kBlack, kBlue, kRed, kViolet # load configuration and list of used samples config = XbbConfigReader.read('Zll2018') path = "Zll2018config/samples_nosplit.ini" sampleInfo = ParseInfo(config, path, config=config) usedSamples = sampleInfo.get_samples(XbbConfigTools(config).getMC()) #usedSamples = sampleInfo.get_samples(['ZJetsHT100', 'ZH_Znunu']) usedSampleIdentifiers = list(set([x.identifier for x in usedSamples])) print('usedSampleIdentifiers', usedSampleIdentifiers) # some samples come from same set of ROOT trees (=have same identifier) # -> find list of unique identifiers to avoid to process same tree file twice #sampleIdentifiers = sampleInfo.getSampleIdentifiers() #usedSampleIdentifiers = ParseInfo.filterIdentifiers(sampleIdentifiers, usedSamples) # from which step to take the root trees directory = config.get('Directories', 'sysOUT4')
'folder': config.get('Directories', args.fromFolder).strip() }, config=config) h1 = ROOT.TH1D("h1", "h1", 1, 0, 2) scaleToXs = sampleTree.getScale(sample) #nEvents = sampleTree.tree.Draw("1>>h1", "(" + cut + ")*genWeight*%1.6f"%scaleToXs, "goff") nEvents = sampleTree.tree.Draw("1>>h1", cut, "goff") nEventsWeighted = h1.GetBinContent(1) #print("DEBUG:", sampleIdentifier, cut, " MC events:", nEvents, " (weighted:", nEventsWeighted, ")") h1.Delete() return nEvents # load config config = XbbConfigReader.read(args.tag) sampleInfo = ParseInfo(samples_path=config.get('Directories', args.fromFolder), config=config) mcSamples = sampleInfo.get_samples(XbbConfigTools(config).getMC()) pruneThreshold = float(args.prune) sampleGroups = [] for x in args.samples.split(','): sampleGroups.append(x.split('+')) sampleCuts = args.cuts.strip().split(',') if args.fc != '': cutGroups = [ x.strip(',').split(',') for x in args.fc.strip(';').split(';') ] # cartesian product sampleCuts = list(
(opts, args) = parser.parse_args(argv) config = BetterConfigParser() config.read(opts.config) fileList = FileList.decompress( opts.fileList) if len(opts.fileList) > 0 else None pathOUT = config.get('Directories', 'PREPout') samplesinfo = config.get('Directories', 'samplesinfo') samplefiles = config.get('Directories', 'samplefiles') sampleconf = BetterConfigParser() sampleconf.read(samplesinfo) whereToLaunch = config.get('Configuration', 'whereToLaunch') info = ParseInfo(samplesinfo, None) samples = [ x for x in info if not x.subsample and (len(opts.sampleIdentifier) == 0 or x.identifier in opts.sampleIdentifier.split(',')) ] treeCopier = copytreePSI.CopyTreePSI(config=config) if opts.limit and len(samples) > int(opts.limit): samples = samples[:int(opts.limit)] for sample in samples: treeCopier.copytreePSI(pathIN=samplefiles, pathOUT=pathOUT, folderName=sample.identifier, skimmingCut=sample.addtreecut, fileList=fileList)
print "Compile external macros" print "=======================\n" #get locations: Wdir = config.get('Directories', 'Wdir') # working direcoty containing the ouput samplesinfo = config.get('Directories', 'samplesinfo') # samples_nosplit.cfg path = config.get('Directories', 'plottingSamples') # from which samples to plot section = 'Plot:%s' % region info = ParseInfo( samplesinfo, path ) #creates a list of Samples by reading the info in samples_nosplit.cfg and the conentent of the path. import os if os.path.exists("../interface/DrawFunctions_C.so"): print 'ROOT.gROOT.LoadMacro("../interface/DrawFunctions_C.so")' ROOT.gROOT.LoadMacro("../interface/DrawFunctions_C.so") if os.path.exists("../interface/VHbbNameSpace_h.so"): print 'ROOT.gROOT.LoadMacro("../interface/VHbbNameSpace_h.so")' ROOT.gROOT.LoadMacro("../interface/VHbbNameSpace_h.so") #----------Histo from trees------------ #Get the selections and the samples def doPlot():
default=None, help="max number of files to process") (opts, args) = parser.parse_args(argv) config = BetterConfigParser() config.read(opts.config) fileList = FileList.decompress( opts.fileList) if len(opts.fileList) > 0 else None pathOUT = config.get('Directories', 'PREPout') samplefiles = config.get('Directories', 'samplefiles') sampleconf = config whereToLaunch = config.get('Configuration', 'whereToLaunch') info = ParseInfo(samples_path=None, config=config) samples = [ x for x in info if not x.subsample and (len(opts.sampleIdentifier) == 0 or x.identifier in opts.sampleIdentifier.split(',')) ] treeCopier = copytreePSI.CopyTreePSI(config=config) if opts.limit and len(samples) > int(opts.limit): samples = samples[:int(opts.limit)] for sample in samples: treeCopier.copytreePSI(pathIN=samplefiles, pathOUT=pathOUT, folderName=sample.identifier, skimmingCut=sample.addtreecut, fileList=fileList)
def __init__(self, opts): # get file list self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None print "len(filelist)",len(self.filelist), if len(self.filelist) > 0: print "filelist[0]:", self.filelist[0] else: print '' # config self.debug = 'XBBDEBUG' in os.environ self.verifyCopy = True self.opts = opts self.config = BetterConfigParser() self.config.read(opts.config) self.channel = self.config.get('Configuration', 'channel') # load namespace, TODO VHbbNameSpace = self.config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) # directories self.pathIN = self.config.get('Directories', opts.inputDir) self.pathOUT = self.config.get('Directories', opts.outputDir) self.tmpDir = self.config.get('Directories', 'scratch') print 'INput samples:\t%s'%self.pathIN print 'OUTput samples:\t%s'%self.pathOUT self.fileLocator = FileLocator(config=self.config) # check if given sample identifier uniquely matches a samples from config matchingSamples = ParseInfo(samples_path=self.pathIN, config=self.config).find(identifier=opts.sampleIdentifier) if len(matchingSamples) != 1: print "ERROR: need exactly 1 sample identifier as input with -S !!" print matchingSamples exit(1) self.sample = matchingSamples[0] # collections self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0 else [] if len(self.collections) < 1: print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m" print 'collections to add:', self.collections self.collections = self.parseCollectionList(self.collections) print 'after parsing:', self.collections # temorary folder to save the files of this job on the scratch temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex # input files self.subJobs = [] if opts.join: print("INFO: join input files! This is an experimental feature!") # translate naming convention of .txt file to imported files after the prep step inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist] self.subJobs.append({ 'inputFileNames': self.filelist, 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), }) else: # create separate subjob for all files (default!) for inputFileName in self.filelist: inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)] self.subJobs.append({ 'inputFileNames': [inputFileName], 'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep], 'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]), 'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]), })
def __init__(self, config, sampleIdentifier, trainingRegions, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, force=False): self.config = config self.force = force self.sampleIdentifier = sampleIdentifier self.trainingRegions = trainingRegions self.sampleTree = None if config.has_option('Directories', 'trainingSamples'): self.samplesPath = self.config.get('Directories', 'trainingSamples') else: self.samplesPath = self.config.get('Directories', 'MVAin') self.samplesInfo = ParseInfo(samples_path=self.samplesPath, config=self.config) self.sampleFilesFolder = self.config.get('Directories', 'samplefiles') self.backgroundSampleNames = list( set( sum([ eval(self.config.get(trainingRegion, 'backgrounds')) for trainingRegion in self.trainingRegions ], []))) self.signalSampleNames = list( set( sum([ eval(self.config.get(trainingRegion, 'signals')) for trainingRegion in self.trainingRegions ], []))) # can include DATA in the .h5 files for training self.dataSampleNames = list( set( sum([ eval(self.config.get(trainingRegion, 'data')) if self.config.has_option(trainingRegion, 'data') else [] for trainingRegion in self.trainingRegions ], []))) self.samples = self.samplesInfo.get_samples( list( set(self.backgroundSampleNames + self.signalSampleNames + self.dataSampleNames))) self.trainingRegionsDict = {} for trainingRegion in self.trainingRegions: treeCutName = config.get( trainingRegion, 'treeCut') if config.has_option( trainingRegion, 'treeCut') else trainingRegion treeVarSet = config.get(trainingRegion, 'treeVarSet').strip() #systematics = [x for x in config.get('systematics', 'systematics').split(' ') if len(x.strip())>0] if config.has_option(trainingRegion, 'systematics'): systematicsString = config.get(trainingRegion, 'systematics').strip() if systematicsString.startswith('['): systematics = eval(systematicsString) else: systematics = systematicsString.split(' ') else: systematics = [] mvaVars = config.get(treeVarSet, 'Nominal').split(' ') weightVars = [] #for systematic in systematics: for syst in systematics: systNameUp = syst + '_UP' if self.config.has_option( 'Weights', syst + '_UP') else syst + '_Up' systNameDown = syst + '_DOWN' if self.config.has_option( 'Weights', syst + '_DOWN') else syst + '_Down' if self.config.has_option('Weights', systNameUp): weightVars.append(self.config.get('Weights', systNameUp)) if self.config.has_option('Weights', systNameDown): weightVars.append(self.config.get('Weights', systNameDown)) self.trainingRegionsDict[trainingRegion] = { 'cut': config.get('Cuts', treeCutName), 'vars': mvaVars, 'weightVars': weightVars, } self.TrainCut = config.get('Cuts', 'TrainCut') self.EvalCut = config.get('Cuts', 'EvalCut') self.splitFilesChunks = splitFilesChunks self.chunkNumber = chunkNumber self.splitFilesChunkSize = splitFilesChunkSize VHbbNameSpace = config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace)
config = BetterConfigParser() config.read(opts.config) anaTag = config.get("Analysis", "tag") #get locations: Wdir = config.get('Directories', 'Wdir') samplesinfo = config.get('Directories', 'samplesinfo') #read shape systematics systematics = config.get('systematics', 'systematics') #systematics INpath = config.get('Directories', 'MVAin') OUTpath = config.get('Directories', 'MVAout') info = ParseInfo(samplesinfo, INpath) arglist = '' if not evaluate_optimisation: arglist = opts.discr #RTight_blavla,bsbsb else: # print '@INFO: Evaluating bdt for optimisation' arglist = weight namelistIN = opts.names namelist = namelistIN.split(',') print('namelist', namelist) # sys.exit(1)
'name': opts.sampleIdentifier, 'folder': config.get('Directories', 'MVAin') }, countOnly=True, splitFilesChunkSize=-1, config=config).getSampleFileNameChunks()[0] print("INFO: no file list given, use all files!") print(len(filelist), filelist) # read paths and sample info samplesinfo = config.get('Directories', 'samplesinfo') systematics = config.get('systematics', 'systematics') INpath = config.get('Directories', 'MVAin') OUTpath = config.get('Directories', 'MVAout') tmpDir = config.get('Directories', 'scratch') info = ParseInfo(samples_path=INpath, config=config) #load the namespace VHbbNameSpace = config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(VHbbNameSpace) # ------------------------------------------------------------------------------------------ # helper class to evaluate scikit classifiers and write MVA score as new branch # ------------------------------------------------------------------------------------------ class SciKitEvaluator(object): def __init__(self, name, sampleTree, classifier, variables,