예제 #1
0
    def __init__(self,
                 config,
                 sampleIdentifier,
                 trainingRegions,
                 splitFilesChunks=1,
                 chunkNumber=1,
                 splitFilesChunkSize=-1,
                 force=False):
        self.config = config
        self.force = force
        self.sampleIdentifier = sampleIdentifier
        self.trainingRegions = trainingRegions

        self.sampleTree = None
        self.samplesPath = self.config.get('Directories', 'MVAin')
        self.samplesDefinitions = self.config.get('Directories', 'samplesinfo')
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)
        self.sampleFilesFolder = self.config.get('Directories', 'samplefiles')

        self.backgroundSampleNames = list(
            set(
                sum([
                    eval(self.config.get(trainingRegion, 'backgrounds'))
                    for trainingRegion in self.trainingRegions
                ], [])))
        self.signalSampleNames = list(
            set(
                sum([
                    eval(self.config.get(trainingRegion, 'signals'))
                    for trainingRegion in self.trainingRegions
                ], [])))
        self.samples = self.samplesInfo.get_samples(
            list(set(self.backgroundSampleNames + self.signalSampleNames)))

        self.trainingRegionsDict = {}
        for trainingRegion in self.trainingRegions:
            treeCutName = config.get(trainingRegion, 'treeCut')
            treeVarSet = config.get(trainingRegion, 'treeVarSet').strip()
            systematics = [
                x for x in config.get('systematics', 'systematics').split(' ')
                if len(x.strip()) > 0
            ]
            mvaVars = []
            for systematic in systematics:
                mvaVars += config.get(treeVarSet,
                                      systematic).strip().split(' ')
            self.trainingRegionsDict[trainingRegion] = {
                'cut': config.get('Cuts', treeCutName),
                'vars': mvaVars,
            }

        self.TrainCut = config.get('Cuts', 'TrainCut')
        self.EvalCut = config.get('Cuts', 'EvalCut')

        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.splitFilesChunkSize = splitFilesChunkSize

        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)
예제 #2
0
    def customInit(self, initVars):
        self.sample = initVars['sample']
        self.sampleTree = initVars['sampleTree']
        self.config = initVars['config']
        self.samplesInfo = ParseInfo(samples_path=self.config.get(
            'Directories', 'dcSamples'),
                                     config=self.config)
        self.subsamples = [
            x for x in self.samplesInfo
            if x.identifier == self.sample.identifier and x.subsample
        ]
        print("INFO: subsamples/cut")
        for s in self.subsamples:
            print(" >", s.name, s.subcut)
            self.sampleTree.addFormula(s.subcut)

        if not self.groupDict:
            self.groupDict = eval(self.config.get('LimitGeneral', 'Group'))

        self.groupNames = list(set(self.groupDict.values()))
        self.groups = {
            k: [x for x, y in self.groupDict.iteritems() if y == k]
            for k in self.groupNames
        }

        for groupName, sampleNames in self.groups.iteritems():
            self.branches.append({
                'name': self.prefix + groupName,
                'formula': self.isInGroup,
                'arguments': groupName
            })

        self.branches.append({
            'name': 'sampleIndex',
            'formula': self.getSampleIndex,
            'type': 'i'
        })

        if self.eventCountsDict:
            self.branches.append({
                'name': 'event_unique',
                'formula': self.getEventNumber,
                'type': 'l'
            })

            if len(self.sampleTree.sampleFileNames) != 1:
                print(
                    "ERROR: adding unique event numbers for chains is not implemented!"
                )
                raise Exception("SampleGroup__customInit__not_implemented")
            self.eventNumberOffset = self.eventCountsDict[
                self.sample.identifier][self.sampleTree.sampleFileNames[0]]
예제 #3
0
    def __init__(self, config, mvaName):
        self.config = config
        self.factoryname = config.get('factory', 'factoryname')
        self.factorysettings = config.get('factory', 'factorysettings')
        self.samplesPath = config.get('Directories', 'MVAin')
        self.samplesDefinitions = config.get('Directories', 'samplesinfo')
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)

        self.sampleFilesFolder = config.get('Directories', 'samplefiles')

        self.treeVarSet = config.get(mvaName, 'treeVarSet')
        self.MVAtype = config.get(mvaName, 'MVAtype')
        self.MVAsettings = config.get(mvaName, 'MVAsettings')
        self.mvaName = mvaName

        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

        # variables
        self.MVA_Vars = {}
        self.MVA_Vars['Nominal'] = config.get(self.treeVarSet,
                                              'Nominal').strip().split(' ')

        # samples
        backgroundSampleNames = eval(config.get(mvaName, 'backgrounds'))
        signalSampleNames = eval(config.get(mvaName, 'signals'))
        self.samples = {
            'BKG': self.samplesInfo.get_samples(backgroundSampleNames),
            'SIG': self.samplesInfo.get_samples(signalSampleNames),
        }

        self.treeCutName = config.get(mvaName, 'treeCut')
        self.treeCut = config.get('Cuts', self.treeCutName)

        self.TrainCut = config.get('Cuts', 'TrainCut')
        self.EvalCut = config.get('Cuts', 'EvalCut')
        print("TRAINING CUT:", self.TrainCut)
        print("EVAL CUT:", self.EvalCut)

        self.globalRescale = 2.0

        self.trainingOutputFileName = 'mvatraining_{factoryname}_{region}.root'.format(
            factoryname=self.factoryname, region=mvaName)
        print("INFO: MvaTrainingHelper class created.")
예제 #4
0
파일: cache_plot.py 프로젝트: acalandr/Xbb
    def __init__(self,
                 config,
                 sampleIdentifier,
                 regions,
                 splitFilesChunks=1,
                 chunkNumber=1,
                 splitFilesChunkSize=-1,
                 forceRedo=False,
                 fileList=None):
        self.config = config
        self.sampleIdentifier = sampleIdentifier
        self.regions = list(set(regions))
        self.forceRedo = forceRedo

        self.sampleTree = None
        self.samplesPath = self.config.get('Directories', 'plottingSamples')
        self.samplesInfo = ParseInfo(samples_path=self.samplesPath,
                                     config=self.config)
        self.sampleFilesFolder = self.config.get('Directories', 'samplefiles')

        self.sampleNames = list(
            eval(self.config.get('Plot_general', 'samples')))
        self.dataNames = list(eval(self.config.get('Plot_general', 'Data')))
        self.samples = self.samplesInfo.get_samples(self.sampleNames +
                                                    self.dataNames)

        self.regionsDict = {}
        for region in self.regions:
            treeCut = config.get('Cuts', region)
            self.regionsDict[region] = {'cut': treeCut}
        self.splitFilesChunkSize = splitFilesChunkSize
        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.fileList = FileList.decompress(fileList) if fileList else None

        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print(
                "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"
                % returnCode)
        else:
            print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace)
예제 #5
0
config = BetterConfigParser()
config.read(opts.config)

#namelist=opts.names.split(',')
#print "namelist:",namelist

pathIN = config.get('Directories', 'PREPin')
pathOUT = config.get('Directories', 'PREPout')
samplesinfo = config.get('Directories', 'samplesinfo')
sampleconf = BetterConfigParser()
sampleconf.read(samplesinfo)

prefix = sampleconf.get('General', 'prefix')

info = ParseInfo(samples_path=pathIN, config=config)
print "samplesinfo:", samplesinfo
cross_sections = {}
samples = []
for job in info:
    if not job.identifier in samples:
        if type(job.xsec) is list: job.xsec = job.xsec[0]
        cross_sections[job.identifier] = job.xsec
        samples.append(job.identifier)

for sample in samples:
    print sample, "\t", cross_sections[sample]
#    print dir(job)
#    print "job.name:",job.name," job.cross_section:",job.xsec
#    print "job.prefix:",job.prefix
#    if not job.name in namelist:
예제 #6
0
    def __init__(self, config, region, vars=None, title=None):
        self.config = config
        self.region = region
        self.vars = vars
        self.title = title if title and len(title) > 0 else None

        # VHbb namespace
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print(
                "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"
                % returnCode)
        else:
            print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace)

        # additional blinding cut:
        self.addBlindingCut = None
        if self.config.has_option(
                'Plot_general', 'addBlindingCut'
        ):  #contained in plots, cut on the event number
            self.addBlindingCut = self.config.get('Plot_general',
                                                  'addBlindingCut')
            print('adding add. blinding cut:', self.addBlindingCut)

        self.samplesPath = config.get('Directories', 'plottingSamples')
        self.samplesDefinitions = config.get('Directories', 'samplesinfo')
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)
        self.sampleFilesFolder = config.get('Directories', 'samplefiles')
        self.plotPath = config.get('Directories', 'plotpath')

        # plot regions
        self.configSection = 'Plot:%s' % region
        if self.vars and type(self.vars) == list:
            self.vars = [x.strip() for x in self.vars if len(x.strip()) > 0]

        if not self.vars or len(self.vars) < 1:
            varListFromConfig = self.config.get(self.configSection,
                                                'vars').split(',')
            print("VARS::", self.configSection, " => ", varListFromConfig)
            self.vars = [
                x.strip() for x in varListFromConfig if len(x.strip()) > 0
            ]

        # load samples
        self.data = eval(self.config.get(
            self.configSection,
            'Datas'))  # read the data corresponding to each CR (section)
        self.mc = eval(self.config.get(
            'Plot_general', 'samples'))  # read the list of mc samples
        self.total_lumi = eval(self.config.get('General', 'lumi'))
        self.signalRegion = False
        if self.config.has_option(self.configSection, 'Signal'):
            self.mc.append(self.config.get(self.configSection, 'Signal'))
            self.signalRegion = True
        self.dataSamples = self.samplesInfo.get_samples(self.data)
        self.mcSamples = self.samplesInfo.get_samples(self.mc)

        self.groupDict = eval(self.config.get('Plot_general', 'Group'))
        self.subcutPlotName = ''
        self.histogramStacks = {}
예제 #7
0
파일: run_plot.py 프로젝트: acalandr/Xbb
    def __init__(self,
                 config,
                 region,
                 vars=None,
                 title=None,
                 sampleIdentifier=None):
        self.debug = 'XBBDEBUG' in os.environ
        self.config = config
        self.region = region
        self.vars = vars
        self.title = title if title and len(title) > 0 else None
        self.sampleIdentifiers = sampleIdentifier.split(
            ',') if sampleIdentifier and len(sampleIdentifier) > 0 else None

        # VHbb namespace
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print(
                "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"
                % returnCode)
        else:
            print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace)

        # input/output paths
        self.samplesPath = config.get('Directories', 'plottingSamples')
        self.samplesInfo = ParseInfo(samples_path=self.samplesPath,
                                     config=self.config)
        self.sampleFilesFolder = config.get('Directories', 'samplefiles')
        self.plotPath = config.get('Directories', 'plotpath')

        # plot regions
        self.configSection = 'Plot:%s' % region
        self.dataOverBackground = self.config.has_option(
            'Plot_general', 'plotDataOverBackground') and eval(
                self.config.get('Plot_general', 'plotDataOverBackground'))

        # variables
        if self.vars and type(self.vars) == list:
            self.vars = [x.strip() for x in self.vars if len(x.strip()) > 0]

        # if variables not specified in command line, read from config
        if not self.vars or len(self.vars) < 1:
            varListFromConfig = self.config.get(self.configSection,
                                                'vars').split(',')
            print("VARS::", self.configSection, " => ", varListFromConfig)
            self.vars = [
                x.strip() for x in varListFromConfig if len(x.strip()) > 0
            ]

        # resolve plot variables (find plot section name if ROOT expression is given)
        self.vars = [
            XbbTools.resolvePlotVariable(var, self.config) for var in vars
        ]

        # additional cut to only plot a subset of the region
        self.subcut = None
        if self.config.has_option(self.configSection, 'subcut'):
            self.subcut = self.config.get(self.configSection, 'subcut')
            print("INFO: use cut:", self.subcut)

        # additional global blinding cut:
        self.addBlindingCut = None
        if self.config.has_option(
                'Plot_general', 'addBlindingCut'
        ):  #contained in plots, cut on the event number
            self.addBlindingCut = self.config.get('Plot_general',
                                                  'addBlindingCut')
            print('adding add. blinding cut:', self.addBlindingCut)

        # load samples
        if self.config.has_section(self.configSection):
            # read data from region definition
            if self.config.has_option(self.configSection, 'Datas'):
                self.data = eval(
                    self.config.get(self.configSection, 'Datas')
                )  # read the data corresponding to each CR (section)
            elif self.config.has_option(self.configSection, 'Data'):
                self.data = eval(
                    self.config.get(self.configSection, 'Data')
                )  # read the data corresponding to each CR (section)
            else:
                self.data = eval(self.config.get('Plot_general', 'Data'))
        else:
            # use default datasets
            self.data = eval(self.config.get('Plot_general', 'Data'))
        self.mc = eval(self.config.get(
            'Plot_general', 'samples'))  # read the list of mc samples
        self.total_lumi = eval(self.config.get('General', 'lumi'))
        self.signalRegion = False

        self.dataSamples = self.samplesInfo.get_samples(self.data)
        self.mcSamples = self.samplesInfo.get_samples(self.mc)

        # filter samples used in the plot
        if self.sampleIdentifiers:
            self.dataSamples = [
                x for x in self.dataSamples
                if x.identifier in self.sampleIdentifiers
            ]
            self.mcSamples = [
                x for x in self.mcSamples
                if x.identifier in self.sampleIdentifiers
            ]

        self.groupDict = eval(self.config.get('Plot_general', 'Group'))
        self.subcutPlotName = ''
        self.histogramStacks = {}
예제 #8
0
    train_list = (config.get('MVALists', 'List_for_submitscript')).split(',')
    print train_list
    for item in train_list:
        submit(item, repDict)

if opts.task == 'dc':
    DC_vars = (config.get('LimitGeneral', 'List')).split(',')
    print DC_vars

Plot_vars = ['']
if opts.task == 'plot' or opts.task == 'singleplot' or opts.task == 'mergesingleplot' or opts.task == 'checksingleplot':
    Plot_vars = (config.get('Plot_general', 'List')).split(',')

if not opts.task == 'prep':
    path = config.get("Directories", "samplepath")
    info = ParseInfo(samplesinfo, path)

if opts.task == 'plot':
    repDict['queue'] = 'all.q'
    for item in Plot_vars:
        submit(item, repDict)

if opts.task == 'trainReg':
    repDict['queue'] = 'all.q'
    submit('trainReg', repDict)

elif opts.task == 'dc':
    repDict['queue'] = 'all.q'
    for item in DC_vars:
        # item here contains the dc name
        submit(item, repDict)
예제 #9
0
    def __init__(self, config, mvaName):
        self.dataRepresentationVersion = 2
        self.config = config
        self.samplesPath = config.get('Directories', 'MVAin')
        self.samplesInfo = ParseInfo(samples_path=self.samplesPath, config=self.config) 
        self.sampleFilesFolder = config.get('Directories', 'samplefiles')
        self.logpath = config.get('Directories', 'logpath')
        self.treeVarSet = config.get(mvaName, 'treeVarSet')
        self.mvaName = mvaName
        self.MVAsettings = config.get(mvaName,'MVAsettings')
        self.factoryname = 'scikit-test1'

        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

        # variables
        self.MVA_Vars = {}
        self.MVA_Vars['Nominal'] = config.get(self.treeVarSet, 'Nominal').strip().split(' ')

        # samples
        self.backgroundSampleNames = eval(config.get(mvaName, 'backgrounds'))
        self.signalSampleNames = eval(config.get(mvaName, 'signals'))
        self.samples = {
            'BKG': self.samplesInfo.get_samples(self.backgroundSampleNames),
            'SIG': self.samplesInfo.get_samples(self.signalSampleNames),
        }

        # MVA signal region cuts
        self.treeCutName = config.get(mvaName, 'treeCut')
        self.treeCut = config.get('Cuts', self.treeCutName)

        # split in train/test samples
        self.datasets = ['train', 'test']
        self.varsets = ['X', 'y', 'sample_weight']
        self.trainCut = config.get('Cuts', 'TrainCut') 
        self.evalCut = config.get('Cuts', 'EvalCut')

        print("TRAINING CUT:", self.trainCut)
        print("TEST CUT:", self.evalCut)

        self.globalRescale = 2.0
        
        # default parameters
        self.parameters = {
                'factoryname': self.factoryname,
                'mvaName': self.mvaName,
                'MVAregionCut': self.treeCutName + ': ' + self.treeCut,
                #'classifier': 'GradientBoostingClassifier',
                'classifier': 'RandomForestClassifier',
                #'classifier': 'ExtraTreesClassifier',
                #'classifier': 'FT_GradientBoostingClassifier',
                'max_depth': None,
                'max_leaf_nodes': None,
                'class_weight': 'balanced',
                #'criterion': 'friedman_mse',
                'criterion': 'gini',
                #'n_estimators': 3000,
                'n_estimators': 400,
                #'learning_rate': 0.1,
                'algorithm': 'SAMME.R',
                #'min_samples_leaf': 100,
                'splitter': 'best',
                'max_features': 4,
                'subsample': 0.6,
                'limit': -1,
                'additional_signal_weight': 1.0,
                'min_impurity_split': 0.0,
                'bootstrap': True,
                }

        # load parameters from config in a format similar to Root TMVA parameter string
        self.MVAsettingsEvaluated = []
        for mvaSetting in self.MVAsettings.split(':'):
             self.parameters[mvaSetting.split('=')[0].strip()] = eval(mvaSetting.split('=')[1].strip())
             try:
                 self.MVAsettingsEvaluated.append('%s'%mvaSetting.split('=')[0].strip() + '=' + '%r'%self.parameters[mvaSetting.split('=')[0].strip()])
             except:
                 print("???:", mvaSetting)
                 self.MVAsettingsEvaluated.append(mvaSetting)

        self.MVAsettingsEvaluated = ':'.join(self.MVAsettingsEvaluated)
예제 #10
0
    def __init__(self, config, mvaName):
        self.mvaName = mvaName
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)
        self.dataFormatVersion = 2
        self.sampleTrees = []
        self.config = config
        self.samplesPath = config.get('Directories', 'MVAin')
        self.samplesDefinitions = config.get('Directories', 'samplesinfo')
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)

        # region
        self.treeCutName = config.get(mvaName, 'treeCut')
        self.treeCut = config.get('Cuts', self.treeCutName)

        # split in train/eval sets
        self.trainCut = config.get('Cuts', 'TrainCut')
        self.evalCut = config.get('Cuts', 'EvalCut')
        # rescale MC by 2 because of train/eval split
        self.globalRescale = 2.0

        # variables and systematics
        self.treeVarSet = config.get(mvaName, 'treeVarSet')
        self.systematics = config.get('systematics',
                                      'systematics').strip().split(' ')
        self.MVA_Vars = {
            'Nominal': [
                x for x in config.get(self.treeVarSet,
                                      'Nominal').strip().split(' ')
                if len(x.strip()) > 0
            ]
        }
        for sys in self.systematics:
            self.MVA_Vars[sys] = [
                x for x in config.get(self.treeVarSet, sys).strip().split(' ')
                if len(x.strip()) > 0
            ]

        self.weightSYS = []
        self.weightWithoutBtag = self.config.get('Weights', 'weight_noBTag')
        self.weightSYSweights = {}
        for d in ['Up', 'Down']:
            for syst in [
                    'HFStats1', 'HFStats2', 'LF', 'HF', 'LFStats1', 'LFStats2',
                    'cErr2', 'cErr1', 'JES'
            ]:
                systFullName = "btag_" + syst + "_" + d
                weightName = "bTagWeightCMVAV2_Moriond_" + syst + d
                self.weightSYSweights[
                    systFullName] = self.weightWithoutBtag + '*' + weightName
                self.weightSYS.append(systFullName)

        # samples
        self.sampleNames = {
            #                   'BKG_TT': eval(self.config.get('Plot_general', 'TT')),
            #                   'BKG_ST': eval(self.config.get('Plot_general', 'ST')),
            #                   'BKG_VV': eval(self.config.get('Plot_general', 'VV')),
            #                   'BKG_DY2b': eval(self.config.get('Plot_general', 'DY2b')),
            #                   'BKG_DY1b': eval(self.config.get('Plot_general', 'DY1b')),
            #                   'BKG_DY0b': eval(self.config.get('Plot_general', 'DYlight')),
            #                   'SIG_ggZH': eval(self.config.get('Plot_general', 'ggZH')),
            #                   'SIG_qqZH': eval(self.config.get('Plot_general', 'qqZH')),
            'SIG_ALL': eval(self.config.get('Plot_general', 'allSIG')),
            'BKG_ALL': eval(self.config.get('Plot_general', 'allBKG')),
        }
        self.samples = {
            category: self.samplesInfo.get_samples(samples)
            for category, samples in self.sampleNames.iteritems()
        }
예제 #11
0
    def __init__(self, config, region, sampleIdentifier=None, opts=None):
        self.config = config
        self.region = region
        self.sampleIdentifiers = sampleIdentifier.split(
            ',') if sampleIdentifier and len(sampleIdentifier) > 0 else None

        # VHbb namespace
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print(
                "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"
                % returnCode)
        else:
            print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace)

        # input/output paths
        self.fileLocator = FileLocator(config=self.config)
        self.pathIN = self.config.get('Directories', opts.inputDir)
        self.pathOUT = self.config.get('Directories', opts.outputDir)
        self.tmpDir = self.config.get('Directories', 'scratch')

        self.samplesPath = config.get('Directories', 'plottingSamples')
        self.samplesInfo = ParseInfo(samples_path=self.samplesPath,
                                     config=self.config)
        self.sampleFilesFolder = config.get('Directories', 'samplefiles')
        self.plotPath = config.get('Directories', 'plotpath')

        # plot regions
        self.configSection = 'Plot:%s' % region

        # additional cut to only plot a subset of the region
        self.subcut = None
        if self.config.has_option(self.configSection, 'subcut'):
            self.subcut = self.config.get(self.configSection, 'subcut')
            print("INFO: use cut:", self.subcut)

        # additional global blinding cut:
        self.addBlindingCut = None
        if self.config.has_option(
                'Plot_general', 'addBlindingCut'
        ):  #contained in plots, cut on the event number
            self.addBlindingCut = self.config.get('Plot_general',
                                                  'addBlindingCut')
            print('adding add. blinding cut:', self.addBlindingCut)

        # load samples
        self.data = eval(self.config.get(
            self.configSection,
            'Datas'))  # read the data corresponding to each CR (section)
        self.mc = eval(self.config.get(
            'Plot_general', 'samples'))  # read the list of mc samples
        self.total_lumi = eval(self.config.get('General', 'lumi'))
        self.signalRegion = False
        if self.config.has_option(self.configSection, 'Signal'):
            self.mc.append(self.config.get(self.configSection, 'Signal'))
            self.signalRegion = True
        self.dataSamples = self.samplesInfo.get_samples(self.data)
        self.mcSamples = self.samplesInfo.get_samples(self.mc)

        # filter samples used in the plot
        if self.sampleIdentifiers:
            self.dataSamples = [
                x for x in self.dataSamples
                if x.identifier in self.sampleIdentifiers
            ]
            self.mcSamples = [
                x for x in self.mcSamples
                if x.identifier in self.sampleIdentifiers
            ]
예제 #12
0
from ROOT import TAxis
from ROOT import TLorentzVector
from ROOT import TMath
from ROOT import TLegend
#from ROOT import cmath

from ROOT import gStyle
from ROOT import gPad

from ROOT import TCanvas, TColor, TGaxis, TH1F, TPad
from ROOT import kBlack, kBlue, kRed, kViolet

# load configuration and list of used samples
config = XbbConfigReader.read('Zll2018')
path = "Zll2018config/samples_nosplit.ini"
sampleInfo = ParseInfo(config, path, config=config)

usedSamples = sampleInfo.get_samples(XbbConfigTools(config).getMC())
#usedSamples = sampleInfo.get_samples(['ZJetsHT100', 'ZH_Znunu'])

usedSampleIdentifiers = list(set([x.identifier for x in usedSamples]))
print('usedSampleIdentifiers', usedSampleIdentifiers)

# some samples come from same set of ROOT trees (=have same identifier)
# -> find list of unique identifiers to avoid to process same tree file twice
#sampleIdentifiers = sampleInfo.getSampleIdentifiers()
#usedSampleIdentifiers = ParseInfo.filterIdentifiers(sampleIdentifiers, usedSamples)

# from which step to take the root trees
directory = config.get('Directories', 'sysOUT4')
예제 #13
0
                'folder': config.get('Directories', args.fromFolder).strip()
            },
            config=config)
    h1 = ROOT.TH1D("h1", "h1", 1, 0, 2)
    scaleToXs = sampleTree.getScale(sample)
    #nEvents = sampleTree.tree.Draw("1>>h1", "(" + cut + ")*genWeight*%1.6f"%scaleToXs, "goff")
    nEvents = sampleTree.tree.Draw("1>>h1", cut, "goff")
    nEventsWeighted = h1.GetBinContent(1)
    #print("DEBUG:", sampleIdentifier, cut, " MC events:", nEvents, " (weighted:", nEventsWeighted, ")")
    h1.Delete()
    return nEvents


# load config
config = XbbConfigReader.read(args.tag)
sampleInfo = ParseInfo(samples_path=config.get('Directories', args.fromFolder),
                       config=config)
mcSamples = sampleInfo.get_samples(XbbConfigTools(config).getMC())

pruneThreshold = float(args.prune)

sampleGroups = []
for x in args.samples.split(','):
    sampleGroups.append(x.split('+'))

sampleCuts = args.cuts.strip().split(',')
if args.fc != '':
    cutGroups = [
        x.strip(',').split(',') for x in args.fc.strip(';').split(';')
    ]
    # cartesian product
    sampleCuts = list(
예제 #14
0
(opts, args) = parser.parse_args(argv)
config = BetterConfigParser()
config.read(opts.config)

fileList = FileList.decompress(
    opts.fileList) if len(opts.fileList) > 0 else None

pathOUT = config.get('Directories', 'PREPout')
samplesinfo = config.get('Directories', 'samplesinfo')
samplefiles = config.get('Directories', 'samplefiles')
sampleconf = BetterConfigParser()
sampleconf.read(samplesinfo)

whereToLaunch = config.get('Configuration', 'whereToLaunch')

info = ParseInfo(samplesinfo, None)
samples = [
    x for x in info
    if not x.subsample and (len(opts.sampleIdentifier) == 0 or x.identifier in
                            opts.sampleIdentifier.split(','))
]
treeCopier = copytreePSI.CopyTreePSI(config=config)
if opts.limit and len(samples) > int(opts.limit):
    samples = samples[:int(opts.limit)]
for sample in samples:
    treeCopier.copytreePSI(pathIN=samplefiles,
                           pathOUT=pathOUT,
                           folderName=sample.identifier,
                           skimmingCut=sample.addtreecut,
                           fileList=fileList)
예제 #15
0
파일: tree_stack.py 프로젝트: jmduarte/Xbb
print "Compile external macros"
print "=======================\n"

#get locations:
Wdir = config.get('Directories',
                  'Wdir')  # working direcoty containing the ouput
samplesinfo = config.get('Directories', 'samplesinfo')  # samples_nosplit.cfg

path = config.get('Directories',
                  'plottingSamples')  # from which samples to plot

section = 'Plot:%s' % region

info = ParseInfo(
    samplesinfo, path
)  #creates a list of Samples by reading the info in samples_nosplit.cfg and the conentent of the path.

import os
if os.path.exists("../interface/DrawFunctions_C.so"):
    print 'ROOT.gROOT.LoadMacro("../interface/DrawFunctions_C.so")'
    ROOT.gROOT.LoadMacro("../interface/DrawFunctions_C.so")

if os.path.exists("../interface/VHbbNameSpace_h.so"):
    print 'ROOT.gROOT.LoadMacro("../interface/VHbbNameSpace_h.so")'
    ROOT.gROOT.LoadMacro("../interface/VHbbNameSpace_h.so")


#----------Histo from trees------------
#Get the selections and the samples
def doPlot():
                  default=None,
                  help="max number of files to process")
(opts, args) = parser.parse_args(argv)
config = BetterConfigParser()
config.read(opts.config)

fileList = FileList.decompress(
    opts.fileList) if len(opts.fileList) > 0 else None

pathOUT = config.get('Directories', 'PREPout')
samplefiles = config.get('Directories', 'samplefiles')
sampleconf = config

whereToLaunch = config.get('Configuration', 'whereToLaunch')

info = ParseInfo(samples_path=None, config=config)
samples = [
    x for x in info
    if not x.subsample and (len(opts.sampleIdentifier) == 0 or x.identifier in
                            opts.sampleIdentifier.split(','))
]
treeCopier = copytreePSI.CopyTreePSI(config=config)
if opts.limit and len(samples) > int(opts.limit):
    samples = samples[:int(opts.limit)]
for sample in samples:
    treeCopier.copytreePSI(pathIN=samplefiles,
                           pathOUT=pathOUT,
                           folderName=sample.identifier,
                           skimmingCut=sample.addtreecut,
                           fileList=fileList)
예제 #17
0
    def __init__(self, opts):

        # get file list
        self.filelist = FileList.decompress(opts.fileList) if len(opts.fileList) > 0 else None
        print "len(filelist)",len(self.filelist),
        if len(self.filelist) > 0:
            print "filelist[0]:", self.filelist[0]
        else:
            print ''

        # config
        self.debug = 'XBBDEBUG' in os.environ
        self.verifyCopy = True
        self.opts = opts
        self.config = BetterConfigParser()
        self.config.read(opts.config)
        self.channel = self.config.get('Configuration', 'channel')

        # load namespace, TODO
        VHbbNameSpace = self.config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

        # directories
        self.pathIN = self.config.get('Directories', opts.inputDir)
        self.pathOUT = self.config.get('Directories', opts.outputDir)
        self.tmpDir = self.config.get('Directories', 'scratch')
        print 'INput samples:\t%s'%self.pathIN
        print 'OUTput samples:\t%s'%self.pathOUT

        self.fileLocator = FileLocator(config=self.config)

        # check if given sample identifier uniquely matches a samples from config
        matchingSamples = ParseInfo(samples_path=self.pathIN, config=self.config).find(identifier=opts.sampleIdentifier)
        if len(matchingSamples) != 1:
            print "ERROR: need exactly 1 sample identifier as input with -S !!"
            print matchingSamples
            exit(1)
        self.sample = matchingSamples[0]

        # collections
        self.collections = [x.strip() for x in opts.addCollections.split(',') if len(x.strip()) > 0] if len(opts.addCollections.strip())>0  else []
        if len(self.collections) < 1:
            print "\x1b[31mWARNING: no collections added! Specify the collections to add with the --addCollections option!\x1b[0m"
        print 'collections to add:', self.collections
        self.collections = self.parseCollectionList(self.collections)
        print 'after parsing:', self.collections

        # temorary folder to save the files of this job on the scratch
        temporaryName = self.sample.identifier + '/' + uuid.uuid4().hex

        # input files
        self.subJobs = []
        if opts.join:
            print("INFO: join input files! This is an experimental feature!")

            # translate naming convention of .txt file to imported files after the prep step
            inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(x) for x in self.filelist]

            self.subJobs.append({
                'inputFileNames': self.filelist,
                'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep],
                'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]),
                'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]),
                })

        else:
            
            # create separate subjob for all files (default!)
            for inputFileName in self.filelist:
                inputFileNamesAfterPrep = [self.fileLocator.getFilenameAfterPrep(inputFileName)]

                self.subJobs.append({
                    'inputFileNames': [inputFileName],
                    'localInputFileNames': ["{path}/{subfolder}/{filename}".format(path=self.pathIN, subfolder=self.sample.identifier, filename=localFileName) for localFileName in inputFileNamesAfterPrep],
                    'outputFileName': "{path}/{subfolder}/{filename}".format(path=self.pathOUT, subfolder=self.sample.identifier, filename=inputFileNamesAfterPrep[0]),
                    'tmpFileName': "{path}/{subfolder}/{filename}".format(path=self.tmpDir, subfolder=temporaryName, filename=inputFileNamesAfterPrep[0]),
                    })
예제 #18
0
    def __init__(self,
                 config,
                 sampleIdentifier,
                 trainingRegions,
                 splitFilesChunks=1,
                 chunkNumber=1,
                 splitFilesChunkSize=-1,
                 force=False):
        self.config = config
        self.force = force
        self.sampleIdentifier = sampleIdentifier
        self.trainingRegions = trainingRegions

        self.sampleTree = None
        if config.has_option('Directories', 'trainingSamples'):
            self.samplesPath = self.config.get('Directories',
                                               'trainingSamples')
        else:
            self.samplesPath = self.config.get('Directories', 'MVAin')
        self.samplesInfo = ParseInfo(samples_path=self.samplesPath,
                                     config=self.config)
        self.sampleFilesFolder = self.config.get('Directories', 'samplefiles')

        self.backgroundSampleNames = list(
            set(
                sum([
                    eval(self.config.get(trainingRegion, 'backgrounds'))
                    for trainingRegion in self.trainingRegions
                ], [])))
        self.signalSampleNames = list(
            set(
                sum([
                    eval(self.config.get(trainingRegion, 'signals'))
                    for trainingRegion in self.trainingRegions
                ], [])))
        # can include DATA in the .h5 files for training
        self.dataSampleNames = list(
            set(
                sum([
                    eval(self.config.get(trainingRegion, 'data'))
                    if self.config.has_option(trainingRegion, 'data') else []
                    for trainingRegion in self.trainingRegions
                ], [])))
        self.samples = self.samplesInfo.get_samples(
            list(
                set(self.backgroundSampleNames + self.signalSampleNames +
                    self.dataSampleNames)))

        self.trainingRegionsDict = {}
        for trainingRegion in self.trainingRegions:
            treeCutName = config.get(
                trainingRegion, 'treeCut') if config.has_option(
                    trainingRegion, 'treeCut') else trainingRegion
            treeVarSet = config.get(trainingRegion, 'treeVarSet').strip()
            #systematics = [x for x in config.get('systematics', 'systematics').split(' ') if len(x.strip())>0]
            if config.has_option(trainingRegion, 'systematics'):
                systematicsString = config.get(trainingRegion,
                                               'systematics').strip()
                if systematicsString.startswith('['):
                    systematics = eval(systematicsString)
                else:
                    systematics = systematicsString.split(' ')
            else:
                systematics = []
            mvaVars = config.get(treeVarSet, 'Nominal').split(' ')
            weightVars = []
            #for systematic in systematics:
            for syst in systematics:
                systNameUp = syst + '_UP' if self.config.has_option(
                    'Weights', syst + '_UP') else syst + '_Up'
                systNameDown = syst + '_DOWN' if self.config.has_option(
                    'Weights', syst + '_DOWN') else syst + '_Down'
                if self.config.has_option('Weights', systNameUp):
                    weightVars.append(self.config.get('Weights', systNameUp))
                if self.config.has_option('Weights', systNameDown):
                    weightVars.append(self.config.get('Weights', systNameDown))

            self.trainingRegionsDict[trainingRegion] = {
                'cut': config.get('Cuts', treeCutName),
                'vars': mvaVars,
                'weightVars': weightVars,
            }

        self.TrainCut = config.get('Cuts', 'TrainCut')
        self.EvalCut = config.get('Cuts', 'EvalCut')

        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.splitFilesChunkSize = splitFilesChunkSize

        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)
예제 #19
0
파일: evaluateMVA.py 프로젝트: jmduarte/Xbb
config = BetterConfigParser()
config.read(opts.config)
anaTag = config.get("Analysis", "tag")

#get locations:
Wdir = config.get('Directories', 'Wdir')
samplesinfo = config.get('Directories', 'samplesinfo')

#read shape systematics
systematics = config.get('systematics', 'systematics')

#systematics
INpath = config.get('Directories', 'MVAin')
OUTpath = config.get('Directories', 'MVAout')

info = ParseInfo(samplesinfo, INpath)

arglist = ''

if not evaluate_optimisation:
    arglist = opts.discr  #RTight_blavla,bsbsb
else:
    #    print '@INFO: Evaluating bdt for optimisation'
    arglist = weight

namelistIN = opts.names
namelist = namelistIN.split(',')

print('namelist', namelist)
# sys.exit(1)
예제 #20
0
            'name': opts.sampleIdentifier,
            'folder': config.get('Directories', 'MVAin')
        },
        countOnly=True,
        splitFilesChunkSize=-1,
        config=config).getSampleFileNameChunks()[0]
    print("INFO: no file list given, use all files!")
    print(len(filelist), filelist)

# read paths and sample info
samplesinfo = config.get('Directories', 'samplesinfo')
systematics = config.get('systematics', 'systematics')
INpath = config.get('Directories', 'MVAin')
OUTpath = config.get('Directories', 'MVAout')
tmpDir = config.get('Directories', 'scratch')
info = ParseInfo(samples_path=INpath, config=config)

#load the namespace
VHbbNameSpace = config.get('VHbbNameSpace', 'library')
ROOT.gSystem.Load(VHbbNameSpace)


# ------------------------------------------------------------------------------------------
# helper class to evaluate scikit classifiers and write MVA score as new branch
# ------------------------------------------------------------------------------------------
class SciKitEvaluator(object):
    def __init__(self,
                 name,
                 sampleTree,
                 classifier,
                 variables,