def customInit(self, initVars):
        self.n_excluded = 0
        self.n_kept = 0
        self.n_skipped = 0
        self.sample = initVars['sample']
        self.config = initVars['config']

        if self.sample.identifier in self.applyToSamples:
            self.excludedEvents = {}
            excludedSampleTree = SampleTree([self.excludeTreeFileName], config=self.config)
            excludedSampleTree.enableBranches(['run','event'])
            print "INFO: loading list of events to filter"
            n_events = 0
            for ev in excludedSampleTree:
                if ev.run not in self.excludedEvents:
                    self.excludedEvents[ev.run] = {}
                if ev.event not in self.excludedEvents[ev.run]:
                    self.excludedEvents[ev.run][ev.event] = 0
                self.excludedEvents[ev.run][ev.event] += 1
                if self.excludedEvents[ev.run][ev.event]==1:
                    n_events += 1

            intrinsicDuplicates = sum([[[event,run,count] for event,count in self.excludedEvents[run].items() if count > 1] for run in self.excludedEvents.keys()], [])
            print "INFO: done => ", n_events, "distinct events will be filtered out of", self.applyToSamples
            if len(intrinsicDuplicates) > 0:
                print "INFO: the event list provided contains",len(intrinsicDuplicates),"duplicates itself!"
        else:
            print "INFO: event number filter disdable for this sample"
Beispiel #2
0
    def customInit(self, initVars):
        self.n_excluded = 0
        self.n_kept = 0
        self.n_skipped = 0
        self.sample = initVars['sample']
        self.config = initVars['config']

        if self.sample.identifier in self.applyToSamples:
            self.excludedEvents = {}
            excludedSampleTree = SampleTree([self.excludeTreeFileName],
                                            config=self.config)
            excludedSampleTree.enableBranches(['run', 'event'])
            print "INFO: loading list of events to filter"
            n_events = 0
            for ev in excludedSampleTree:
                if ev.run not in self.excludedEvents:
                    self.excludedEvents[ev.run] = {}
                if ev.event not in self.excludedEvents[ev.run]:
                    self.excludedEvents[ev.run][ev.event] = 0
                self.excludedEvents[ev.run][ev.event] += 1
                if self.excludedEvents[ev.run][ev.event] == 1:
                    n_events += 1

            intrinsicDuplicates = sum(
                [[[event, run, count]
                  for event, count in self.excludedEvents[run].items()
                  if count > 1] for run in self.excludedEvents.keys()], [])
            print "INFO: done => ", n_events, "distinct events will be filtered out of", self.applyToSamples
            if len(intrinsicDuplicates) > 0:
                print "INFO: the event list provided contains", len(
                    intrinsicDuplicates), "duplicates itself!"
        else:
            print "INFO: event number filter disdable for this sample"
Beispiel #3
0
    def run(self):
        name = self.config.get('Configuration', 'channel') if self.config.has_option('Configuration', 'channel') else '_'
        timestamp = datetime.datetime.now().strftime("%y%m%d")
        tmpName = self.tmpDir + '/skim_' + name + '_' + region + '_' + timestamp + '_tmp.root'
        destName = self.pathOUT + '/skim_' + name + '_' + region + '_' + timestamp + '.root'

        sampleTree = SampleTree(self.fileNames, config=self.config) 

        if self.config.has_option('Plot_general', 'controlSample'):
            controlSampleDict = eval(self.config.get('Plot_general', 'controlSample'))
            controlSample = controlSampleDict[self.region] if self.region in controlSampleDict else -1
            sampleTree.addOutputBranch("controlSample", lambda x: controlSample, branchType="i")
            print("INFO: setting controlSample to", controlSample)

        sampleTree.addOutputTree(tmpName, cut='1', branches='*', friend=False)
        sampleTree.process()

        # copy to final destination
        if sampleTree.getNumberOfOutputTrees() > 0:
            try:
                self.fileLocator.cp(tmpName, destName, force=True)
                print('copy ', tmpName, destName)

                if not self.fileLocator.isValidRootFile(destName):
                    print("\x1b[31mERROR: copy failed, output is broken!\x1b[0m")
                else:
                    try:
                        self.fileLocator.rm(tmpName)
                    except Exception as e:
                        print(e)
            except Exception as e:
                print("\x1b[31mERROR: copy failed!", e, "\x1b[0m")
Beispiel #4
0
    def run(self):
        inputFileNames = [
            "{path}/{sample}/{fileName}".format(
                path=self.config.get('Directories', 'HADDin'),
                sample=self.sampleIdentifier,
                fileName=self.fileLocator.getFilenameAfterPrep(fileName))
            for fileName in self.fileNames
        ]
        outputFileName = self.getTemporaryFileName()
        self.fileLocator.makedirs('/'.join(outputFileName.split('/')[:-1]))
        command = self.commandTemplate.format(output=outputFileName,
                                              inputs=' '.join(inputFileNames),
                                              f="-f" if self.force else "")
        if self.debug:
            print("DEBUG: run \x1b[34m", command, "\x1b[0m")

        if self.useChain:
            # use sampleTree class (can e.g. drop branches at the same time)
            sampleTree = SampleTree(inputFileNames, config=self.config)

            try:
                removeBranches = eval(
                    self.config.get('General', 'remove_branches'))
                for removeBranch in removeBranches:
                    sampleTree.addBranchToBlacklist(removeBranch)
                    print("DEBUG: disable branch ", removeBranch)
            except Exception as e:
                print("DEBUG: could not disable branch:", e)
            sampleTree.addOutputTree(outputFileName, cut='1', branches='*')
            sampleTree.process()
            result = 0
        else:
            # standard hadd
            result = self.fileLocator.runCommand(command)

        print("INFO: hadd returned ", result)
        if result == 0:
            finalOutputFileName = self.getOutputFileName()
            print("move file to final destination: \x1b[34m",
                  finalOutputFileName, "\x1b[0m")
            self.fileLocator.makedirs('/'.join(
                finalOutputFileName.split('/')[:-1]))
            resultCopy = self.fileLocator.cp(outputFileName,
                                             finalOutputFileName, self.force)
            if not resultCopy:
                print("\x1b[31mERROR: copy failed\n from:", outputFileName,
                      "\n to:", finalOutputFileName, "\n force:", self.force,
                      "\x1b[0m")
                raise Exception("FileCopyError")
            # try to delete temporary file
            try:
                self.fileLocator.rm(outputFileName)
            except Exception as e:
                print("ERROR: could not delete temporary file:",
                      outputFileName, " => ", e)
            print("INFO: done.")
        else:
            raise Exception("HaddError")
Beispiel #5
0
    def test_MultiOutput(self):
        sampleTree = self.getTree()

        # define some random cuts
        cuts = [
            "nJet==5&&Sum$(Jet)>500",
            "nJet==6&&Sum$(Jet)>600",
            "nJet==7&&Sum$(Jet)>700",
            "nJet==8&&Sum$(Jet)>800",
            "nJet==9 && Sum$(Jet)>800 && a<0 && (b>30 || b > 50)",
            "nJet==9 && Sum$(Jet)>800 && (a<0 && (b>30 || b > 50)) || (a>0 && (b>10 || b > 90)) || (a>0.8 && (b>5 || b > 50))",
        ]

        # add some more random cuts
        for j in range(5):
            randomCuts = ["(a<%f && (b>%f || c > %f))"%(random.gauss(0,0.5), random.uniform(0,50), random.uniform(0,2)) for i in range(50)]
            cuts.append('||'.join(randomCuts))

        # write skimmed subtrees to file
        for i, cut in enumerate(cuts):
            sampleTree.addOutputTree(TestSampleTreeMethods.scratchDirectory + '/tree_skimmed_%d.root'%i, cut, '')
        sampleTree.process()

        # load subtrees and count events
        newSampleTrees = [SampleTree([TestSampleTreeMethods.scratchDirectory + '/tree_skimmed_%d.root'%i])  for i, cut in enumerate(cuts)]
        resultsMethodA = [newSampleTree.tree.GetEntries() for newSampleTree in newSampleTrees]

        # count directly
        resultsMethodB = [sampleTree.tree.Draw("a", cut, "goff") for i, cut in enumerate(cuts)]

        print(resultsMethodA)
        print(resultsMethodB)

        self.assertTrue(all([resultsMethodA[i] == resultsMethodB[i] for i in range(len(resultsMethodA))]))
Beispiel #6
0
    def test_SampleTree_Callback_1(self):
        sampleTree = self.getTree()

        # define some random cuts
        cuts = [
            "b>444.4&&b<444.5",
            "nJet==5&&Sum$(Jet)>500",
            "nJet==6&&Sum$(Jet)>600",
            "nJet==7&&Sum$(Jet)>700",
            "nJet==8&&Sum$(Jet)>800",
        ]
        # write skimmed subtrees to file
        for i, cut in enumerate(cuts):
            sampleTree.addOutputTree(
                outputFileName=TestSampleTreeCallbacksMethods.scratchDirectory + '/tree_test_%d.root'%i,
                cut=cut,
                callbacks={
                    'beforeLoop': self.callback_before_loop,
                    'afterWrite': self.callback_after_write,
                },
                branches='*',
            )
        sampleTree.setCallback('event', self.event_callback)
        sampleTree.process()

        # check otuput
        sampleTree2 = SampleTree([TestSampleTreeCallbacksMethods.scratchDirectory + '/tree_test_0.root'])
        resultsMethodB = sampleTree2.tree.GetEntries()
        print(sampleTree2.tree)
        print("events which triggered callback:", self.nEventsFound)
        print("events in tree 0:", resultsMethodB)
        self.assertEqual(self.nEventsFound, resultsMethodB)
        self.assertTrue(self.nEventsFound > 0)
Beispiel #7
0
def getEventCount(config, sampleIdentifier, cut="1"):
    sampleTree = SampleTree(
        {
            'name': sampleIdentifier,
            'folder': config.get('Directories', 'PREPout').strip()
        },
        config=config)
    nEvents = sampleTree.tree.Draw("1", cut, "goff")
    print sampleIdentifier, " =>", nEvents
    return nEvents
Beispiel #8
0
def getEventCount(config,
                  sampleIdentifier,
                  cut="1",
                  sampleTree=None,
                  sample=None):
    if not sampleTree:
        sampleTree = SampleTree(
            {
                'name': sampleIdentifier,
                'folder': config.get('Directories', args.fromFolder).strip()
            },
            config=config)
    h1 = ROOT.TH1D("h1", "h1", 1, 0, 2)
    scaleToXs = sampleTree.getScale(sample)
    #nEvents = sampleTree.tree.Draw("1>>h1", "(" + cut + ")*genWeight*%1.6f"%scaleToXs, "goff")
    nEvents = sampleTree.tree.Draw("1>>h1", cut, "goff")
    nEventsWeighted = h1.GetBinContent(1)
    #print("DEBUG:", sampleIdentifier, cut, " MC events:", nEvents, " (weighted:", nEventsWeighted, ")")
    h1.Delete()
    return nEvents
def getEventCount(config, sampleIdentifier, cut="1"):
    sysOut = config.get('Directories','SYSout').strip()
    t3proto = 'root://t3dcachedb.psi.ch:1094'
    sysOutMountedPath = sysOut.replace(t3proto,'').replace('root://t3dcachedb03.psi.ch:1094','')
    fileMask = "{path}/{sample}/{tree}.root".format(path=sysOutMountedPath, sample=sampleIdentifier, tree='*')
    sampleFiles = [t3proto + x for x in glob.glob(fileMask)]

    sampleTree = SampleTree(sampleFiles, config=config)
    nEvents = sampleTree.tree.Draw("1", cut, "goff")
    print sampleIdentifier,"(",len(sampleFiles),"files) =>",nEvents
    return nEvents
Beispiel #10
0
    def test_TreeCutDict(self):

        def flattenDict(cutDict):
            if type(cutDict) == str:
                return cutDict
            elif type(cutDict) == dict:
                if 'OR' in cutDict:
                    return '||'.join(['(%s)'%flattenDict(x) for x in cutDict['OR']])
                elif 'AND' in cutDict:
                    return '&&'.join(['(%s)'%flattenDict(x) for x in cutDict['AND']])
                else:
                    raise Exception('BadTreeTypeCutDict')

        sampleTree = self.getTree()
        cutDict = {'OR':
                   [
                       {
                           'AND': [
                               'nJet>8',
                               'a>0',
                               'a>c']
                       },
                       'nJet==6&&Sum$(Jet)>600',
                       'c>1',
                       {
                           'OR': [
                               'Sum$(Jet)>1200',
                               'a>0.8',
                               'b>80',
                               {
                                   'AND': [
                                       'a>0.9',
                                       'b>80',
                                       'c>0.9',
                                   ]
                               }
                           ]
                       }
                   ]
               }
        cutFlat = flattenDict(cutDict)
        print ("flat:", cutFlat)
        sampleTree.addOutputTree(TestSampleTreeMethods.scratchDirectory + '/tree_dummy.root', cutDict, cutSequenceMode='TREE')
        sampleTree.process()

        # count number of entries written to output file
        skimmedSampleTree = SampleTree([TestSampleTreeMethods.scratchDirectory + '/tree_dummy.root'])
        resultsMethodA = skimmedSampleTree.tree.GetEntries()
        resultsMethodB = sampleTree.tree.Draw("a", cutFlat, "goff")
        self.assertEqual(resultsMethodA, resultsMethodB)
Beispiel #11
0
    def run(self):
        name = self.config.get('Configuration',
                               'channel') if self.config.has_option(
                                   'Configuration', 'channel') else '_'
        timestamp = datetime.datetime.now().strftime("%y%m%d")
        tmpName = self.tmpDir + '/skim_' + name + '_' + region + '_' + timestamp + '_tmp.root'
        destName = self.pathOUT + '/skim_' + name + '_' + region + '_' + timestamp + '.root'

        sampleTree = SampleTree(self.fileNames, config=self.config)

        if self.config.has_option('Plot_general', 'controlSample'):
            controlSampleDict = eval(
                self.config.get('Plot_general', 'controlSample'))
            controlSample = controlSampleDict[
                self.region] if self.region in controlSampleDict else -1
            sampleTree.addOutputBranch("controlSample",
                                       lambda x: controlSample,
                                       branchType="i")
            print("INFO: setting controlSample to", controlSample)

        sampleTree.addOutputTree(tmpName, cut='1', branches='*', friend=False)
        sampleTree.process()

        # copy to final destination
        if sampleTree.getNumberOfOutputTrees() > 0:
            try:
                self.fileLocator.cp(tmpName, destName, force=True)
                print('copy ', tmpName, destName)

                if not self.fileLocator.isValidRootFile(destName):
                    print(
                        "\x1b[31mERROR: copy failed, output is broken!\x1b[0m")
                else:
                    try:
                        self.fileLocator.rm(tmpName)
                    except Exception as e:
                        print(e)
            except Exception as e:
                print("\x1b[31mERROR: copy failed!", e, "\x1b[0m")
Beispiel #12
0
    def run(self):
        inputFileNames = ["{path}/{sample}/{fileName}".format(path=self.config.get('Directories','HADDin'), sample=self.sampleIdentifier, fileName=self.fileLocator.getFilenameAfterPrep(fileName)) for fileName in self.fileNames]
        outputFileName = self.getTemporaryFileName()
        self.fileLocator.makedirs('/'.join(outputFileName.split('/')[:-1]))
        command = self.commandTemplate.format(output=outputFileName, inputs=' '.join(inputFileNames), f="-f" if self.force else "")
        if self.debug:
            print ("DEBUG: run \x1b[34m", command, "\x1b[0m")
        
        if self.useChain:
            # use sampleTree class (can e.g. drop branches at the same time)
            sampleTree = SampleTree(inputFileNames, config=self.config)

            try:
                removeBranches = eval(self.config.get('General', 'remove_branches'))
                for removeBranch in removeBranches:
                    sampleTree.addBranchToBlacklist(removeBranch)
                    print("DEBUG: disable branch ", removeBranch)
            except Exception as e:
                print("DEBUG: could not disable branch:", e)
            sampleTree.addOutputTree(outputFileName, cut='1', branches='*')
            sampleTree.process()
            result = 0
        else:
            # standard hadd
            result = self.fileLocator.runCommand(command)

        print ("INFO: hadd returned ", result)
        if result == 0:
            finalOutputFileName = self.getOutputFileName()
            print("move file to final destination: \x1b[34m", finalOutputFileName, "\x1b[0m")
            self.fileLocator.makedirs('/'.join(finalOutputFileName.split('/')[:-1]))
            resultCopy = self.fileLocator.cp(outputFileName, finalOutputFileName, self.force)
            if not resultCopy:
                print("\x1b[31mERROR: copy failed\n from:", outputFileName, "\n to:", finalOutputFileName, "\n force:", self.force, "\x1b[0m")
                raise Exception("FileCopyError")
            # try to delete temporary file
            try:
                self.fileLocator.rm(outputFileName)
            except Exception as e:
                print("ERROR: could not delete temporary file:", outputFileName, " => ", e)
            print("INFO: done.")
        else:
            raise Exception("HaddError")
Beispiel #13
0
    def run(self):

        nFilesProcessed = 0
        nFilesFailed = 0

        for subJob in self.subJobs:

            # only process if output is non-existing/broken or --force was used
            if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']):

                # create directories
                outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1])
                tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1])
                self.fileLocator.makedirs(outputFolder)
                self.fileLocator.makedirs(tmpFolder)

                # load sample tree
                sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config)
                if not sampleTree.tree:
                    print "trying fallback...", len(subJob['inputFileNames'])

                    if len(subJob['inputFileNames']) == 1:
                        # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep)
                        fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0]
                        print "FO:", fileNameOriginal
                        xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal)
                        sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector)
                        if not sampleTree.tree:
                            print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m"
                            nFilesFailed += 1
                            continue
                    else:
                        print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m"
                        nFilesFailed += 1
                        continue

                # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this:
                # [Sys]
                # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll')
                # (instead of passing the tree in the constructor, the setTree method can be used)
                pyModules = []
                for collection in self.collections:
                    if '.' in collection:
                        section = collection.split('.')[0]
                        key = collection.split('.')[1]
                        pyCode = self.config.get(section, key)

                        # import module from myutils
                        moduleName = pyCode.split('(')[0].split('.')[0].strip()
                        if self.debug:
                            print "DEBUG: import module:", moduleName
                            print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m")
                        globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils")

                        # get object
                        wObject = eval(pyCode)

                        # pass the tree and other variables if needed to finalize initialization
                        if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")):
                            wObject.customInit({'config': self.config,
                                                'sampleTree': sampleTree,
                                                'tree': sampleTree.tree,
                                                'sample': self.sample,
                                                'channel': self.channel,
                                                'pathIN': self.pathIN,
                                                'pathOUT': self.pathOUT,
                                                })

                        # add callbacks if the objects provides any
                        if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")):
                            sampleTree.addCallback('event', wObject.processEvent)

                        # add branches
                        if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")):
                            sampleTree.addOutputBranches(wObject.getBranches())

                        pyModules.append(wObject)

                # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas()
                if 'addbranches' in self.collections:
                    writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict"))
                    sampleTree.addOutputBranches(writeNewVariables)
                
                # DEPRECATED, do not use anymore ---> use BranchTools.Drop()
                if 'removebranches' in self.collections:
                    bl_branch = eval(config.get('Branches', 'useless_branch'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)
                    bl_branch = eval(config.get('Branches', 'useless_after_sys'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)

                # define output file 
                sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend)

                # run processing
                for pyModule in pyModules:
                    if hasattr(pyModule, "beforeProcessing"):
                        getattr(pyModule, "beforeProcessing")()

                sampleTree.process()

                for pyModule in pyModules:
                    if hasattr(pyModule, "afterProcessing"):
                        getattr(pyModule, "afterProcessing")()

                # if output trees have been produced: copy temporary file to output folder
                if sampleTree.getNumberOfOutputTrees() > 0: 
                    try:
                        self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                        print 'copy ', subJob['tmpFileName'], subJob['outputFileName']

                        if self.verifyCopy:
                            if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...'
                                self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                                print 'INFO: second attempt copy done!'
                                if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                    print '\x1b[31mERROR: output still broken!\x1b[0m'
                                    nFilesFailed += 1
                                    raise Exception("FileCopyError")
                                else:
                                    print 'INFO: file is good after second attempt!'
                    except Exception as e:
                        print e
                        print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m"

                    # delete temporary file
                    try:
                        self.fileLocator.rm(subJob['tmpFileName'])
                    except Exception as e:
                        print e
                        print "WARNING: could not delete file on scratch!"


                # clean up
                if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")):
                    getattr(wObject, "cleanUp")()

            else:
                print 'SKIP:', subJob['inputFileNames']

        if nFilesFailed > 0:
            raise Exception("ProcessingIncomplete")
Beispiel #14
0
class CachePlot(object):
    def __init__(self,
                 config,
                 sampleIdentifier,
                 regions,
                 splitFilesChunks=1,
                 chunkNumber=1,
                 splitFilesChunkSize=-1,
                 forceRedo=False,
                 fileList=None):
        self.config = config
        self.sampleIdentifier = sampleIdentifier
        self.regions = list(set(regions))
        self.forceRedo = forceRedo

        self.sampleTree = None
        self.samplesPath = self.config.get('Directories', 'plottingSamples')
        self.samplesDefinitions = self.config.get('Directories', 'samplesinfo')
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)
        self.sampleFilesFolder = self.config.get('Directories', 'samplefiles')

        self.sampleNames = eval(self.config.get('Plot_general', 'samples'))
        self.dataNames = eval(self.config.get('Plot_general', 'Data'))
        self.samples = self.samplesInfo.get_samples(self.sampleNames +
                                                    self.dataNames)

        self.regionsDict = {}
        for region in self.regions:
            treeCut = config.get('Cuts', region)
            self.regionsDict[region] = {'cut': treeCut}
        self.splitFilesChunkSize = splitFilesChunkSize
        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.fileList = FileList.decompress(fileList) if fileList else None

        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print(
                "\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"
                % returnCode)
        else:
            print("INFO: loaded VHbbNameSpace: %s" % VHbbNameSpace)

    def printInfo(self):
        print("REGION:".ljust(24), "CUT:")
        for region, regionInfo in self.regionsDict.iteritems():
            print(" > ", region.ljust(20), regionInfo['cut'])

    def run(self):

        # keep additional branches for plotting
        try:
            keepBranchesPlot = eval(
                self.config.get('Branches', 'keep_branches_plot'))
        except:
            keepBranchesPlot = []
        try:
            keepBranchesPlot += eval(
                self.config.get('Branches', 'keep_branches'))
        except:
            pass

        # also keep some branches which might be used later in variables definition and weights
        try:
            for section in self.config.sections():
                if section.startswith('plotDef:') and self.config.has_option(
                        section, 'relPath'):
                    keepBranchesPlot.append(self.config.get(
                        section, 'relPath'))
        except Exception as e:
            print(e)
        try:
            keepBranchesPlot.append(self.config.get('Weights', 'weightF'))
        except:
            pass
        # plotting region cut
        for region, regionInfo in self.regionsDict.iteritems():
            keepBranchesPlot.append(regionInfo['cut'])
        keepBranchesPlotFinal = BranchList(
            keepBranchesPlot).getListOfBranches()

        # ----------------------------------------------------------------------------------------------------------------------
        # cache samples
        # ----------------------------------------------------------------------------------------------------------------------
        for sampleToCache in [self.sampleIdentifier]:
            print('*' * 80)
            print(' ', sampleToCache)
            print('*' * 80)
            # prepare caches for training and evaluation samples
            treeCaches = []
            sampleTree = None

            # for all (sub)samples which come from the same files (sampleIdentifier)
            subsamples = [
                x for x in self.samples if x.identifier == sampleToCache
            ]
            for sample in subsamples:

                # add cuts for all training regions
                for region, regionInfo in self.regionsDict.iteritems():

                    configSection = 'Plot:%s' % region

                    # cuts
                    sampleCuts = [sample.subcut]
                    if regionInfo['cut']:
                        sampleCuts.append(regionInfo['cut'])
                    if self.config.has_option(configSection, 'Datacut'):
                        sampleCuts.append(
                            self.config.get(configSection, 'Datacut'))
                    if self.config.has_option('Plot_general',
                                              'addBlindingCut'):
                        sampleCuts.append(
                            self.config.has_option('Plot_general',
                                                   'addBlindingCut'))

                    # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.)
                    cacheName = 'plot:{region}_{sample}'.format(
                        region=region, sample=sample.name)

                    # add cache object
                    tc = TreeCache.TreeCache(
                        name=cacheName,
                        sample=sample.name,
                        cutList=sampleCuts,
                        inputFolder=self.samplesPath,
                        splitFilesChunks=self.splitFilesChunks,
                        chunkNumber=self.chunkNumber,
                        splitFilesChunkSize=self.splitFilesChunkSize,
                        fileList=self.fileList,
                        branches=keepBranchesPlotFinal,
                        config=self.config,
                        debug=True)

                    # check if this part of the sample is already cached
                    isCached = tc.partIsCached()
                    if not isCached or self.forceRedo:
                        if isCached:
                            tc.deleteCachedFiles(chunkNumber=self.chunkNumber)

                        # for the first sample which comes from this files, load the tree
                        if not self.sampleTree:
                            self.sampleTree = SampleTree(
                                {
                                    'name': sample.identifier,
                                    'folder': self.samplesPath
                                },
                                splitFilesChunkSize=self.splitFilesChunkSize,
                                chunkNumber=self.chunkNumber,
                                config=self.config,
                                saveMemory=True)
                            if not self.sampleTree or not self.sampleTree.tree:
                                print(
                                    "\x1b[31mERROR: creation of sample tree failed!!\x1b[0m"
                                )
                                raise Exception("CreationOfSampleTreeFailed")
                            # consistency check on the file list at submission time and now
                            fileListNow = self.sampleTree.getSampleFileNameChunk(
                                self.chunkNumber)
                            if self.fileList and (sorted(self.fileList) !=
                                                  sorted(fileListNow)):
                                print(
                                    "\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m"
                                )
                                raise Exception("SampleFilesHaveChanged")

                        treeCaches.append(
                            tc.setSampleTree(self.sampleTree).cache())
                    else:
                        print("INFO: already cached!", tc, "(", tc.hash, ")")

            if len(treeCaches) > 0:
                # run on the tree
                self.sampleTree.process()
            else:
                print("nothing to do!")
Beispiel #15
0
for sampleGroup in sampleGroups:
    count = 0
    for sampleIdentifier in sampleGroup:
        print "\x1b[32m", sampleIdentifier, "\x1b[0m"
        countDict[sampleIdentifier] = {}

        samples_matching = [
            x for x in mcSamples if x.identifier == sampleIdentifier
        ]
        if len(samples_matching) > 0:
            sample = samples_matching[0]

            sampleTree = SampleTree(
                {
                    'sample': sample,
                    'folder': config.get('Directories',
                                         args.fromFolder).strip()
                },
                config=config)
            print "CUT=", sampleCuts, ":"
            for sampleCut in sampleCuts:
                sampleCount = getEventCount(config,
                                            sampleIdentifier,
                                            sampleCut,
                                            sampleTree=sampleTree,
                                            sample=sample)
                print sampleIdentifier, sampleCut, "\x1b[34m=>", sampleCount, "\x1b[0m"
                if sampleCut in countDict[sampleIdentifier]:
                    print "duplicate!!", sampleIdentifier, sampleCut, countDict[
                        sampleIdentifier][sampleCut]
                    raise Exception("duplicate")
Beispiel #16
0
print 'collections to add:', collections


for fileName in filelist:
    localFileName = fileLocator.getFilenameAfterPrep(fileName)
    inputFileName = "{path}/{subfolder}/{filename}".format(path=pathIN, subfolder=sample.identifier, filename=localFileName)
    outputFileName = "{path}/{subfolder}/{filename}".format(path=pathOUT, subfolder=sample.identifier, filename=localFileName)
    tmpFileName = "{path}/{subfolder}/{filename}".format(path=tmpDir, subfolder=sample.identifier, filename=localFileName)
    outputFolder = '/'.join(outputFileName.split('/')[:-1])
    tmpFolder = '/'.join(tmpFileName.split('/')[:-1])
    fileLocator.makedirs(tmpFolder)
    fileLocator.makedirs(outputFolder)

    if opts.force or not fileLocator.isValidRootFile(outputFileName):
        # load sample tree and initialize vtype corrector
        sampleTree = SampleTree([inputFileName], config=config)
        if not sampleTree.tree:
            # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep)
            fileNameOriginal = pathIN + '/' + fileName
            print "FO:", fileNameOriginal
            xrootdRedirector = fileLocator.getRedirector(fileNameOriginal)
            sampleTree = SampleTree([fileNameOriginal], config=config, xrootdRedirector=xrootdRedirector)
            if not sampleTree.tree:
                print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m"
                continue

        # lists of single modules can be given instead of a module, "--addCollections Sys.all"
        # [Sys]
        # all = ['Sys.Vtype', 'Sys.Leptons', ...]
        collectionsListsReplaced = []
        for collection in collections:
Beispiel #17
0
 def getTree(self):
     fileNames = [
         self.scratchDirectory + '/tree_%d.root' % i for i in range(10)
     ]
     return SampleTree(fileNames)
Beispiel #18
0
    def prepare(self):
        if len(self.dcMakers) > 0:
            self.treeCaches = []
            self.sampleTree = None

            # cuts
            allSamples = self.getAllSamples()
            subsamples = [
                x for x in allSamples if x.identifier == self.sampleToCache
            ]

            # loop over all datacard regions
            for dcMaker in self.dcMakers:

                # loop over all subsamples (which come from the same root tree files)
                for sample in subsamples:

                    # combine subcut and systematics cut with logical AND
                    # systematics cuts are combined with logical OR, such that 1 cache file can be used for all the systematics

                    isData = (sample.type == 'DATA')
                    systematicsCuts = sorted(
                        list(
                            set([
                                x['cachecut']
                                for x in dcMaker.getSystematicsList(
                                    isData=isData)
                            ])))
                    sampleCuts = {
                        'AND': [sample.subcut, {
                            'OR': systematicsCuts
                        }]
                    }
                    if self.verbose:
                        print(
                            json.dumps(sampleCuts,
                                       sort_keys=True,
                                       indent=8,
                                       default=str))

                    # make list of branches to keep in root file
                    branchList = BranchList(sample.subcut)
                    branchList.addCut(
                        [x['cachecut'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut(
                        [x['cut'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut(
                        [x['var'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut(
                        [x['weight'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut(self.config.get('Weights', 'weightF'))
                    branchList.addCut(
                        eval(self.config.get('Branches', 'keep_branches')))
                    branchesToKeep = branchList.getListOfBranches()

                    # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.)
                    cacheName = 'dc:{region}_{sample}'.format(
                        region=dcMaker.getRegion(), sample=sample.name)

                    # add cache object
                    tc = TreeCache.TreeCache(
                        name=cacheName,
                        sample=sample.name,
                        cutList=sampleCuts,
                        cutSequenceMode='TREE',
                        branches=branchesToKeep,
                        inputFolder=dcMaker.path,
                        splitFilesChunks=self.splitFilesChunks,
                        chunkNumber=self.chunkNumber,
                        splitFilesChunkSize=self.splitFilesChunkSize,
                        fileList=self.fileList,
                        config=self.config,
                        debug=self.verbose)

                    # check if this part of the sample is already cached
                    isCached = tc.partIsCached()
                    print(
                        "check if sample \x1b[34m{sample}\x1b[0m part {part} is cached:"
                        .format(sample=sample.name,
                                part=self.chunkNumber), isCached)
                    if not isCached or self.forceRedo:
                        if isCached:
                            tc.deleteCachedFiles(chunkNumber=self.chunkNumber)

                        # for the first sample which comes from this files, load the tree
                        if not self.sampleTree:
                            self.sampleTree = SampleTree(
                                {
                                    'name': sample.identifier,
                                    'folder': dcMaker.path
                                },
                                splitFilesChunkSize=self.splitFilesChunkSize,
                                chunkNumber=self.chunkNumber,
                                config=self.config,
                                saveMemory=True)
                            if not self.sampleTree or not self.sampleTree.tree:
                                print(
                                    "\x1b[31mERROR: creation of sample tree failed!!\x1b[0m"
                                )
                                raise Exception("CreationOfSampleTreeFailed")

                            # consistency check on the file list at submission time and now
                            fileListNow = self.sampleTree.getSampleFileNameChunk(
                                self.chunkNumber)
                            if self.fileList and (sorted(self.fileList) !=
                                                  sorted(fileListNow)):
                                print(
                                    "\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m"
                                )
                                raise Exception("SampleFilesHaveChanged")

                        # connect the TreeCache object to the input sampleTree and add it to the list of cached trees
                        self.treeCaches.append(
                            tc.setSampleTree(self.sampleTree).cache())
        else:
            print("WARNING: no datacard regions added, nothing to do.")
        return self
Beispiel #19
0
 def getTree(self, path):
     fileNames = [
         'root://xrootd-cms.infn.it//store/group/phys_higgs/hbb/ntuples/V25/TT_TuneCUETP8M2T4_13TeV-powheg-pythia8/VHBB_HEPPY_V25_TT_TuneCUETP8M2T4_13TeV-powheg-Py8__RunIISummer16MAv2-PUMoriond17_80r2as_2016_TrancheIV_v6-v1/170202_212737/0000/tree_100.root'
     ]
     return SampleTree(fileNames)
Beispiel #20
0
import sys
import os
from myutils.sampleTree import SampleTree

# input: file with one tree filename per line, e.g.
# /path/to/tree_1.root
# /path/to/tree_2.root

# output: txt file with json compatible list of [run, ls]
# [[304292, 29], [304663, 510], [302163, 561], ... ]

print "usage: %s outputfile.txt inputfile.txt [redirector]"

if os.path.isfile(sys.argv[2]):
    outputFileName = sys.argv[1]
    sampleTree = SampleTree(sys.argv[2], 'Events', xrootdRedirector=sys.argv[3] if len(sys.argv) > 3 else '') 
else:
    raise Exception("Input file not found!", sys.argv[2])

sampleTree.tree.SetBranchStatus("*", 0)
sampleTree.tree.SetBranchStatus("run", 1)
sampleTree.tree.SetBranchStatus("luminosityBlock", 1)

runLumi = {}
for i in sampleTree:
    if (i.run, i.luminosityBlock) not in runLumi:
        runLumi[(i.run, i.luminosityBlock)] = True

with open(outputFileName, 'w') as f:
    f.write("%r"%[list(x) for x in runLumi.keys()])
Beispiel #21
0
        print("LENGTH NOT 1")

    #print("\x1b[41m\x1b[32m")

    # in case the distinction between subsamples is needed, one could access the cut definitions for the subsamples
    # with: subsample.subcut for subsample in subsamples
    #sample     = sampleInfo.getFullSample(sampleIdentifier)
    #subsamples = sampleInfo.getSubsamples(sampleIdentifier)

    #print('subsample_pirmin', subsamples)
    #print('sample_pirmin', sample)
    #print("\x1b[41m\x1b[0m")

    sampleTree = SampleTree({
        'sample': sample,
        'folder': directory
    },
                            config=config)
    #raw_input()

    # since we load all trees, we can compute the factor to scale cross section to luminosity directly (otherwise write it to ntuples
    # first and then use it as branch, or compute it with full set of trees before)
    scaleXStoLumi = sampleTree.getScale(sample)

    # enable only used branches!
    # this will speed up processing a lot
    sampleTree.enableBranches(
        BranchList([
            signalRegionSelection, weightExpression_DeepCSV,
            weightExpression_DeepJet, taggerExpression_DeepCSV,
            taggerExpression_DeepJet
Beispiel #22
0
    def run(self):

        # keep additional branches for plotting
        try:
            keepBranchesPlot = eval(self.config.get('Branches', 'keep_branches_plot'))
        except:
            keepBranchesPlot = []
        try:
            keepBranchesPlot += eval(self.config.get('Branches', 'keep_branches'))
        except:
            pass

        # also keep some branches which might be used later in variables definition and weights
        try:
            for section in self.config.sections():
                if section.startswith('plotDef:') and self.config.has_option(section, 'relPath'):
                    keepBranchesPlot.append(self.config.get(section, 'relPath'))
        except Exception as e:
            print("\x1b[31mERROR: config file contains an error! automatic selection of branches to keep will not work!\x1b[0m")
            print(e)
        try:
            keepBranchesPlot.append(self.config.get('Weights', 'weightF'))
        except:
            pass
        # plotting region cut
        for region,regionInfo in self.regionsDict.iteritems():
            keepBranchesPlot.append(regionInfo['cut'])
        keepBranchesPlotFinal = BranchList(keepBranchesPlot).getListOfBranches()
        print("KEEP:", keepBranchesPlotFinal)


        # ----------------------------------------------------------------------------------------------------------------------
        # cache samples
        # ----------------------------------------------------------------------------------------------------------------------
        for sampleToCache in [self.sampleIdentifier]:
            print ('*'*80)
            print (' ',sampleToCache)
            print ('*'*80)
            # prepare caches for training and evaluation samples
            treeCaches = []
            sampleTree = None

            # for all (sub)samples which come from the same files (sampleIdentifier)
            subsamples = [x for x in self.samples if x.identifier == sampleToCache]
            for sample in subsamples:

                # add cuts for all training regions
                for region,regionInfo in self.regionsDict.iteritems():

                    configSection = 'Plot:%s'%region
                    
                    # cuts
                    sampleCuts = [sample.subcut]
                    if regionInfo['cut']:
                        sampleCuts.append(regionInfo['cut'])
                    if self.config.has_option(configSection, 'Datacut'):
                        sampleCuts.append(self.config.get(configSection, 'Datacut'))
                    if self.config.has_option('Plot_general','addBlindingCut'):
                        sampleCuts.append(self.config.has_option('Plot_general', 'addBlindingCut'))

                    # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.)
                    cacheName = 'plot:{region}_{sample}'.format(region=region, sample=sample.name)

                    # add cache object
                    tc = TreeCache.TreeCache(
                        name=cacheName,
                        sample=sample.name,
                        cutList=sampleCuts,
                        inputFolder=self.samplesPath,
                        splitFilesChunks=self.splitFilesChunks,
                        chunkNumber=self.chunkNumber,
                        splitFilesChunkSize=self.splitFilesChunkSize,
                        fileList=self.fileList,
                        branches=keepBranchesPlotFinal,
                        config=self.config,
                        debug=True
                    )

                    # check if this part of the sample is already cached
                    isCached = tc.partIsCached()
                    if not isCached or self.forceRedo:
                        if isCached:
                            tc.deleteCachedFiles(chunkNumber=self.chunkNumber)

                        # for the first sample which comes from this files, load the tree
                        if not self.sampleTree:
                            self.sampleTree = SampleTree({'name': sample.identifier, 'folder': self.samplesPath}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True)
                            if not self.sampleTree or not self.sampleTree.tree:
                                print ("\x1b[31mERROR: creation of sample tree failed!!\x1b[0m")
                                raise Exception("CreationOfSampleTreeFailed")
                            # consistency check on the file list at submission time and now
                            fileListNow = self.sampleTree.getSampleFileNameChunk(self.chunkNumber)
                            if self.fileList and (sorted(self.fileList) != sorted(fileListNow)):
                                print ("\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m")
                                raise Exception("SampleFilesHaveChanged")

                        treeCaches.append(tc.setSampleTree(self.sampleTree).cache())
                    else:
                        print ("INFO: already cached!",tc, "(",tc.hash,")")

            if len(treeCaches) > 0:
                # run on the tree
                self.sampleTree.process()
            else:
                print ("nothing to do!")
Beispiel #23
0
 def getTree(self):
     fileNames = [TestSampleTreeCallbacksMethods.scratchDirectory + '/tree_%d.root'%i for i in range(2)]
     return SampleTree(fileNames)
Beispiel #24
0
config.set(mvaName, "checkpoint", checkpoint) 
config.set(mvaName, "branchName", branchName)
config.set(mvaName, "nClasses", "%d"%nClasses)
config.set(mvaName, "treeVarSet", "dnnVars") 
config.add_section("systematics")
config.set("systematics", "systematics", " ".join(systematics))
config.add_section("dnnVars")
for syst in systematics:
    config.set("dnnVars", syst, treeVarSet[syst])

# helper for fs operations
fileLocator = FileLocator(config=config, xrootdRedirector=xrootdRedirector)
fileLocator.mkdir(outputFolder)

# load input files
sampleTree = SampleTree([inputFile], treeName=inputTreeName, xrootdRedirector=xrootdRedirector)

# load tensorflow evaluator
tfe = tensorflowEvaluator.tensorflowEvaluator(mvaName)
tfe.customInit({'config': config, 'sample': sample, 'sampleTree': sampleTree})

# register callbacks for processing
sampleTree.addCallback('event', tfe.processEvent)

# define new branches to add
sampleTree.addOutputBranches(tfe.getBranches())

try:
    os.makedirs(outputFolder)
except:
    pass
Beispiel #25
0
config.set(mvaName, "scalerDump", scalerDump) 
config.set(mvaName, "checkpoint", checkpoint) 
config.set(mvaName, "branchName", branchName)
config.set(mvaName, "treeVarSet", "dnnVars") 
config.add_section("systematics")
config.set("systematics", "systematics", " ".join(systematics))
config.add_section("dnnVars")
for syst in systematics:
    config.set("dnnVars", syst, treeVarSet[syst])

# helper for fs operations
fileLocator = FileLocator(config=config, xrootdRedirector=xrootdRedirector)
fileLocator.mkdir(outputFolder)

# load input files
sampleTree = SampleTree([inputFile], treeName="tree", xrootdRedirector=xrootdRedirector)

# load tensorflow evaluator
tfe = tensorflowEvaluator.tensorflowEvaluator(mvaName)
tfe.customInit({'config': config, 'sample': sample, 'sampleTree': sampleTree})

# register callbacks for processing
sampleTree.addCallback('event', tfe.processEvent)

# define new branches to add
sampleTree.addOutputBranches(tfe.getBranches())

# define output file 
tmpFileName = scratch + '/' + inputFile.split('/')[-1]
outputFileName = outputFolder + '/' + inputFile.split('/')[-1]
sampleTree.addOutputTree(tmpFileName, cut='1', branches='*')
Beispiel #26
0
    #parser.add_argument('-v', action='store', dest='variable', default='Max$(Jet_pt)', help='variable to compute the efficiency of (differentially)')
    #parser.add_argument('-r', action='store', dest='range', default='0,1000,100', help='min,max,nbins of variable to create histogram')
    parser.add_argument('-o',
                        action='store',
                        dest='output',
                        default='trigeff.root',
                        help='output .root file')
    #parser.add_argument('-l', action='store', dest='loose', default='((nJet>0)&&HLT_BIT_HLT_PFJet80_v)*HLT_BIT_HLT_PFJet80_v_Prescale', help='loose cut')
    #parser.add_argument('-t', action='store', dest='tight', default='((nJet>0)&&HLT_BIT_HLT_PFJet140_v)*HLT_BIT_HLT_PFJet140_v_Prescale', help='tight cut')
    args = parser.parse_args()

    limitTrees = int(args.limit)
    print('ARGS:', args)

    # read samples
    sampleTree = SampleTree(args.sample, limitFiles=limitTrees)
    if not sampleTree:
        print('creating sample tree failed!')
        exit(0)

    # trigger efficiency histograms
    triggerEfficiencyHistograms = [
        {
            'name': 'HLT_PFJet140',
            'range': [0, 1000, 100],
            'loose':
            '((nJet>0)&&HLT_BIT_HLT_PFJet80_v)*HLT_BIT_HLT_PFJet80_v_Prescale',
            'tight':
            '((nJet>0)&&HLT_BIT_HLT_PFJet140_v)*HLT_BIT_HLT_PFJet140_v_Prescale',
            'variable': 'Max$(Jet_pt)'
        },
Beispiel #27
0
    print ("need exactly 1 sample identifier as input with -S !!", matchingSamples)
    exit(1)
sample = matchingSamples[0]

for fileName in filelist:
    localFileName = fileLocator.getFilenameAfterPrep(fileName)
    inputFileName = "{path}/{subfolder}/{filename}".format(path=INpath, subfolder=sample.identifier, filename=localFileName)
    outputFileName = "{path}/{subfolder}/{filename}".format(path=OUTpath, subfolder=sample.identifier, filename=localFileName)
    tmpFileName = "{path}/{subfolder}/{filename}".format(path=tmpDir, subfolder=sample.identifier, filename=localFileName)
    outputFolder = '/'.join(outputFileName.split('/')[:-1])
    tmpFolder = '/'.join(tmpFileName.split('/')[:-1])
    fileLocator.makedirs(tmpFolder)
    fileLocator.makedirs(outputFolder)
    if not fileLocator.isValidRootFile(outputFileName) or opts.force:
        # load sample tree
        sampleTree = SampleTree([inputFileName], config=config)
        if not sampleTree.tree:
            print ("\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m")
            continue
        # Set branch adress for all vars
        for i in range(0, len(theMVAs)):
            theMVAs[i].setVariables(sampleTree.tree, sample)
        mvaBranches = []
        for i in range(0, len(theMVAs)):
            mvaBranches.append({
                    'name': MVAinfos[i].MVAname,
                    'length': len(systematics.split()),
                    'formula': theMVAs[i].evaluate,
                    'leaflist': ':'.join(systematics.split())+'/F',
                    # force 'srray-style' filling = passing the pointer to the array to the function instead of using the return value, even when the branch is a scalar, e.g. when only nominal systematic is selected
                    'arrayStyle': True,
Beispiel #28
0
class CachePlot(object):

    def __init__(self, config, sampleIdentifier, regions, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, forceRedo=False, fileList=None):
        self.config = config
        self.sampleIdentifier = sampleIdentifier
        self.regions = list(set(regions))
        self.forceRedo = forceRedo

        self.sampleTree = None
        self.samplesPath = self.config.get('Directories', 'plottingSamples')
        self.samplesDefinitions = self.config.get('Directories','samplesinfo') 
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)
        self.sampleFilesFolder = self.config.get('Directories', 'samplefiles')

        self.sampleNames = eval(self.config.get('Plot_general', 'samples'))
        self.dataNames = eval(self.config.get('Plot_general', 'Data'))
        self.samples = self.samplesInfo.get_samples(self.sampleNames + self.dataNames)

        self.regionsDict = {}
        for region in self.regions:
            treeCut = config.get('Cuts', region)
            self.regionsDict[region] = {'cut': treeCut}
        self.splitFilesChunkSize = splitFilesChunkSize
        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.fileList = FileList.decompress(fileList) if fileList else None
    
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        returnCode = ROOT.gSystem.Load(VHbbNameSpace)
        if returnCode != 0:
            print ("\x1b[31mERROR: loading VHbbNameSpace failed with code %d\x1b[0m"%returnCode)
        else:
            print ("INFO: loaded VHbbNameSpace: %s"%VHbbNameSpace)

    def printInfo(self):
        print ("REGION:".ljust(24),"CUT:")
        for region,regionInfo in self.regionsDict.iteritems():
            print (" > ",region.ljust(20), regionInfo['cut'])

    def run(self):

        # keep additional branches for plotting
        try:
            keepBranchesPlot = eval(self.config.get('Branches', 'keep_branches_plot'))
        except:
            keepBranchesPlot = []
        keepBranchesPlotOld = keepBranchesPlot

        # also keep some branches which might be used later in variables definition and weights
        try:
            for section in self.config.sections():
                if section.startswith('plotDef:') and self.config.has_option(section, 'relPath'):
                    keepBranchesPlot.append(self.config.get(section, 'relPath'))
        except Exception as e:
            print(e)
        try:
            keepBranchesPlot.append(self.config.get('Weights', 'weightF'))
        except:
            pass
        keepBranchesPlotFinal = BranchList(keepBranchesPlot).getListOfBranches()

        # ----------------------------------------------------------------------------------------------------------------------
        # cache samples
        # ----------------------------------------------------------------------------------------------------------------------
        for sampleToCache in [self.sampleIdentifier]:
            print ('*'*80)
            print (' ',sampleToCache)
            print ('*'*80)
            # prepare caches for training and evaluation samples
            treeCaches = []
            sampleTree = None

            # for all (sub)samples which come from the same files (sampleIdentifier)
            subsamples = [x for x in self.samples if x.identifier == sampleToCache]
            for sample in subsamples:

                # add cuts for all training regions
                for region,regionInfo in self.regionsDict.iteritems():

                    configSection = 'Plot:%s'%region
                    
                    # cuts
                    sampleCuts = [sample.subcut]
                    if regionInfo['cut']:
                        sampleCuts.append(regionInfo['cut'])
                    if self.config.has_option(configSection, 'Datacut'):
                        sampleCuts.append(self.config.get(configSection, 'Datacut'))
                    if self.config.has_option('Plot_general','addBlindingCut'):
                        sampleCuts.append(self.config.has_option('Plot_general', 'addBlindingCut'))

                    # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.)
                    cacheName = 'plot:{region}_{sample}'.format(region=region, sample=sample.name)

                    # add cache object
                    tc = TreeCache.TreeCache(
                        name=cacheName,
                        sample=sample.name,
                        cutList=sampleCuts,
                        inputFolder=self.samplesPath,
                        splitFilesChunks=self.splitFilesChunks,
                        chunkNumber=self.chunkNumber,
                        splitFilesChunkSize=self.splitFilesChunkSize,
                        fileList=self.fileList,
                        branches=keepBranchesPlotFinal,
                        config=self.config,
                        debug=True
                    )

                    # check if this part of the sample is already cached
                    isCached = tc.partIsCached()
                    if not isCached or self.forceRedo:
                        if isCached:
                            tc.deleteCachedFiles(chunkNumber=self.chunkNumber)

                        # for the first sample which comes from this files, load the tree
                        if not self.sampleTree:
                            self.sampleTree = SampleTree({'name': sample.identifier, 'folder': self.samplesPath}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True)
                            if not self.sampleTree or not self.sampleTree.tree:
                                print ("\x1b[31mERROR: creation of sample tree failed!!\x1b[0m")
                                raise Exception("CreationOfSampleTreeFailed")
                            # consistency check on the file list at submission time and now
                            fileListNow = self.sampleTree.getSampleFileNameChunk(self.chunkNumber)
                            if self.fileList and (sorted(self.fileList) != sorted(fileListNow)):
                                print ("\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m")
                                raise Exception("SampleFilesHaveChanged")

                        treeCaches.append(tc.setSampleTree(self.sampleTree).cache())
                    else:
                        print ("INFO: already cached!",tc, "(",tc.hash,")")

            if len(treeCaches) > 0:
                # run on the tree
                self.sampleTree.process()
            else:
                print ("nothing to do!")
Beispiel #29
0
#! /usr/bin/env python
from __future__ import print_function
import ROOT
ROOT.gROOT.SetBatch(True)
from myutils.XbbConfig import XbbConfigReader, XbbConfigTools
from myutils.sampleTree import SampleTree as SampleTree
from myutils.BranchList import BranchList

config = XbbConfigTools(XbbConfigReader.read("Zvv2017"))
sampleTree = SampleTree(
    {
        'name': 'MET',
        'folder': config.get('Directories', 'dcSamples')
    },
    config=config)
variables = ["H_pt", "MET_Pt", "H_pt/MET_Pt"]

# enable only explicitly used branches
sampleTree.enableBranches(BranchList(variables).getListOfBranches())

# create TTReeFormula's
for variable in variables:
    sampleTree.addFormula(variable)

# loop over events
for event in sampleTree:
    print(
        sampleTree.tree.GetReadEntry(),
        ", ".join([x + "=%1.4f" % sampleTree.evaluate(x) for x in variables]))
    if sampleTree.tree.GetReadEntry() > 98:
        break
#!/usr/bin/env python
import ROOT
from myutils.sampleTree import SampleTree

#pp
#sampleTree = SampleTree('VHbbPostNano2017_V2_DoubleEG.txt', 'Events', xrootdRedirector='root://t3dcachedb03.psi.ch:1094/')
sampleTree = SampleTree('VHbbPostNano2017_V2_DoubleMuon.txt',
                        'Events',
                        xrootdRedirector='root://t3dcachedb03.psi.ch:1094/')
outputFileName = 'existing_lumis_pp_DoubleMuon.txt'

#nano
#sampleTree = SampleTree('2017-94X-Nano01-DoubleEG.txt', 'Events', xrootdRedirector='root://xrootd-cms.infn.it/')
#outputFileName = 'existing_lumis_nano.txt'

sampleTree.tree.SetBranchStatus("*", 0)
sampleTree.tree.SetBranchStatus("run", 1)
sampleTree.tree.SetBranchStatus("luminosityBlock", 1)
runLumi = []
for i in sampleTree:
    #if [i.run, i.luminosityBlock] not in runLumi and i.run>=302030 and i.run <= 303434:
    #    runLumi.append([i.run, i.luminosityBlock])
    if [i.run, i.luminosityBlock] not in runLumi:
        runLumi.append([i.run, i.luminosityBlock])
print runLumi
with open(outputFileName, 'w') as f:
    f.write("%r" % runLumi)
Beispiel #31
0
    def run(self):

        # keep additional branches for plotting
        try:
            keepBranchesPlot = eval(
                self.config.get('Branches', 'keep_branches_plot'))
        except:
            keepBranchesPlot = []
        try:
            keepBranchesPlot += eval(
                self.config.get('Branches', 'keep_branches'))
        except:
            pass

        # also keep some branches which might be used later in variables definition and weights
        try:
            for section in self.config.sections():
                try:
                    if section.startswith(
                            'plotDef:') and self.config.has_option(
                                section, 'relPath'):
                        keepBranchesPlot.append(
                            self.config.get(section, 'relPath'))
                except Exception as e:
                    print("\x1b[31mWARNING: config error in:", section, "=>",
                          e, "\x1b[0m")
        except Exception as e2:
            print(
                "\x1b[31mERROR: config file contains an error! automatic selection of branches to keep will not work!\x1b[0m"
            )
            print(e2)
        try:
            keepBranchesPlot.append(self.config.get('Weights', 'weightF'))
        except:
            pass
        # plotting region cut
        for region, regionInfo in self.regionsDict.iteritems():
            keepBranchesPlot.append(regionInfo['cut'])
        keepBranchesPlotFinal = BranchList(
            keepBranchesPlot).getListOfBranches()
        print("KEEP:", keepBranchesPlotFinal)

        # ----------------------------------------------------------------------------------------------------------------------
        # cache samples
        # ----------------------------------------------------------------------------------------------------------------------
        for sampleToCache in [self.sampleIdentifier]:
            print('*' * 80)
            print(' ', sampleToCache)
            print('*' * 80)
            # prepare caches for training and evaluation samples
            treeCaches = []
            sampleTree = None

            # for all (sub)samples which come from the same files (sampleIdentifier)
            subsamples = [
                x for x in self.samples if x.identifier == sampleToCache
            ]
            for sample in subsamples:

                # add cuts for all training regions
                for region, regionInfo in self.regionsDict.iteritems():

                    configSection = 'Plot:%s' % region

                    # cuts
                    sampleCuts = [sample.subcut]
                    if regionInfo['cut']:
                        sampleCuts.append(regionInfo['cut'])
                    if self.config.has_option(configSection, 'Datacut'):
                        sampleCuts.append(
                            self.config.get(configSection, 'Datacut'))
                    if self.config.has_option('Plot_general',
                                              'addBlindingCut'):
                        sampleCuts.append(
                            self.config.has_option('Plot_general',
                                                   'addBlindingCut'))

                    # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.)
                    cacheName = 'plot:{region}_{sample}'.format(
                        region=region, sample=sample.name)

                    # add cache object
                    tc = TreeCache.TreeCache(
                        name=cacheName,
                        sample=sample.name,
                        cutList=sampleCuts,
                        inputFolder=self.samplesPath,
                        splitFilesChunks=self.splitFilesChunks,
                        chunkNumber=self.chunkNumber,
                        splitFilesChunkSize=self.splitFilesChunkSize,
                        fileList=self.fileList,
                        branches=keepBranchesPlotFinal,
                        config=self.config,
                        debug=True)

                    # check if this part of the sample is already cached
                    isCached = tc.partIsCached()
                    if not isCached or self.forceRedo:
                        if isCached:
                            tc.deleteCachedFiles(chunkNumber=self.chunkNumber)

                        # for the first sample which comes from this files, load the tree
                        if not self.sampleTree:
                            self.sampleTree = SampleTree(
                                {
                                    'name': sample.identifier,
                                    'folder': self.samplesPath
                                },
                                splitFilesChunkSize=self.splitFilesChunkSize,
                                chunkNumber=self.chunkNumber,
                                config=self.config,
                                saveMemory=True)
                            if not self.sampleTree or not self.sampleTree.tree:
                                print(
                                    "\x1b[31mERROR: creation of sample tree failed!!\x1b[0m"
                                )
                                raise Exception("CreationOfSampleTreeFailed")
                            # consistency check on the file list at submission time and now
                            fileListNow = self.sampleTree.getSampleFileNameChunk(
                                self.chunkNumber)
                            if self.fileList and (sorted(self.fileList) !=
                                                  sorted(fileListNow)):
                                print(
                                    "\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m"
                                )
                                raise Exception("SampleFilesHaveChanged")

                        treeCaches.append(
                            tc.setSampleTree(self.sampleTree).cache())
                    else:
                        print("INFO: already cached!", tc, "(", tc.hash, ")")

            if len(treeCaches) > 0:
                # run on the tree
                self.sampleTree.process()
            else:
                print("nothing to do!")
Beispiel #32
0
    def run(self):
        # ----------------------------------------------------------------------------------------------------------------------
        # cache samples
        # ----------------------------------------------------------------------------------------------------------------------
        for sampleToCache in [self.sampleIdentifier]:
            print ('*'*80)
            print (' ',sampleToCache)
            print ('*'*80)
            # prepare caches for training and evaluation samples
            treeCaches = []
            self.sampleTree = None

            # use all (sub)samples which come from the same files (sampleIdentifier)
            subsamples = [x for x in self.samples if x.identifier == sampleToCache]

            # list of branches to keep for use as MVA input variables
            branchListOfMVAVars = BranchList()
            for sample in subsamples:
                for trainingRegion,trainingRegionInfo in self.trainingRegionsDict.iteritems():
                    for additionalCut in [self.TrainCut, self.EvalCut]:
                        branchListOfMVAVars.addCut(trainingRegionInfo['vars'])
                    for weightVar in trainingRegionInfo['weightVars']:
                        branchListOfMVAVars.addCut(weightVar)
            branchListOfMVAVars.addCut(self.config.get('Weights', 'weightF'))
            mvaBranches = branchListOfMVAVars.getListOfBranches()

            # loop over all samples
            for sample in subsamples:

                # add cuts for all training regions
                for trainingRegion,trainingRegionInfo in self.trainingRegionsDict.iteritems():

                    # add cuts for training and evaluation
                    for additionalCut in [self.TrainCut, self.EvalCut]:

                        # cuts
                        sampleCuts = [sample.subcut]
                        if additionalCut:
                            sampleCuts.append(additionalCut)
                        if trainingRegionInfo['cut']:
                            sampleCuts.append(trainingRegionInfo['cut'])

                        # add cache object
                        tc = TreeCache.TreeCache(
                            name='{region}_{sample}_{tr}'.format(region=trainingRegion, sample=sample.name, tr='TRAIN' if additionalCut==self.TrainCut else 'EVAL'),
                            sample=sample.name,
                            cutList=sampleCuts,
                            inputFolder=self.samplesPath,
                            splitFilesChunks=self.splitFilesChunks,
                            chunkNumber=self.chunkNumber,
                            splitFilesChunkSize=self.splitFilesChunkSize,
                            branches=mvaBranches,
                            config=self.config,
                            debug=True
                        )

                        # check if this part of the sample is already cached
                        isCached = tc.partIsCached()
                        if not isCached or self.force:
                            if isCached:
                                tc.deleteCachedFiles(chunkNumber=self.chunkNumber)
                            # for the first sample which comes from this files, load the tree
                            if not self.sampleTree:
                                self.sampleTree = SampleTree({'name': sample.identifier, 'folder': self.samplesPath}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True)
                            treeCaches.append(tc.setSampleTree(self.sampleTree).cache())

            if len(treeCaches) > 0:
                # run on the tree
                self.sampleTree.process()
            else:
                print ("nothing to do!")
Beispiel #33
0

for fileName in filelist:
    localFileName = fileLocator.getFilenameAfterPrep(fileName)
    
    inputFileName = "{path}/{subfolder}/{filename}".format(path=pathIN, subfolder=sample.identifier, filename=localFileName)
    outputFileName = "{path}/{subfolder}/{filename}".format(path=pathOUT, subfolder=sample.identifier, filename=localFileName)
    tmpFileName = "{path}/{subfolder}/{filename}".format(path=tmpDir, subfolder=sample.identifier, filename=localFileName)
    outputFolder = '/'.join(outputFileName.split('/')[:-1])
    tmpFolder = '/'.join(tmpFileName.split('/')[:-1])
    fileLocator.makedirs(tmpFolder)
    fileLocator.makedirs(outputFolder)

    if not fileLocator.exists(outputFileName) or opts.force:
        # load sample tree and initialize vtype corrector
        sampleTree = SampleTree([inputFileName], config=config)
        if not sampleTree.tree:
            print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m"
            continue

        # lists of single modules can be given instead of a module, "--addCollections Sys.all"
        # [Sys]
        # all = ['Sys.Vtype', 'Sys.Leptons', ...]
        collectionsListsReplaced = []
        for collection in collections:
            if '.' in collection:
                section = collection.split('.')[0]
                key = collection.split('.')[1]
                listExpression = config.get(section, key).strip()
                if listExpression.startswith('[') and listExpression.endswith(']'):
                    listParsed = eval(listExpression)
Beispiel #34
0
    def test_AddBranches(self):

        sampleTree = self.getTree()

        # you can add a string
        sampleTree.addOutputBranch('jetSum', 'Sum$(Jet)')
        sampleTree.addOutputBranch('abcSum', 'a+b+c')

        # or a function, including lambdas
        sampleTree.addOutputBranch('abcSum2', lambda tree: tree.a + tree.b + tree.c)

        # alternative syntax
        vectorLength = 4
        sampleTree.addOutputBranches([
            {
                'name': 'abcSum3',
                'formula': BlablaCorrector.applyCorrection,
            },
            {
                'name': 'stuff',
                'formula': BlablaCorrector.applyOtherCorrection,
            },
            {
                'name': 'vectorstuff',
                'formula': BlablaCorrector.addVector,
                'length': vectorLength,
            },
        ]
        )

        # write output tree and apply also a cut
        sampleTree.addOutputTree(
            TestSampleTreeAddBranchesMethods.scratchDirectory + '/tree_withaddedbranches.root',
            cut='1',
        )
        sampleTree.process()

        # compare histograms of tree with new branches with expected result
        newSampleTree = SampleTree([TestSampleTreeAddBranchesMethods.scratchDirectory + '/tree_withaddedbranches.root'])
        newTree = newSampleTree.tree
        outfile = ROOT.TFile.Open(TestSampleTreeAddBranchesMethods.scratchDirectory + '/histograms.root', 'recreate')
        h1 = ROOT.TH1F('h1', 'h1', 200, 0, 200)
        h2 = ROOT.TH1F('h2', 'h2', 200, 0, 200)

        # new branch
        newTree.Draw('vectorstuff[2]>>h1')

        # expected result
        sampleTree.tree.Draw('a+b+c>>h2')

        m1 = h1.GetMean()
        m2 = h2.GetMean()
        self.assertTrue(abs(m1-m2) < 0.00001)
        self.assertTrue(abs(m1/m2) < 1.00001)
        self.assertTrue(abs(m1/m2) > 0.99999)
        print("histogram means:", m1, m2, " check histograms h1 and h2 in histograms.root")
        outfile.Write()
        outfile.Close()

        # test if we really have a vector with 4 entries in our output tree
        h3 = ROOT.TH1F('h3', 'h3', 2000, -1000, 10000)
        h4 = ROOT.TH1F('h4', 'h4', 2000, -1000, 10000)
        newTree.Draw('vectorstuff>>h3')
        sampleTree.tree.Draw('a>>h4')
        self.assertTrue(h3.GetEntries() > 0)
        self.assertEqual(h3.GetEntries(), h4.GetEntries()*vectorLength)
Beispiel #35
0
    config = XbbConfigTools(XbbConfigReader.read(opts.tag))
    inputFolder = config.get('Directories', 'dcSamples')
    logFolder = config.get('Directories', 'tagDir')

    config.loadNamespaces()

    regions = config.getDatacardRegions() if len(
        opts.regions) < 1 else config.parseCommaSeparatedList(opts.regions)
    for region in regions:

        dataSamples = eval(config.get('dc:' + region, 'data'))
        for dataSample in dataSamples:
            sampleTree = SampleTree({
                'name': dataSample,
                'folder': inputFolder
            },
                                    config=config)

            outputFileName = logFolder + '/' + region + '_' + dataSample + '_' + opts.run + '_' + opts.event + '.txt'
            print("save event list to:", outputFileName)
            treePlayer = sampleTree.tree.GetPlayer()
            treePlayer.SetScanRedirect(True)
            treePlayer.SetScanFileName(outputFileName)

            branchList = BranchList(["run", "event"])
            regionCut = config.get(
                'Cuts',
                config.get('dc:' + region, 'cut') if config.has_option(
                    'dc:' + region, 'cut') else region)
            branchList.addCut(regionCut)
Beispiel #36
0
    def prepare(self):
        if len(self.dcMakers) > 0:
            self.treeCaches = []
            self.sampleTree = None

            # cuts
            allSamples = self.getAllSamples() 
            subsamples = [x for x in allSamples if x.identifier == self.sampleToCache]

            # loop over all datacard regions
            for dcMaker in self.dcMakers:

                # loop over all subsamples (which come from the same root tree files)
                for sample in subsamples:

                    # combine subcut and systematics cut with logical AND
                    # systematics cuts are combined with logical OR, such that 1 cache file can be used for all the systematics

                    isData = (sample.type == 'DATA')
                    systematicsCuts = sorted(list(set([x['cachecut'] for x in dcMaker.getSystematicsList(isData=isData)])))
                    sampleCuts = {'AND': [sample.subcut, {'OR': systematicsCuts}]}
                    if True or self.verbose:
                        print (json.dumps(sampleCuts, sort_keys=True, indent=8, default=str))

                    # make list of branches to keep in root file
                    branchList = BranchList(sample.subcut)
                    branchList.addCut([x['cachecut'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut([x['cut'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut([x['var'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut([x['weight'] for x in dcMaker.getSystematicsList()])
                    branchList.addCut(self.config.get('Weights', 'weightF'))
                    branchList.addCut(eval(self.config.get('Branches', 'keep_branches')))
                    branchesToKeep = branchList.getListOfBranches()

                    # arbitrary (optional) name for the output tree, used for print-out (the TreeCache object has no idea what it is doing, e.g. dc, plot etc.)
                    cacheName = 'dc:{region}_{sample}'.format(region=dcMaker.getRegion(), sample=sample.name) 
                    
                    # add cache object
                    tc = TreeCache.TreeCache(
                        name=cacheName,
                        sample=sample.name,
                        cutList=sampleCuts,
                        cutSequenceMode='TREE',
                        branches=branchesToKeep,
                        inputFolder=dcMaker.path,
                        splitFilesChunks=self.splitFilesChunks,
                        chunkNumber=self.chunkNumber,
                        splitFilesChunkSize=self.splitFilesChunkSize,
                        fileList=self.fileList,
                        config=self.config,
                        debug=self.verbose
                    )

                    # check if this part of the sample is already cached
                    isCached = tc.partIsCached() 
                    print ("check if sample \x1b[34m{sample}\x1b[0m part {part} is cached:".format(sample=sample.name, part=self.chunkNumber), isCached)
                    if not isCached or self.forceRedo:
                        if isCached:
                            tc.deleteCachedFiles(chunkNumber=self.chunkNumber)

                        # for the first sample which comes from this files, load the tree
                        if not self.sampleTree:
                            self.sampleTree = SampleTree({'name': sample.identifier, 'folder': dcMaker.path}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True)
                            if not self.sampleTree or not self.sampleTree.tree:
                                print ("\x1b[31mERROR: creation of sample tree failed!!\x1b[0m")
                                raise Exception("CreationOfSampleTreeFailed")

                            # consistency check on the file list at submission time and now
                            fileListNow = self.sampleTree.getSampleFileNameChunk(self.chunkNumber)
                            if self.fileList and (sorted(self.fileList) != sorted(fileListNow)):
                                print ("\x1b[31mERROR: sample files have changed between submission and run of the job!\x1b[0m")
                                raise Exception("SampleFilesHaveChanged")

                        # connect the TreeCache object to the input sampleTree and add it to the list of cached trees 
                        self.treeCaches.append(tc.setSampleTree(self.sampleTree).cache())
        else:
            print("WARNING: no datacard regions added, nothing to do.")
        return self
Beispiel #37
0
#!/usr/bin/env python
import ROOT
from myutils.sampleTree import SampleTree

#pp
#sampleTree = SampleTree('VHbbPostNano2017_V2_DoubleEG.txt', 'Events', xrootdRedirector='root://t3dcachedb03.psi.ch:1094/')
#sampleTree = SampleTree('VHbbPostNano2017_V2_DoubleMuon.txt', 'Events', xrootdRedirector='root://t3dcachedb03.psi.ch:1094/')
#outputFileName = 'existing_lumis_pp_DoubleMuon.txt'

#nano
sampleTree = SampleTree('DoubleEG_RunII2017ReReco17Nov17-94X-Nano01_300122to300237.txt', 'Events', xrootdRedirector='root://xrootd-cms.infn.it/')
outputFileName = 'existing_lumis_nano_realDoubleEG_300122to300237.txt'

sampleTree.tree.SetBranchStatus("*", 0)
sampleTree.tree.SetBranchStatus("run", 1)
sampleTree.tree.SetBranchStatus("luminosityBlock", 1)
runLumi = []
for i in sampleTree:
    #if [i.run, i.luminosityBlock] not in runLumi and i.run>=302030 and i.run <= 303434:
    #    runLumi.append([i.run, i.luminosityBlock])
    if [i.run, i.luminosityBlock] not in runLumi:
        runLumi.append([i.run, i.luminosityBlock])
print runLumi
with open(outputFileName, 'w') as f:
    f.write("%r"%runLumi)


Beispiel #38
0
 def getTree(self, path):
     fileNames = [path]
     return SampleTree(fileNames, treeName='Events')
Beispiel #39
0
class CacheTraining(object):

    def __init__(self, config, sampleIdentifier, trainingRegions, splitFilesChunks=1, chunkNumber=1, splitFilesChunkSize=-1, force=False):
        self.config = config
        self.force = force
        self.sampleIdentifier = sampleIdentifier
        self.trainingRegions = trainingRegions

        self.sampleTree = None
        self.samplesPath = self.config.get('Directories', 'MVAin')
        self.samplesDefinitions = self.config.get('Directories','samplesinfo') 
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)
        self.sampleFilesFolder = self.config.get('Directories', 'samplefiles')

        self.backgroundSampleNames = list(set(sum([eval(self.config.get(trainingRegion, 'backgrounds')) for trainingRegion in self.trainingRegions], [])))
        self.signalSampleNames = list(set(sum([eval(self.config.get(trainingRegion, 'signals')) for trainingRegion in self.trainingRegions], [])))
        self.samples = self.samplesInfo.get_samples(list(set(self.backgroundSampleNames + self.signalSampleNames)))

        self.trainingRegionsDict = {}
        for trainingRegion in self.trainingRegions:
            treeCutName = config.get(trainingRegion, 'treeCut')
            treeVarSet = config.get(trainingRegion, 'treeVarSet').strip()
            #systematics = [x for x in config.get('systematics', 'systematics').split(' ') if len(x.strip())>0]
            systematics = eval(config.get(trainingRegion, 'systematics')) if config.has_option(trainingRegion, 'systematics') else []
            mvaVars = config.get(treeVarSet, 'Nominal').split(' ')
            weightVars = []
            #for systematic in systematics:
            for syst in systematics: 
                systNameUp   = syst+'_UP'   if self.config.has_option('Weights',syst+'_UP')   else syst+'_Up'
                systNameDown = syst+'_DOWN' if self.config.has_option('Weights',syst+'_DOWN') else syst+'_Down'
                weightVars += [self.config.get('Weights',systNameUp), self.config.get('Weights',systNameDown)]

            self.trainingRegionsDict[trainingRegion] = {
                    'cut': config.get('Cuts', treeCutName),
                    'vars': mvaVars,
                    'weightVars': weightVars,
                    }

        self.TrainCut = config.get('Cuts', 'TrainCut') 
        self.EvalCut = config.get('Cuts', 'EvalCut')

        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.splitFilesChunkSize = splitFilesChunkSize
        
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(VHbbNameSpace)
    
    def printInfo(self):
        print ("REGION:".ljust(24),"CUT:")
        for trainingRegion,trainingRegionInfo in self.trainingRegionsDict.iteritems():
            print (" > ",trainingRegion.ljust(20), trainingRegionInfo['cut'])

    def run(self):
        # ----------------------------------------------------------------------------------------------------------------------
        # cache samples
        # ----------------------------------------------------------------------------------------------------------------------
        for sampleToCache in [self.sampleIdentifier]:
            print ('*'*80)
            print (' ',sampleToCache)
            print ('*'*80)
            # prepare caches for training and evaluation samples
            treeCaches = []
            self.sampleTree = None

            # use all (sub)samples which come from the same files (sampleIdentifier)
            subsamples = [x for x in self.samples if x.identifier == sampleToCache]

            # list of branches to keep for use as MVA input variables
            branchListOfMVAVars = BranchList()
            for sample in subsamples:
                for trainingRegion,trainingRegionInfo in self.trainingRegionsDict.iteritems():
                    for additionalCut in [self.TrainCut, self.EvalCut]:
                        branchListOfMVAVars.addCut(trainingRegionInfo['vars'])
                    for weightVar in trainingRegionInfo['weightVars']:
                        branchListOfMVAVars.addCut(weightVar)
            branchListOfMVAVars.addCut(self.config.get('Weights', 'weightF'))
            mvaBranches = branchListOfMVAVars.getListOfBranches()

            # loop over all samples
            for sample in subsamples:

                # add cuts for all training regions
                for trainingRegion,trainingRegionInfo in self.trainingRegionsDict.iteritems():

                    # add cuts for training and evaluation
                    for additionalCut in [self.TrainCut, self.EvalCut]:

                        # cuts
                        sampleCuts = [sample.subcut]
                        if additionalCut:
                            sampleCuts.append(additionalCut)
                        if trainingRegionInfo['cut']:
                            sampleCuts.append(trainingRegionInfo['cut'])

                        # add cache object
                        tc = TreeCache.TreeCache(
                            name='{region}_{sample}_{tr}'.format(region=trainingRegion, sample=sample.name, tr='TRAIN' if additionalCut==self.TrainCut else 'EVAL'),
                            sample=sample.name,
                            cutList=sampleCuts,
                            inputFolder=self.samplesPath,
                            splitFilesChunks=self.splitFilesChunks,
                            chunkNumber=self.chunkNumber,
                            splitFilesChunkSize=self.splitFilesChunkSize,
                            branches=mvaBranches,
                            config=self.config,
                            debug=True
                        )

                        # check if this part of the sample is already cached
                        isCached = tc.partIsCached()
                        if not isCached or self.force:
                            if isCached:
                                tc.deleteCachedFiles(chunkNumber=self.chunkNumber)
                            # for the first sample which comes from this files, load the tree
                            if not self.sampleTree:
                                self.sampleTree = SampleTree({'name': sample.identifier, 'folder': self.samplesPath}, splitFilesChunkSize=self.splitFilesChunkSize, chunkNumber=self.chunkNumber, config=self.config, saveMemory=True)
                            treeCaches.append(tc.setSampleTree(self.sampleTree).cache())

            if len(treeCaches) > 0:
                # run on the tree
                self.sampleTree.process()
            else:
                print ("nothing to do!")
Beispiel #40
0
class CacheTraining(object):
    def __init__(self,
                 config,
                 sampleIdentifier,
                 trainingRegions,
                 splitFilesChunks=1,
                 chunkNumber=1,
                 splitFilesChunkSize=-1,
                 force=False):
        self.config = config
        self.force = force
        self.sampleIdentifier = sampleIdentifier
        self.trainingRegions = trainingRegions

        self.sampleTree = None
        self.samplesPath = self.config.get('Directories', 'MVAin')
        self.samplesDefinitions = self.config.get('Directories', 'samplesinfo')
        self.samplesInfo = ParseInfo(self.samplesDefinitions, self.samplesPath)
        self.sampleFilesFolder = self.config.get('Directories', 'samplefiles')

        self.backgroundSampleNames = list(
            set(
                sum([
                    eval(self.config.get(trainingRegion, 'backgrounds'))
                    for trainingRegion in self.trainingRegions
                ], [])))
        self.signalSampleNames = list(
            set(
                sum([
                    eval(self.config.get(trainingRegion, 'signals'))
                    for trainingRegion in self.trainingRegions
                ], [])))
        self.samples = self.samplesInfo.get_samples(
            list(set(self.backgroundSampleNames + self.signalSampleNames)))

        self.trainingRegionsDict = {}
        for trainingRegion in self.trainingRegions:
            treeCutName = config.get(trainingRegion, 'treeCut')
            treeVarSet = config.get(trainingRegion, 'treeVarSet').strip()
            systematics = [
                x for x in config.get('systematics', 'systematics').split(' ')
                if len(x.strip()) > 0
            ]
            mvaVars = []
            for systematic in systematics:
                mvaVars += config.get(treeVarSet,
                                      systematic).strip().split(' ')
            self.trainingRegionsDict[trainingRegion] = {
                'cut': config.get('Cuts', treeCutName),
                'vars': mvaVars,
            }

        self.TrainCut = config.get('Cuts', 'TrainCut')
        self.EvalCut = config.get('Cuts', 'EvalCut')

        self.splitFilesChunks = splitFilesChunks
        self.chunkNumber = chunkNumber
        self.splitFilesChunkSize = splitFilesChunkSize

        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

    def printInfo(self):
        print("REGION:".ljust(24), "CUT:")
        for trainingRegion, trainingRegionInfo in self.trainingRegionsDict.iteritems(
        ):
            print(" > ", trainingRegion.ljust(20), trainingRegionInfo['cut'])

    def run(self):
        # ----------------------------------------------------------------------------------------------------------------------
        # cache samples
        # ----------------------------------------------------------------------------------------------------------------------
        for sampleToCache in [self.sampleIdentifier]:
            print('*' * 80)
            print(' ', sampleToCache)
            print('*' * 80)
            # prepare caches for training and evaluation samples
            treeCaches = []
            self.sampleTree = None

            # use all (sub)samples which come from the same files (sampleIdentifier)
            subsamples = [
                x for x in self.samples if x.identifier == sampleToCache
            ]

            # list of branches to keep for use as MVA input variables
            branchListOfMVAVars = BranchList()
            for sample in subsamples:
                for trainingRegion, trainingRegionInfo in self.trainingRegionsDict.iteritems(
                ):
                    for additionalCut in [self.TrainCut, self.EvalCut]:
                        branchListOfMVAVars.addCut(trainingRegionInfo['vars'])
            branchListOfMVAVars.addCut(self.config.get('Weights', 'weightF'))
            mvaBranches = branchListOfMVAVars.getListOfBranches()

            # loop over all samples
            for sample in subsamples:

                # add cuts for all training regions
                for trainingRegion, trainingRegionInfo in self.trainingRegionsDict.iteritems(
                ):

                    # add cuts for training and evaluation
                    for additionalCut in [self.TrainCut, self.EvalCut]:

                        # cuts
                        sampleCuts = [sample.subcut]
                        if additionalCut:
                            sampleCuts.append(additionalCut)
                        if trainingRegionInfo['cut']:
                            sampleCuts.append(trainingRegionInfo['cut'])

                        # add cache object
                        tc = TreeCache.TreeCache(
                            name='{region}_{sample}_{tr}'.format(
                                region=trainingRegion,
                                sample=sample.name,
                                tr='TRAIN'
                                if additionalCut == self.TrainCut else 'EVAL'),
                            sample=sample.name,
                            cutList=sampleCuts,
                            inputFolder=self.samplesPath,
                            splitFilesChunks=self.splitFilesChunks,
                            chunkNumber=self.chunkNumber,
                            splitFilesChunkSize=self.splitFilesChunkSize,
                            branches=mvaBranches,
                            config=self.config,
                            debug=True)

                        # check if this part of the sample is already cached
                        isCached = tc.partIsCached()
                        if not isCached or self.force:
                            if isCached:
                                tc.deleteCachedFiles(
                                    chunkNumber=self.chunkNumber)
                            # for the first sample which comes from this files, load the tree
                            if not self.sampleTree:
                                self.sampleTree = SampleTree(
                                    {
                                        'name': sample.identifier,
                                        'folder': self.samplesPath
                                    },
                                    splitFilesChunkSize=self.
                                    splitFilesChunkSize,
                                    chunkNumber=self.chunkNumber,
                                    config=self.config,
                                    saveMemory=True)
                            treeCaches.append(
                                tc.setSampleTree(self.sampleTree).cache())

            if len(treeCaches) > 0:
                # run on the tree
                self.sampleTree.process()
            else:
                print("nothing to do!")
Beispiel #41
0
    def run(self):

        nFilesProcessed = 0
        nFilesFailed = 0

        for subJob in self.subJobs:

            # only process if output is non-existing/broken or --force was used
            if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']):

                # create directories
                outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1])
                tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1])
                self.fileLocator.makedirs(outputFolder)
                self.fileLocator.makedirs(tmpFolder)

                # load sample tree
                sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config)
                if not sampleTree.tree:
                    print "trying fallback...", len(subJob['inputFileNames'])

                    if len(subJob['inputFileNames']) == 1:
                        # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep)
                        fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0]
                        print "FO:", fileNameOriginal
                        xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal)
                        sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector)
                        if not sampleTree.tree:
                            print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m"
                            nFilesFailed += 1
                            continue
                    else:
                        print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m"
                        nFilesFailed += 1
                        continue

                # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this:
                # [Sys]
                # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll')
                # (instead of passing the tree in the constructor, the setTree method can be used)
                pyModules = []
                versionTable = []
                for collection in self.collections:
                    if '.' in collection:
                        section = collection.split('.')[0]
                        key = collection.split('.')[1]
                        if self.config.has_section(section) and self.config.has_option(section, key):
                            pyCode = self.config.get(section, key)
                        elif '(' in collection and collection.endswith(')'):
                            print "WARNING: config option", collection, " not found, interpreting it as Python code!"
                            pyCode = collection 
                        else:
                            print "\x1b[31mERROR: config option not found:", collection, ". To specify Python code directly, pass a complete constructor, e.g. --addCollections 'Module.Class()'. Module has to be placed in python/myutils/ folder.\x1b[0m"
                            raise Exception("ConfigError")

                        # import module from myutils
                        moduleName = pyCode.split('(')[0].split('.')[0].strip()
                        if self.debug:
                            print "DEBUG: import module:", moduleName
                            print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m")
                        globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils")

                        # get object
                        wObject = eval(pyCode)

                        # pass the tree and other variables if needed to finalize initialization
                        if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")):
                            wObject.customInit({'config': self.config,
                                                'sampleTree': sampleTree,
                                                'tree': sampleTree.tree,
                                                'sample': self.sample,
                                                'channel': self.channel,
                                                'pathIN': self.pathIN,
                                                'pathOUT': self.pathOUT,
                                                })

                        # add callbacks if the objects provides any
                        if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")):
                            sampleTree.addCallback('event', wObject.processEvent)
                        for cb in ["finish", "prepareOutput"]:
                            if hasattr(wObject, cb) and callable(getattr(wObject, cb)):
                                sampleTree.addCallback(cb, getattr(wObject, cb))

                        # add branches
                        if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")):
                            sampleTree.addOutputBranches(wObject.getBranches())

                        pyModules.append(wObject)

                        versionTable.append([moduleName, wObject.getVersion() if hasattr(wObject, "getVersion") else 0])
                    else:
                        print "\x1b[31mERROR: config option not found:", collection, " the format should be: [Section].[Option]\x1b[0m"
                        raise Exception("ConfigError")

                for moduleName, moduleVersion in versionTable:
                    print " > {m}:{v}".format(m=moduleName, v=moduleVersion)

                # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas()
                if 'addbranches' in self.collections:
                    writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict"))
                    sampleTree.addOutputBranches(writeNewVariables)
                
                # DEPRECATED, do not use anymore ---> use BranchTools.Drop()
                if 'removebranches' in self.collections:
                    bl_branch = eval(config.get('Branches', 'useless_branch'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)
                    bl_branch = eval(config.get('Branches', 'useless_after_sys'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)

                # define output file 
                sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend)

                # run processing
                for pyModule in pyModules:
                    if hasattr(pyModule, "beforeProcessing"):
                        getattr(pyModule, "beforeProcessing")()

                sampleTree.process()

                for pyModule in pyModules:
                    if hasattr(pyModule, "afterProcessing"):
                        getattr(pyModule, "afterProcessing")()

                # if output trees have been produced: copy temporary file to output folder
                if sampleTree.getNumberOfOutputTrees() > 0: 
                    try:
                        self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                        print 'copy ', subJob['tmpFileName'], subJob['outputFileName']

                        if self.verifyCopy:
                            if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...'
                                self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                                print 'INFO: second attempt copy done!'
                                if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                    print '\x1b[31mERROR: output still broken!\x1b[0m'
                                    nFilesFailed += 1
                                    raise Exception("FileCopyError")
                                else:
                                    print 'INFO: file is good after second attempt!'
                    except Exception as e:
                        print e
                        print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m"

                    # delete temporary file
                    try:
                        self.fileLocator.rm(subJob['tmpFileName'])
                    except Exception as e:
                        print e
                        print "WARNING: could not delete file on scratch!"


                # clean up
                if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")):
                    getattr(wObject, "cleanUp")()

            else:
                print 'SKIP:', subJob['inputFileNames']

        if nFilesFailed > 0:
            raise Exception("ProcessingIncomplete")
Beispiel #42
0
    def run(self):
        # ----------------------------------------------------------------------------------------------------------------------
        # cache samples
        # ----------------------------------------------------------------------------------------------------------------------
        for sampleToCache in [self.sampleIdentifier]:
            print('*' * 80)
            print(' ', sampleToCache)
            print('*' * 80)
            # prepare caches for training and evaluation samples
            treeCaches = []
            self.sampleTree = None

            # use all (sub)samples which come from the same files (sampleIdentifier)
            subsamples = [
                x for x in self.samples if x.identifier == sampleToCache
            ]

            # list of branches to keep for use as MVA input variables
            branchListOfMVAVars = BranchList()
            for sample in subsamples:
                for trainingRegion, trainingRegionInfo in self.trainingRegionsDict.iteritems(
                ):
                    for additionalCut in [self.TrainCut, self.EvalCut]:
                        branchListOfMVAVars.addCut(trainingRegionInfo['vars'])
            branchListOfMVAVars.addCut(self.config.get('Weights', 'weightF'))
            mvaBranches = branchListOfMVAVars.getListOfBranches()

            # loop over all samples
            for sample in subsamples:

                # add cuts for all training regions
                for trainingRegion, trainingRegionInfo in self.trainingRegionsDict.iteritems(
                ):

                    # add cuts for training and evaluation
                    for additionalCut in [self.TrainCut, self.EvalCut]:

                        # cuts
                        sampleCuts = [sample.subcut]
                        if additionalCut:
                            sampleCuts.append(additionalCut)
                        if trainingRegionInfo['cut']:
                            sampleCuts.append(trainingRegionInfo['cut'])

                        # add cache object
                        tc = TreeCache.TreeCache(
                            name='{region}_{sample}_{tr}'.format(
                                region=trainingRegion,
                                sample=sample.name,
                                tr='TRAIN'
                                if additionalCut == self.TrainCut else 'EVAL'),
                            sample=sample.name,
                            cutList=sampleCuts,
                            inputFolder=self.samplesPath,
                            splitFilesChunks=self.splitFilesChunks,
                            chunkNumber=self.chunkNumber,
                            splitFilesChunkSize=self.splitFilesChunkSize,
                            branches=mvaBranches,
                            config=self.config,
                            debug=True)

                        # check if this part of the sample is already cached
                        isCached = tc.partIsCached()
                        if not isCached or self.force:
                            if isCached:
                                tc.deleteCachedFiles(
                                    chunkNumber=self.chunkNumber)
                            # for the first sample which comes from this files, load the tree
                            if not self.sampleTree:
                                self.sampleTree = SampleTree(
                                    {
                                        'name': sample.identifier,
                                        'folder': self.samplesPath
                                    },
                                    splitFilesChunkSize=self.
                                    splitFilesChunkSize,
                                    chunkNumber=self.chunkNumber,
                                    config=self.config,
                                    saveMemory=True)
                            treeCaches.append(
                                tc.setSampleTree(self.sampleTree).cache())

            if len(treeCaches) > 0:
                # run on the tree
                self.sampleTree.process()
            else:
                print("nothing to do!")