Exemple #1
0
    def customInit(self, initVars):
        self.sample     = initVars['sample']
        self.sampleTree = initVars['sampleTree']
        self.config     = initVars['config']
        self.addBranch(self.branchName)
        self.addBranch("weightF")
        self.addBranch("weightXS")

        if not self.sample.isData():
            self.weightString = self.config.get('Weights','weightF')
            # per sample special weight
            if self.config.has_option('Weights', 'useSpecialWeight') and eval(self.config.get('Weights', 'useSpecialWeight')):
                specialweight = self.sample.specialweight
                self.weightString = "(({weight})*({specialweight}))".format(weight=self.weightString, specialweight=specialweight)
                print ("INFO: use specialweight: {specialweight}".format(specialweight=specialweight))

            self.evalCut = self.config.get('Cuts','EvalCut')
            self.sampleTree.addFormula(self.weightString)
            self.sampleTree.addFormula(self.evalCut)

            self.excludeTrainingSet = False

            # to compute the correct scale to cross-section, all trees of the sample have to be used!
            sampleTreeForCount = SampleTree({'sample': self.sample, 'folder': initVars['pathIN']}, config=self.config)
            self.weightScaleToXS = sampleTreeForCount.getScale(self.sample) * (2.0 if self.excludeTrainingSet else 1.0)
            print "scale:", self.weightScaleToXS, self.sample
Exemple #2
0
    def customInit(self, initVars):
        self.sample = initVars['sample']
        self.sampleTree = initVars['sampleTree']
        self.config = initVars['config']
        self.addBranch(self.branchName)
        self.addBranch("weightF")
        self.addBranch("weightXS")

        if not self.sample.isData():
            self.weightString = self.config.get('Weights', 'weightF')
            # per sample special weight
            if self.config.has_option('Weights', 'useSpecialWeight') and eval(
                    self.config.get('Weights', 'useSpecialWeight')):
                specialweight = self.sample.specialweight
                self.weightString = "(({weight})*({specialweight}))".format(
                    weight=self.weightString, specialweight=specialweight)
                print("INFO: use specialweight: {specialweight}".format(
                    specialweight=specialweight))

            self.evalCut = self.config.get('Cuts', 'EvalCut')
            self.sampleTree.addFormula(self.weightString)
            self.sampleTree.addFormula(self.evalCut)

            self.excludeTrainingSet = False

            # to compute the correct scale to cross-section, all trees of the sample have to be used!
            sampleTreeForCount = SampleTree(
                {
                    'sample': self.sample,
                    'folder': initVars['pathIN']
                },
                config=self.config)
            self.weightScaleToXS = sampleTreeForCount.getScale(
                self.sample) * (2.0 if self.excludeTrainingSet else 1.0)
            print "scale:", self.weightScaleToXS, self.sample
Exemple #3
0
    def __init__(self, sample, cutList='1', branches=None, inputFolder=None, tmpFolder=None, outputFolder=None, chunkNumber=-1, splitFilesChunks=-1, splitFilesChunkSize=-1, debug=False, fileList=None, cutSequenceMode='AND', name='', config=None, fileLocator=None):
        self.config = config
        self.fileLocator = fileLocator if fileLocator is not None else FileLocator(config=self.config)
        self.debug = debug or ('XBBDEBUG' in os.environ)

        # SAMPLE
        if isinstance(sample, Sample):
            # sample passed as Sample object
            # count number of chunks the cached data is split into
            defaultChunkSize = int(config.get('General', 'mergeCachingSize')) if config.has_option('General', 'mergeCachingSize') else 100
            splitFilesChunkSize = sample.mergeCachingSize if sample.mergeCachingSize > 0 else defaultChunkSize
            splitFilesChunks = SampleTree({'name': sample.identifier, 'folder': inputFolder}, countOnly=True, splitFilesChunkSize=splitFilesChunkSize, config=config, verbose=self.debug, fileLocator=self.fileLocator).getNumberOfParts()
            # if sample passed as object, it can be a 'subsample' and habe different name and identifier
            self.sample = sample.name
            self.sampleIdentifier = sample.identifier
            if self.debug:
                print ("INFO: use sample=", sample.name, " #parts = ", splitFilesChunks)
        else:
            # sample identifier passed as string
            self.sample = sample
            self.sampleIdentifier = sample
        self.name = name

        # CUTS
        self.cutList = cutList
        self.cutSequenceMode = cutSequenceMode
        self.minCut = SampleTree.findMinimumCut(self.cutList, cutSequenceMode=self.cutSequenceMode)

        # PATHS
        self.inputFolder = inputFolder
        self.outputFolder = (config.get('Directories', 'tmpSamples') if config else 'cache/') if outputFolder is None else outputFolder
        self.tmpFolder = (config.get('Directories', 'scratch') if config else 'tmp/') if tmpFolder is None else tmpFolder
        self.cachedFileNames = []
        self.tmpFiles = []
        self.outputFileNameFormat = '{outputFolder}/tmp_{hash}_{part}of{parts}.root'

        # BRANCHES and chunk information
        self.branches = branches
        self.branchesForHash = None     # for now make hash independent of selecte branches 
        self.hash = Hash(sample=sample, minCut=self.minCut, branches=self.branchesForHash, splitFilesChunkSize=splitFilesChunkSize, debug=False, inputPath=self.inputFolder).get()
        self.chunkNumber = chunkNumber
        self.splitFilesChunks = splitFilesChunks if splitFilesChunks > 1 else 1
        self.splitFilesChunkSize = splitFilesChunkSize
        
        # identifier is just used as an arbitrary name for print-out
        cutUsedForIdentifier = (self.minCut if len(self.minCut) < 60 else self.minCut[0:50] + '...').replace(' ', '')
        self.identifier = '{sample}[{cut}]of{parts}'.format(sample=self.sample, cut=cutUsedForIdentifier, parts=self.splitFilesChunks)
        self.sampleTree = None
        self.isCachedChecked = False

        self.createFolders()
Exemple #4
0
 def getTree(self):
     # if it has already been checked if tree is cached, then use this result dierctly
     isCached = self.isCachedChecked
     if not isCached:
         isCached = self.isCached()
     if isCached:
         self.sampleTree = SampleTree(self.cachedFileNames,
                                      config=self.config)
         self.sampleTree.sampleIdentifier = self.sampleIdentifier
     return self.sampleTree
Exemple #5
0
    def getTree(self, chunkSize=-1, chunkNumber=-1):
        # if it has already been checked if tree is cached, then use this result dierctly
        isCached = self.isCachedChecked
        if not isCached:
            isCached = self.isCached()
        if isCached:
            if chunkSize > 0 and chunkNumber > 0:
                fileNames = self.cachedFileNames[(chunkNumber-1)*chunkSize:chunkNumber*chunkSize]
            elif chunkSize < 0 and chunkNumber < 0:
                fileNames = self.cachedFileNames
            else:
                raise Exception("InvalidParameters")
            self.sampleTree = SampleTree(self.cachedFileNames, config=self.config, fileNamesToProcess=fileNames)
            self.sampleTree.sampleIdentifier = self.sampleIdentifier

            # check if even though all files exist, they couldn't be accessed for some reason
            # and therefore the tree would be incomplete
            if not self.sampleTree.isCompleteTree():
                raise Exception("IncompleteTree")

        return self.sampleTree
Exemple #6
0
    def __init__(self, sample, cutList='1', branches=None, inputFolder=None, tmpFolder=None, outputFolder=None, chunkNumber=-1, splitFilesChunks=-1, splitFilesChunkSize=-1, debug=False, fileList=None, cutSequenceMode='AND', name='', config=None):
        self.config = config
        self.fileLocator = FileLocator(config=self.config)
        self.debug = debug or ('XBBDEBUG' in os.environ)

        # SAMPLE
        if isinstance(sample, Sample):
            # sample passed as Sample object
            # count number of chunks the cached data is split into
            defaultChunkSize = int(config.get('General', 'mergeCachingSize')) if config.has_option('General', 'mergeCachingSize') else 100
            splitFilesChunkSize = sample.mergeCachingSize if sample.mergeCachingSize > 0 else defaultChunkSize
            splitFilesChunks = SampleTree({'name': sample.identifier, 'folder': inputFolder}, countOnly=True, splitFilesChunkSize=splitFilesChunkSize, config=config, verbose=self.debug).getNumberOfParts()
            # if sample passed as object, it can be a 'subsample' and habe different name and identifier
            self.sample = sample.name
            self.sampleIdentifier = sample.identifier
            if self.debug:
                print ("INFO: use sample=", sample.name, " #parts = ", splitFilesChunks)
        else:
            # sample identifier passed as string
            self.sample = sample
            self.sampleIdentifier = sample
        self.name = name

        # CUTS
        self.cutList = cutList
        self.cutSequenceMode = cutSequenceMode
        self.minCut = SampleTree.findMinimumCut(self.cutList, cutSequenceMode=self.cutSequenceMode)

        # PATHS
        self.inputFolder = inputFolder
        self.outputFolder = (config.get('Directories', 'tmpSamples') if config else 'cache/') if outputFolder is None else outputFolder
        self.tmpFolder = (config.get('Directories', 'scratch') if config else 'tmp/') if tmpFolder is None else tmpFolder
        self.cachedFileNames = []
        self.tmpFiles = []
        self.outputFileNameFormat = '{outputFolder}/tmp_{hash}_{part}of{parts}.root'

        # BRANCHES and chunk information
        self.branches = branches
        self.branchesForHash = None     # for now make hash independent of selecte branches 
        self.hash = Hash(sample=sample, minCut=self.minCut, branches=self.branchesForHash, splitFilesChunkSize=splitFilesChunkSize, debug=False, inputPath=self.inputFolder).get()
        self.chunkNumber = chunkNumber
        self.splitFilesChunks = splitFilesChunks if splitFilesChunks > 1 else 1
        self.splitFilesChunkSize = splitFilesChunkSize
        
        # identifier is just used as an arbitrary name for print-out
        cutUsedForIdentifier = (self.minCut if len(self.minCut) < 60 else self.minCut[0:50] + '...').replace(' ', '')
        self.identifier = '{sample}[{cut}]of{parts}'.format(sample=self.sample, cut=cutUsedForIdentifier, parts=self.splitFilesChunks)
        self.sampleTree = None
        self.isCachedChecked = False

        self.createFolders()
Exemple #7
0
    def setStartingState(self, garbageSize, numLinear, numCircular):
        assert self.N > garbageSize + numLinear + numCircular
        self.pool = SampleTree()

        numGarbage = 0
        if garbageSize > 0:
            garbage = CircularContig(garbageSize)
            garbage.setDead()
            self.pool.insert(garbage, garbage.numBases())
            numGarbage = 1
        
        lrat = float(numLinear) / (numLinear + numCircular)
        crat = float(numCircular) / (numLinear + numCircular)
        linearBases = math.floor((self.N - garbageSize) * lrat)
        circularBases = math.ceil((self.N - garbageSize) * crat)
        assert linearBases + circularBases + garbageSize == self.N

        if numLinear > 0:
            linSize = math.floor(linearBases / numLinear)
            extra = linearBases % numLinear
            added = 0
            for i in range(numLinear):
                size = linSize
                if i < extra:
                    size += 1
                # plus 1 since number of adjacencies is 1 + number of bases
                contig = LinearContig(size + 1)
                self.pool.insert(contig, contig.numBases())
                added += contig.size
            assert added == linearBases + numLinear
            assert self.pool.size() == numLinear + numGarbage
            assert self.pool.weight() == linearBases + garbageSize

        if numCircular > 0:
            circSize = math.floor(circularBases / numCircular)
            extra = circularBases % numCircular
            added = 0
            for i in range(numCircular):
                size = circSize
                if i < extra:
                    size += 1
                contig = CircularContig(size)
                self.pool.insert(contig, contig.numBases())
                added += contig.size
            assert added == circularBases
            assert self.pool.size() == numLinear + numCircular + numGarbage
            assert self.pool.weight() == circularBases + linearBases + \
            garbageSize
Exemple #8
0
    def getTree(self, chunkSize=-1, chunkNumber=-1):
        # if it has already been checked if tree is cached, then use this result dierctly
        isCached = self.isCachedChecked
        if not isCached:
            isCached = self.isCached()
        if isCached:
            if chunkSize > 0 and chunkNumber > 0:
                fileNames = self.cachedFileNames[(chunkNumber-1)*chunkSize:chunkNumber*chunkSize]
            elif chunkSize < 0 and chunkNumber < 0:
                fileNames = self.cachedFileNames
            else:
                raise Exception("InvalidParameters")
            self.sampleTree = SampleTree(self.cachedFileNames, config=self.config, fileNamesToProcess=fileNames)
            self.sampleTree.sampleIdentifier = self.sampleIdentifier

            # check if even though all files exist, they couldn't be accessed for some reason
            # and therefore the tree would be incomplete
            if not self.sampleTree.isCompleteTree():
                raise Exception("IncompleteTree")

        return self.sampleTree
Exemple #9
0
class TreeCache:

    def __init__(self, sample, cutList='1', branches=None, inputFolder=None, tmpFolder=None, outputFolder=None, chunkNumber=-1, splitFilesChunks=-1, splitFilesChunkSize=-1, debug=False, fileList=None, cutSequenceMode='AND', name='', config=None, fileLocator=None):
        self.config = config
        self.fileLocator = fileLocator if fileLocator is not None else FileLocator(config=self.config)
        self.debug = debug or ('XBBDEBUG' in os.environ)

        # SAMPLE
        if isinstance(sample, Sample):
            # sample passed as Sample object
            # count number of chunks the cached data is split into
            defaultChunkSize = int(config.get('General', 'mergeCachingSize')) if config.has_option('General', 'mergeCachingSize') else 100
            splitFilesChunkSize = sample.mergeCachingSize if sample.mergeCachingSize > 0 else defaultChunkSize
            splitFilesChunks = SampleTree({'name': sample.identifier, 'folder': inputFolder}, countOnly=True, splitFilesChunkSize=splitFilesChunkSize, config=config, verbose=self.debug, fileLocator=self.fileLocator).getNumberOfParts()
            # if sample passed as object, it can be a 'subsample' and habe different name and identifier
            self.sample = sample.name
            self.sampleIdentifier = sample.identifier
            if self.debug:
                print ("INFO: use sample=", sample.name, " #parts = ", splitFilesChunks)
        else:
            # sample identifier passed as string
            self.sample = sample
            self.sampleIdentifier = sample
        self.name = name

        # CUTS
        self.cutList = cutList
        self.cutSequenceMode = cutSequenceMode
        self.minCut = SampleTree.findMinimumCut(self.cutList, cutSequenceMode=self.cutSequenceMode)

        # PATHS
        self.inputFolder = inputFolder
        self.outputFolder = (config.get('Directories', 'tmpSamples') if config else 'cache/') if outputFolder is None else outputFolder
        self.tmpFolder = (config.get('Directories', 'scratch') if config else 'tmp/') if tmpFolder is None else tmpFolder
        self.cachedFileNames = []
        self.tmpFiles = []
        self.outputFileNameFormat = '{outputFolder}/tmp_{hash}_{part}of{parts}.root'

        # BRANCHES and chunk information
        self.branches = branches
        self.branchesForHash = None     # for now make hash independent of selecte branches 
        self.hash = Hash(sample=sample, minCut=self.minCut, branches=self.branchesForHash, splitFilesChunkSize=splitFilesChunkSize, debug=False, inputPath=self.inputFolder).get()
        self.chunkNumber = chunkNumber
        self.splitFilesChunks = splitFilesChunks if splitFilesChunks > 1 else 1
        self.splitFilesChunkSize = splitFilesChunkSize
        
        # identifier is just used as an arbitrary name for print-out
        cutUsedForIdentifier = (self.minCut if len(self.minCut) < 60 else self.minCut[0:50] + '...').replace(' ', '')
        self.identifier = '{sample}[{cut}]of{parts}'.format(sample=self.sample, cut=cutUsedForIdentifier, parts=self.splitFilesChunks)
        self.sampleTree = None
        self.isCachedChecked = False

        self.createFolders()

    # free memory
    def deleteSampleTree(self):
        self.sampleTree = None

    # file, where skimmed tree is written to
    def getTmpFileName(self):
        return self.outputFileNameFormat.format(
            outputFolder=self.tmpFolder,
            hash=self.hash,
            part=self.chunkNumber if self.chunkNumber > 0 else 1,
            parts='%d'%self.splitFilesChunks
        )

    # file, where skimmed tree is moved to after it has been written completely
    def getOutputFileName(self):
        return self.outputFileNameFormat.format(
            outputFolder=self.outputFolder,
            hash=self.hash,
            part=self.chunkNumber if self.chunkNumber > 0 else 1,
            parts='%d'%self.splitFilesChunks
        )

    # check existence of files with skimmed trees
    def findCachedFileNames(self, chunkNumber=-1):
        cachedFilesMaskRaw = self.outputFileNameFormat.format(
            outputFolder=self.outputFolder,
            hash=self.hash,
            part='*' if chunkNumber < 1 else '%d'%chunkNumber,
            parts=self.splitFilesChunks
        )
        cachedFilesMask = self.fileLocator.getLocalFileName(cachedFilesMaskRaw)

        # this does not work reliably on T3 worker nodes anymore 
        #self.cachedFileNames = glob.glob(cachedFilesMask)

        # workaround: use xrootd for directory listing 
        #self.cachedFileNames = self.fileLocator.glob_with_fallback(cachedFilesMaskRaw)

        # this solution uses a loop over all possible files and uses xrdfs stat instead of xrdfs ls
        self.cachedFileNames = self.fileLocator.get_numbered_file_list(cachedFilesMaskRaw, 1, self.splitFilesChunks)

        if self.debug:
            print ('DEBUG: search files:', cachedFilesMask)
            print ('\x1b[32mDEBUG: files:')
            for fileName in self.cachedFileNames:
                print (' > ', fileName)
            if len(self.cachedFileNames) < 1:
                print ('none!')
            print ('\x1b[0m(%d files found)'%len(self.cachedFileNames))

        # sort
        self.cachedFileNames = sorted(self.cachedFileNames, key=lambda x: int(x.split('_')[-1].split('of')[0]) if 'of' in x and '_' in x else -1)
        return self.cachedFileNames

    def getTotalNumberOfOutputFiles(self):
        return self.splitFilesChunks

    # check if a single part is cached, (only checks existence of the file, not validity!)
    def partIsCached(self):
        cachedFilesMaskRaw = self.outputFileNameFormat.format(
            outputFolder=self.outputFolder,
            hash=self.hash,
            part=self.chunkNumber,
            parts=self.splitFilesChunks
        )
        # this does not work reliably on T3 worker nodes anymore 
        #cachedFilesMask = self.fileLocator.getLocalFileName(cachedFilesMaskRaw)
        #return len(glob.glob(cachedFilesMask)) > 0
        return self.fileLocator.remoteFileExists(cachedFilesMaskRaw)

    # isCached == all files containing the skimmed tree found!
    def isCached(self):
        self.findCachedFileNames()
        if (len(self.cachedFileNames) != self.splitFilesChunks and self.splitFilesChunks > 1) or len(self.cachedFileNames) == 0:
            if self.debug:
                print ('\x1b[32mDEBUG: not cached:', self.identifier, '\x1b[0m')
            return False
        self.isCachedChecked = True
        return True

    # check if an existing file can be opened without errors by ROOT
    def checkFileValidity(self, rawFileName):
        xrootdFileName = self.fileLocator.getXrootdFileName(rawFileName)
        f = ROOT.TFile.Open(xrootdFileName, 'read')
        if not f or f.GetNkeys() == 0 or f.TestBit(ROOT.TFile.kRecovered) or f.IsZombie():
            print ('\x1b[31mWARNING: broken file:', rawFileName, ' => redo caching!\x1b[0m')
            if f:
                f.Close()
            self.deleteFile(rawFileName)
            return False
        if f:
            f.Close()
        return True

    # check if all cached files are valid
    def isCachedAndValid(self):
        valid = True
        if self.isCached():
            # check file integrity
            for fileName in self.cachedFileNames:
                valid = valid and self.checkFileValidity(fileName)
        else:
            valid = False
        return valid

    # set input sampleTree object
    def setSampleTree(self, sampleTree):
        self.sampleTree = sampleTree
        return self

    # this prepares the caching by telling the sampleTree object what to write during processing of the file
    # note: does not run the caching by itself! needs an additional sampleTree.process()
    def cache(self):
        if self.sampleTree:
            outputFileName = self.getTmpFileName()
            callbacks = {'afterWrite': self.moveFilesToFinalLocation}
            self.sampleTree.addOutputTree(outputFileName=outputFileName, cut=self.cutList, hash=self.hash, branches=self.branches, callbacks=callbacks, cutSequenceMode=self.cutSequenceMode, name=self.name)
            self.tmpFiles.append(outputFileName)
            if self.debug:
                print ('\x1b[32mDEBUG: output file for ', self.identifier, ' is ', outputFileName, '\x1b[0m')
        else:
            print ('\x1b[31mERROR: no sample tree connected!:', self.identifier, ' set the sampleTree first with "setSampleTree(sampleTree)" \x1b[0m')
        return self

    # return sample tree class of cached samples if all files found
    def getTree(self, chunkSize=-1, chunkNumber=-1):
        # if it has already been checked if tree is cached, then use this result dierctly
        isCached = self.isCachedChecked
        if not isCached:
            isCached = self.isCached()
        if isCached:
            if chunkSize > 0 and chunkNumber > 0:
                fileNames = self.cachedFileNames[(chunkNumber-1)*chunkSize:chunkNumber*chunkSize]
            elif chunkSize < 0 and chunkNumber < 0:
                fileNames = self.cachedFileNames
            else:
                raise Exception("InvalidParameters")
            self.sampleTree = SampleTree(self.cachedFileNames, config=self.config, fileNamesToProcess=fileNames)
            self.sampleTree.sampleIdentifier = self.sampleIdentifier

            # check if even though all files exist, they couldn't be accessed for some reason
            # and therefore the tree would be incomplete
            if not self.sampleTree.isCompleteTree():
                raise Exception("IncompleteTree")

        return self.sampleTree

    # delete file
    def deleteFile(self, rawFileName):
        if self.debug:
            print ('DELETE:', rawFileName)
        self.fileLocator.rm(rawFileName)

    # delete cached files
    def deleteCachedFiles(self, chunkNumber=-1):
        cachedFileNames = self.findCachedFileNames(chunkNumber=chunkNumber)
        for fileName in cachedFileNames:
            if self.fileLocator.fileExists(fileName):
                self.deleteFile(fileName)

    # create folders
    def createFolders(self):
        tmpfolderLocal = self.fileLocator.getLocalFileName(self.tmpFolder)
        if not os.path.isdir(tmpfolderLocal):
            print("DOES NOT EXIST:", tmpfolderLocal)
            try:
                xrootdFileName = self.fileLocator.getXrootdFileName(self.tmpFolder)
                if '://' not in xrootdFileName:
                    os.makedirs(self.tmpFolder)
                else:
                    command = 'gfal-mkdir %s' % (xrootdFileName)
                    returnCode = subprocess.call([command], shell=True)
                    if self.debug:
                        print(command, ' => ', returnCode)
                        print ()
            except:
                pass

        if not self.fileLocator.exists(self.outputFolder):
            print("INFO: output folder does not exist and will be created:", self.outputFolder)
            self.fileLocator.makedirs(self.outputFolder)

    # move files from temporary to final location
    def moveFilesToFinalLocation(self, raiseOnFailure=True):
        success = True
        # free some memory for file copy command
        if self.debug:
            print('DEBUG: max mem used A:', resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
        self.deleteSampleTree()
        if self.debug:
            print('DEBUG: max mem used B:', resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

        for tmpFileName in self.tmpFiles:
            outputFileName = self.outputFolder + '/' + self.tmpFolder.join(tmpFileName.split(self.tmpFolder)[1:])
            print ('copy ', tmpFileName, ' to ', outputFileName)
            if self.fileLocator.fileExists(outputFileName):
                self.deleteFile(outputFileName)
            copySuccessful = self.fileLocator.cp(tmpFileName, outputFileName)
            if not copySuccessful:
                print("WARNING: first copy attempt failed! retry once!")
                self.fileLocator.debug = True
                copySuccessful = self.fileLocator.cp(tmpFileName, outputFileName)
                if not copySuccessful:
                    success = False
                    print('\x1b[31mERROR: copy failed for {tmpfile}->{outputfile} !\x1b[0m'.format(tmpfile=tmpFileName,
                                                                                                outputfile=outputFileName))
                    if raiseOnFailure:
                        raise Exception("CopyToFinalDestinationFailed")
            if success:
                # delete temporary file if copy was successful
                self.deleteFile(tmpFileName)
        return success
Exemple #10
0
    #print(config.get('Weights','weightF'))
    #config = XbbConfigReader.read('Zvv2017')

    inputFile = 'root://t3dcachedb03.psi.ch:1094//pnfs/psi.ch/cms/trivcat/store/user/berger_p2/VHbb/VHbbPostNano2017/V5/Zvv/rerun/v4j/eval/ggZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8/tree_aa5e971734ef4e885512748d534e6937ff03dc61feed21b6772ba943_000000_000000_0000_9_a6c5a52b56e5e0c7ad5aec31429c8926bf32cf39adbe087f05cfb323.root'
    path = 'root://t3dcachedb03.psi.ch:1094//pnfs/psi.ch/cms/trivcat/store/user/berger_p2/VHbb/VHbbPostNano2017/V5/Zvv/rerun/v4j/eval/'
    samplefiles = '../samples/VHbbPostNano2017_V5/merged_Zvv2017/'
    samplesinfo = 'Zvv2017config/samples_nosplit.ini'
    info = ParseInfo(samples_path=path, config=config)

    sample = [
        x for x in info
        if x.identifier == 'ggZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8'
    ][0]

    # read sample
    sampleTree = SampleTree([inputFile], config=config)

    # initialize module
    w = WeightAsBranch()
    w.customInit({
        'sampleTree': sampleTree,
        'config': config,
        'sample': sample,
        'pathIN': path
    })

    #addAsBranch = True
    addAsBranch = False

    print 'w.getBranches()', w.getBranches()
Exemple #11
0
 def __init__(self):
     self.pool = SampleTree()
     self.eventQueue = EventQueue()
     self.__resetCounts()
Exemple #12
0
class Model(object):
    def __init__(self):
        self.pool = SampleTree()
        self.eventQueue = EventQueue()
        self.__resetCounts()

    ##################################################################
    # there are five kinds of rates:
    # N: (fixed) number of bases in the model
    # rll: rate for dcj on the bases in the contig pool
    # rld: rate for dcj where one break is in the pool
    #      and the other rate is in the garbage
    # rdd: both in garbage
    # fl: telomere loss modifier
    # fg: telomere gain modifier
    # pgain: dead gain probability
    ##################################################################
    def setParameters(self, N, rll, rld = 0, rdd = 0, fl = 0, fg = 0,
                      pgain = 0):        
        self.eventQueue.reset()
        self.N = N
        self.fl = fl
        self.fg = fg
        self.pgain = pgain

        if rll > 0:
            self.eventQueue.addEventType(N * rll, self.__llEvent)
        if rld > 0:
            self.eventQueue.addEventType(N * rld, self.__ldEvent)
        if rdd > 0:
            self.eventQueue.addEventType(N * rdd, self.__ddEvent)
            
    ##################################################################
    # intitialize the starting state
    # the the contigs will all have the same sizes (modulo rounding)
    # in order to satisfy the input parameters exactly
    ##################################################################
    def setStartingState(self, garbageSize, numLinear, numCircular):
        assert self.N > garbageSize + numLinear + numCircular
        self.pool = SampleTree()

        numGarbage = 0
        if garbageSize > 0:
            garbage = CircularContig(garbageSize)
            garbage.setDead()
            self.pool.insert(garbage, garbage.numBases())
            numGarbage = 1
        
        lrat = float(numLinear) / (numLinear + numCircular)
        crat = float(numCircular) / (numLinear + numCircular)
        linearBases = math.floor((self.N - garbageSize) * lrat)
        circularBases = math.ceil((self.N - garbageSize) * crat)
        assert linearBases + circularBases + garbageSize == self.N

        if numLinear > 0:
            linSize = math.floor(linearBases / numLinear)
            extra = linearBases % numLinear
            added = 0
            for i in range(numLinear):
                size = linSize
                if i < extra:
                    size += 1
                # plus 1 since number of adjacencies is 1 + number of bases
                contig = LinearContig(size + 1)
                self.pool.insert(contig, contig.numBases())
                added += contig.size
            assert added == linearBases + numLinear
            assert self.pool.size() == numLinear + numGarbage
            assert self.pool.weight() == linearBases + garbageSize

        if numCircular > 0:
            circSize = math.floor(circularBases / numCircular)
            extra = circularBases % numCircular
            added = 0
            for i in range(numCircular):
                size = circSize
                if i < extra:
                    size += 1
                contig = CircularContig(size)
                self.pool.insert(contig, contig.numBases())
                added += contig.size
            assert added == circularBases
            assert self.pool.size() == numLinear + numCircular + numGarbage
            assert self.pool.weight() == circularBases + linearBases + \
            garbageSize

    ##################################################################
    # run the simulation for the specified time
    ##################################################################
    def simulate(self, time):
        self.eventQueue.begin()
        self.__resetCounts()
        while True:
            nextEvent = self.eventQueue.next(time)
            if nextEvent is not None:
                nextEvent()
            else:
                break

    ##################################################################
    # draw (and remove) two random adajcenies and their
    # contigs from the pool (only if they are not dead)
    ##################################################################
    def __drawSamples(self):
        sampleNode1, offset1 = self.pool.uniformSample()
        sampleNode2, offset2 = self.pool.uniformSample()

        # the offset is weighted based on the number of bases
        # we want to translate this into number of edges (splitting)
        # the probability between linear and telomere edges.
        # so for linear contigs with zero offset, we flip a coin to
        # move it to the other side. 
        if sampleNode1.data.isLinear() and offset1 == 0:
            if random.random() < 0.5:
                offset1 = sampleNode1.data.numBases()
        if sampleNode2 is not sampleNode1 and sampleNode2.data.isLinear() and\
           offset2 == 0:
            if random.random() < 0.5:
                offset2 = sampleNode2.data.numBases()

        assert offset1 < sampleNode1.data.size
        assert offset2 < sampleNode2.data.size
        
        return (sampleNode1, offset1, sampleNode2, offset2)

    
    ##################################################################
    #LIVE-LIVE event.  Is normal DCJ operation between two live contigs
    #unless the two breakpoints are identical or on telomeres, in which
    #case fl and fg parameters are used to use fission operations to
    #modifiy the number of telomeres
    ##################################################################
    def __llEvent(self):
        if self.pool.size() == 0 or self.pool.weight() == 1:
            return
        
        # draw (and remove) two random adajcenies and their
        #contigs from the pool (only if they are not dead)
        sampleNode1, offset1, sampleNode2, offset2 = self.__drawSamples()
        c1 = sampleNode1.data
        c2 = sampleNode2.data

        # don't deal with dead contigs in this event
        if c1.isDead() == True or c2.isDead() == True:
            return

        self.pool.remove(sampleNode1)
        if c1 is not c2:
            self.pool.remove(sampleNode2)

        # case 1) gain of telomere
        if sampleNode1 is sampleNode2 and offset1 == offset2:
            return self.__llGain(c1, c2, offset1, offset2)
            
          
        # case 2) loss of telomere
        elif c1.isLinear() and c2.isLinear() and \
                 (offset1 == 0 or offset1 == c1.size - 1) and \
                 (offset2 == 0 or offset2 == c2.size - 1):
            return self.__llLoss(c1, c2, offset1, offset2)

        # case 3) no gain or loss
        self.llCount += 1
        forward = random.randint(0, 1) == 1

        # do the dcj
        dcjResult = dcj(c1, offset1, c2, offset2, forward)
            
        # add the resulting contigs back to the pool
        for res in dcjResult:
            self.pool.insert(res, res.numBases())
            
    ##################################################################
    # Do the fission telomere gain operation (if fg check passes)
    ##################################################################
    def __llGain(self, c1, c2, offset1, offset2):
        # correct "not composite check below"
        if c1.isCircular() or (offset1 != 0 and offset1 != c1.size - 1):
            forward = self.fg > random.random()
            if forward:
                self.fgCount += 1
                dcjResult = dcj(c1, offset1, c2, offset2, forward)
                if c1.isCircular():
                    assert len(dcjResult) == 1 and dcjResult[0].isLinear()
                else:
                    assert len(dcjResult) == 2 and dcjResult[0].isLinear() \
                           and dcjResult[1].isLinear()
                # add the resulting contigs back to the pool
                for res in dcjResult:
                    self.pool.insert(res, res.numBases())
                return

        self.pool.insert(c1, c1.numBases())
        if c2 is not c1:
            self.pool.insert(c2, c2.numBases())
                     
    ##################################################################
    # Do the fission telomer loss operation (if fl check passes)
    ##################################################################
    def __llLoss(self, c1, c2, offset1, offset2):
        if c1 is c2:
            forward = self.fl / 4.0 > random.random()
        else:
            forward = self.fl / 2.0 > random.random()
        if forward:
            c1 = c1.circularize()
            if c1 is not c2:
                c2 = c2.circularize()
            dcjResult = dcj(c1, offset1, c2, offset2, forward)
            self.flCount += 1
            assert len(dcjResult) == 1
            if c1 is not c2:
                assert dcjResult[0].isLinear()
            else:
                assert dcjResult[0].isCircular()
            # add the resulting contigs back to the pool
            for res in dcjResult:
                self.pool.insert(res, res.numBases())
        else:
            self.pool.insert(c1, c1.numBases())
            if c2 is not c1:
                self.pool.insert(c2, c2.numBases())


    ##################################################################
    #LIVE-DEAD (or DEAD-LIVE) event.  One contig is alive and the
    #other is the unique dead contig.  This can result in a loss of
    #live contigs and/or change in number of live bases
    ##################################################################
    def __ldEvent(self):
        if self.pool.size() == 0 or self.pool.weight() == 1:
            return
        
        # draw (and remove) two random adajcenies and their
        #contigs from the pool (only if they are not dead)
        sampleNode1, offset1, sampleNode2, offset2 = self.__drawSamples()
        c1 = sampleNode1.data
        c2 = sampleNode2.data

        # only deal with live / dead contigs in this event
        if (c1.isDead() == c2.isDead()):
            return

        self.pool.remove(sampleNode1)
        if c1 is not c2:
            self.pool.remove(sampleNode2)

        # make sure c1 is alive and c2 is dead
        if c1.isDead():
            c1, c2 = c2, c1
            offset1, offset2 = offset2, offset1

        # do the dcj
        dcjResult = dcj(c1, offset1, c2, offset2, random.randint(0, 1) == 1)

        deadIdx = 0;
        if len(dcjResult) == 2 and \
               random.randint(0, dcjResult[0].size + dcjResult[1].size) >= \
               dcjResult[0].size:
            deadIdx = 1
        dcjResult[deadIdx].setDead(True)

        if len(dcjResult) == 1:
            self.ldLossCount += 1
        else:
            self.ldSwapCount += 1
            
        # add the resulting contigs back to the pool
        deadCount = 0
        for res in dcjResult:
            if res.isDead():
                deadCount += 1
            self.pool.insert(res, res.numBases())
        assert deadCount == 1
            
    ##################################################################
    #DEAD-DEAD event.  The dead contig rearranges with itself.  pgain
    #is used to decide how oftern this oepration breaks off a new circular
    #live chormosome
    ##################################################################
    def __ddEvent(self):
        if self.pool.size() == 0 or self.pool.weight() == 1:
            return
        
        sampleNode1, offset1, sampleNode2, offset2 = self.__drawSamples()
        c1 = sampleNode1.data
        c2 = sampleNode2.data

        # only deal with dead / dead contigs in this event
        if (c1.isDead() == False or c2.isDead() == False):
            return

        # only support single dead contig
        assert c1 is c2

        # don't know what to do here
        if (offset1 == offset2):
            return        

        self.pool.remove(sampleNode1)
        if c1 is not c2:
            self.pool.remove(sampleNode2)

        #forward means do not cut
        forward = random.random() > self.pgain

        # do the dcj
        dcjResult = dcj(c1, offset1, c2, offset2, forward)

        deadIdx = 0;
        if len(dcjResult) == 2 and \
               random.randint(0, dcjResult[0].size + dcjResult[1].size) \
                            >= dcjResult[0].size:
                    deadIdx = 1
        dcjResult[deadIdx].setDead(True)

        if forward:
            self.ddSwapCount += 1
            assert len(dcjResult) == 1
        else:
            self.ddGainCount += 1
            assert len(dcjResult) == 2
            assert not dcjResult[0].isDead() or not dcjResult[1].isDead()
        
         # add the resulting contigs back to the pool
        for res in dcjResult:
            self.pool.insert(res, res.numBases())

          
    ##################################################################
    # all counters set to zero.  
    ##################################################################
    def __resetCounts(self):
        self.llCount = 0
        self.fgCount = 0
        self.flCount = 0
        self.ldLossCount = 0
        self.ldSwapCount = 0
        self.ddGainCount = 0
        self.ddSwapCount = 0
Exemple #13
0

if __name__ == '__main__':

    config = XbbConfigReader.read('Wlv2017')

    info = ParseInfo(config=config)

    sample = [
        x for x in info
        if x.identifier == 'WplusH_HToBB_WToLNu_M125_13TeV_powheg_pythia8'
    ][0]
    # read sample
    sampleTree = SampleTree([
        '/store/group/phys_higgs/hbb/ntuples/VHbbPostNano/2017/V11/WplusH_HToBB_WToLNu_M125_13TeV_powheg_pythia8/adewit-crab_nano2017_WplusH_HT81/190606_065851/0000/tree_1.root'
    ],
                            treeName='Events',
                            xrootdRedirector="root://eoscms.cern.ch/")
    # initialize module
    w = JetSmearer("2017")
    w.customInit({
        'sampleTree': sampleTree,
        'sample': sample,
        'config': config
    })
    n = 0
    #for event in sampleTree:
    #    w.processEvent(event)
    #    n=n+1
    #    if n==3: break
Exemple #14
0
class TreeCache:

    def __init__(self, sample, cutList='1', branches=None, inputFolder=None, tmpFolder=None, outputFolder=None, chunkNumber=-1, splitFilesChunks=-1, splitFilesChunkSize=-1, debug=False, fileList=None, cutSequenceMode='AND', name='', config=None):
        self.config = config
        self.fileLocator = FileLocator(config=self.config)
        self.debug = debug or ('XBBDEBUG' in os.environ)

        # SAMPLE
        if isinstance(sample, Sample):
            # sample passed as Sample object
            # count number of chunks the cached data is split into
            defaultChunkSize = int(config.get('General', 'mergeCachingSize')) if config.has_option('General', 'mergeCachingSize') else 100
            splitFilesChunkSize = sample.mergeCachingSize if sample.mergeCachingSize > 0 else defaultChunkSize
            splitFilesChunks = SampleTree({'name': sample.identifier, 'folder': inputFolder}, countOnly=True, splitFilesChunkSize=splitFilesChunkSize, config=config, verbose=self.debug).getNumberOfParts()
            # if sample passed as object, it can be a 'subsample' and habe different name and identifier
            self.sample = sample.name
            self.sampleIdentifier = sample.identifier
            if self.debug:
                print ("INFO: use sample=", sample.name, " #parts = ", splitFilesChunks)
        else:
            # sample identifier passed as string
            self.sample = sample
            self.sampleIdentifier = sample
        self.name = name

        # CUTS
        self.cutList = cutList
        self.cutSequenceMode = cutSequenceMode
        self.minCut = SampleTree.findMinimumCut(self.cutList, cutSequenceMode=self.cutSequenceMode)

        # PATHS
        self.inputFolder = inputFolder
        self.outputFolder = (config.get('Directories', 'tmpSamples') if config else 'cache/') if outputFolder is None else outputFolder
        self.tmpFolder = (config.get('Directories', 'scratch') if config else 'tmp/') if tmpFolder is None else tmpFolder
        self.cachedFileNames = []
        self.tmpFiles = []
        self.outputFileNameFormat = '{outputFolder}/tmp_{hash}_{part}of{parts}.root'

        # BRANCHES and chunk information
        self.branches = branches
        self.branchesForHash = None     # for now make hash independent of selecte branches 
        self.hash = Hash(sample=sample, minCut=self.minCut, branches=self.branchesForHash, splitFilesChunkSize=splitFilesChunkSize, debug=False, inputPath=self.inputFolder).get()
        self.chunkNumber = chunkNumber
        self.splitFilesChunks = splitFilesChunks if splitFilesChunks > 1 else 1
        self.splitFilesChunkSize = splitFilesChunkSize
        
        # identifier is just used as an arbitrary name for print-out
        cutUsedForIdentifier = (self.minCut if len(self.minCut) < 60 else self.minCut[0:50] + '...').replace(' ', '')
        self.identifier = '{sample}[{cut}]of{parts}'.format(sample=self.sample, cut=cutUsedForIdentifier, parts=self.splitFilesChunks)
        self.sampleTree = None
        self.isCachedChecked = False

        self.createFolders()

    # free memory
    def deleteSampleTree(self):
        self.sampleTree = None

    # file, where skimmed tree is written to
    def getTmpFileName(self):
        return self.outputFileNameFormat.format(
            outputFolder=self.tmpFolder,
            hash=self.hash,
            part=self.chunkNumber if self.chunkNumber > 0 else 1,
            parts='%d'%self.splitFilesChunks
        )

    # file, where skimmed tree is moved to after it has been written completely
    def getOutputFileName(self):
        return self.outputFileNameFormat.format(
            outputFolder=self.outputFolder,
            hash=self.hash,
            part=self.chunkNumber if self.chunkNumber > 0 else 1,
            parts='%d'%self.splitFilesChunks
        )

    # check existence of files with skimmed trees
    def findCachedFileNames(self, chunkNumber=-1):
        cachedFilesMaskRaw = self.outputFileNameFormat.format(
            outputFolder=self.outputFolder,
            hash=self.hash,
            part='*' if chunkNumber < 1 else '%d'%chunkNumber,
            parts=self.splitFilesChunks
        )
        cachedFilesMask = self.fileLocator.getLocalFileName(cachedFilesMaskRaw)
        self.cachedFileNames = glob.glob(cachedFilesMask)
        if self.debug:
            print ('DEBUG: search files:', cachedFilesMask)
            print ('\x1b[32mDEBUG: files:')
            for fileName in self.cachedFileNames:
                print (' > ', fileName)
            if len(self.cachedFileNames) < 1:
                print ('none!')
            print ('\x1b[0m(%d files found)'%len(self.cachedFileNames))

        # sort
        self.cachedFileNames = sorted(self.cachedFileNames, key=lambda x: int(x.split('_')[-1].split('of')[0]) if 'of' in x and '_' in x else -1)
        return self.cachedFileNames

    # check if a single part is cached, (only checks existence of the file, not validity!)
    def partIsCached(self):
        cachedFilesMaskRaw = self.outputFileNameFormat.format(
            outputFolder=self.outputFolder,
            hash=self.hash,
            part=self.chunkNumber,
            parts=self.splitFilesChunks
        )
        cachedFilesMask = self.fileLocator.getLocalFileName(cachedFilesMaskRaw)
        return len(glob.glob(cachedFilesMask)) > 0

    # isCached == all files containing the skimmed tree found!
    def isCached(self):
        self.findCachedFileNames()
        if (len(self.cachedFileNames) != self.splitFilesChunks and self.splitFilesChunks > 1) or len(self.cachedFileNames) == 0:
            if self.debug:
                print ('\x1b[32mDEBUG: not cached:', self.identifier, '\x1b[0m')
            return False
        self.isCachedChecked = True
        return True

    # check if an existing file can be opened without errors by ROOT
    def checkFileValidity(self, rawFileName):
        xrootdFileName = self.fileLocator.getXrootdFileName(rawFileName)
        f = ROOT.TFile.Open(xrootdFileName, 'read')
        if not f or f.GetNkeys() == 0 or f.TestBit(ROOT.TFile.kRecovered) or f.IsZombie():
            print ('\x1b[31mWARNING: broken file:', rawFileName, ' => redo caching!\x1b[0m')
            if f:
                f.Close()
            self.deleteFile(rawFileName)
            return False
        if f:
            f.Close()
        return True

    # check if all cached files are valid
    def isCachedAndValid(self):
        valid = True
        if self.isCached():
            # check file integrity
            for fileName in self.cachedFileNames:
                valid = valid and self.checkFileValidity(fileName)
        else:
            valid = False
        return valid

    # set input sampleTree object
    def setSampleTree(self, sampleTree):
        self.sampleTree = sampleTree
        return self

    # this prepares the caching by telling the sampleTree object what to write during processing of the file
    # note: does not run the caching by itself! needs an additional sampleTree.process()
    def cache(self):
        if self.sampleTree:
            outputFileName = self.getTmpFileName()
            callbacks = {'afterWrite': self.moveFilesToFinalLocation}
            self.sampleTree.addOutputTree(outputFileName=outputFileName, cut=self.cutList, hash=self.hash, branches=self.branches, callbacks=callbacks, cutSequenceMode=self.cutSequenceMode, name=self.name)
            self.tmpFiles.append(outputFileName)
            if self.debug:
                print ('\x1b[32mDEBUG: output file for ', self.identifier, ' is ', outputFileName, '\x1b[0m')
        else:
            print ('\x1b[31mERROR: no sample tree connected!:', self.identifier, ' set the sampleTree first with "setSampleTree(sampleTree)" \x1b[0m')
        return self

    # return sample tree class of cached samples if all files found
    def getTree(self, chunkSize=-1, chunkNumber=-1):
        # if it has already been checked if tree is cached, then use this result dierctly
        isCached = self.isCachedChecked
        if not isCached:
            isCached = self.isCached()
        if isCached:
            if chunkSize > 0 and chunkNumber > 0:
                fileNames = self.cachedFileNames[(chunkNumber-1)*chunkSize:chunkNumber*chunkSize]
            elif chunkSize < 0 and chunkNumber < 0:
                fileNames = self.cachedFileNames
            else:
                raise Exception("InvalidParameters")
            self.sampleTree = SampleTree(self.cachedFileNames, config=self.config, fileNamesToProcess=fileNames)
            self.sampleTree.sampleIdentifier = self.sampleIdentifier

            # check if even though all files exist, they couldn't be accessed for some reason
            # and therefore the tree would be incomplete
            if not self.sampleTree.isCompleteTree():
                raise Exception("IncompleteTree")

        return self.sampleTree

    # delete file
    def deleteFile(self, rawFileName):
        if self.debug:
            print ('DELETE:', rawFileName)
        self.fileLocator.rm(rawFileName)

    # delete cached files
    def deleteCachedFiles(self, chunkNumber=-1):
        cachedFileNames = self.findCachedFileNames(chunkNumber=chunkNumber)
        for fileName in cachedFileNames:
            if self.fileLocator.fileExists(fileName):
                self.deleteFile(fileName)

    # create folders
    def createFolders(self):
        tmpfolderLocal = self.fileLocator.getLocalFileName(self.tmpFolder)
        if not os.path.isdir(tmpfolderLocal):
            print("DOES NOT EXIST:", tmpfolderLocal)
            try:
                xrootdFileName = self.fileLocator.getXrootdFileName(self.tmpFolder)
                if '://' not in xrootdFileName:
                    os.makedirs(self.tmpFolder)
                else:
                    command = 'gfal-mkdir %s' % (xrootdFileName)
                    returnCode = subprocess.call([command], shell=True)
                    if self.debug:
                        print(command, ' => ', returnCode)
                        print ()
            except:
                pass

        if not self.fileLocator.exists(self.outputFolder):
            print("INFO: output folder does not exist and will be created:", self.outputFolder)
            self.fileLocator.makedirs(self.outputFolder)

    # move files from temporary to final location
    def moveFilesToFinalLocation(self):
        success = True
        # free some memory for file copy command
        if self.debug:
            print('DEBUG: max mem used A:', resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
        self.deleteSampleTree()
        if self.debug:
            print('DEBUG: max mem used B:', resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)

        for tmpFileName in self.tmpFiles:
            outputFileName = self.outputFolder + '/' + self.tmpFolder.join(tmpFileName.split(self.tmpFolder)[1:])
            print ('copy ', tmpFileName, ' to ', outputFileName)
            if self.fileLocator.fileExists(outputFileName):
                self.deleteFile(outputFileName)
            copySuccessful = self.fileLocator.cp(tmpFileName, outputFileName)
            if not copySuccessful:
                success = False
                print('\x1b[31mERROR: copy failed for {tmpfile}->{outputfile} !\x1b[0m'.format(tmpfile=tmpFileName,
                                                                                                outputfile=outputFileName))
            else:
                # delete temporary file if copy was successful
                self.deleteFile(tmpFileName)
        return success
Exemple #15
0
            #   print("Processed {0} events in {1:.2f} seconds, {2:.2f} ev/s".format(self.nEvent, tot_time, self.nEvent/tot_time))


if __name__ == '__main__':

    config = XbbConfigReader.read('Zvv2018')
    info = ParseInfo(config=config)
    sample = [
        x for x in info
        if x.identifier == 'ZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8'
    ][0]

    #sampleTree = SampleTree(['/store/group/phys_higgs/hbb/ntuples/VHbbPostNano/2018/V12/ZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8/RunIIAutumn18NanoAODv6-Nano25O133/200221_205457/0000/tree_1.root'], treeName='Events', xrootdRedirector="root://eoscms.cern.ch/")
    sampleTree = SampleTree([
        '/store/group/phys_higgs/hbb/ntuples/VHbbPostNano/2018/V13/ZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8/RunIIAutumn18NanoAODv7-Nano02A85/200519_095652/0000/tree_1.root'
    ],
                            treeName='Events',
                            xrootdRedirector="root://eoscms.cern.ch/")
    w = JECcorrelator("2018")
    w.customInit({
        'sampleTree': sampleTree,
        'sample': sample,
        'config': config
    })
    sampleTree.addOutputBranches(w.getBranches())
    histograms = {}
    for jec in w.JEC_reduced:
        histograms[jec] = {}

    for var in [
            "Jet_pt", "Jet_mass", "MET_pt", "MET_phi", "FatJet_pt",