Ejemplo n.º 1
0
    def customInit(self, initVars):
        self.sample = initVars['sample']
        self.sampleTree = initVars['sampleTree']
        self.config = initVars['config']
        self.addBranch(self.branchName)
        self.addBranch("weightF")
        self.addBranch("weightXS")

        if not self.sample.isData():
            self.weightString = self.config.get('Weights', 'weightF')
            # per sample special weight
            if self.config.has_option('Weights', 'useSpecialWeight') and eval(
                    self.config.get('Weights', 'useSpecialWeight')):
                specialweight = self.sample.specialweight
                self.weightString = "(({weight})*({specialweight}))".format(
                    weight=self.weightString, specialweight=specialweight)
                print("INFO: use specialweight: {specialweight}".format(
                    specialweight=specialweight))

            self.evalCut = self.config.get('Cuts', 'EvalCut')
            self.sampleTree.addFormula(self.weightString)
            self.sampleTree.addFormula(self.evalCut)

            self.excludeTrainingSet = False

            # to compute the correct scale to cross-section, all trees of the sample have to be used!
            sampleTreeForCount = SampleTree(
                {
                    'sample': self.sample,
                    'folder': initVars['pathIN']
                },
                config=self.config)
            self.weightScaleToXS = sampleTreeForCount.getScale(
                self.sample) * (2.0 if self.excludeTrainingSet else 1.0)
            print "scale:", self.weightScaleToXS, self.sample
Ejemplo n.º 2
0
 def getTree(self):
     # if it has already been checked if tree is cached, then use this result dierctly
     isCached = self.isCachedChecked
     if not isCached:
         isCached = self.isCached()
     if isCached:
         self.sampleTree = SampleTree(self.cachedFileNames,
                                      config=self.config)
         self.sampleTree.sampleIdentifier = self.sampleIdentifier
     return self.sampleTree
Ejemplo n.º 3
0
    def __init__(self, sample, cutList='1', branches=None, inputFolder=None, tmpFolder=None, outputFolder=None, chunkNumber=-1, splitFilesChunks=-1, splitFilesChunkSize=-1, debug=False, fileList=None, cutSequenceMode='AND', name='', config=None, fileLocator=None):
        self.config = config
        self.fileLocator = fileLocator if fileLocator is not None else FileLocator(config=self.config)
        self.debug = debug or ('XBBDEBUG' in os.environ)

        # SAMPLE
        if isinstance(sample, Sample):
            # sample passed as Sample object
            # count number of chunks the cached data is split into
            defaultChunkSize = int(config.get('General', 'mergeCachingSize')) if config.has_option('General', 'mergeCachingSize') else 100
            splitFilesChunkSize = sample.mergeCachingSize if sample.mergeCachingSize > 0 else defaultChunkSize
            splitFilesChunks = SampleTree({'name': sample.identifier, 'folder': inputFolder}, countOnly=True, splitFilesChunkSize=splitFilesChunkSize, config=config, verbose=self.debug, fileLocator=self.fileLocator).getNumberOfParts()
            # if sample passed as object, it can be a 'subsample' and habe different name and identifier
            self.sample = sample.name
            self.sampleIdentifier = sample.identifier
            if self.debug:
                print ("INFO: use sample=", sample.name, " #parts = ", splitFilesChunks)
        else:
            # sample identifier passed as string
            self.sample = sample
            self.sampleIdentifier = sample
        self.name = name

        # CUTS
        self.cutList = cutList
        self.cutSequenceMode = cutSequenceMode
        self.minCut = SampleTree.findMinimumCut(self.cutList, cutSequenceMode=self.cutSequenceMode)

        # PATHS
        self.inputFolder = inputFolder
        self.outputFolder = (config.get('Directories', 'tmpSamples') if config else 'cache/') if outputFolder is None else outputFolder
        self.tmpFolder = (config.get('Directories', 'scratch') if config else 'tmp/') if tmpFolder is None else tmpFolder
        self.cachedFileNames = []
        self.tmpFiles = []
        self.outputFileNameFormat = '{outputFolder}/tmp_{hash}_{part}of{parts}.root'

        # BRANCHES and chunk information
        self.branches = branches
        self.branchesForHash = None     # for now make hash independent of selecte branches 
        self.hash = Hash(sample=sample, minCut=self.minCut, branches=self.branchesForHash, splitFilesChunkSize=splitFilesChunkSize, debug=False, inputPath=self.inputFolder).get()
        self.chunkNumber = chunkNumber
        self.splitFilesChunks = splitFilesChunks if splitFilesChunks > 1 else 1
        self.splitFilesChunkSize = splitFilesChunkSize
        
        # identifier is just used as an arbitrary name for print-out
        cutUsedForIdentifier = (self.minCut if len(self.minCut) < 60 else self.minCut[0:50] + '...').replace(' ', '')
        self.identifier = '{sample}[{cut}]of{parts}'.format(sample=self.sample, cut=cutUsedForIdentifier, parts=self.splitFilesChunks)
        self.sampleTree = None
        self.isCachedChecked = False

        self.createFolders()
Ejemplo n.º 4
0
    def getTree(self, chunkSize=-1, chunkNumber=-1):
        # if it has already been checked if tree is cached, then use this result dierctly
        isCached = self.isCachedChecked
        if not isCached:
            isCached = self.isCached()
        if isCached:
            if chunkSize > 0 and chunkNumber > 0:
                fileNames = self.cachedFileNames[(chunkNumber-1)*chunkSize:chunkNumber*chunkSize]
            elif chunkSize < 0 and chunkNumber < 0:
                fileNames = self.cachedFileNames
            else:
                raise Exception("InvalidParameters")
            self.sampleTree = SampleTree(self.cachedFileNames, config=self.config, fileNamesToProcess=fileNames)
            self.sampleTree.sampleIdentifier = self.sampleIdentifier

            # check if even though all files exist, they couldn't be accessed for some reason
            # and therefore the tree would be incomplete
            if not self.sampleTree.isCompleteTree():
                raise Exception("IncompleteTree")

        return self.sampleTree
Ejemplo n.º 5
0
    #print(config.get('Weights','weightF'))
    #config = XbbConfigReader.read('Zvv2017')

    inputFile = 'root://t3dcachedb03.psi.ch:1094//pnfs/psi.ch/cms/trivcat/store/user/berger_p2/VHbb/VHbbPostNano2017/V5/Zvv/rerun/v4j/eval/ggZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8/tree_aa5e971734ef4e885512748d534e6937ff03dc61feed21b6772ba943_000000_000000_0000_9_a6c5a52b56e5e0c7ad5aec31429c8926bf32cf39adbe087f05cfb323.root'
    path = 'root://t3dcachedb03.psi.ch:1094//pnfs/psi.ch/cms/trivcat/store/user/berger_p2/VHbb/VHbbPostNano2017/V5/Zvv/rerun/v4j/eval/'
    samplefiles = '../samples/VHbbPostNano2017_V5/merged_Zvv2017/'
    samplesinfo = 'Zvv2017config/samples_nosplit.ini'
    info = ParseInfo(samples_path=path, config=config)

    sample = [
        x for x in info
        if x.identifier == 'ggZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8'
    ][0]

    # read sample
    sampleTree = SampleTree([inputFile], config=config)

    # initialize module
    w = WeightAsBranch()
    w.customInit({
        'sampleTree': sampleTree,
        'config': config,
        'sample': sample,
        'pathIN': path
    })

    #addAsBranch = True
    addAsBranch = False

    print 'w.getBranches()', w.getBranches()
Ejemplo n.º 6
0

if __name__ == '__main__':

    config = XbbConfigReader.read('Wlv2017')

    info = ParseInfo(config=config)

    sample = [
        x for x in info
        if x.identifier == 'WplusH_HToBB_WToLNu_M125_13TeV_powheg_pythia8'
    ][0]
    # read sample
    sampleTree = SampleTree([
        '/store/group/phys_higgs/hbb/ntuples/VHbbPostNano/2017/V11/WplusH_HToBB_WToLNu_M125_13TeV_powheg_pythia8/adewit-crab_nano2017_WplusH_HT81/190606_065851/0000/tree_1.root'
    ],
                            treeName='Events',
                            xrootdRedirector="root://eoscms.cern.ch/")
    # initialize module
    w = JetSmearer("2017")
    w.customInit({
        'sampleTree': sampleTree,
        'sample': sample,
        'config': config
    })
    n = 0
    #for event in sampleTree:
    #    w.processEvent(event)
    #    n=n+1
    #    if n==3: break
Ejemplo n.º 7
0
            #   print("Processed {0} events in {1:.2f} seconds, {2:.2f} ev/s".format(self.nEvent, tot_time, self.nEvent/tot_time))


if __name__ == '__main__':

    config = XbbConfigReader.read('Zvv2018')
    info = ParseInfo(config=config)
    sample = [
        x for x in info
        if x.identifier == 'ZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8'
    ][0]

    #sampleTree = SampleTree(['/store/group/phys_higgs/hbb/ntuples/VHbbPostNano/2018/V12/ZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8/RunIIAutumn18NanoAODv6-Nano25O133/200221_205457/0000/tree_1.root'], treeName='Events', xrootdRedirector="root://eoscms.cern.ch/")
    sampleTree = SampleTree([
        '/store/group/phys_higgs/hbb/ntuples/VHbbPostNano/2018/V13/ZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8/RunIIAutumn18NanoAODv7-Nano02A85/200519_095652/0000/tree_1.root'
    ],
                            treeName='Events',
                            xrootdRedirector="root://eoscms.cern.ch/")
    w = JECcorrelator("2018")
    w.customInit({
        'sampleTree': sampleTree,
        'sample': sample,
        'config': config
    })
    sampleTree.addOutputBranches(w.getBranches())
    histograms = {}
    for jec in w.JEC_reduced:
        histograms[jec] = {}

    for var in [
            "Jet_pt", "Jet_mass", "MET_pt", "MET_phi", "FatJet_pt",