コード例 #1
0
    def run(self):

        nFilesProcessed = 0
        nFilesFailed = 0

        for subJob in self.subJobs:

            # only process if output is non-existing/broken or --force was used
            if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']):

                # create directories
                outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1])
                tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1])
                self.fileLocator.makedirs(outputFolder)
                self.fileLocator.makedirs(tmpFolder)

                # load sample tree
                sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config)
                if not sampleTree.tree:
                    print "trying fallback...", len(subJob['inputFileNames'])

                    if len(subJob['inputFileNames']) == 1:
                        # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep)
                        fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0]
                        print "FO:", fileNameOriginal
                        xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal)
                        sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector)
                        if not sampleTree.tree:
                            print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m"
                            nFilesFailed += 1
                            continue
                    else:
                        print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m"
                        nFilesFailed += 1
                        continue

                # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this:
                # [Sys]
                # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll')
                # (instead of passing the tree in the constructor, the setTree method can be used)
                pyModules = []
                versionTable = []
                for collection in self.collections:
                    if '.' in collection:
                        section = collection.split('.')[0]
                        key = collection.split('.')[1]
                        if self.config.has_section(section) and self.config.has_option(section, key):
                            pyCode = self.config.get(section, key)
                        elif '(' in collection and collection.endswith(')'):
                            print "WARNING: config option", collection, " not found, interpreting it as Python code!"
                            pyCode = collection 
                        else:
                            print "\x1b[31mERROR: config option not found:", collection, ". To specify Python code directly, pass a complete constructor, e.g. --addCollections 'Module.Class()'. Module has to be placed in python/myutils/ folder.\x1b[0m"
                            raise Exception("ConfigError")

                        # import module from myutils
                        moduleName = pyCode.split('(')[0].split('.')[0].strip()
                        if self.debug:
                            print "DEBUG: import module:", moduleName
                            print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m")
                        globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils")

                        # get object
                        wObject = eval(pyCode)

                        # pass the tree and other variables if needed to finalize initialization
                        if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")):
                            wObject.customInit({'config': self.config,
                                                'sampleTree': sampleTree,
                                                'tree': sampleTree.tree,
                                                'sample': self.sample,
                                                'channel': self.channel,
                                                'pathIN': self.pathIN,
                                                'pathOUT': self.pathOUT,
                                                })

                        # add callbacks if the objects provides any
                        if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")):
                            sampleTree.addCallback('event', wObject.processEvent)
                        for cb in ["finish", "prepareOutput"]:
                            if hasattr(wObject, cb) and callable(getattr(wObject, cb)):
                                sampleTree.addCallback(cb, getattr(wObject, cb))

                        # add branches
                        if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")):
                            sampleTree.addOutputBranches(wObject.getBranches())

                        pyModules.append(wObject)

                        versionTable.append([moduleName, wObject.getVersion() if hasattr(wObject, "getVersion") else 0])
                    else:
                        print "\x1b[31mERROR: config option not found:", collection, " the format should be: [Section].[Option]\x1b[0m"
                        raise Exception("ConfigError")

                for moduleName, moduleVersion in versionTable:
                    print " > {m}:{v}".format(m=moduleName, v=moduleVersion)

                # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas()
                if 'addbranches' in self.collections:
                    writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict"))
                    sampleTree.addOutputBranches(writeNewVariables)
                
                # DEPRECATED, do not use anymore ---> use BranchTools.Drop()
                if 'removebranches' in self.collections:
                    bl_branch = eval(config.get('Branches', 'useless_branch'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)
                    bl_branch = eval(config.get('Branches', 'useless_after_sys'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)

                # define output file 
                sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend)

                # run processing
                for pyModule in pyModules:
                    if hasattr(pyModule, "beforeProcessing"):
                        getattr(pyModule, "beforeProcessing")()

                sampleTree.process()

                for pyModule in pyModules:
                    if hasattr(pyModule, "afterProcessing"):
                        getattr(pyModule, "afterProcessing")()

                # if output trees have been produced: copy temporary file to output folder
                if sampleTree.getNumberOfOutputTrees() > 0: 
                    try:
                        self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                        print 'copy ', subJob['tmpFileName'], subJob['outputFileName']

                        if self.verifyCopy:
                            if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...'
                                self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                                print 'INFO: second attempt copy done!'
                                if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                    print '\x1b[31mERROR: output still broken!\x1b[0m'
                                    nFilesFailed += 1
                                    raise Exception("FileCopyError")
                                else:
                                    print 'INFO: file is good after second attempt!'
                    except Exception as e:
                        print e
                        print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m"

                    # delete temporary file
                    try:
                        self.fileLocator.rm(subJob['tmpFileName'])
                    except Exception as e:
                        print e
                        print "WARNING: could not delete file on scratch!"


                # clean up
                if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")):
                    getattr(wObject, "cleanUp")()

            else:
                print 'SKIP:', subJob['inputFileNames']

        if nFilesFailed > 0:
            raise Exception("ProcessingIncomplete")
コード例 #2
0
ファイル: sys_new.py プロジェクト: perrozzi/Xbb
                # get object
                wObject = eval(pyCode)

                # pass the tree and other variables if needed to finalize initialization
                if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")):
                    wObject.customInit({'config': config,
                                        'sampleTree': sampleTree,
                                        'tree': sampleTree.tree,
                                        'sample': sample,
                                        'channel': channel,
                                        })

                # add callbacks if the objects provides any
                if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")):
                    sampleTree.addCallback('event', wObject.processEvent)

                # add branches
                if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")):
                    sampleTree.addOutputBranches(wObject.getBranches())

        # TODO: this can also be made a separate module
        if 'addbranches' in collections:
            writeNewVariables = eval(config.get("Regression", "writeNewVariablesDict"))
            sampleTree.addOutputBranches(writeNewVariables)

        if 'removebranches' in collections:
            bl_branch = eval(config.get('Branches', 'useless_branch'))
            for br in bl_branch:
                sampleTree.addBranchToBlacklist(br)
            bl_branch = eval(config.get('Branches', 'useless_after_sys'))
コード例 #3
0
                # get object
                wObject = eval(pyCode)

                # pass the tree and other variables if needed to finalize initialization
                if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")):
                    wObject.customInit({'config': config,
                                        'sampleTree': sampleTree,
                                        'tree': sampleTree.tree,
                                        'sample': sample,
                                        'channel': channel,
                                        })

                # add callbacks if the objects provides any
                if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")):
                    sampleTree.addCallback('event', wObject.processEvent)

                # add branches
                if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")):
                    sampleTree.addOutputBranches(wObject.getBranches())

        # TODO: this can also be made a separate module
        if 'addbranches' in collections:
            writeNewVariables = eval(config.get("Regression", "writeNewVariablesDict"))
            sampleTree.addOutputBranches(writeNewVariables)

        if 'removebranches' in collections:
            bl_branch = eval(config.get('Branches', 'useless_branch'))
            for br in bl_branch:
                sampleTree.addBranchToBlacklist(br)
            bl_branch = eval(config.get('Branches', 'useless_after_sys'))
コード例 #4
0
for syst in systematics:
    config.set("dnnVars", syst, treeVarSet[syst])

# helper for fs operations
fileLocator = FileLocator(config=config, xrootdRedirector=xrootdRedirector)
fileLocator.mkdir(outputFolder)

# load input files
sampleTree = SampleTree([inputFile], treeName="tree", xrootdRedirector=xrootdRedirector)

# load tensorflow evaluator
tfe = tensorflowEvaluator.tensorflowEvaluator(mvaName)
tfe.customInit({'config': config, 'sample': sample, 'sampleTree': sampleTree})

# register callbacks for processing
sampleTree.addCallback('event', tfe.processEvent)

# define new branches to add
sampleTree.addOutputBranches(tfe.getBranches())

# define output file 
tmpFileName = scratch + '/' + inputFile.split('/')[-1]
outputFileName = outputFolder + '/' + inputFile.split('/')[-1]
sampleTree.addOutputTree(tmpFileName, cut='1', branches='*')

# process tree
sampleTree.process()

# copy to final location
try:
    fileLocator.cp(tmpFileName, outputFileName)
コード例 #5
0
ファイル: sys_new.py プロジェクト: GLP90/Xbb
    def run(self):

        nFilesProcessed = 0
        nFilesFailed = 0

        for subJob in self.subJobs:

            # only process if output is non-existing/broken or --force was used
            if self.opts.force or not self.fileLocator.isValidRootFile(subJob['outputFileName']):

                # create directories
                outputFolder = '/'.join(subJob['outputFileName'].split('/')[:-1])
                tmpFolder = '/'.join(subJob['tmpFileName'].split('/')[:-1])
                self.fileLocator.makedirs(outputFolder)
                self.fileLocator.makedirs(tmpFolder)

                # load sample tree
                sampleTree = SampleTree(subJob['localInputFileNames'], config=self.config)
                if not sampleTree.tree:
                    print "trying fallback...", len(subJob['inputFileNames'])

                    if len(subJob['inputFileNames']) == 1:
                        # try original naming scheme if reading directly from Heppy/Nano ntuples (without prep)
                        fileNameOriginal = self.pathIN + '/' + subJob['inputFileNames'][0]
                        print "FO:", fileNameOriginal
                        xrootdRedirector = self.fileLocator.getRedirector(fileNameOriginal)
                        sampleTree = SampleTree([fileNameOriginal], config=self.config, xrootdRedirector=xrootdRedirector)
                        if not sampleTree.tree:
                            print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED!\x1b[0m"
                            nFilesFailed += 1
                            continue
                    else:
                        print "\x1b[31mERROR: file does not exist or is broken, will be SKIPPED! (old naming scheme not supported for joining multipel files)\x1b[0m"
                        nFilesFailed += 1
                        continue

                # to use this syntax, use "--addCollections Sys.Vtype" for a config file entry like this:
                # [Sys]
                # Vtype = VtypeCorrector.VtypeCorrector(channel='Zll')
                # (instead of passing the tree in the constructor, the setTree method can be used)
                pyModules = []
                for collection in self.collections:
                    if '.' in collection:
                        section = collection.split('.')[0]
                        key = collection.split('.')[1]
                        pyCode = self.config.get(section, key)

                        # import module from myutils
                        moduleName = pyCode.split('(')[0].split('.')[0].strip()
                        if self.debug:
                            print "DEBUG: import module:", moduleName
                            print("\x1b[33mDEBUG: " + collection + ": run PYTHON code:\n"+pyCode+"\x1b[0m")
                        globals()[moduleName] = importlib.import_module(".{module}".format(module=moduleName), package="myutils")

                        # get object
                        wObject = eval(pyCode)

                        # pass the tree and other variables if needed to finalize initialization
                        if hasattr(wObject, "customInit") and callable(getattr(wObject, "customInit")):
                            wObject.customInit({'config': self.config,
                                                'sampleTree': sampleTree,
                                                'tree': sampleTree.tree,
                                                'sample': self.sample,
                                                'channel': self.channel,
                                                'pathIN': self.pathIN,
                                                'pathOUT': self.pathOUT,
                                                })

                        # add callbacks if the objects provides any
                        if hasattr(wObject, "processEvent") and callable(getattr(wObject, "processEvent")):
                            sampleTree.addCallback('event', wObject.processEvent)

                        # add branches
                        if hasattr(wObject, "getBranches") and callable(getattr(wObject, "getBranches")):
                            sampleTree.addOutputBranches(wObject.getBranches())

                        pyModules.append(wObject)

                # DEPRECATED, do not use anymore ---> use BranchTools.TreeFormulas()
                if 'addbranches' in self.collections:
                    writeNewVariables = eval(self.config.get("Regression", "writeNewVariablesDict"))
                    sampleTree.addOutputBranches(writeNewVariables)
                
                # DEPRECATED, do not use anymore ---> use BranchTools.Drop()
                if 'removebranches' in self.collections:
                    bl_branch = eval(config.get('Branches', 'useless_branch'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)
                    bl_branch = eval(config.get('Branches', 'useless_after_sys'))
                    for br in bl_branch:
                        sampleTree.addBranchToBlacklist(br)

                # define output file 
                sampleTree.addOutputTree(subJob['tmpFileName'], cut='1', branches='*', friend=self.opts.friend)

                # run processing
                for pyModule in pyModules:
                    if hasattr(pyModule, "beforeProcessing"):
                        getattr(pyModule, "beforeProcessing")()

                sampleTree.process()

                for pyModule in pyModules:
                    if hasattr(pyModule, "afterProcessing"):
                        getattr(pyModule, "afterProcessing")()

                # if output trees have been produced: copy temporary file to output folder
                if sampleTree.getNumberOfOutputTrees() > 0: 
                    try:
                        self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                        print 'copy ', subJob['tmpFileName'], subJob['outputFileName']

                        if self.verifyCopy:
                            if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                print 'INFO: output at final destination broken, try to copy again from scratch disk to final destination...'
                                self.fileLocator.cp(subJob['tmpFileName'], subJob['outputFileName'], force=True)
                                print 'INFO: second attempt copy done!'
                                if not self.fileLocator.isValidRootFile(subJob['outputFileName']):
                                    print '\x1b[31mERROR: output still broken!\x1b[0m'
                                    nFilesFailed += 1
                                    raise Exception("FileCopyError")
                                else:
                                    print 'INFO: file is good after second attempt!'
                    except Exception as e:
                        print e
                        print "\x1b[31mERROR: copy from scratch to final destination failed!!\x1b[0m"

                    # delete temporary file
                    try:
                        self.fileLocator.rm(subJob['tmpFileName'])
                    except Exception as e:
                        print e
                        print "WARNING: could not delete file on scratch!"


                # clean up
                if hasattr(wObject, "cleanUp") and callable(getattr(wObject, "cleanUp")):
                    getattr(wObject, "cleanUp")()

            else:
                print 'SKIP:', subJob['inputFileNames']

        if nFilesFailed > 0:
            raise Exception("ProcessingIncomplete")
コード例 #6
0
for syst in systematics:
    config.set("dnnVars", syst, treeVarSet[syst])

# helper for fs operations
fileLocator = FileLocator(config=config, xrootdRedirector=xrootdRedirector)
fileLocator.mkdir(outputFolder)

# load input files
sampleTree = SampleTree([inputFile], treeName=inputTreeName, xrootdRedirector=xrootdRedirector)

# load tensorflow evaluator
tfe = tensorflowEvaluator.tensorflowEvaluator(mvaName)
tfe.customInit({'config': config, 'sample': sample, 'sampleTree': sampleTree})

# register callbacks for processing
sampleTree.addCallback('event', tfe.processEvent)

# define new branches to add
sampleTree.addOutputBranches(tfe.getBranches())

try:
    os.makedirs(outputFolder)
except:
    pass

# define output file 
tmpFileName = scratch + '/' + inputFile.split('/')[-1]
outputFileName = outputFolder + '/' + inputFile.split('/')[-1]
sampleTree.addOutputTree(tmpFileName, cut='weight<999&&weight>-999', branches='*')

# process tree