Ejemplo n.º 1
0
    def customInit(self, initVars):
        self.sampleTree = initVars['sampleTree']
        self.isData = initVars['sample'].isData()
        if not self.isData:
            self.addCollection(
                Collection('GenVbosons', ['pt', 'pdgId', 'GenPartIdx'],
                           maxSize=40))
            self.addCollection(
                Collection('GenTop', ['pt', 'GenPartIdx'], maxSize=4))
            self.addCollection(
                Collection('GenHiggsBoson', ['pt', 'GenPartIdx'], maxSize=4))

            self.branchBuffers['VtypeSim'] = array.array('i', [0])
            self.branches.append({
                'name': 'VtypeSim',
                'formula': self.getBranch,
                'arguments': 'VtypeSim',
                'type': 'i'
            })

            self.addCollection(
                Collection('GenBs', ['pt', 'eta', 'phi', 'genPartIdx'],
                           maxSize=32))
            self.addCollection(
                Collection('GenDs', ['pt', 'eta', 'phi', 'genPartIdx'],
                           maxSize=32))

            self.addVectorBranch("GenJetAK8_nBhadrons",
                                 default=0,
                                 branchType='i',
                                 length=100,
                                 leaflist="GenJetAK8_nBhadrons[nGenJetAK8]/i")
            self.addVectorBranch(
                "GenJetAK8_nBhadrons2p4",
                default=0,
                branchType='i',
                length=100,
                leaflist="GenJetAK8_nBhadrons2p4[nGenJetAK8]/i")
            self.addVectorBranch("GenJet_nBhadrons",
                                 default=0,
                                 branchType='i',
                                 length=100,
                                 leaflist="GenJet_nBhadrons[nGenJet]/i")
            self.addVectorBranch("GenJet_nBhadrons2p4",
                                 default=0,
                                 branchType='i',
                                 length=100,
                                 leaflist="GenJet_nBhadrons2p4[nGenJet]/i")

            #Sum$(GenBs_pt>25&&abs(GenBs_eta)<2.6)
            self.addIntegerBranch("nGenBpt25eta2p6")
            self.addIntegerBranch("nGenBpt20eta2p6")
            self.addIntegerBranch("nGenDpt25eta2p6")
            self.addIntegerBranch("nGenDpt20eta2p6")
Ejemplo n.º 2
0
    def customInit(self, initVars):
        self.config = initVars['config']
        self.sampleTree = initVars['sampleTree']
        self.sample = initVars['sample']
        self.tensorflowConfig = self.config.get(self.mvaName,
                                                'tensorflowConfig')
        self.scalerDump = self.config.get(self.mvaName, 'scalerDump')
        self.checkpoint = self.config.get(self.mvaName, 'checkpoint')
        self.branchName = self.config.get(self.mvaName, 'branchName')

        # Jet systematics
        self.systematics = self.config.get('systematics',
                                           'systematics').split(' ')

        # create output branches
        self.dnnCollection = Collection(self.branchName,
                                        self.systematics,
                                        leaves=True)
        self.addCollection(self.dnnCollection)

        # create formulas for input variables
        self.inputVariables = {}
        for syst in self.systematics:
            self.inputVariables[syst] = self.config.get(
                self.config.get(self.mvaName, "treeVarSet"),
                syst if self.sample.isMC() else 'Nominal').split(' ')
            for var in self.inputVariables[syst]:
                self.sampleTree.addFormula(var)

        # create tensorflow graph
        self.reloadModel()
Ejemplo n.º 3
0
    def __init__(self, nano=False):
        self.nano = nano
        self.debug = False
        super(FSR, self).__init__()

        # corrected Higgs properties
        self.addCollection(
            Collection('HCMVAV2_reg_fsrCorr', ['pt', 'eta', 'phi', 'mass']))
        self.addCollection(
            Collection('fsrJet', ['pt', 'eta', 'phi', 'mass', 'deltaR'],
                       maxSize=4))
        self.addCollection(
            Collection('isrJet', ['pt', 'eta', 'phi', 'mass', 'deltaR'],
                       maxSize=4))

        # test of new syntax
        self.addCollection(
            Collection('hJetFSRcorr', ['pt', 'eta', 'phi', 'mass'], maxSize=2))
Ejemplo n.º 4
0
    def customInit(self, initVars):

        v_sys = getattr(ROOT, 'vector<string>')()
        for syst in self.systematics:
            v_sys.push_back(syst)

        print 'load BTagCalibrationStandalone...'
        if os.path.isfile(self.btagCalibratorFileName):
            ROOT.gSystem.Load(self.btagCalibratorFileName)
        else:
            print "\x1b[31m:ERROR: BTagCalibrationStandalone not found! Go to Xbb directory and run 'make'!\x1b[0m"
            raise Exception("BTagCalibrationStandaloneNotFound")

        print 'load bTag CSV files...'
        calib = ROOT.BTagCalibration(self.calibName, self.calibFile)
        self.btag_calibrators = {}
        print "[btagSF]: Loading calibrator for algo:", self.calibName
        self.btag_calibrators[self.calibName +
                              "_iterative"] = ROOT.BTagCalibrationReader(
                                  3, "central", v_sys)
        for fl in range(3):
            self.btag_calibrators[self.calibName + "_iterative"].load(
                calib, fl, self.method)

        print 'INFO: bTag initialization done.'

        sample = initVars['sample']
        self.isData = sample.type == 'DATA'
        if not self.isData:
            self.systBranches = [""]  #nominal
            self.systBranches += [
                syst + sdir for syst in self.systList for sdir in self.systVars
            ]
            if self.decorrelatePtEta:
                self.systBranches += [
                    syst + "_pt" + str(ipt) + "_eta" + str(ieta) + sdir
                    for syst in self.systList for sdir in self.systVars
                    for ipt in range(0, 5) for ieta in range(1, 4)
                ]

            self.btagCollection = Collection(self.branchBaseName,
                                             self.systBranches,
                                             leaves=False)
            self.addCollection(self.btagCollection)
Ejemplo n.º 5
0
    def customInit(self, initVars):
        sample = initVars['sample']
        self.isData = sample.type == 'DATA'
        if not self.isData:
            self.systBranches = [""]  #nominal
            self.systBranches += [
                syst + sdir for syst in self.systList for sdir in self.systVars
            ]
            if self.includeFixPtEtaBins:
                self.systBranches += [
                    syst + "_pt" + str(ipt) + "_eta" + str(ieta) + sdir
                    for syst in self.systList for sdir in self.systVars
                    for ipt in range(0, 5) for ieta in range(1, 4)
                ]

            self.btagCollection = Collection(self.branchBaseName,
                                             self.systBranches,
                                             leaves=False)
            self.addCollection(self.btagCollection)
            print "sys:", self.systBranches
    def customInit(self, initVars):
        self.config = initVars['config']
        self.sampleTree = initVars['sampleTree']
        self.sample = initVars['sample']

        if self.condition:
            self.sampleTree.addFormula(self.condition)

        self.hJidx = self.config.get('General',
                                     'hJidx') if self.config.has_option(
                                         'General', 'hJidx') else 'hJidx'

        self.scalerDump = self.config.get(
            self.mvaName, 'scalerDump') if self.config.has_option(
                self.mvaName, 'scalerDump') else None
        self.checkpoint = self.config.get(
            self.mvaName, 'checkpoint') if self.config.has_option(
                self.mvaName, 'checkpoint') else None
        if self.config.has_option(self.mvaName, 'branchName'):
            self.branchName = self.config.get(self.mvaName, 'branchName')
        elif self.checkpoint is not None:
            self.branchName = self.checkpoint.strip().replace(
                '/model.ckpt', '').replace('/', '_')
            if self.branchName[0] in [
                    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
            ]:
                self.branchName = 'DNN_' + self.branchName

        self.addDebugVariables = eval(
            self.config.get('Multi', 'evalAddDebugVariables')
        ) if self.config.has_section('Multi') and self.config.has_option(
            'Multi', 'evalAddDebugVariables') else False

        if self.checkpoint is None:
            print(
                "\x1b[31mERROR: 'checkpoint' option missing for MVA config section [%s]! .../model.ckpt has to be specified to be able to restore classifier.\x1b[0m"
                % self.mvaName)
            raise Exception("CheckpointError")

        if self.scalerDump is not None and not os.path.isfile(self.scalerDump):
            self.scalerDump = None

        if self.config.has_option(self.mvaName, 'fixInputs'):
            self.fixInputs = eval(self.config.get(self.mvaName, 'fixInputs'))

        if os.path.isdir(self.checkpoint):
            self.checkpoint += '/model.ckpt'

        if not os.path.isfile(self.checkpoint + '.meta'):
            print(
                "\x1b[31mERROR: can't restore from graph! .meta file not found in checkpoint:",
                self.checkpoint, "\x1b[0m")
            raise Exception("CheckpointError")

        # INFO file (with training parameters)
        if os.path.isfile(self.checkpoint + '.info'):
            with open(self.checkpoint + '.info', 'r') as infoFile:
                self.info = json.load(infoFile)
        else:
            print("WARNING: (optional) .info file not found in checkpoint!")
            self.info = {}

        # CLASSES
        self.classes = eval(self.config.get(
            self.mvaName, 'classes')) if self.config.has_option(
                self.mvaName, 'classes') else None
        if self.classes:
            self.nClasses = len(self.classes)
        else:
            try:
                self.nClasses = eval(self.config.get(self.mvaName, 'nClasses'))
            except Exception as e:
                self.nClasses = len(self.info["labels"].keys())
        if self.nClasses < 3:
            self.nClasses = 1
        if self.nClasses > 1:
            print("INFO: multi-class checkpoint found! number of classes =",
                  self.nClasses)
        else:
            print("INFO: binary-classifier found!")

        # FEATURES
        self.featuresConfig = None
        self.featuresCheckpoint = None
        try:
            self.featuresConfig = self.config.get(
                self.config.get(self.mvaName, "treeVarSet"),
                "Nominal").strip().split(" ")
        except Exception as e:
            print("WARNING: could not get treeVarSet from config:", e)
        if 'variables' in self.info:
            self.featuresCheckpoint = self.info['variables']

        if self.featuresConfig is None and self.featuresCheckpoint is not None:
            self.features = self.featuresCheckpoint
        elif self.featuresConfig is not None and self.featuresCheckpoint is None:
            self.features = self.featuresConfig
        elif self.featuresConfig is None and self.featuresCheckpoint is None:
            raise Exception("NoInputFeaturesDefined")
        else:
            self.features = self.featuresCheckpoint
            if len(self.featuresConfig) != len(self.featuresCheckpoint):
                print(
                    "\x1b[31mWARNING: number of input features does not match!"
                )
                print(" > classifier expects:", len(self.featuresCheckpoint))
                print(" > configuration has:", len(self.featuresConfig),
                      "\x1b[0m")
                print("INFO: => feature list from checkpoint will be used.")
            else:
                if self.config.has_option(
                        self.mvaName, 'forceInputFeaturesFromConfig') and eval(
                            self.config.get(self.mvaName,
                                            'forceInputFeaturesFromConfig')):
                    print(
                        "INFO: forceInputFeaturesFromConfig is enabled, features from configuration will be used"
                    )
                    self.features = self.featuresConfig

            print("INFO: list of input features:")
            print("INFO:", "config".ljust(40), "---->", "checkpoint")
            match = True
            for i in range(
                    min(len(self.featuresConfig),
                        len(self.featuresCheckpoint))):
                if self.featuresConfig[i] != self.featuresCheckpoint[i]:
                    print("\x1b[41m\x1b[37mINFO:",
                          self.featuresConfig[i].ljust(40), "---->",
                          self.featuresCheckpoint[i].ljust(40),
                          " => MISMATCH!\x1b[0m")
                    match = False
                else:
                    print("INFO:\x1b[32m", self.featuresConfig[i].ljust(40),
                          "---->", self.featuresCheckpoint[i],
                          "(match)\x1b[0m")
            if match:
                print("INFO: => all input variables match!")
            else:
                print(
                    "INFO: some variables are not identically defined as for the training, please check!"
                )
                if self.config.has_option(
                        self.mvaName, 'forceInputFeaturesFromConfig') and eval(
                            self.config.get(self.mvaName,
                                            'forceInputFeaturesFromConfig')):
                    print(
                        "\x1b[31mWARNING: forceInputFeaturesFromConfig is enabled, features from configuration will be used although they could be incompatible with the features used during training.\x1b[0m"
                    )

            # fix input features at constant values: check if features given exist in checkpoint
            if self.fixInputs:
                for feature, value in self.fixInputs.items():
                    if feature not in self.featuresCheckpoint:
                        print("ERROR: can't fix input feature '", feature,
                              "' to value ", value,
                              " => feature not found in checkpoint.")
                        raise Exception("ConfigError")

        self.featureList = XbbMvaInputsList(self.features, config=self.config)
        self.nFeatures = self.featureList.length()

        # SIGNAL definition
        self.signalIndex = 0
        if self.config.has_option(self.mvaName, 'signalIndex'):
            self.signalIndex = eval(
                self.config.get(self.mvaName, 'signalIndex'))
        self.signalClassIds = [self.signalIndex]
        if self.classes:
            self.signalClassIds = [
                x for x, y in enumerate(self.classes) if y[0].startswith('SIG')
            ]
            print("INFO: signals:", self.signalClassIds)

        print("INFO: number of classes:",
              self.nClasses if self.nClasses > 1 else 2)

        # systematics
        self.systematics = self.config.get(
            self.mvaName, 'systematics').split(' ') if self.config.has_option(
                self.mvaName, 'systematics') else self.config.get(
                    'systematics', 'systematics').split(' ')

        # create output branches
        self.dnnCollections = []
        for i in range(self.nClasses):
            collectionName = self.branchName if self.nClasses == 1 else self.branchName + "_%d" % i
            if self.nClasses == 1 or self.addDebugVariables:
                self.dnnCollection = Collection(collectionName,
                                                self.systematics,
                                                leaves=True)
                self.addCollection(self.dnnCollection)
                self.dnnCollections.append(self.dnnCollection)

        # create formulas for input variables
        self.inputVariables = {}
        for syst in self.systematics:
            systBase, UD = XbbTools.splitSystVariation(syst,
                                                       sample=self.sample)
            self.inputVariables[syst] = [
                XbbTools.sanitizeExpression(self.featureList.get(i,
                                                                 syst=systBase,
                                                                 UD=UD),
                                            self.config,
                                            debug=self.debug)
                for i in range(self.nFeatures)
            ]
            for var in self.inputVariables[syst]:
                self.sampleTree.addFormula(var)

        # additional pre-computed values for multi-classifiers
        if self.nClasses > 1:
            self.dnnCollectionsMulti = {
                'default':
                Collection(self.branchName, self.systematics, leaves=True),
            }
            if self.addDebugVariables:
                self.dnnCollectionsMulti.update({
                    'argmax':
                    Collection(self.branchName + "_argmax",
                               self.systematics,
                               leaves=True),
                    'max':
                    Collection(self.branchName + "_max",
                               self.systematics,
                               leaves=True),
                    'max2':
                    Collection(self.branchName + "_max2",
                               self.systematics,
                               leaves=True),
                    'signal':
                    Collection(self.branchName + "_signal",
                               self.systematics,
                               leaves=True),
                })

            for k, v in self.dnnCollectionsMulti.items():
                self.addCollection(v)

        # create tensorflow graph
        self.ev = TensorflowDNNEvaluator(checkpoint=self.checkpoint,
                                         scaler=self.scalerDump)