def checkDatacardRegions(self): datacardRegions = self.config.getDatacardRegions() samples = ParseInfo(config=self.config) availableSampleNames = [x.name for x in samples] for datacardRegion in datacardRegions: try: cutName = self.config.getDatacardCutName(datacardRegion) cutString = self.config.getCutString(cutName) print("datacard region:", datacardRegion) print(" ->", cutName) print(" ->", cutString) regionType = self.config.getDatacardRegionType(datacardRegion) print(" -> TYPE:", regionType) signals = self.config.getDatacardRegionSignals(datacardRegion) backgrounds = self.config.getDatacardRegionBackgrounds( datacardRegion) print(" -> SIG:\x1b[34m", signals, "\x1b[0m") print(" -> BKG:\x1b[35m", backgrounds, "\x1b[0m") for x in list(signals) + list(backgrounds): if x not in availableSampleNames: print("ERROR: not found: sample for datacard:", x) raise Exception("SampleNotFound") except Exception as e: self.addError('datacard region', datacardRegion + ' ' + repr(e))
def checkSamples(self): samples = ParseInfo(config=self.config) availableSampleNames = [x.name for x in samples] usedSampleNames = self.config.getUsedSamples() for sampleName in usedSampleNames: print(sampleName) if sampleName not in availableSampleNames: print("ERROR: not found sample:", sampleName) raise Exception("SampleNotFound")
class RegressionTrainer(): def __init__(self, config): vhbb_name_space = config.get('VHbbNameSpace','library') ROOT.gSystem.Load(vhbb_name_space) self.__weight = config.get("TrainRegression","weight") self.__vars = config.get("TrainRegression","vars").split() self.__target = config.get("TrainRegression","target") self.__cut = config.get("TrainRegression","cut") self.__title = config.get("TrainRegression","name") self.__signals = config.get("TrainRegression","signals") self.__regOptions = config.get("TrainRegression","options") path = config.get('Directories','PREPout') samplesinfo=config.get('Directories','samplesinfo') self.__info = ParseInfo(samplesinfo,path) self.__samples = self.__info.get_samples(self.__signals) self.__tc = TreeCache([self.__cut],self.__samples,path,config) self.__trainCut = config.get("TrainRegression","trainCut") self.__testCut = config.get("TrainRegression","testCut") self.__config = config def train(self): signals = [] signalsTest = [] for job in self.__samples: print '\tREADING IN %s AS SIG'%job.name signals.append(self.__tc.get_tree(job,'%s & %s' %(self.__cut,self.__trainCut))) signalsTest.append(self.__tc.get_tree(job,'%s & %s'%(self.__cut,self.__testCut))) # Get the tree from signal tree = signals[0] sWeight = 1. fnameOutput='training_Reg_'+self.__title+'.root' output = ROOT.TFile.Open('/exports/uftrig01a/dcurry/data/bbar/13TeV/heppy/files/regr_out/'+fnameOutput, "RECREATE") print '\n----- Saving output to ', output factory = ROOT.TMVA.Factory('MVA', output, '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression') for i, signal in enumerate(signals): factory.AddRegressionTree( signal, sWeight, ROOT.TMVA.Types.kTraining) factory.AddRegressionTree( signalsTest[i], sWeight, ROOT.TMVA.Types.kTesting) for var in self.__vars: factory.AddVariable(var,'D') factory.AddTarget(self.__target ) mycut = ROOT.TCut( self.__cut ) factory.BookMethod(ROOT.TMVA.Types.kBDT,'BDT_REG_%s'%(self.__title),self.__regOptions) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() output.Write() '''
def __init__(self, config): vhbb_name_space = config.get('VHbbNameSpace','library') ROOT.gSystem.Load(vhbb_name_space) self.__weight = config.get("TrainRegression","weight") self.__vars = config.get("TrainRegression","vars").split() self.__target = config.get("TrainRegression","target") self.__cut = config.get("TrainRegression","cut") self.__title = config.get("TrainRegression","name") self.__signals = config.get("TrainRegression","signals") self.__regOptions = config.get("TrainRegression","options") path = config.get('Directories','PREPout') samplesinfo=config.get('Directories','samplesinfo') self.__info = ParseInfo(samplesinfo,path) self.__samples = self.__info.get_samples(self.__signals) self.__tc = TreeCache([self.__cut],self.__samples,path,config) self.__trainCut = config.get("TrainRegression","trainCut") self.__testCut = config.get("TrainRegression","testCut") self.__config = config
'/mnt/t3nfs01/data01/shome/krgedia/CMSSW_10_1_0/src/Xbb/python/Zvv2017config/paths.ini' ) #parent class 'ConfigParser' method configFiles = pathconfig.get('Configuration', 'List').split(' ') config = BetterConfigParser.BetterConfigParser() for configFile in configFiles: print(configFile) config.read('Zvv2017config/' + configFile) #print(config.get('Weights','weightF')) #config = XbbConfigReader.read('Zvv2017') inputFile = 'root://t3dcachedb03.psi.ch:1094//pnfs/psi.ch/cms/trivcat/store/user/berger_p2/VHbb/VHbbPostNano2017/V5/Zvv/rerun/v4j/eval/ggZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8/tree_aa5e971734ef4e885512748d534e6937ff03dc61feed21b6772ba943_000000_000000_0000_9_a6c5a52b56e5e0c7ad5aec31429c8926bf32cf39adbe087f05cfb323.root' path = 'root://t3dcachedb03.psi.ch:1094//pnfs/psi.ch/cms/trivcat/store/user/berger_p2/VHbb/VHbbPostNano2017/V5/Zvv/rerun/v4j/eval/' samplefiles = '../samples/VHbbPostNano2017_V5/merged_Zvv2017/' samplesinfo = 'Zvv2017config/samples_nosplit.ini' info = ParseInfo(samples_path=path, config=config) sample = [ x for x in info if x.identifier == 'ggZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8' ][0] # read sample sampleTree = SampleTree([inputFile], config=config) # initialize module w = WeightAsBranch() w.customInit({ 'sampleTree': sampleTree, 'config': config, 'sample': sample,
pathOUT = config.get('Directories', 'SYSout') elif opts.mergeeval == 'True': pathIN = config.get('Directories', 'MVAin') pathOUT = config.get('Directories', 'MVAout') else: pathIN = config.get('Directories', 'PREPin') pathOUT = config.get('Directories', 'PREPout') samplesinfo = config.get('Directories', 'samplesinfo') sampleconf = BetterConfigParser() sampleconf.read(samplesinfo) whereToLaunch = config.get( 'Configuration', 'whereToLaunch') # USEFUL IN CASE OF SITE BY SITE OPTIONS prefix = sampleconf.get('General', 'prefix') info = ParseInfo(samplesinfo, pathIN) print "samplesinfo:", samplesinfo def mergetreePSI(pathIN, pathOUT, prefix, newprefix, folderName, Aprefix, Acut, config): ''' List of variables pathIN: path of the input file containing the data pathOUT: path of the output files prefix: "prefix" variable from "samples_nosplit.cfg" newprefix: "newprefix" variable from "samples_nosplit.cfg" file: sample header (as DYJetsToLL_M-50_TuneZ2Star_8TeV-madgraph-tarball) Aprefix: empty string '' Acut: the sample cut as defined in "samples_nosplit.cfg" '''
class RegressionTrainer(): def __init__(self, config): vhbb_name_space = config.get('VHbbNameSpace','library') ROOT.gSystem.Load(vhbb_name_space) self.__weight = config.get("TrainRegression","weight") self.__vars = config.get("TrainRegression","vars").split() self.__target = config.get("TrainRegression","target") self.__cut = config.get("TrainRegression","cut") self.__title = config.get("TrainRegression","name") self.__signals = config.get("TrainRegression","signals") self.__regOptions = config.get("TrainRegression","options") path = config.get('Directories','PREPout') samplesinfo=config.get('Directories','samplesinfo') self.__info = ParseInfo(samplesinfo,path) self.__samples = self.__info.get_samples(self.__signals) self.__tc = TreeCache([self.__cut],self.__samples,path,config) self.__trainCut = config.get("TrainRegression","trainCut") self.__testCut = config.get("TrainRegression","testCut") self.__config = config def train(self): signals = [] signalsTest = [] for job in self.__samples: print '\tREADING IN %s AS SIG'%job.name signals.append(self.__tc.get_tree(job,'%s & %s' %(self.__cut,self.__trainCut))) signalsTest.append(self.__tc.get_tree(job,'%s & %s'%(self.__cut,self.__testCut))) sWeight = 1. fnameOutput='training_Reg_%s.root' %(self.__title) output = ROOT.TFile.Open(fnameOutput, "RECREATE") factory = ROOT.TMVA.Factory('MVA', output, '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression') #factory.SetSignalWeightExpression( self.__weight ) #set input trees for i, signal in enumerate(signals): factory.AddRegressionTree( signal, sWeight, ROOT.TMVA.Types.kTraining) factory.AddRegressionTree( signalsTest[i], sWeight, ROOT.TMVA.Types.kTesting) self.__apply = [] p = re.compile(r'hJet_\w+') for var in self.__vars: factory.AddVariable(var,'D') # add the variables self.__apply.append(p.sub(r'\g<0>[0]', var)) print (self.__apply) factory.AddTarget( self.__target ) mycut = ROOT.TCut( self.__cut ) factory.BookMethod(ROOT.TMVA.Types.kBDT,'BDT_REG_%s'%(self.__title),self.__regOptions) # book an MVA method factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() output.Write() regDict = dict(zip(self.__vars, self.__apply)) self.__config.set('Regression', 'regWeight', '../data/MVA_BDT_REG_%s.weights.xml' %self.__title) self.__config.set('Regression', 'regDict', '%s' %regDict) self.__config.set('Regression', 'regVars', '%s' %self.__vars) for section in self.__config.sections(): if not section == 'Regression': self.__config.remove_section(section) with open('8TeVconfig/appReg', 'w') as configfile: self.__config.write(configfile) with open('8TeVconfig/appReg', 'r') as configfile: for line in configfile: print line.strip()
hash = hashlib.sha224(minCut).hexdigest() print 'pathOUT_orig', pathOUT_orig print hash grep_hash = '' if hash != '': grep_hash = ' |grep ' + hash mc_dataset_missing_files = [] data_dataset_missing_files = [] dataset_identifiers = [] if opts.filelist == "" or opts.names == 'nosample': # print "info:",info info = ParseInfo(samplesinfo, pathOUT_orig) # if opts.names == "" for job in info: dataset_identifiers.append(job.identifier) dataset_identifiers = set(dataset_identifiers) for identifier in dataset_identifiers: sampleType = config.get(identifier, 'sampleType') print 'sampleType', sampleType # print identifier # identifier=opts.names.split(',')[0] print "identifier:", identifier pathOUT = pathOUT_orig + '/' + identifier filenames = open(samplefiles + '/' + identifier + '.txt').readlines() print 'number of files on DAS:', len(filenames), #filenames[0]
def getSamplesInfo(self): if self.samplesInfo is None: self.samplesInfo = ParseInfo(config=self.config) return self.samplesInfo
class RegressionTrainer(): def __init__(self, config): vhbb_name_space = config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(vhbb_name_space) self.__weight = config.get("TrainRegression", "weight") self.__vars = config.get("TrainRegression", "vars").split() self.__target = config.get("TrainRegression", "target") self.__cut = config.get("TrainRegression", "cut") self.__title = config.get("TrainRegression", "name") self.__signals = config.get("TrainRegression", "signals") self.__regOptions = config.get("TrainRegression", "options") path = config.get('Directories', 'PREPout') samplesinfo = config.get('Directories', 'samplesinfo') self.__info = ParseInfo(samplesinfo, path) self.__samples = self.__info.get_samples(self.__signals) self.__tc = TreeCache([self.__cut], self.__samples, path, config) self.__trainCut = config.get("TrainRegression", "trainCut") self.__testCut = config.get("TrainRegression", "testCut") self.__config = config def train(self): signals = [] signalsTest = [] for job in self.__samples: print '\tREADING IN %s AS SIG' % job.name signals.append( self.__tc.get_tree(job, '%s & %s' % (self.__cut, self.__trainCut))) signalsTest.append( self.__tc.get_tree(job, '%s & %s' % (self.__cut, self.__testCut))) sWeight = 1. fnameOutput = 'training_Reg_%s.root' % (self.__title) output = ROOT.TFile.Open(fnameOutput, "RECREATE") factory = ROOT.TMVA.Factory( 'MVA', output, '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression' ) #factory.SetSignalWeightExpression( self.__weight ) #set input trees for i, signal in enumerate(signals): factory.AddRegressionTree(signal, sWeight, ROOT.TMVA.Types.kTraining) factory.AddRegressionTree(signalsTest[i], sWeight, ROOT.TMVA.Types.kTesting) self.__apply = [] p = re.compile(r'hJet_\w+') for var in self.__vars: factory.AddVariable(var, 'D') # add the variables self.__apply.append(p.sub(r'\g<0>[0]', var)) factory.AddTarget(self.__target) mycut = ROOT.TCut(self.__cut) factory.BookMethod(ROOT.TMVA.Types.kBDT, 'BDT_REG_%s' % (self.__title), self.__regOptions) # book an MVA method factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() output.Write() regDict = dict(zip(self.__vars, self.__apply)) self.__config.set('Regression', 'regWeight', '../data/MVA_BDT_REG_%s.weights.xml' % self.__title) self.__config.set('Regression', 'regDict', '%s' % regDict) self.__config.set('Regression', 'regVars', '%s' % self.__vars) for section in self.__config.sections(): if not section == 'Regression': self.__config.remove_section(section) with open('8TeVconfig/appReg', 'w') as configfile: self.__config.write(configfile) with open('8TeVconfig/appReg', 'r') as configfile: for line in configfile: print line.strip()
self.quickloadWarningShown = True print("INFO: SetQuickLoad(1) called for formula:", formulaName) print( "INFO: -> EvalInstance(0) on formulas will not re-load branches but will take values from memory, which might have been modified by this module." ) treeFormula.SetQuickLoad(1) # print("\x1b[31mERROR: this module can't be used together with others which use formulas based on branches changed inside this module!\x1b[0m") # raise Exception("NotImplemented") if __name__ == '__main__': config = XbbConfigReader.read('Wlv2017') info = ParseInfo(config=config) sample = [ x for x in info if x.identifier == 'WplusH_HToBB_WToLNu_M125_13TeV_powheg_pythia8' ][0] # read sample sampleTree = SampleTree([ '/store/group/phys_higgs/hbb/ntuples/VHbbPostNano/2017/V11/WplusH_HToBB_WToLNu_M125_13TeV_powheg_pythia8/adewit-crab_nano2017_WplusH_HT81/190606_065851/0000/tree_1.root' ], treeName='Events', xrootdRedirector="root://eoscms.cern.ch/") # initialize module w = JetSmearer("2017") w.customInit({ 'sampleTree': sampleTree,
class RegressionTrainer(): def __init__(self, config): vhbb_name_space = config.get('VHbbNameSpace', 'library') ROOT.gSystem.Load(vhbb_name_space) self.__weight = config.get("TrainRegression", "weight") self.__vars = config.get("TrainRegression", "vars").split() self.__target = config.get("TrainRegression", "target") self.__cut = config.get("TrainRegression", "cut") self.__title = config.get("TrainRegression", "name") self.__signals = config.get("TrainRegression", "signals") self.__regOptions = config.get("TrainRegression", "options") path = config.get('Directories', 'PREPout') samplesinfo = config.get('Directories', 'samplesinfo') self.__info = ParseInfo(samplesinfo, path) self.__samples = self.__info.get_samples(self.__signals) self.__tc = TreeCache([self.__cut], self.__samples, path, config) self.__trainCut = config.get("TrainRegression", "trainCut") self.__testCut = config.get("TrainRegression", "testCut") self.__config = config def train(self): signals = [] signalsTest = [] for job in self.__samples: print '\tREADING IN %s AS SIG' % job.name signals.append( self.__tc.get_tree(job, '%s & %s' % (self.__cut, self.__trainCut))) signalsTest.append( self.__tc.get_tree(job, '%s & %s' % (self.__cut, self.__testCut))) # Perform regression for the two Higgs jets independently for iJet in range(0, 2): print '\n========== Performing Regression on Jet', iJet, '==========\n' sWeight = 1. #fnameOutput='training_Reg_%s_Jet'+str(iJet)+'.root'%(self.__title) fnameOutput = 'training_Reg_' + self.__title + '_Jet' + str( iJet) + '.root' output = ROOT.TFile.Open( '/exports/uftrig01a/dcurry/data/bbar/13TeV/heppy/files/regr_out/' + fnameOutput, "RECREATE") print '\n----- Saving output to ', output factory = ROOT.TMVA.Factory( 'MVA', output, '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression' ) #factory.SetSignalWeightExpression( self.__weight ) #set input trees for i, signal in enumerate(signals): #print 'signal, sWeight, ROOT.TMVA.Types.kTesting', signal, sWeight, ROOT.TMVA.Types.kTraining #print 'tree entries', signal.GetEntries() factory.AddRegressionTree(signal, sWeight, ROOT.TMVA.Types.kTraining) factory.AddRegressionTree(signalsTest[i], sWeight, ROOT.TMVA.Types.kTesting) self.__apply = [] p = re.compile(r'hJet_\w+') for var in self.__vars: if var == 'rho' or var == 'met_pt': factory.AddVariable(var, 'D') continue if iJet == 0: if 'max' in var: var = var.replace(')', '[hJidx[0]])') factory.AddVariable(var, 'D') else: var = var + '[hJidx[0]]' factory.AddVariable(var, 'D') else: if 'max' in var: var = var.replace(')', '[hJidx[1]])') factory.AddVariable(var, 'D') else: var = var + '[hJidx[1]]' factory.AddVariable(var, 'D') self.__apply.append(p.sub(r'\g<0>[0]', var)) print(self.__apply) factory.AddTarget(self.__target + '%s' % ('[' + str(iJet) + ']')) mycut = ROOT.TCut(self.__cut) #factory.BookMethod(ROOT.TMVA.Types.kBDT,'BDT_REG_%s'%(self.__title),self.__regOptions) factory.BookMethod(ROOT.TMVA.Types.kBDT, 'BDT_REG_' + self.__title + '_Jet' + str(iJet), self.__regOptions) factory.TrainAllMethods() #factory.TestAllMethods() #factory.EvaluateAllMethods() output.Write() '''