def __initializeSample(self,sample): self.sample = sample tchain = ROOT.TChain(self.treeName) sampleDirectory = '{0}/{1}'.format(self.ntupleDirectory,sample) summedWeights = 0. for f in glob.glob('{0}/*.root'.format(sampleDirectory)): tfile = ROOT.TFile.Open(f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tchain.Add(f) self.sampleLumi = float(summedWeights)/getXsec(sample) if getXsec(sample) else 0. self.sampleTree = tchain
def __initializeSample(self, sample): self.sample = sample tchain = ROOT.TChain(self.treeName) sampleDirectory = '{0}/{1}'.format(self.ntupleDirectory, sample) summedWeights = 0. for f in glob.glob('{0}/*.root'.format(sampleDirectory)): tfile = ROOT.TFile.Open(f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tchain.Add(f) self.sampleLumi = float(summedWeights) / getXsec(sample) if getXsec( sample) else 0. self.sampleTree = tchain
def __initializeNtuple(self): tchain = ROOT.TChain(self.treeName) if self.inputFileList: # reading from a passed list of inputfiles allFiles = [] with open(self.inputFileList, 'r') as f: for line in f.readlines(): allFiles += [line.strip()] else: # reading from an input directory (all files in directory will be processed) allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory)) if len(allFiles) == 0: logging.error('No files found for sample {0}'.format(self.sample)) summedWeights = 0. for f in allFiles: tfile = ROOT.TFile.Open(f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tchain.Add(f) if not summedWeights and not isData(self.sample): logging.warning('No events for sample {0}'.format(self.sample)) self.intLumi = float(getLumi()) self.xsec = getXsec(self.sample) self.sampleLumi = float(summedWeights) / self.xsec if self.xsec else 0. self.sampleTree = tchain self.files = allFiles self.initialized = True logging.debug( 'Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}' .format(self.sample, summedWeights, self.xsec, self.sampleLumi, self.intLumi))
def __initializeNtuple(self): tchain = ROOT.TChain(self.treeName) if self.inputFileList: # reading from a passed list of inputfiles allFiles = [] with open(self.inputFileList,'r') as f: for line in f.readlines(): allFiles += [line.strip()] else: # reading from an input directory (all files in directory will be processed) #allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory)) allFiles = [] for root, dirnames, fnames in os.walk(self.ntupleDirectory): if 'failed' in root: continue for fname in fnmatch.filter(fnames, '*.root'): allFiles.append(os.path.join(root,fname)) if len(allFiles)==0: logging.error('No files found for sample {0}'.format(self.sample)) summedWeights = 0. for f in allFiles: tfile = ROOT.TFile.Open(f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tchain.Add(f) if not summedWeights and not isData(self.sample): logging.warning('No events for sample {0}'.format(self.sample)) self.xsec = getXsec(self.sample) self.sampleLumi = float(summedWeights)/self.xsec if self.xsec else 0. self.sampleTree = tchain self.files = allFiles self.initialized = True logging.debug('Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}'.format(self.sample,summedWeights,self.xsec,self.sampleLumi,self.intLumi))
def __initializeNtuple(self): tchain = ROOT.TChain(self.treeName) if self.inputFileList: # reading from a passed list of inputfiles allFiles = [] with open(self.inputFileList,'r') as f: for line in f.readlines(): allFiles += [line.strip()] else: # reading from an input directory (all files in directory will be processed) allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory)) #elif os.path.isfile(self.ntuple): # reading a single root file # allFiles = [self.ntuple] if len(allFiles)==0: logging.error('No files found for sample {0}'.format(self.sample)) summedWeights = 0. for f in allFiles: tfile = ROOT.TFile.Open(f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tchain.Add(f) if not summedWeights and not isData(self.sample): logging.warning('No events for sample {0}'.format(self.sample)) self.xsec = getXsec(self.sample) if not self.xsec: logging.error('No xsec for sample {0}'.format(self.sample)) self.sampleLumi = float(summedWeights)/self.xsec if self.xsec else 0. self.sampleTree = tchain self.j += 1 #listname = 'selList{0}'.format(self.j) #self.sampleTree.Draw('>>{0}'.format(listname),'1','entrylist') #skim = ROOT.gDirectory.Get(listname) #self.entryListMap['1'] = skim self.files = allFiles self.initialized = True if not self.temp: self.fileHash = hashFile(*self.files) if self.useProof: self.sampleTree.SetProof() logging.debug('Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}'.format(self.sample,summedWeights,self.xsec,self.sampleLumi,self.intLumi))
def main(argv=None): if argv is None: argv = sys.argv[1:] args = parse_command_line(argv) if args.verbose and args.analysis: table = PrettyTable(['Sample','xsec [pb]','entries','ratio neg.','lumi. [/pb]','eff. entries']) else: table = PrettyTable(['Sample','xsec [pb]']) table.align = 'r' table.align['Sample'] = 'l' ntupleDir = getAnalysisNtupleDirectory(args.analysis,True) if args.verbose and args.analysis else getNtupleDirectory(version=args.version) #Odd mix of local pathnames and xrootd access # for sample in sorted(hdfs_ls_directory(ntupleDir)): for sample in (glob.glob('/'.join([ntupleDir,'*']))): name = os.path.basename(sample) logging.info('Processing {0}'.format(name)) data = isData(name) xsec = getXsec(name) if args.verbose and args.analysis: print sample fnames = get_hdfs_root_files(sample) # get total events, total weights tree = ROOT.TChain(getTreeName(args.analysis)) summedWeights = 0. for f in fnames: tfile = ROOT.TFile.Open('/hdfs'+f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tree.Add('/hdfs'+f) numEntries = tree.GetEntries(args.selection) weightedEntries = 0. negevents = 0. seltree = tree.CopyTree(args.selection) for row in seltree: if data: weightedEntries += 1. else: weightedEntries += row.genWeight if row.genWeight<0.: negevents += 1 if data: sampleLumi = getLumi() else: sampleLumi = float(summedWeights)/xsec if xsec else 0. negratio = float(negevents)/numEntries if numEntries else 0. effevents = weightedEntries*getLumi()/sampleLumi if sampleLumi else 0. table.add_row([name,'{0:.6f}'.format(float(xsec)),numEntries,'{0:.3}'.format(float(negratio)),'{0:.6f}'.format(float(sampleLumi)),'{0:.3f}'.format(float(effevents))]) else: table.add_row([name,xsec]) print table.get_string()
def main(argv=None): if argv is None: argv = sys.argv[1:] args = parse_command_line(argv) if args.verbose and args.analysis: table = PrettyTable(['Sample','xsec [pb]','entries','ratio neg.','lumi. [/pb]','eff. entries']) else: table = PrettyTable(['Sample','xsec [pb]']) table.align = 'r' table.align['Sample'] = 'l' ntupleDir = getAnalysisNtupleDirectory(args.analysis) if args.verbose and args.analysis else getNtupleDirectory(version=args.version) for sample in sorted(glob.glob(os.path.join(ntupleDir,'*'))): name = os.path.basename(sample) logging.info('Processing {0}'.format(name)) data = isData(name) xsec = getXsec(name) if args.verbose and args.analysis: fnames = get_hdfs_root_files(sample) # get total events, total weights tree = ROOT.TChain(getTreeName(args.analysis)) summedWeights = 0. for f in fnames: tfile = ROOT.TFile.Open('/hdfs'+f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tree.Add('/hdfs'+f) numEntries = tree.GetEntries(args.selection) weightedEntries = 0. negevents = 0. seltree = tree.CopyTree(args.selection) for row in seltree: if data: weightedEntries += 1. else: weightedEntries += row.genWeight if row.genWeight<0.: negevents += 1 if data: sampleLumi = getLumi() else: sampleLumi = float(summedWeights)/xsec if xsec else 0. negratio = float(negevents)/numEntries if numEntries else 0. effevents = weightedEntries*getLumi()/sampleLumi if sampleLumi else 0. table.add_row([name,'{0:.6f}'.format(float(xsec)),numEntries,'{0:.3}'.format(float(negratio)),'{0:.6f}'.format(float(sampleLumi)),'{0:.3f}'.format(float(effevents))]) else: table.add_row([name,xsec]) print table.get_string()
def __initializeNtuple(self): tchain = ROOT.TChain(self.treeName) if self.inputFileList: # reading from a passed list of inputfiles allFiles = [] with open(self.inputFileList, 'r') as f: for line in f.readlines(): allFiles += [line.strip()] else: # reading from an input directory (all files in directory will be processed) allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory)) #elif os.path.isfile(self.ntuple): # reading a single root file # allFiles = [self.ntuple] if len(allFiles) == 0: logging.error('No files found for sample {0}'.format(self.sample)) summedWeights = 0. for f in allFiles: tfile = ROOT.TFile.Open(f) summedWeights += tfile.Get("summedWeights").GetBinContent(1) tfile.Close() tchain.Add(f) if not summedWeights and not isData(self.sample): logging.warning('No events for sample {0}'.format(self.sample)) self.intLumi = float(getLumi()) self.xsec = getXsec(self.sample) if not self.xsec: logging.error('No xsec for sample {0}'.format(self.sample)) self.sampleLumi = float(summedWeights) / self.xsec if self.xsec else 0. self.sampleTree = tchain self.j += 1 #listname = 'selList{0}'.format(self.j) #self.sampleTree.Draw('>>{0}'.format(listname),'1','entrylist') #skim = ROOT.gDirectory.Get(listname) #self.entryListMap['1'] = skim self.files = allFiles self.initialized = True if not self.temp: self.fileHash = hashFile(*self.files) if self.useProof: self.sampleTree.SetProof() logging.debug( 'Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}' .format(self.sample, summedWeights, self.xsec, self.sampleLumi, self.intLumi))
def __init__(self,**kwargs): inputTreeName = kwargs.pop('inputTreeName','WZTree') super(WZTrainer,self).__init__(**kwargs) sampleDir = 'ntuples/WZ' sampleMap = { "dy10" : "DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8", "dy50" : "DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8", "ggzz2e2m" : "GluGluToContinToZZTo2e2mu_13TeV_MCFM701_pythia8", "ggzz2e2t" : "GluGluToContinToZZTo2e2tau_13TeV_MCFM701_pythia8", "ggzz2m2t" : "GluGluToContinToZZTo2mu2tau_13TeV_MCFM701_pythia8", "ggzz4e" : "GluGluToContinToZZTo4e_13TeV_MCFM701_pythia8", "ggzz4m" : "GluGluToContinToZZTo4mu_13TeV_MCFM701_pythia8", "ggzz4t" : "GluGluToContinToZZTo4tau_13TeV_MCFM701_pythia8", "tt" : "TTJets_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8", "ttw" : "TTWJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-madspin-pythia8", "w" : "WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8", "ww" : "WWTo2L2Nu_13TeV-powheg", "wz3lnu" : "WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8", "wz2l2q" : "WZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8", "wzz" : "WZZ_TuneCUETP8M1_13TeV-amcatnlo-pythia8", "zg" : "ZGTo2LG_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8", "zz2l2n" : "ZZTo2L2Nu_13TeV_powheg_pythia8", "zz2l2q" : "ZZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8", "zz4l" : "ZZTo4L_13TeV_powheg_pythia8", #"tzq" : "tZq_ll_4f_13TeV-amcatnlo-pythia8_TuneCUETP8M1", } # get the trees intLumis = {} for s in sampleMap: summedWeights = 0. for f in glob.glob('{0}/{1}/*.root'.format(sampleDir, sampleMap[s])): tfile = ROOT.TFile.Open(f) hist = tfile.Get('summedWeights') summedWeights += hist.GetBinContent(1) tfile.Close() intLumis[s] = float(summedWeights)/getXsec(sampleMap[s]) sigTrees = {} for sig in ['wz3lnu']: sigTrees[sig] = ROOT.TChain('WZTree') for f in glob.glob('{0}/{1}/*.root'.format(sampleDir,sampleMap[sig])): sigTrees[sig].Add(f) bgTrees = {} for bg in ['dy10','dy50','ggzz2e2m','ggzz2m2t','ggzz4e','ggzz4m','ggzz4t','tt','ttw','wzz','zg','zz2l2n','zz2l2q','zz4l']: bgTrees[bg] = ROOT.TChain('WZTree') for f in glob.glob('{0}/{1}/*.root'.format(sampleDir,sampleMap[bg])): bgTrees[bg].Add(f) intLumi = getLumi() # add to factory for sig in sigTrees: self.factory.AddSignalTree(sigTrees[sig],intLumi/intLumis[sig]) for bg in bgTrees: self.factory.AddBackgroundTree(bgTrees[bg],intLumi/intLumis[bg]) # per event weight weight = 'genWeight' self.factory.SetWeightExpression(weight) # variables self.factory.AddVariable('z1_pt','F') self.factory.AddVariable('z2_pt','F') self.factory.AddVariable('w1_pt','F') self.factory.AddVariable('z_mass','F') self.factory.AddVariable('met_pt','F') self.factory.AddVariable('numBjetsTight30','I') # preselection cut passCut = ROOT.TCut('z1_passMedium==1 && z2_passMedium==1 && w1_passTight==1') self.factory.PrepareTrainingAndTestTree( passCut, ":".join( [ "nTrain_Signal=0", "nTrain_Background=0", "SplitMode=Random", "NormMode=NumEvents", "!V" ] ) ) # options: # H : display help # V : turn on verbosity # IgnoreNegWeightsInTraining : ignore events with negative weights for training, keep for testing # book method method = self.factory.BookMethod( ROOT.TMVA.Types.kBDT, "BDT", ":".join( [ "NTrees=850", "MaxDepth=3", "BoostType=AdaBoost", "AdaBoostBeta=0.5", "SeparationType=GiniIndex", "nCuts=20", "PruneMethod=NoPruning", ] ) )
def __init__(self, **kwargs): inputTreeName = kwargs.pop('inputTreeName', 'WZTree') super(WZTrainer, self).__init__(**kwargs) sampleDir = 'ntuples/WZ' sampleMap = { "dy10": "DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8", "dy50": "DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8", "ggzz2e2m": "GluGluToContinToZZTo2e2mu_13TeV_MCFM701_pythia8", "ggzz2e2t": "GluGluToContinToZZTo2e2tau_13TeV_MCFM701_pythia8", "ggzz2m2t": "GluGluToContinToZZTo2mu2tau_13TeV_MCFM701_pythia8", "ggzz4e": "GluGluToContinToZZTo4e_13TeV_MCFM701_pythia8", "ggzz4m": "GluGluToContinToZZTo4mu_13TeV_MCFM701_pythia8", "ggzz4t": "GluGluToContinToZZTo4tau_13TeV_MCFM701_pythia8", "tt": "TTJets_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8", "ttw": "TTWJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-madspin-pythia8", "w": "WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8", "ww": "WWTo2L2Nu_13TeV-powheg", "wz3lnu": "WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8", "wz2l2q": "WZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8", "wzz": "WZZ_TuneCUETP8M1_13TeV-amcatnlo-pythia8", "zg": "ZGTo2LG_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8", "zz2l2n": "ZZTo2L2Nu_13TeV_powheg_pythia8", "zz2l2q": "ZZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8", "zz4l": "ZZTo4L_13TeV_powheg_pythia8", #"tzq" : "tZq_ll_4f_13TeV-amcatnlo-pythia8_TuneCUETP8M1", } # get the trees intLumis = {} for s in sampleMap: summedWeights = 0. for f in glob.glob('{0}/{1}/*.root'.format(sampleDir, sampleMap[s])): tfile = ROOT.TFile.Open(f) hist = tfile.Get('summedWeights') summedWeights += hist.GetBinContent(1) tfile.Close() intLumis[s] = float(summedWeights) / getXsec(sampleMap[s]) sigTrees = {} for sig in ['wz3lnu']: sigTrees[sig] = ROOT.TChain('WZTree') for f in glob.glob('{0}/{1}/*.root'.format(sampleDir, sampleMap[sig])): sigTrees[sig].Add(f) bgTrees = {} for bg in [ 'dy10', 'dy50', 'ggzz2e2m', 'ggzz2m2t', 'ggzz4e', 'ggzz4m', 'ggzz4t', 'tt', 'ttw', 'wzz', 'zg', 'zz2l2n', 'zz2l2q', 'zz4l' ]: bgTrees[bg] = ROOT.TChain('WZTree') for f in glob.glob('{0}/{1}/*.root'.format(sampleDir, sampleMap[bg])): bgTrees[bg].Add(f) intLumi = getLumi() # add to factory for sig in sigTrees: self.factory.AddSignalTree(sigTrees[sig], intLumi / intLumis[sig]) for bg in bgTrees: self.factory.AddBackgroundTree(bgTrees[bg], intLumi / intLumis[bg]) # per event weight weight = 'genWeight' self.factory.SetWeightExpression(weight) # variables self.factory.AddVariable('z1_pt', 'F') self.factory.AddVariable('z2_pt', 'F') self.factory.AddVariable('w1_pt', 'F') self.factory.AddVariable('z_mass', 'F') self.factory.AddVariable('met_pt', 'F') self.factory.AddVariable('numBjetsTight30', 'I') # preselection cut passCut = ROOT.TCut( 'z1_passMedium==1 && z2_passMedium==1 && w1_passTight==1') self.factory.PrepareTrainingAndTestTree( passCut, ":".join([ "nTrain_Signal=0", "nTrain_Background=0", "SplitMode=Random", "NormMode=NumEvents", "!V" ])) # options: # H : display help # V : turn on verbosity # IgnoreNegWeightsInTraining : ignore events with negative weights for training, keep for testing # book method method = self.factory.BookMethod( ROOT.TMVA.Types.kBDT, "BDT", ":".join([ "NTrees=850", "MaxDepth=3", "BoostType=AdaBoost", "AdaBoostBeta=0.5", "SeparationType=GiniIndex", "nCuts=20", "PruneMethod=NoPruning", ]))