def __rdf_from_dataset(self, dataset): t_names = [ntuple.directory for ntuple in \ dataset.ntuples] if len(set(t_names)) == 1: tree_name = t_names.pop() else: raise NameError( 'Impossible to create RDataFrame with different tree names') chain = TChain() ftag_fchain = {} for ntuple in dataset.ntuples: chain.Add('{}/{}'.format(ntuple.path, ntuple.directory)) for friend in ntuple.friends: if friend.tag not in ftag_fchain.keys(): ftag_fchain[friend.tag] = TChain() ftag_fchain[friend.tag].Add('{}/{}'.format( friend.path, friend.directory)) for ch in ftag_fchain.values(): chain.AddFriend(ch) # Keep friend chains alive self.friend_tchains.append(ch) if self.nthreads != 1: EnableImplicitMT(self.nthreads) # Keep main chain alive self.tchains.append(chain) rdf = RDataFrame(chain) rcw = RDataFrameCutWeight(rdf) return rcw
def load(self): """Loads the process data. Returns: A TChain for the process. """ chain = TChain(self._tree) for f in self._files: if not isfile(f): raise RuntimeError('file does not exist {0}'.format(f)) chain.Add(f) for friend in self._friends: chain.AddFriend(self._load_friend(*friend)) return chain
def attach_friend_info_if_present(current_range: Ranges.TreeRange, chain: ROOT.TChain) -> None: """ Adds info about friend trees to the input chain. Also aligns the starting and ending entry of the friend chain cache to those of the main chain. """ # Gather information about friend trees. Check that we got an # RFriendInfo struct and that it's not empty if (current_range.friendinfo is not None and not current_range.friendinfo.fFriendNames.empty()): # Zip together the information about friend trees. Each # element of the iterator represents a single friend tree. # If the friend is a TChain, the zipped information looks like: # (name, alias), (file1.root, file2.root, ...), (subname1, subname2, ...) # If the friend is a TTree, the file list is made of # only one filename and the list of names of the sub trees # is empty, so the zipped information looks like: # (name, alias), (filename.root, ), () zipped_friendinfo = zip( current_range.friendinfo.fFriendNames, current_range.friendinfo.fFriendFileNames, current_range.friendinfo.fFriendChainSubNames) for ( friend_name, friend_alias ), friend_filenames, friend_chainsubnames in zipped_friendinfo: # Start a TChain with the current friend treename friend_chain = ROOT.TChain(str(friend_name)) # Add each corresponding file to the TChain # Use zip_longest to address both cases: # - Friend is a TTree, filenames is a vector of length one # and chainsubnames is an empty vector. # - Friend is a TChain, filenames and chainsubnames are # vectors of the same length. for filename, chainsubname in zip_longest( friend_filenames, friend_chainsubnames, fillvalue=""): fullpath = filename + "?#" + chainsubname friend_chain.Add(str(fullpath)) # Set cache on the same range as the parent TChain friend_chain.SetCacheEntryRange(current_range.globalstart, current_range.globalend) # Finally add friend TChain to the parent (with alias) chain.AddFriend(friend_chain, friend_alias)
def rdf_from_dataset_helper(dataset): t_names = [ntuple.directory for ntuple in \ dataset.ntuples] if len(set(t_names)) == 1: tree_name = t_names.pop() else: raise NameError( 'Impossible to create RDataFrame with different tree names') chain = TChain() ftag_fchain = {} friend_tchains = [] for ntuple in dataset.ntuples: chain.Add('{}/{}'.format(ntuple.path, ntuple.directory)) for friend in ntuple.friends: if friend.tag not in ftag_fchain.keys(): ftag_fchain[friend.tag] = TChain() ftag_fchain[friend.tag].Add('{}/{}'.format(friend.path, friend.directory)) for ch in ftag_fchain.values(): chain.AddFriend(ch) # Keep friend chains alive friend_tchains.append(ch) return (chain, friend_tchains)
def getRootChain(jobpar,treeName): import glob # print infiles InputRootFiles = glob.glob (jobpar.inputFiles) nStep1=0 nStep1WithPU=0 tr = TChain(treeName) trFR = TChain("treeFriend") for rootfile in InputRootFiles: basename=os.path.basename(rootfile) print "Adding to chain: ",rootfile,rootfile.find("pnfs") if rootfile.find("pnfs")>-1: rootfile="dcache:" + rootfile ## print "XXX: ", rootfile f=TFile.Open(rootfile) h1=f.Get('Count') # get histogram with Step 1 event count # print h.GetEntries() nStep1 = nStep1 + h1.GetBinContent(1) h2=f.Get('CountWithPU') # get histogram with Step 1 event count # print h.GetEntries() nStep1WithPU = nStep1WithPU + h2.GetBinContent(1) f.Close() tr.AddFile(rootfile) ## now get friends FriendDir=os.path.join(os.path.dirname(rootfile),"NewFriends") if jobpar.isData: if os.environ.has_key('DATAFRIEND'): FriendDir=os.environ['DATAFRIEND'] else: if os.environ.has_key('MCFRIEND'): FriendDir=os.environ['MCFRIEND'] ## if jobpar.ApplyBJetRegressionSubjets: friendName=basename[:basename.find(".root")] + "_Friend.root" rootFriend=os.path.join(FriendDir,friendName) #if not os.path.isfile(rootFriend): # print "Could not find Friend " + rootFriend+ " -- Exiting" # sys.exit(1) print "Adding friend ",rootFriend trFR.AddFile(rootFriend) if (tr.GetEntries() != trFR.GetEntries()): print "Number of entries on Main and Friend chains differ -- exiting program" sys.exit(1) tr.AddFriend(trFR) SetOwnership( tr, False ) print "Number of Step1 events: ", nStep1 print "Number of Step1 events (pileup weighted): ", nStep1WithPU return tr, nStep1, nStep1WithPU
dqdsChain = TChain("dQdSAnalysis") matchChain = TChain("MatchAnalysis") eventChain = TChain("EventVertexTree") for infile in inputFiles: vtxChain.Add(infile) LEEChain.Add(infile) angleChain.Add(infile) shapeChain.Add(infile) gapChain.Add(infile) dqdsChain.Add(infile) matchChain.Add(infile) eventChain.Add(infile) vtxChain.AddFriend(LEEChain) vtxChain.AddFriend(angleChain) vtxChain.AddFriend(shapeChain) vtxChain.AddFriend(gapChain) vtxChain.AddFriend(dqdsChain) vtxChain.AddFriend(matchChain) # ------------------------------------------------------------------------------------------------------------------------------------------------- # FidVolumeCut = True TwoParticleCut = True GapCut = True TwoTrackCut = True SkipManyVertCut = True tooManyVerts = 100 binning = 36
class DataReader(Path): """Create RooDataSet from a TChain""" def __init__(self, cfg): """Init""" super(DataReader, self).__init__(cfg) self.argset = cfg['argset'] self.reset() return def reset(self): super(DataReader, self).reset() self.ch = None self.friend = None self.dataset = {} def __str__(self): list_of_files = self.ch.GetListOfFiles() next_file = TIter(list_of_files) print("Input file list is given below.") for f in range(list_of_files.GetEntries()): print("\t{0}".format(next_file().GetTitle())) print("End of the input file list.") return "" @classmethod def templateConfig(cls): cfg = { 'name': "DataReader", 'ifile': [], 'ifriend': [], 'ifriendIndex': ["Run", "Event"], 'argset': [], 'dataset': [], 'preloadFile': None, } return cfg def createDataSet(self, dname, dcut): """Create named dataset""" if dname in self.dataset.keys(): return self.dataset[dname] data = RooDataSet(dname, "", self.ch, self.argset, dcut) self.dataset[dname] = data return data def createDataSets(self, cfg): """Create named dataset""" for name, cut in cfg: if self.cfg['preloadFile'] and os.path.exists( self.cfg['preloadFile']): file_preload = ROOT.TFile(self.cfg['preloadFile']) data = file_preload.Get(name) if not data == None: self.dataset[name] = data file_preload.Close() self.createDataSet(name, cut) return self.dataset def _runPath(self): self.ch = TChain("tree") for f in self.cfg['ifile']: self.ch.Add(f) if len(self.cfg['ifriend']) > 0: self.friend = TChain("tree") for f in self.cfg['ifriend']: self.friend.Add(f) self.friend.BuildIndex(*self.cfg['ifriendIndex']) self.ch.AddFriend(self.friend) self.createDataSets(self.cfg['dataset']) pass def _addSource(self): """Add dataset and arguments to source pool""" if self.cfg['preloadFile'] and not os.path.exists( self.cfg['preloadFile']): file_preload = ROOT.TFile(self.cfg['preloadFile'], 'RECREATE') for dname, d in self.dataset.items(): d.Write() file_preload.Close() if not 'source' in self.cfg.keys(): self.cfg['source'] = {} self.cfg['source']['{0}.tree'.format(self.name)] = self.ch self.cfg['source']['{0}.argset'.format(self.name)] = self.argset if len(self.cfg['ifriend']) > 0: self.cfg['source']['{0}.friend'.format(self.name)] = self.friend for dname, d in self.dataset.items(): self.cfg['source'][dname] = d self.logger.logINFO("{0} events in {1}.".format( d.sumEntries(), dname)) super(DataReader, self)._addSource()
help="Truth tree name") args = parser.parse_args() print("Read trees from", args.inputfiles) tree_reco = TChain(args.reco_tree) for infile in args.inputfiles: tree_reco.Add(infile) tree_truth = TChain(args.truth_tree) for infile in args.inputfiles: tree_truth.Add(infile) # make them friends! tree_reco.AddFriend(tree_truth) branches_reco = {} branches_truth = {} cuts = "isMatched" if args.truth_tree == 'parton': branches_reco = { ('thad', 'pt'): 'klfitter_bestPerm_topHad_pt', ('thad', 'eta'): 'klfitter_bestPerm_topHad_eta', ('thad', 'y'): 'klfitter_bestPerm_topHad_y', ('thad', 'phi'): 'klfitter_bestPerm_topHad_phi', ('thad', 'm'): 'klfitter_bestPerm_topHad_m', ('thad', 'E'): 'klfitter_bestPerm_topHad_E', ('thad', 'pout'): 'klfitter_bestPerm_topHad_pout', ('tlep', 'pt'): 'klfitter_bestPerm_topLep_pt',
class Plotter(object): counter = 0 def __init__(self, sigFilePath, bgFilePath=None, treeName="jets"): self.sigFilePath = sigFilePath self.bgFilePath = bgFilePath self.treeName = treeName self.friends = None self.customBins = False def AddFriend(self, f): if self.friends == None: self.friends = [f] else: self.friends.append(f) def makeMIPlot(self, varList, doTruth=True): self.chain = TChain(self.treeName) self.chain.AddFile(self.bgFilePath) self.chain.AddFile(self.sigFilePath) fSig = TFile(self.sigFilePath) fBg = TFile(self.bgFilePath) tSig = fSig.Get(self.treeName) tBg = fBg.Get(self.treeName) self.fBuffer = TFile('/tmp/buffer.root', 'RECREATE') self.truthtree = TTree('truthtree', 'truthtree') truth = arr('f', [1]) self.truthtree.Branch('truth', truth, 'truth/F') nEntries = tSig.GetEntries() for iE in xrange(nEntries): self.truthtree.Fill() truth = arr('f', [0]) nEntries = tBg.GetEntries() for iE in xrange(nEntries): self.truthtree.Fill() self.chain.AddFriend(self.truthtree) mi = MutualInformer(self.chain) nVars = len(varList) hplot = TH2D('mi' + str(Plotter.counter), 'mi' + str(Plotter.counter), nVars, 0, nVars, nVars, 0, nVars) Plotter.counter += 1 for iV in xrange(nVars): for jV in xrange(iV): varA = varList[iV] varB = varList[jV] if doTruth: val = mi.MITruth(varA[0], varA[1], varA[2], varA[3], varB[0], varB[1], varB[2], varB[3]) else: val = mi.MI(varA[0], varA[1], varA[2], varA[3], varB[0], varB[1], varB[2], varB[3]) print iV + 1, jV + 1, val hplot.SetBinContent(jV + 1, iV + 1, val) hplot.SetBinContent(iV + 1, jV + 1, val) return hplot def makeCorrelationPlot(self, varList1, varList2, cut=""): fIn = TFile(self.sigFilePath) t = fIn.Get(self.treeName) if self.friends is not None: for f in self.friends: tFriend = fIn.Get(f) t.AddFriend(tFriend) gROOT.cd() c = Correlator(t) nVars1 = len(varList1) isSymmetric = False if varList2 == None: varList2 = varList1 isSymmetric = True nVars2 = len(varList2) hplot = TH2D('cor' + str(Plotter.counter), 'cor' + str(Plotter.counter), nVars1, 0, nVars1, nVars2, 0, nVars2) Plotter.counter += 1 xaxis = hplot.GetXaxis() yaxis = hplot.GetYaxis() matrix = empty([nVars1, nVars2]) if isSymmetric: for iV in xrange(nVars1): if self.customBins: xaxis.SetBinLabel(iV + 1, varList1[iV][0]) yaxis.SetBinLabel(iV + 1, varList1[iV][0]) else: xaxis.SetBinLabel(iV + 1, varList1[iV]) yaxis.SetBinLabel(iV + 1, varList1[iV]) for jV in xrange(iV + 1): varA = varList1[iV] varB = varList1[jV] if self.customBins: val = c.pearsonSuperFast(varA[0], varB[0], cut, varList1, varA[1], varB[1]) else: val = c.pearsonSuperFast(varA, varB, cut, varList1) hplot.SetBinContent(jV + 1, iV + 1, val) hplot.SetBinContent(iV + 1, jV + 1, val) matrix[iV, jV] = val matrix[jV, iV] = val else: for iV in xrange(nVars1): if self.customBins: xaxis.SetBinLabel(iV + 1, varList1[iV][0]) else: xaxis.SetBinLabel(iV + 1, varList1[iV]) for jV in xrange(nVars2): if self.customBins: yaxis.SetBinLabel(jV + 1, varList2[jV][0]) else: yaxis.SetBinLabel(jV + 1, varList2[jV]) varA = varList1[iV] varB = varList2[jV] if self.customBins: val = c.pearsonSuperFast(varA[0], varB[0], cut, varA[1], varB[1]) else: val = c.pearsonSuperFast(varA, varB, cut, varList1) print iV, jV, val hplot.SetBinContent(iV + 1, jV + 1, val) matrix[iV, jV] = val return hplot, matrix def makeCovariancePlot(self, varList1, varList2, cut=""): fIn = TFile(self.sigFilePath) t = fIn.Get(self.treeName) gROOT.cd() c = Covariator(t) nVars1 = len(varList1) isSymmetric = False if varList2 == None: varList2 = varList1 isSymmetric = True nVars2 = len(varList2) hplot = TH2D('cov' + str(Plotter.counter), 'cov' + str(Plotter.counter), nVars1, 0, nVars1, nVars2, 0, nVars2) Plotter.counter += 1 xaxis = hplot.GetXaxis() yaxis = hplot.GetYaxis() matrix = empty([nVars1, nVars2]) if isSymmetric: for iV in xrange(nVars1): xaxis.SetBinLabel(iV + 1, varList1[iV]) yaxis.SetBinLabel(iV + 1, varList1[iV]) for jV in xrange(iV + 1): varA = varList1[iV] varB = varList1[jV] val = c.covSuperFast(varA, varB, cut) hplot.SetBinContent(jV + 1, iV + 1, val) hplot.SetBinContent(iV + 1, jV + 1, val) matrix[iV, jV] = val matrix[jV, iV] = val else: for iV in xrange(nVars1): xaxis.SetBinLabel(iV + 1, varList1[iV]) for jV in xrange(nVars2): yaxis.SetBinLabel(jV + 1, varList2[jV]) varA = varList1[iV] varB = varList2[jV] val = c.covSuperFast(varA, varB, cut) hplot.SetBinContent(iV + 1, jV + 1, val) matrix[iV, jV] = val return hplot, matrix
CSVChain = TChain(CSVTree) CSVChain.Add(input_file) TopRecoTree = "likelihood/TopReco" TopRecoChain = TChain(TopRecoTree) TopRecoChain.Add(input_file) NeutrinoTree = "likelihood/Neutrino" NeutrinoChain = TChain(NeutrinoTree) NeutrinoChain.Add(input_file) LikelihoodTree = "TTbar_plus_X_analysis/EPlusJets/Ref selection/LikelihoodReco/Discriminator" LikelihoodChain = TChain(LikelihoodTree) LikelihoodChain.Add(input_file) CSVChain.AddFriend(TopRecoChain) CSVChain.AddFriend(NeutrinoChain) CSVChain.AddFriend(LikelihoodChain) CSVChain.SetBranchStatus("*", 1) ########## FILL HISTOGRAMS ########## for event in CSVChain: Jet_CSV = event.__getattr__("Jets") TypeOfJet = event.__getattr__("TypeofJet") #1 for light, 2 for b for JetIndex in range(0, len(TypeOfJet)): if (TypeOfJet[JetIndex] == 1): LightJetCSVHist.Fill(Jet_CSV[JetIndex]) if (TypeOfJet[JetIndex] == 2):
class DataReader(Path): """Create RooDataSet from a TChain""" def __init__(self, cfg): """Init""" super(DataReader, self).__init__(cfg) self.argset = cfg['argset'] self.reset() return def reset(self): super(DataReader, self).reset() self.ch = None self.friend = None self.dataset = {} def __str__(self): list_of_files = self.ch.GetListOfFiles() next_file = TIter(list_of_files) self.logger.logDEBUG("Input file list is given below.") self.logger.logDEBUG("Absolute path: ", os.path.dirname(next_file.GetTitle())) ifile = next_file.Begin() for f in range(list_of_files.GetEntries()): self.logger.logDEBUG("\t{0}".format( os.path.basename(ifile.GetTitle())), Stamp=False) ifile = next_file.Next() self.logger.logDEBUG("End of the input file list.") return 1 @classmethod def templateConfig(cls): cfg = { 'name': "DataReader", 'ifile': [], 'ifriend': [], 'ifriendIndex': ["Run", "Event"], 'argset': [], 'dataset': [], 'preloadFile': None, } return cfg def createDataSet(self, dname, dcut): """Return named dataset, create if not exist""" if dname in self.dataset.keys() and not self.process.cfg['args'].force: self.logger.logINFO("\033[0;34;47m Dataset: ", dname, " Already Exists! \033[0m. Total Entries:", self.dataset[dname].sumEntries()) self.logger.logDEBUG(dcut, Stamp=False) return 1 tempfile_preload = ROOT.TFile(tempfile.gettempdir() + "/temp.root", 'RECREATE') #Pritam RooCut = ROOT.RooFit.Cut(dcut) Import = ROOT.RooFit.Import(self.ch) Range = ROOT.RooFit.CutRange( dname.split(".")[2]) # Not taking effect, need review Weight = ROOT.RooFit.WeightVar(self.cfg['weight']) if self.argset.find("Bmass"): self.argset.find( "Bmass").removeRange() # Analysis specific line introduced if "dataReader" in dname or "sigMCGENReader" in dname: data = RooDataSet(dname, "", self.argset, Import, RooCut, Range) else: data = RooDataSet(dname, "Weighted dataset", self.argset, Import, RooCut, Range, Weight) if self.argset.find("Phimass"): self.argset.find("Phimass").setBins(20) datahist = ROOT.RooDataHist( dname + ".hist", "", ROOT.RooArgSet(self.argset.find("Phimass")), data) self.dataset[dname + ".hist"] = deepcopy(datahist) data.Write() self.dataset[dname] = deepcopy(data) self.logger.logINFO("\033[0;34;47m Creating Dataset: ", dname, ": \033[0m. Total Entries:", data.sumEntries()) self.logger.logDEBUG(dcut, Stamp=False) tempfile_preload.Close() #Pritam return 1 def createDataSets(self, dataset): """Get named dataset""" for name, cut in dataset: if self.cfg['preloadFile'] and os.path.exists( self.cfg['preloadFile']): file_preload = ROOT.TFile(self.cfg['preloadFile']) data = file_preload.Get(name) datahist = file_preload.Get(name + '.hist') if not data == None: self.dataset[name] = deepcopy(data) if self.argset.find("Phimass"): self.dataset[name + '.hist'] = deepcopy(datahist) file_preload.Close() self.createDataSet(name, cut) return self.dataset def _runPath(self): self.ch = TChain() print("Name: ", self.cfg['name']) for f in self.cfg['ifile']: self.ch.Add(f) if len(self.cfg['ifriend']) > 0: self.friend = TChain("tree") for f in self.cfg['ifriend']: self.friend.Add(f) self.friend.BuildIndex(*self.cfg['ifriendIndex']) self.ch.AddFriend(self.friend) self.createDataSets(self.cfg['dataset']) self.__str__() pass def _addSource(self): """Add dataset and arguments to source pool""" if self.cfg['preloadFile'] and not os.path.exists( self.cfg['preloadFile']): file_out = ROOT.TFile.Open(self.cfg['preloadFile'], 'RECREATE') for dname, d in self.dataset.items(): d.Write() file_out.Close() elif os.path.exists(self.cfg['preloadFile']): file_out = ROOT.TFile(self.cfg['preloadFile'], 'UPDATE') for dname, d in self.dataset.items(): if file_out.Get(dname) and self.process.cfg['args'].force: file_out.Delete(dname + ';*') # Delete old objects if exists file_out.Delete( 'ProcessID*;*') # Delete old Pids if exists d.Write() elif not file_out.Get(dname): print("Freshly booking an object") d.Write() file_out.Close() if not 'source' in self.cfg.keys(): self.cfg['source'] = {} #self.cfg['source']['{0}.tree'.format(self.name)] = self.ch self.cfg['source']['{0}.argset'.format(self.name)] = self.argset if len(self.cfg['ifriend']) > 0: self.cfg['source']['{0}.friend'.format(self.name)] = self.friend for dname, d in self.dataset.items(): self.cfg['source'][dname] = d self.logger.logINFO("{0} events in {1}.".format( d.sumEntries(), dname)) super(DataReader, self)._addSource()