def test_unsupported_branch_in_branches(): tree = TTree('test', 'test') vect = TLorentzVector() double = np.array([0], dtype=float) tree.Branch('vector', vect) tree.Branch('double', double, 'double/D') rnp.tree2array(tree) assert_raises(TypeError, rnp.tree2array, tree, branches=['vector'])
def test_tree2array(): chain = TChain('tree') chain.Add(load('single1.root')) check_single(rnp.tree2array(chain)) f = get_file('single1.root') tree = f.Get('tree') check_single(rnp.tree2array(tree)) assert_raises(ValueError, get_file, 'file_does_not_exist.root')
def read_data(input_file_path, tree_name, event=1): input_file = ROOT.TFile(input_file_path, 'read') input_tree = input_file.Get(tree_name) data = tree2array(input_tree) input_file.Close() event_data = data[event-1] # choose event to display return event_data
def test_single_branch(): f = get_file('single1.root') tree = f.Get('tree') arr1_1d = rnp.tree2array(tree, branches='n_int') arr2_1d = rnp.root2array(load('single1.root'), branches='n_int') assert_equal(arr1_1d.dtype, np.dtype('<i4')) assert_equal(arr2_1d.dtype, np.dtype('<i4'))
def to_array(self, *args, **kwargs): """ Convert this tree into a NumPy structured array """ from root_numpy import tree2array return tree2array(self, *args, **kwargs)
def do_cut(args, did, files, supercuts, weights): start = clock() try: # load up the tree for the files tree = get_ttree(args.tree_name, files, args.eventWeightBranch) # if using numpy optimization, load the tree as a numpy array to apply_cuts on if args.numpy: # this part is tricky, a user might specify multiple branches # in their selection string, so we will remove non-alphanumeric characters (underscores are safe) # and remove anything else that is an empty string (hence the filter) # and then flatten the entire list, removing duplicate branch names ''' totalSelections = [] for supercut in supercuts: selection = supercut['selections'] # filter out non-alphanumeric selection = p.sub(' ', selection.format("-", "-", "-", "-", "-", "-", "-", "-", "-", "-")) # split on spaces, since we substituted non alphanumeric with spaces selections = selection.split(' ') # remove empty elements filter(None, selections) totalSelections.append(selections) # flatten the thing totalSelections = itertools.chain.from_iterable(totalSelections) # remove duplicates totalSelections = list(set(totalSelections)) ''' alphachars = re.compile('\W+') branchesSpecified = list(set(itertools.chain.from_iterable(filter(None, alphachars.sub(' ', supercut['selections'].format(*['-']*10)).split(' ')) for supercut in supercuts))) # get actual list of branches in the file availableBranches = [i.GetName() for i in tree.GetListOfBranches() if not i.GetName() == args.eventWeightBranch] # remove anything that doesn't exist branchesToUse = [branch for branch in branchesSpecified if branch in availableBranches] branchesSkipped = list(set(branchesSpecified) - set(branchesToUse)) if branchesSkipped: logger.info("The following branches have been skipped...") for branch in branchesSkipped: logger.info("\t{0:s}".format(branch)) tree = rnp.tree2array(tree, branches=[args.eventWeightBranch]+branchesToUse) # get the scale factor sample_scaleFactor = get_scaleFactor(weights, did) # iterate over the cuts available cuts = {} for cut in get_cut(copy.deepcopy(supercuts)): cut_hash = get_cut_hash(cut) rawEvents, weightedEvents = apply_cuts(tree, cut, args.eventWeightBranch, args.numpy) scaledEvents = weightedEvents*sample_scaleFactor cuts[cut_hash] = {'raw': rawEvents, 'weighted': weightedEvents, 'scaled': scaledEvents} logger.info("Applied {0:d} cuts".format(len(cuts))) with open('{0:s}/{1:s}.json'.format(args.output_directory, did), 'w+') as f: f.write(json.dumps(cuts, sort_keys=True, indent=4)) result = True except: logger.exception("Caught an error - skipping {0:s}".format(did)) result = False end = clock() return (result, end-start)
def i3root2hdf5(infile, force=False): h5file = infile + '.h5' bad_keys = ['AntMCTree', 'MasterTree'] rf = root_open(infile, 'r') keys = [k.name for k in rf.keys()] if force: mode = 'w' else: mode = 'a' h5 = h5py.File(h5file, mode) for key in keys: if key in bad_keys: continue tree = rf[key] arr = tree2array(tree) try: h5.create_dataset( key, data=arr, compression='gzip', compression_opts=9, shuffle=True, fletcher32=True, ) except TypeError: continue h5.flush() h5.close()
def tree_to_ndarray(trees, branches=None, dtype=np.float32, include_weight=False, weight_dtype="f4"): """ Convert a tree or a list of trees into a numpy.ndarray """ if isinstance(trees, (list, tuple)): return np.concatenate( [ _add_weight_column( recarray_to_ndarray(tree2array(tree, branches), dtype=dtype), tree, include_weight, weight_dtype ) for tree in trees ] ) return _add_weight_column( recarray_to_ndarray(tree2array(trees, branches), dtype=dtype), trees, include_weight, weight_dtype )
def main (): inputdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-06-28/' outputdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-07-10/' inputpaths = glob.glob(inputdir + '/ISRgamma_*.root') outputpaths = [p.replace(inputdir, outputdir).replace('ISRgamma', 'hist_ISRgamma') for p in inputpaths] for inputpath, outputpath in zip(inputpaths,outputpaths): print "Processing '%s'" % inputpath infile = ROOT.TFile(inputpath, 'READ') outfile = ROOT.TFile(outputpath, 'RECREATE') categories = [key.GetName() for key in infile.GetListOfKeys()] for category in categories: print "-- '%s'" % category tree = infile.Get(category) array = tree2array(tree) #hist = ROOT.TH1F(category, "", 30, 100, 250) hist = ROOT.TH1F(category, "", 32, 100, 260) fill_hist(hist, array['mJ'], weights=array['weight']) # TF shape/norm ... outfile.cd() hist.Write() pass outfile.Write() outfile.Close() infile.Close() pass return
def test_chain(): chain = ROOT.TChain('tree') chain.Add(load('single1.root')) check_single(rnp.tree2array(chain)) f = load(['single1.root', 'single2.root']) a = rnp.root2array(f) check_single(a, 200)
def test_selection(): chain = ROOT.TChain('tree') chain.Add(load('single1.root')) chain.Add(load('single2.root')) a = rnp.tree2array(chain) assert_equal((a['d_double'] <= 100).any(), True) a = rnp.tree2array(chain, selection="d_double > 100") assert_equal((a['d_double'] <= 100).any(), False) # selection with differing variables in branches and expression a = rnp.tree2array(chain, branches=['d_double'], selection="f_float < 100 && n_int%2 == 1") # selection with TMath a = rnp.tree2array(chain, selection="TMath::Erf(d_double) < 0.5")
def getTreeToArray(tree): """ Convert and return a tree into a numpy array Inputs: TTree object Return: np array """ from root_numpy import tree2array return tree2array(tree)
def records(self, **kwargs): "" "" from root_numpy import tree2array rfile = get_file(self.ntuple_path, self.student) tree = rfile[self.tree_name] log.info('Converting tree to record array, sorry if this is long ...') rec = tree2array(tree, **kwargs).view(np.recarray) return rec
def test_weights(): f = ROOT.TFile(load('test.root')) tree = f.Get('tree') tree.SetWeight(5.) rec = rnp.tree2array(tree, include_weight=True, weight_name='treeweight') assert_array_equal(rec['treeweight'], np.ones(100) * 5) f = load(['single1.root', 'single2.root']) a = rnp.root2array(f, include_weight=True) assert_array_equal( a['weight'], np.concatenate((np.ones(100) * 2., np.ones(100) * 3.)))
def test_array2tree(): a = np.array([ (12345, 2., 2.1, True), (3, 4., 4.2, False),], dtype=[ ('x', np.int32), ('y', np.float32), ('z', np.float64), ('w', np.bool)]) with temp() as tmp: tree = rnp.array2tree(a) a_conv = rnp.tree2array(tree) assert_array_equal(a, a_conv) # extend the tree tree2 = rnp.array2tree(a, tree=tree) assert_equal(tree2.GetEntries(), len(a) * 2) a_conv2 = rnp.tree2array(tree2) assert_array_equal(np.hstack([a, a]), a_conv2) assert_raises(TypeError, rnp.array2tree, a, tree=object)
def to_array(self, branches=None, include_weight=False, weight_name='weight', weight_dtype='f4'): """ Convert this tree into a NumPy structured array """ from root_numpy import tree2array return tree2array(self, branches, include_weight=include_weight, weight_name=weight_name, weight_dtype=weight_dtype)
def test_array2tree(): a = np.array([ (12345, 2., 2.1, True), (3, 4., 4.2, False),], dtype=[ ('x', np.int32), ('y', np.float32), ('z', np.float64), ('w', np.bool)]) tmp = ROOT.TFile.Open('test_array2tree_temp_file.root', 'recreate') tree = rnp.array2tree(a) a_conv = rnp.tree2array(tree) assert_array_equal(a, a_conv) # extend the tree tree2 = rnp.array2tree(a, tree=tree) assert_equal(tree2.GetEntries(), len(a) * 2) a_conv2 = rnp.tree2array(tree2) assert_array_equal(np.hstack([a, a]), a_conv2) tmp.Close() os.remove(tmp.GetName()) assert_raises(TypeError, rnp.array2tree, a, tree=object)
def read_ntuple(path="./", cfg="", selection = 'loose'): # read the config files the same as for the Zfitter # to the a chain of the files config = pd.read_csv(path + "/" + cfg , sep = " ", names = ['id', 'tree', 'file'], comment ="#") print config chain = r.TChain('merged') for index, root in config.iterrows(): print root.id , "\t: ", root.file chain.Add(root.file+'/'+root.tree) # transform this chain to an array ment to be used later by matplotlib data = tree2array( chain, selection = ecal_selections[selection]) return data
def test_single(): f = load('single1.root') a = rnp.root2array(f) check_single(a) # specify tree name a = rnp.root2array(f, treename='tree') check_single(a) # tree2array f = get_file('single1.root') tree = f.Get('tree') check_single(rnp.tree2array(tree))
def KStest( PmissBins , ana_sim , ana_data , var , cut=ROOT.TCut() , debug=2 , Nbins=20): # [http://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.ks_2samp.html] KS_distances , Pval_KS = [] , [] figure = plt.figure(figsize=[60,20]) for i in range(len(PmissBins)): pMiss_min , pMiss_max = PmissBins[i][0] , PmissBins[i][1] reduced_data = tree2array(ana_data.GetTree(),branches=var , selection = '%f < Pmiss3Mag && Pmiss3Mag < %f'%(pMiss_min , pMiss_max) ) reduced_sim = tree2array(ana_sim.GetTree(),branches=var , selection = '%f < Pmiss3Mag && Pmiss3Mag < %f'%(pMiss_min , pMiss_max)) D , Pvalue = ks_2samp( reduced_sim , reduced_data ) if ( debug > 1 ): ax = figure.add_subplot(len(PmissBins)/2,3,i+1) for array,col in zip([reduced_sim , reduced_data],['black','red']): g=sns.distplot( array, bins=np.linspace(-1, 2 , Nbins), ax=ax, color=col , axlabel=var ) g.axes.set_title(r'%.2f < p$_{miss}$ < %.2f GeV/c'%(pMiss_min , pMiss_max), fontsize=34,color="b") print_important( "KS test of data vs. simulation for %s in p(miss) bin %d is D = %f, Pvalue = %f"%(var , i , D , Pvalue) ) KS_distances.append(D) Pval_KS.append(Pvalue) figure.savefig("/Users/erezcohen/Desktop/cmHistos_%s.pdf"%var) return KS_distances , Pval_KS
def numpyfy(run): for f in files_to_convert: fname = "%s/run%d/%s%d.root" % (root_dir, run,f,run) print " Processing %s ..." % (fname ) rfile = ROOT.TFile(fname) for key in rfile.GetListOfKeys(): if key.GetClassName() == "TTree": name = key.GetName() print " Converting Tree %s " % name tree = rfile.Get(name) #Ok, apparently root_numpy is not smart enough to convert composite objects, # but we can give it a list of branches instead that we can automatically generate. # Not sure why it doesn't do it itself... branch_list = build_branches(tree) size = tree.GetEntries() if size == 0: continue nchunks = int(math.ceil(size / float(max_output_length))) outdir = "%s/run%d" % (numpy_dir, run) if not os.path.exists(outdir): os.makedirs(outdir) for chunk in range(nchunks): arrays = {} arrays[tree.GetName()] = tree2array(tree, branch_list, start = chunk * max_output_length, stop = (chunk+1) * max_output_length) outfile = "" if nchunks > 1: outfile = "%s/%s%d_%d.npz" % (outdir, tree.GetName(), run,chunk) else: outfile = "%s/%s%d.npz" % (outdir, tree.GetName(), run) print "Creating %s " % outfile numpy.savez_compressed(outfile, **arrays) else: print " No trees found... skipping"
def test_duplicate_branch_name(): from array import array tree = ROOT.TTree('tree', 'tree') d = array('d', [0.]) tree.Branch('double', d, 'double/D') tree.Branch('double', d, 'double/D') tree.Fill() # check that a warning was emitted with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") a = rnp.tree2array(tree) assert_equal(len(w), 1) assert_true(issubclass(w[-1].category, RuntimeWarning)) assert_true("ignoring duplicate branch named" in str(w[-1].message)) assert_equal( a.dtype, [('double', '<f8')])
def evaluate(config, tree, names, transform=None): output = [] dtype = [] for name in names: setup = load(config, name.split("_")[1]) data = rec2array(tree2array(tree.raw(), list(transform(setup["variables"])) if transform else setup["variables"])) if name.startswith("sklearn"): fn = os.path.join(config["mvadir"], name + ".pkl") with open(fn, 'rb') as fd: bdt, label = pickle.load(fd) scores = [] if len(data) > 0: scores = bdt.predict_proba(data)[:, 1] output += [scores] dtype += [(name, 'float64')] fn = os.path.join(config["mvadir"], name + ".xml") reader = r.TMVA.Reader("Silent") for var in setup['variables']: reader.AddVariable(var, array('f', [0.])) reader.BookMVA("BDT", fn) scores = evaluate_reader(reader, "BDT", data) output += [scores] dtype += [(name.replace("sklearn", "tmvalike"), 'float64')] f = r.TFile(os.path.join(config.get("mvadir", config.get("indir", config["outdir"])), "mapping.root"), "READ") if f.IsOpen(): likelihood = f.Get("hTargetBinning") def lh(values): return likelihood.GetBinContent(likelihood.FindBin(*values)) indices = dict((v, n) for n, (v, _) in enumerate(dtype)) tt = output[indices['tmvalike_tt']] ttZ = output[indices['tmvalike_ttZ']] if len(tt) == 0: output += [[]] else: output += [np.apply_along_axis(lh, 1, np.array([tt, ttZ]).T)] dtype += [('tmvalike_likelihood', 'float64')] f.Close() data = np.array(zip(*output), dtype) tree.mva(array2tree(data))
def _tree_to_array(schema, to_npy = False): print 'Loading File...' f = root_open(schema['sample']['file']) T = f[schema['sample']['tree']] if schema['sample'].has_key('selection') == False: this_sel = None else: this_sel = schema['sample']['selection'] if schema['sample'].has_key('step'): this_step = schema['sample']['step'] else: this_step = None print 'Pulling Tree...' arr = rn.tree2array(T, selection = this_sel, step = this_step) if to_npy is True: print 'Writing to *.npy file...' varlist = "".join(this_sel.split()).replace('(', '').replace(')', '').split('&&') varlist.sort() hash_name = os.path.basename(schema['sample']['file']) + schema['sample']['tree'] + ''.join(varlist) + str(this_step) m = hashlib.sha1() m.update(hash_name) np.save(os.path.dirname(schema['sample']['file']) + '/' + m.hexdigest() + '.npy', arr) print 'Done.' return arr
import ROOT import pandas as pd import numpy as np from root_numpy import tree2array f = ROOT.TFile('evetest_CC4GeVmb_110_n50k.root') tree = f.Get('cbmsim') tracks_params = tree2array(tree, branches='BmnGemStripHit')
print "Accessed the trees" # get input variable names from branches vars = img.getBoostCandBranchNames(treeHH4W) treeVars = vars print "Variables for jet image creation: ", vars # create selection criteria #sel = "" sel = "jetAK8_pt > 500 && jetAK8_mass > 50" #sel = "tau32 < 9999. && et > 500. && et < 2500. && bDisc1 > -0.05 && SDmass < 400" # make arrays from the trees #start, stop, step = 0, 200000, 1 arrayHH4W = tree2array(treeHH4W, treeVars, sel)#, None, start, stop, step ) arrayHH4W = tools.appendTreeArray(arrayHH4W) print "Number of Jets that will be imaged: ", len(arrayHH4W) imgArrayHH4W = img.makeBoostCandFourVector(arrayHH4W) print "Made candidate 4 vector arrays from the datasets" #================================================================================== # Store BEST Variables //////////////////////////////////////////////////////////// #================================================================================== # get BEST variable names from branches bestVars = tools.getBestBranchNames(treeHH4W) print "Boosted Event Shape Variables: ", bestVars
print 'getting trees...', 'Making training arrays...', 'Making testing arrays...' seltest = "Tprime2_DeepAK8_Mass >= 0" #======process===== #open file #get tree #Convert the ntuple branches to numpy arrays for i in range(1, 7): fileTTToSemiLepT = TFile.Open( eosdir + "TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_" + str(i) + "_hadd.root", "READ") treeTTToSemiLepT = fileTTToSemiLepT.Get("ljmet") if i == 1: arrayTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, sel) testTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, seltest) else: arrayTTToSemiLepT = np.concatenate( [arrayTTToSemiLepT, tree2array(treeTTToSemiLepT, treeVars, sel)]) testTTToSemiLepT = np.concatenate([ testTTToSemiLepT, tree2array(treeTTToSemiLepT, treeVars, seltest) ]) for i in range(1, 3): fileTTToSemiLepTb = TFile.Open( eosdir + "TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_" + str(i) + "_hadd.root", "READ")
] print branchnames, len(branchnames) jetbranch = ['jet_pt', 'jet_eta', 'jet_mass', 'jet_phi', 'jet_btag'] mu_branch = ['mu_pt', 'mu_eta', 'mu_mt', 'mu_phi', 'mu_q'] el_branch = ['el_pt', 'el_eta', 'el_mt', 'el_phi', 'el_q'] flat_branch = [ 'm_l1j1', 'H_T', 'm_l1j2', 'm_l1l2', 'Nleps', 'H_Tratio', 'Nbtags', 'Nlooseb', 'Ntightb', 'H_Tb', 'Njets', 'MET', 'm_j1j2' ] truthbranch = ['class'] data_dict = {} Y = rootnp.tree2array(file_check, branches=truthbranch) Z_Y = rootnp.rec2array(Y) flat = rootnp.tree2array(file_check, branches=flat_branch) Z_flat = rootnp.rec2array(flat) #Z_Y = np.zeros(Y.shape[0]) #for a in range(0,Y.shape): # Z_Y[a] = Z_Y[a].tolist() X_mu = rootnp.tree2array(file_check, branches=mu_branch) X_mu = rootnp.rec2array(X_mu) X_el = rootnp.tree2array(file_check, branches=el_branch) X_el = rootnp.rec2array(X_el) X_jets = rootnp.tree2array(file_check, branches=jetbranch)
def anaUltraLatency(infilename, debug=False, latSigMaskRange=None, latSigRange=None, outputDir=None, outfilename="latencyAna.root", performFit=False): """ Analyzes data taken by ultraLatency.py infilename - Name of input TFile containing the latTree TTree debug - If True prints additional debugging statements latSigMaskRange - Comma separated pair of values defining the region to be masked when trying to fit the noise, e.g. lat #notepsilon [40,44] is noise (lat < 40 || lat > 44)") latSigRange - Comma separated pair of values defining expected signal range, e.g. lat #epsilon [41,43] is signal") outfilename - Name of output TFile containing analysis results performFit - Fit the latency distributions """ # Determine output filepath if outputDir is None: from gempython.gemplotting.utils.anautilities import getElogPath outputDir = getElogPath() pass # Redirect sys.stdout and sys.stderr if necessary from gempython.gemplotting.utils.multiprocUtils import redirectStdOutAndErr redirectStdOutAndErr("anaUltraLatency", outputDir) # Create the output File and TTree import ROOT as r outF = r.TFile(outputDir + "/" + outfilename, "RECREATE") if not outF.IsOpen(): outF.Close() raise IOError( "Unable to open output file {1} check to make sure you have write permissions under {0}" .format(outputDir, outfilename)) if outF.IsZombie(): outF.Close() raise IOError( "Output file {1} is a Zombie, check to make sure you have write permissions under {0}" .format(outputDir, outfilename)) myT = r.TTree('latFitTree', 'Tree Holding FitData') # Attempt to open input TFile inFile = r.TFile(infilename, "read") if not inFile.IsOpen(): outF.Close() inFile.Close() raise IOError( "Unable to open input file {0} check to make sure you have read permissions" .format(infilename)) if inFile.IsZombie(): outF.Close() inFile.Close() raise IOError( "Input file {0} is a Zombie, check to make sure you have write permissions and file has expected size" .format(infilename)) from gempython.tools.hw_constants import vfatsPerGemVariant # Get ChipID's import numpy as np import root_numpy as rp ##### FIXME from gempython.gemplotting.mapping.chamberInfo import gemTypeMapping if 'gemType' not in inFile.latTree.GetListOfBranches(): gemType = "ge11" else: gemType = gemTypeMapping[rp.tree2array(tree=inFile.latTree, branches=['gemType'])[0][0]] print gemType ##### END from gempython.tools.hw_constants import vfatsPerGemVariant nVFATS = vfatsPerGemVariant[gemType] from gempython.gemplotting.mapping.chamberInfo import CHANNELS_PER_VFAT as maxChans listOfBranches = inFile.latTree.GetListOfBranches() if 'vfatID' in listOfBranches: array_chipID = np.unique( rp.tree2array(inFile.latTree, branches=['vfatID', 'vfatN'])) dict_chipID = {} for entry in array_chipID: dict_chipID[entry['vfatN']] = entry['vfatID'] else: dict_chipID = {vfat: 0 for vfat in range(nVFATS)} if debug: print("VFAT Position to ChipID Mapping") for vfat, vfatID in dict_chipID.iteritems(): print(vfat, vfatID) # Set default histogram behavior r.TH1.SetDefaultSumw2(False) r.gROOT.SetBatch(True) r.gStyle.SetOptStat(1111111) #Initializing Histograms print('Initializing Histograms') from gempython.utils.gemlogger import printYellow from gempython.utils.nesteddict import nesteddict as ndict dict_hVFATHitsVsLat = ndict() for vfat in range(0, nVFATS): try: chipID = dict_chipID[vfat] except KeyError as err: chipID = 0 if debug: printYellow( "No CHIP_ID for VFAT{0}, If you don't expect data from this VFAT there's no problem" .format(vfat)) dict_hVFATHitsVsLat[vfat] = r.TH1F( "vfat{0}HitsVsLat".format(vfat), "VFAT {0}: chipID {1}".format(vfat, chipID), 1024, -0.5, 1023.5) pass #Filling Histograms print('Filling Histograms') latMin = 1000 latMax = -1 nTrig = -1 for event in inFile.latTree: dict_hVFATHitsVsLat[int(event.vfatN)].Fill(event.latency, event.Nhits) if event.latency < latMin and event.Nhits > 0: latMin = event.latency pass elif event.latency > latMax: latMax = event.latency pass if nTrig < 0: nTrig = event.Nev pass pass from math import sqrt for vfat in range(0, nVFATS): for binX in range(1, dict_hVFATHitsVsLat[vfat].GetNbinsX() + 1): dict_hVFATHitsVsLat[vfat].SetBinError( binX, sqrt(dict_hVFATHitsVsLat[vfat].GetBinContent(binX))) hHitsVsLat_AllVFATs = dict_hVFATHitsVsLat[0].Clone("hHitsVsLat_AllVFATs") hHitsVsLat_AllVFATs.SetTitle("Sum over all VFATs") for vfat in range(1, nVFATS): hHitsVsLat_AllVFATs.Add(dict_hVFATHitsVsLat[vfat]) # Set Latency Fitting Bounds - Signal latFitMin_Sig = latMin latFitMax_Sig = latMax if latSigRange is not None: listLatValues = map(lambda val: float(val), latSigRange.split(",")) if len(listLatValues) != 2: raise IndexError( "You must specify exactly two values for determining latency signal range; values given: {0} do not meet this criterion" .format(listLatValues)) else: latFitMin_Sig = min(listLatValues) latFitMax_Sig = max(listLatValues) # Set Latency Fitting Bounds - Noise latFitMin_Noise = latFitMin_Sig - 1 latFitMax_Noise = latFitMax_Sig + 1 if latSigMaskRange is not None: listLatValues = map(lambda val: float(val), latSigMaskRange.split(",")) if len(listLatValues) != 2: raise IndexError( "You must specify exactly two values for determining latency signal range; values given: {0} do not meet this criterion" .format(listLatValues)) else: latFitMin_Noise = min(listLatValues) latFitMax_Noise = max(listLatValues) # Make output TFile and TTree from array import array dirVFATPlots = outF.mkdir("VFAT_Plots") if 'detName' in listOfBranches: detName = r.vector('string')() detName.push_back( rp.tree2array(inFile.latTree, branches=['detName'])[0][0][0]) myT.Branch('detName', detName) vfatN = array('i', [0]) myT.Branch('vfatN', vfatN, 'vfatN/I') vfatID = array('L', [0]) myT.Branch('vfatID', vfatID, 'vfatID/i') #Hex Chip ID of VFAT hitCountMaxLat = array('f', [0]) myT.Branch('hitCountMaxLat', hitCountMaxLat, 'hitCountMaxLat/F') hitCountMaxLatErr = array('f', [0]) myT.Branch('hitCountMaxLatErr', hitCountMaxLatErr, 'hitCountMaxLatErr/F') maxLatBin = array('f', [0]) myT.Branch('maxLatBin', maxLatBin, 'maxLatBin/F') hitCountBkg = array('f', [0]) hitCountBkgErr = array('f', [0]) hitCountSig = array('f', [0]) hitCountSigErr = array('f', [0]) SigOverBkg = array('f', [0]) SigOverBkgErr = array('f', [0]) if performFit: myT.Branch('hitCountBkg', hitCountBkg, 'hitCountBkg/F') myT.Branch('hitCountBkgErr', hitCountBkgErr, 'hitCountBkgErr/F') myT.Branch('hitCountSig', hitCountSig, 'hitCountSig/F') myT.Branch('hitCountSigErr', hitCountSigErr, 'hitCountSigErr/F') myT.Branch('SigOverBkg', SigOverBkg, 'SigOverBkg/F') myT.Branch('SigOverBkgErr', SigOverBkgErr, 'SigOverBkgErr/F') # Make output plots from math import sqrt dict_grNHitsVFAT = ndict() dict_fitNHitsVFAT_Sig = ndict() dict_fitNHitsVFAT_Noise = ndict() grNMaxLatBinByVFAT = r.TGraphErrors(len(dict_hVFATHitsVsLat)) grMaxLatBinByVFAT = r.TGraphErrors(len(dict_hVFATHitsVsLat)) grVFATSigOverBkg = r.TGraphErrors(len(dict_hVFATHitsVsLat)) grVFATNSignalNoBkg = r.TGraphErrors(len(dict_hVFATHitsVsLat)) r.gStyle.SetOptStat(0) if debug and performFit: print("VFAT\tSignalHits\tSignal/Noise") for vfat in dict_hVFATHitsVsLat: #if we don't have any data for this VFAT, we just need to initialize the TGraphAsymmErrors since it is drawn later if vfat not in dict_chipID: dict_grNHitsVFAT[vfat] = r.TGraphAsymmErrors() continue # Store VFAT info vfatN[0] = vfat vfatID[0] = dict_chipID[vfat] # Store Max Info hitCountMaxLat[0] = dict_hVFATHitsVsLat[vfat].GetBinContent( dict_hVFATHitsVsLat[vfat].GetMaximumBin()) hitCountMaxLatErr[0] = sqrt(hitCountMaxLat[0]) grNMaxLatBinByVFAT.SetPoint(vfat, vfat, hitCountMaxLat[0]) grNMaxLatBinByVFAT.SetPointError(vfat, 0, hitCountMaxLatErr[0]) maxLatBin[0] = dict_hVFATHitsVsLat[vfat].GetBinCenter( dict_hVFATHitsVsLat[vfat].GetMaximumBin()) grMaxLatBinByVFAT.SetPoint(vfat, vfat, maxLatBin[0]) grMaxLatBinByVFAT.SetPointError(vfat, 0, 0.5) #could be improved upon # Initialize dict_fitNHitsVFAT_Sig[vfat] = r.TF1( "func_N_vs_Lat_VFAT{0}_Sig".format(vfat), "[0]", latFitMin_Sig, latFitMax_Sig) dict_fitNHitsVFAT_Noise[vfat] = r.TF1( "func_N_vs_Lat_VFAT{0}_Noise".format(vfat), "[0]", latMin, latMax) dict_grNHitsVFAT[vfat] = r.TGraphAsymmErrors(dict_hVFATHitsVsLat[vfat]) dict_grNHitsVFAT[vfat].SetName("g_N_vs_Lat_VFAT{0}".format(vfat)) # Fitting if performFit: # Fit Signal dict_fitNHitsVFAT_Sig[vfat].SetParameter(0, hitCountMaxLat[0]) dict_fitNHitsVFAT_Sig[vfat].SetLineColor(r.kGreen + 1) dict_grNHitsVFAT[vfat].Fit(dict_fitNHitsVFAT_Sig[vfat], "QR") # Remove Signal Region latVal = r.Double() hitVal = r.Double() gTempDist = dict_grNHitsVFAT[vfat].Clone( "g_N_vs_Lat_VFAT{0}_NoSig".format(vfat)) for idx in range(dict_grNHitsVFAT[vfat].GetN() - 1, 0, -1): gTempDist.GetPoint(idx, latVal, hitVal) if latFitMin_Noise < latVal and latVal < latFitMax_Noise: gTempDist.RemovePoint(idx) # Fit Noise dict_fitNHitsVFAT_Noise[vfat].SetParameter(0, 0.) dict_fitNHitsVFAT_Noise[vfat].SetLineColor(r.kRed + 1) gTempDist.Fit(dict_fitNHitsVFAT_Noise[vfat], "QR") # Calc Signal & Signal/Noise hitCountBkg[0] = dict_fitNHitsVFAT_Noise[vfat].GetParameter(0) hitCountBkgErr[0] = dict_fitNHitsVFAT_Noise[vfat].GetParError(0) hitCountSig[0] = dict_fitNHitsVFAT_Sig[vfat].GetParameter( 0) - hitCountBkg[0] hitCountSigErr[0] = sqrt( (dict_fitNHitsVFAT_Sig[vfat].GetParError(0))**2 + hitCountBkgErr[0]**2) SigOverBkg[0] = hitCountSig[0] / hitCountBkg[0] SigOverBkgErr[0] = sqrt((hitCountSigErr[0] / hitCountBkg[0])**2 + (hitCountBkgErr[0]**2 * (hitCountSig[0] / hitCountBkg[0]**2)**2)) # Add to Plot grVFATSigOverBkg.SetPoint(vfat, vfat, SigOverBkg[0]) grVFATSigOverBkg.SetPointError(vfat, 0, SigOverBkgErr[0]) grVFATNSignalNoBkg.SetPoint(vfat, vfat, hitCountSig[0]) grVFATNSignalNoBkg.SetPointError(vfat, 0, hitCountSigErr[0]) # Print if requested if debug: print("{0}\t{1}\t{2}".format(vfat, hitCountSig[0], SigOverBkg[0])) pass # Format r.gStyle.SetOptStat(0) dict_grNHitsVFAT[vfat].SetMarkerStyle(21) dict_grNHitsVFAT[vfat].SetMarkerSize(0.7) dict_grNHitsVFAT[vfat].SetLineWidth(2) dict_grNHitsVFAT[vfat].GetXaxis().SetRangeUser(latMin, latMax) dict_grNHitsVFAT[vfat].GetXaxis().SetTitle("Lat") dict_grNHitsVFAT[vfat].GetYaxis().SetRangeUser(0, nTrig) dict_grNHitsVFAT[vfat].GetYaxis().SetTitle("N") # Write dirVFAT = dirVFATPlots.mkdir("VFAT{0}".format(vfat)) dirVFAT.cd() dict_grNHitsVFAT[vfat].Write() dict_hVFATHitsVsLat[vfat].Write() if performFit: dict_fitNHitsVFAT_Sig[vfat].Write() dict_fitNHitsVFAT_Noise[vfat].Write() myT.Fill() pass # Store - Summary from gempython.gemplotting.utils.anautilities import getSummaryCanvas, addPlotToCanvas if performFit: canv_Summary = getSummaryCanvas(dict_grNHitsVFAT, name='canv_Summary', drawOpt='APE1', gemType=gemType) canv_Summary = addPlotToCanvas(canv_Summary, dict_fitNHitsVFAT_Noise, gemType) canv_Summary.SaveAs(outputDir + '/Summary.png') else: canv_Summary = getSummaryCanvas(dict_grNHitsVFAT, name='canv_Summary', drawOpt='APE1', gemType=gemType) canv_Summary.SaveAs(outputDir + '/Summary.png') # Store - Sig Over Bkg if performFit: canv_SigOverBkg = r.TCanvas("canv_SigOverBkg", "canv_SigOverBkg", 600, 600) canv_SigOverBkg.cd() canv_SigOverBkg.cd().SetLogy() canv_SigOverBkg.cd().SetGridy() grVFATSigOverBkg.SetTitle("") grVFATSigOverBkg.SetMarkerStyle(21) grVFATSigOverBkg.SetMarkerSize(0.7) grVFATSigOverBkg.SetLineWidth(2) grVFATSigOverBkg.GetXaxis().SetTitle("VFAT Pos") grVFATSigOverBkg.GetYaxis().SetTitle("Sig / Bkg)") grVFATSigOverBkg.GetYaxis().SetTitleOffset(1.25) grVFATSigOverBkg.GetYaxis().SetRangeUser(1e-1, 1e2) grVFATSigOverBkg.GetXaxis().SetRangeUser(-0.5, nVFATS + 0.5) grVFATSigOverBkg.Draw("APE1") canv_SigOverBkg.SaveAs(outputDir + '/SignalOverBkg.png') # Store - Signal if performFit: canv_Signal = r.TCanvas("canv_Signal", "canv_Signal", 600, 600) canv_Signal.cd() grVFATNSignalNoBkg.SetTitle("") grVFATNSignalNoBkg.SetMarkerStyle(21) grVFATNSignalNoBkg.SetMarkerSize(0.7) grVFATNSignalNoBkg.SetLineWidth(2) grVFATNSignalNoBkg.GetXaxis().SetTitle("VFAT Pos") grVFATNSignalNoBkg.GetYaxis().SetTitle("Signal Hits") grVFATNSignalNoBkg.GetYaxis().SetTitleOffset(1.5) grVFATNSignalNoBkg.GetYaxis().SetRangeUser(0, nTrig) grVFATNSignalNoBkg.GetXaxis().SetRangeUser(-0.5, nVFATS + 0.5) grVFATNSignalNoBkg.Draw("APE1") canv_Signal.SaveAs(outputDir + '/SignalNoBkg.png') # Store - Sum over all VFATs canv_LatSum = r.TCanvas("canv_LatSumOverAllVFATs", "canv_LatSumOverAllVFATs", 600, 600) canv_LatSum.cd() hHitsVsLat_AllVFATs.SetXTitle("Latency") hHitsVsLat_AllVFATs.SetYTitle("N") hHitsVsLat_AllVFATs.GetXaxis().SetRangeUser(latMin, latMax) hHitsVsLat_AllVFATs.Draw("hist") canv_LatSum.SaveAs(outputDir + '/LatSumOverAllVFATs.png') # Store - Max Hits By Lat Per VFAT canv_MaxHitsPerLatByVFAT = r.TCanvas("canv_MaxHitsPerLatByVFAT", "canv_MaxHitsPerLatByVFAT", 1200, 600) canv_MaxHitsPerLatByVFAT.Divide(2, 1) canv_MaxHitsPerLatByVFAT.cd(1) grNMaxLatBinByVFAT.SetTitle("") grNMaxLatBinByVFAT.SetMarkerStyle(21) grNMaxLatBinByVFAT.SetMarkerSize(0.7) grNMaxLatBinByVFAT.SetLineWidth(2) grNMaxLatBinByVFAT.GetXaxis().SetRangeUser(-0.5, nVFATS + 0.5) grNMaxLatBinByVFAT.GetXaxis().SetTitle("VFAT Pos") grNMaxLatBinByVFAT.GetYaxis().SetRangeUser(0, nTrig) grNMaxLatBinByVFAT.GetYaxis().SetTitle("Hit Count of Max Lat Bin") grNMaxLatBinByVFAT.GetYaxis().SetTitleOffset(1.7) grNMaxLatBinByVFAT.Draw("APE1") canv_MaxHitsPerLatByVFAT.cd(2) grMaxLatBinByVFAT.SetTitle("") grMaxLatBinByVFAT.SetMarkerStyle(21) grMaxLatBinByVFAT.SetMarkerSize(0.7) grMaxLatBinByVFAT.SetLineWidth(2) grMaxLatBinByVFAT.GetXaxis().SetTitle("VFAT Pos") grMaxLatBinByVFAT.GetYaxis().SetTitle("Max Lat Bin") grMaxLatBinByVFAT.GetYaxis().SetTitleOffset(1.2) grMaxLatBinByVFAT.GetXaxis().SetRangeUser(-0.5, nVFATS + 0.5) grMaxLatBinByVFAT.Draw("APE1") canv_MaxHitsPerLatByVFAT.SaveAs(outputDir + '/MaxHitsPerLatByVFAT.png') # Store - TObjects outF.cd() hHitsVsLat_AllVFATs.Write() grNMaxLatBinByVFAT.SetName("grNMaxLatBinByVFAT") grNMaxLatBinByVFAT.Write() grMaxLatBinByVFAT.SetName("grMaxLatBinByVFAT") grMaxLatBinByVFAT.Write() if performFit: grVFATSigOverBkg.SetName("grVFATSigOverBkg") grVFATSigOverBkg.Write() grVFATNSignalNoBkg.SetName("grVFATNSignalNoBkg") grVFATNSignalNoBkg.Write() myT.Write() outF.Close()
def load_data(inputPathNTuples, treeDirName, variables): print "In data_manager::load_data()::\n inputPathNTuples: ", inputPathNTuples, "\n treeDirName: ", treeDirName print " variables: ", variables my_cols_list = variables + ['proces', 'key', 'target', "totalWeight"] data = pandas.DataFrame( columns=my_cols_list ) ## right now an empty dataframe with columns = my_cols_list print "data: ", data target = None for process in keys: print 'process %s ' % (process) if 'WZ' in process: sampleName = "WZ" target = 0 if 'signal' in process: sampleName = "signal_ggf_spin0_400_hh_wwww" target = 1 inputNTuples = glob.glob("%s/%s*_forBDTtraining.root" % (inputPathNTuples, process)) inputTree = "%s/%s/evtTree" % (treeDirName, sampleName) print "inputTree", inputTree, ", len(inputNTuples):", len( inputNTuples), " inputNTuples: ", inputNTuples for intuple in range(0, len(inputNTuples)): try: tfile = ROOT.TFile(inputNTuples[intuple]) except: print "%s FAIL load root file" % inputNTuples[intuple] continue try: tree = tfile.Get(inputTree) except: print(inputTree, "FAIL read inputTree", tfile) continue if tree is not None: print "sampleName: ", sampleName, ", process: ", process, ", inputNTuples[intuple]: ", inputNTuples[ intuple], ", nEvents: ", tree.GetEntries() try: chunk_arr = tree2array(tree) except: print(inputTree, "FAIL tree2array ", tfile) tfile.Close() continue else: chunk_df = pandas.DataFrame(chunk_arr, columns=variables) tfile.Close() chunk_df['proces'] = sampleName chunk_df['key'] = process chunk_df['target'] = target chunk_df["totalWeight"] = chunk_df["evtWeight"] #print "chunk_df: ",chunk_df data = data.append(chunk_df, ignore_index=True) else: print("file " + list[ii] + "was empty") nS = len(data.ix[(data.target.values == 1) & (data.key.values == process)]) nB = len(data.ix[(data.target.values == 0) & (data.key.values == process)]) print "%s signal size %g, bk size %g, evtWeight %g, totalWeight %g" % ( process, nS, nB, data.ix[(data.key.values == process)]["evtWeight"].sum(), data.ix[(data.key.values == process)]["totalWeight"].sum()) nNW = len(data.ix[(data["totalWeight"].values < 0) & (data.key.values == process)]) print process, " no. of events with -ve weights", nNW #print 'data to list = ', (data.columns.values.tolist()) n = len(data) nS = len(data.ix[data.target.values == 1]) nB = len(data.ix[data.target.values == 0]) print treeDirName, " size of sig, bkg: ", nS, nB return data
def test_tree2array_wrong_type(): rnp.tree2array(list())
def load_single(tree, start_, stop_, branches_): X = tree2array(tree, start=start_, stop=stop_, branches=branches_) X = np.array([x[0] for x in X]) return X
from sklearn import metrics import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt files = ["BDT.root"] #files = ["BDT_GlobalOnly.root", "BDT_CandsOnly.root", "BDT_AllFeatures.root", "BDT_GlobalNoIP.root", "BDT_GlobalNoKin.root", "BDT_Global_NoIPRoundedKinematics.root", "BDT_AllFeatures_RoundedKinematics.root"] branches = ['classID', 'BDT'] for file in files: f = ROOT.TFile(file) tree = f.Get("TestTree") data = root_numpy.tree2array(tree, branches=branches) tpr, fpr, thresh = metrics.roc_curve( data['classID'], data['BDT'] ) # labels are interpreted backwards (that's why tpr and fpr are reversed) numpy.savez('ROCs/' + file.replace(".root", ""), tpr=tpr, fpr=fpr) plt.figure() plt.plot(tpr, fpr, color='aqua', label='BDT') plt.xscale('log') plt.xlim([0.001, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.legend(loc='lower right') plt.savefig('plot' + file.replace(".root", "") + '.pdf')
treeHH4B = fileHH4B.Get("run/jetTree") print "Accessed the trees" # get input variable names from branches vars = img.getBoostCandBranchNames(treeJJ) treeVars = vars print "Variables for jet image creation: ", vars # create selection criteria #sel = "" sel = "jetAK8_pt > 500 && jetAK8_mass > 50" #sel = "tau32 < 9999. && et > 500. && et < 2500. && bDisc1 > -0.05 && SDmass < 400" # make arrays from the trees arrayJJ = tree2array(treeJJ, treeVars, sel) arrayJJ = tools.appendTreeArray(arrayJJ) imgArrayJJ = img.makeBoostCandFourVector(arrayJJ) arrayHH4W = tree2array(treeHH4W, treeVars, sel) arrayHH4W = tools.appendTreeArray(arrayHH4W) imgArrayHH4W = img.makeBoostCandFourVector(arrayHH4W) arrayHH4B = tree2array(treeHH4B, treeVars, sel) arrayHH4B = tools.appendTreeArray(arrayHH4B) imgArrayHH4B = img.makeBoostCandFourVector(arrayHH4B) print "Made candidate 4 vector arrays from the datasets" #================================================================================== # Make Jet Images /////////////////////////////////////////////////////////////////
from ROOT import * import root_numpy import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt print 1 import numpy import sys file = TFile('/gpfs/ddn/cms/user/lgiannini/DeepNtupleRegression/Train_wSV.root' ) #/uscms_data/d3/lgiannin/tree_Reg.root') tree = file.Get("tree") tree2 = root_numpy.tree2array( tree, ["nPVs", "jetpt", "jeteta", "genjetpt", "genjetpt_wNu"], selection="") print tree2.shape print tree2[0].shape for i in range(len(tree2[0])): print i, tree2[0][i] print tree2 tree2 = root_numpy.rec2array(tree2) print tree2.shape t2 = root_numpy.tree2array( tree,
def tvars(rootfile, first, last): stringa = [ "seed_pt", "seed_eta", "seed_phi", "seed_mass", "seed_dz", "seed_dxy", "seed_3D_ip", "seed_3D_sip", "seed_2D_ip", "seed_2D_sip", "seed_3D_signedIp", "seed_3D_signedSip", "seed_2D_signedIp", "seed_2D_signedSip", "seed_chi2reduced", "seed_nPixelHits", "seed_nHits", "seed_jetAxisDistance", "seed_jetAxisDlength", #more truth vars "seed_MC_pt", "seed_MC_eta", "seed_MC_phi", "seed_MC_mass", "seed_MC_dz", "seed_MC_dxy", "seed_MC_MomPdgId", "seed_MC_MomFlavour", "seed_MC_BChain", "seed_MC_DChain", #no B in this case "seed_MC_vx", "seed_MC_vy", "seed_MC_vz", "seed_MC_pvd" ] stringa2 = [ "nearTracks_pt", "nearTracks_eta", "nearTracks_phi", "nearTracks_dz", "nearTracks_dxy", "nearTracks_mass", "nearTracks_3D_ip", "nearTracks_3D_sip", "nearTracks_2D_ip", "nearTracks_2D_sip", "nearTracks_PCAdist", "nearTracks_PCAdsig", "nearTracks_PCAonSeed_x", "nearTracks_PCAonSeed_y", "nearTracks_PCAonSeed_z", "nearTracks_PCAonSeed_xerr", "nearTracks_PCAonSeed_yerr", "nearTracks_PCAonSeed_zerr", "nearTracks_PCAonTrack_x", "nearTracks_PCAonTrack_y", "nearTracks_PCAonTrack_z", "nearTracks_PCAonTrack_xerr", "nearTracks_PCAonTrack_yerr", "nearTracks_PCAonTrack_zerr", "nearTracks_dotprodTrack", "nearTracks_dotprodSeed", "nearTracks_dotprodTrackSeed2D", "nearTracks_dotprodTrackSeed3D", "nearTracks_dotprodTrackSeedVectors2D", "nearTracks_dotprodTrackSeedVectors3D", "nearTracks_PCAonSeed_pvd", "nearTracks_PCAonTrack_pvd", "nearTracks_PCAjetAxis_dist", "nearTracks_PCAjetMomenta_dotprod", "nearTracks_PCAjetDirs_DEta", "nearTracks_PCAjetDirs_DPhi", #more MC vars "nearTracks_MC_pt", "nearTracks_MC_eta", "nearTracks_MC_phi", "nearTracks_MC_dz", "nearTracks_MC_dxy", "nearTracks_MC_MomPdgId", "nearTracks_MC_MomFlavour", "nearTracks_MC_BChain", "nearTracks_MC_DChain", #no B in this case "nearTracks_MC_Track_vx", "nearTracks_MC_Track_vy", "nearTracks_MC_Track_vz", "nearTracks_MC_fromSeedVtx", "nearTracks_MC_fromSeedChain", "nearTracks_MC_pvd", #"nearTracks_MC_fromSeedVtx*(nearTracks_MC_pvd>0)" ] f = TFile(rootfile) # tree=f.Get("analyzer1/tree") tree = root_numpy.tree2array(f.Get('analyzer1/tree'), branches=stringa2, selection="(jet_pt>30)&&(abs(jet_eta)<2.4)", start=first, stop=last) print "loaded" tree2 = root_numpy.rec2array(tree) print "s" print tree2.shape print round(time.time() - starttime, 2), "reshape" tree3 = tree2.reshape((200, 51, len(tree))) tree3 = tree3.reshape((10, 51 * 20, len(tree))) print tree3.shape tree3 = tree3.swapaxes(0, 2) t2 = root_numpy.tree2array(f.Get('analyzer1/tree'), branches=stringa, selection="(jet_pt>30)&&(abs(jet_eta)<2.4)", start=first, stop=last) t2 = root_numpy.rec2array(t2) print t2.shape t2 = t2.reshape((10, len(stringa), len(tree))) t2 = t2.swapaxes(0, 2) tree5 = numpy.concatenate((t2, tree3), axis=1) print tree5.shape numpy.save( "tvars_" + str(first) + "_" + str(last) + "_" + rootfile.split(".")[0] + ".npy", tree5) print time.time() - starttime f.Close() os.system("mv " + "tvars_" + str(first) + "_" + str(last) + "_" + rootfile.split(".")[0] + ".npy" + " /gpfs/ddn/users/lgiannini/NN/DataRECO")
def convert_tree_to_np(sources, destination, npy_files=[]): """ Converts the root files in sources to numpy arrays Params sources : list root file paths/names or path to directory of root files destination : str path to directory where the npy files will be saved npy_files : list list of already converted files (for recursive functionality) Returns list paths to the converted files """ for i in xrange(len(sources)): if os.path.isdir(sources[i]) and ('failed' not in sources[i]): # source is a directory -> recurse on all files in directory new_sources = [ sources[i] + '/' + e for e in os.listdir(sources[i]) ] new_destination = destination + '/' + sources[i].split('/')[-1] print('new_sources ', len(new_sources), new_sources[-9:]) print('new_destination ', new_destination) logging.info('new_sources ' + str(len(new_sources)) + ' ' + str(new_sources[-9:])) logging.info('new_destination ' + new_destination) os.mkdir(new_destination) convert_tree_to_np(new_sources, new_destination, npy_files) else: if ".root" in sources[i]: try: # print(i, sources[i]) logging.info(str(i) + ' ' + sources[i]) print(str(i) + ' ', end="") sys.stdout.flush() tChain = rt.TChain('MyAnalysis/MyTree') tChain.Add(sources[i]) array = root_numpy.tree2array(tChain) # print 'Total number of entries: ',tChain.GetEntries() pkl_file_name = destination + '/' + sources[i].split( '/')[-1][:-5] np.save(pkl_file_name, array) npy_files.append(pkl_file_name + '.npy') except Exception as e: if os.path.exists(failed.pkl): continue else: mylist = [] with open('failed.pkl', 'wb') as f: pickle.dump(mylist, f) print("") print(e) print(sources[i], " ** FAILED ** ") logging.error(sources[i] + " ** FAILED ** ") logging.error(e) f = open('failed.pkl', 'rb') failed = pickle.load(f) f.close() failed.append(sources[i]) f = open('failed.pkl', 'wb') pickle.dump(failed, f) f.close() return npy_files
import pandas as pd from ROOT import * from root_numpy import root2array, tree2array from root_numpy import testdata from IPython.display import display import numpy as np path = '../data/' # --read signal dataset sig_file = TFile.Open(path + 'sig.root') sig_tree = sig_file.Get('ntuple') sig_arr = tree2array(sig_tree) sig_df = pd.DataFrame(sig_arr) # --read background dataset bkg_file = TFile.Open(path + 'bkg.root') bkg_tree = bkg_file.Get('ntuple') bkg_arr = tree2array(bkg_tree) bkg_df = pd.DataFrame(bkg_arr) print('sig: ', sig_df.shape[0]) print('bkg: ', bkg_df.shape[0]) # --Normalize def MinMaxScaler(data): numerator = data - np.min(data, 0) denominator = np.max(data, 0) - np.min(data, 0) denominator = denominator.astype('float') return numerator / denominator
path = '/beegfs/desy/user/hezhiyua/backed/dustData/crab_folder_v2/' pathOut = '/beegfs/desy/user/hezhiyua/backed/dustData/crab_folder_v2/test/' Fname = 'VBFH_HToSSTobbbb_MH-125_MS-40_ctauS-500_TuneCUETP8M1_13TeV-powheg-pythia8_PRIVATE-MC.root' entries = 200 fin = TFile(path + Fname) tin = fin.Get('ntuple/tree') tm1 = tm() s_cut = 1000#None#100 arr_energy = rnp.tree2array(tin, ['PFCandidates.energy'], stop=s_cut) arr_phi = rnp.tree2array(tin, ['PFCandidates.phi'], stop=s_cut) arr_eta = rnp.tree2array(tin, ['PFCandidates.eta'], stop=s_cut) arr_jetindex = rnp.tree2array(tin, ['PFCandidates.jetIndex'], stop=s_cut) #e_npar = np.array(arr_energy) e_df = pd.DataFrame(arr_energy) phi_df = pd.DataFrame(arr_phi) eta_df = pd.DataFrame(arr_eta) #print e_npar #print arr_energy[3] print e_df.loc[3,'PFCandidates.energy'][3] df = pd.DataFrame() df_o = pd.DataFrame() df['e'] = e_df
eosdir = "root://cmseos.fnal.gov//store/user/jmanagan/MVAtraining_2017_Jan2021/" ## Choosing valid events with appropriate characteristics and cutting the rest seltrain = "isValidTTDecayMode_DeepAK8 == 0 && Tprime2_DeepAK8_Mass < 0 && NJetsAK8_JetSubCalc > 2" seltest = "isValidTTDecayMode_DeepAK8 == 0 && Tprime2_DeepAK8_Mass >= 0" treeVars = vars ## Getting values from trees for each parent particle and either keeping them in an array or adding them together fileTTToSemiLepT = TFile.Open( eosdir + "TTJets_SingleLeptFromT_TuneCP5_13TeV-madgraphMLM-pythia8_hadd.root", "READ") treeTTToSemiLepT = fileTTToSemiLepT.Get("ljmet") trainTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, seltrain) testTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, seltest) ## Selection with Single Lept from TBar fileTTToSemiLepTb = TFile.Open( eosdir + "TTJets_SingleLeptFromTbar_TuneCP5_13TeV-madgraphMLM-pythia8_hadd.root", "READ") treeTTToSemiLepTb = fileTTToSemiLepTb.Get("ljmet") trainTTToSemiLepTb = tree2array(treeTTToSemiLepTb, treeVars, seltrain) testTTToSemiLepTb = tree2array(treeTTToSemiLepTb, treeVars, seltest) ## Selection with signals fileTprime = TFile.Open( eosdir + "TprimeTprime_M-1000_TuneCP5_13TeV-madgraph-pythia8_hadd.root", "READ")
@author: rupeshdotel """ import numpy as np import LT.box as B import ROOT as R from root_numpy import tree2array import matplotlib.pyplot as plt #%% rfile = R.TFile( "/Users/rupeshdotel/analysis/work/pi0pippimeta/data/qfactor_data/qfactortree/qfactortree_for_may_10_gluexI.root" ) intree = rfile.Get('qfactortree') d = tree2array(intree) #%% mm2m = d['mm2m'] mpi013 = d['mpi013'] mpi014 = d['mpi014'] mpi023 = d['mpi023'] mpi024 = d['mpi024'] metap = d['metap'] metappi0 = d['metappi0'] cost_etap = d['cos_t'] phi_etap = d['phi_gj'] mpippimpi0 = d['mpippimpi0'] mpi0p = d['mpi0p']
## Open ROOT files print 'Opening files...' eosdir = "root://cmseos.fnal.gov//store/user/cholz/Step2MVAtraining_0432020/" #sel = "Bprime2_DeepAK8_Mass < 0" sel = "isValidBBDecayMode_DeepAK8 == 0 && Bprime2_DeepAK8_Mass < 0 && NJetsAK8_JetSubCalc > 2" treeVars = vars print 'getting trees...','Making training arrays...','Making testing arrays...' seltest = "isValidBBDecayMode_DeepAK8 == 0 && Bprime2_DeepAK8_Mass >= 0" for i in range(1,7): fileTTToSemiLepT = TFile.Open(eosdir + "TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_"+ str(i)+"_hadd.root", "READ") treeTTToSemiLepT = fileTTToSemiLepT.Get("ljmet") if i == 1: arrayTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, sel) testTTToSemiLepT = tree2array(treeTTToSemiLepT, treeVars, seltest) else: arrayTTToSemiLepT = np.concatenate([arrayTTToSemiLepT,tree2array(treeTTToSemiLepT, treeVars, sel)]) testTTToSemiLepT = np.concatenate([testTTToSemiLepT,tree2array(treeTTToSemiLepT, treeVars, seltest)]) for i in range(1,3): fileTTToSemiLepTb = TFile.Open(eosdir + "TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_"+ str(i)+"_hadd.root", "READ") treeTTToSemiLepTb = fileTTToSemiLepTb.Get("ljmet") if i == 1: arrayTTToSemiLepTb = tree2array(treeTTToSemiLepTb, treeVars, sel) testTTToSemiLepTb = tree2array(treeTTToSemiLepTb, treeVars, seltest) else: arrayTTToSemiLepTb = np.concatenate([arrayTTToSemiLepTb,tree2array(treeTTToSemiLepTb, treeVars, sel)]) testTTToSemiLepTb = np.concatenate([testTTToSemiLepTb,tree2array(treeTTToSemiLepTb, treeVars, seltest)])
path_tree = '/home/ucl/cp3/fbury/storage/NNAndELLipseOutputTrees/model_'+str(args.model)+'/' ################################################################################ # Input Trees # ################################################################################ for name in glob.glob(path_tree+'*.root'): filename = name.replace(path_tree,'') num = [int(s) for s in re.findall('\d+',filename )] if num[0]!=mH_select or num[1]!=mA_select: continue break f = ROOT.TFile.Open(name) t = f.Get("tree") sig = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==0') DYToLL_0J = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==1') DYToLL_1J = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==2') DYToLL_2J = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==3') TT_Other = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==4') TTTo2L2Nu = tree2array(t,branches=['NN_out','Ell_out','weight'],selection='id==5') for cn in cut_NN: print ('NN cut : ',cn) N_sig = np.sum(sig[sig[:]['NN_out']>cn]['weight']) N_DYToLL_0J = np.sum(DYToLL_0J[DYToLL_0J[:]['NN_out']>cn]['weight']) N_DYToLL_1J = np.sum(DYToLL_1J[DYToLL_1J[:]['NN_out']>cn]['weight']) N_DYToLL_2J = np.sum(DYToLL_2J[DYToLL_2J[:]['NN_out']>cn]['weight']) N_TT_Other = np.sum(TT_Other[TT_Other[:]['NN_out']>cn]['weight']) N_TTTo2L2Nu = np.sum(TTTo2L2Nu[TTTo2L2Nu[:]['NN_out']>cn]['weight'])
procP3 = glob.glob(inputPath + "/" + folderName + "_fastsim_p3/" + folderName + "_fastsim_p3_forBDTtraining_OS_central_*.root") list = procP1 + procP2 + procP3 else: procP1 = glob.glob(inputPath + "/" + folderName + "_fastsim/" + folderName + "_fastsim_forBDTtraining_OS_central_*.root") list = procP1 print("Date: ", time.asctime(time.localtime(time.time()))) for ii in range(0, len(list)): # #print (list[ii],inputTree) tfile = ROOT.TFile(list[ii]) tree = tfile.Get(inputTree) if tree is not None: chunk_arr = tree2array(tree) #, start=start, stop = stop) chunk_df = pandas.DataFrame(chunk_arr) # chunk_df['key'] = folderName chunk_df['target'] = target #chunk_df['file']=list[ii].split("_")[10] if channel == "2lss_1tau": data[ "totalWeight"] = data.evtWeight * data.tau_frWeight * data.lep1_frWeight * data.lep2_frWeight if channel == "1l_2tau": data["totalWeight"] = data.evtWeight data = data.append(chunk_df, ignore_index=True) else: print("file " + list[ii] + "was empty") tfile.Close() print(data.columns.values.tolist()) n = len(data) nS = len(data.ix[data.target.values == 0])
data_chf1 = root2array(directory, what_tree, branch_names_chfonly1) data_chfp1 = root2array(directory, what_tree, branch_names_chfp1) data_tot1 = root2array(directory, what_tree, branch_names_tot1) data_noreg1 = root2array(directory, what_tree, branch_names_noreg1) data_xgb2 = root2array(directory, what_tree, branch_names_xgb2) data_chf2 = root2array(directory, what_tree, branch_names_chfonly2) data_chfp2 = root2array(directory, what_tree, branch_names_chfp2) data_tot2 = root2array(directory, what_tree, branch_names_tot2) data_noreg2 = root2array(directory, what_tree, branch_names_noreg2) data_cw = root2array(directory_cw, what_tree_cw, branch_names_cw) ''' data_xgb1 = tree2array(what_tree, branch_names_xgb1) data_chf1 = tree2array(what_tree, branch_names_chfonly1) data_chfp1 = tree2array(what_tree, branch_names_chfp1) data_tot1 = tree2array(what_tree, branch_names_tot1) data_cw = tree2array(what_tree_cw, branch_names_cw1) data_noreg1 = tree2array(what_tree, branch_names_noreg1) data_xgb2 = tree2array(what_tree, branch_names_xgb2) data_chf2 = tree2array(what_tree, branch_names_chfonly2) data_chfp2 = tree2array(what_tree, branch_names_chfp2) data_tot2 = tree2array(what_tree, branch_names_tot2) data_cw = tree2array(what_tree_cw, branch_names_cw2) data_noreg2 = tree2array(what_tree, branch_names_noreg2) #================LOAD WEIGHT FILES========================
def main(): ROOT.gROOT.SetBatch(1) ROOT.gStyle.SetOptStat(0) njet = "all" #process = {"sig1lnotTwB1wt1000tminfixed_5K": "sig", "bkg1lnotTwB1wt1000tminfixed_5K": "bkg"} #process = {"sig1lnotTwB1wt100_50K": "sig", "bkg1lnotTwB1wt100_50K": "bkg"} process = { "sig1lnotTwB1wt1000_50K": "sig", "bkg1lnotTwB1wt1000_50K": "bkg" } #process = "sig1lnotTwB1wt1000_5K" #process = "bkg1lnotTwB1wt1000_5K" #process = "sig1lnotTwB1wt1000_1K" #process = "bkg1lnotTwB1wt1000_1K" #process = "sig1lnotTwB1wt100_1K" #process = "bkg1lnotTwB1wt100_1K" #process = "sig" #process = "bkg" #process = "sigOld" #process = "bkgOld" #njet = "ge10" #njet = "9" #njet = "8" #njet = "7" #njet = "6" #njet = "5" tlist = {} for p in process.keys(): if (njet == "all"): t = ROOT.TChain("nominal_Loose") jet_cat = ["ge10", "9", "8", "7", "6", "5"] for jetc in jet_cat: fj = ROOT.TFile( "/afs/cern.ch/work/s/sosen/ChongbinTop/common-framework/run-offline/test_%s/ljets%sj/ttbar_powpyt8.root" % (p, jetc), 'READ') t.Add( "/afs/cern.ch/work/s/sosen/ChongbinTop/common-framework/run-offline/test_%s/ljets%sj/ttbar_powpyt8.root" % (p, jetc)) else: f = ROOT.TFile( "/afs/cern.ch/work/s/sosen/ChongbinTop/common-framework/run-offline/test_%s/ljets%sj/ttbar_powpyt8.root" % (p, njet), 'READ') f.ls() t = f.Get("nominal_Loose") t.ls() tlist[process[p] + "tree"] = t #bucket type count Stwcount = rnp.tree2array(tlist["sigtree"], branches="twcount") Stmincount = rnp.tree2array(tlist["sigtree"], branches="tmincount") St0count = rnp.tree2array(tlist["sigtree"], branches="t0count") ## SmW0 = rnp.tree2array(tlist["sigtree"], branches="mW0") SmW1 = rnp.tree2array(tlist["sigtree"], branches="mW1") SmBucketPrim0 = rnp.tree2array(tlist["sigtree"], branches="mBucketPrim0") SmBucketPrim1 = rnp.tree2array(tlist["sigtree"], branches="mBucketPrim1") SmBucketPrim = np.concatenate((SmBucketPrim0, SmBucketPrim1), axis=None) Stwmass0 = rnp.tree2array(tlist["sigtree"], branches="twmass0") StwPt0 = rnp.tree2array(tlist["sigtree"], branches="twPt0") SNaddjets = rnp.tree2array(tlist["sigtree"], branches="Naddjets") #bucket type count Btwcount = rnp.tree2array(tlist["bkgtree"], branches="twcount") Btmincount = rnp.tree2array(tlist["bkgtree"], branches="tmincount") Bt0count = rnp.tree2array(tlist["bkgtree"], branches="t0count") ## BmW0 = rnp.tree2array(tlist["bkgtree"], branches="mW0") BmW1 = rnp.tree2array(tlist["bkgtree"], branches="mW1") BmBucketPrim0 = rnp.tree2array(tlist["bkgtree"], branches="mBucketPrim0") BmBucketPrim1 = rnp.tree2array(tlist["bkgtree"], branches="mBucketPrim1") BmBucketPrim = np.concatenate((BmBucketPrim0, BmBucketPrim1), axis=None) Btwmass0 = rnp.tree2array(tlist["bkgtree"], branches="twmass0") BtwPt0 = rnp.tree2array(tlist["bkgtree"], branches="twPt0") BNaddjets = rnp.tree2array(tlist["bkgtree"], branches="Naddjets") c0 = ROOT.TCanvas('c', 'c', 800, 600) leg0 = ROOT.TLegend(0.65, 0.75, 0.88, 0.88) leg0.SetFillColor(0) leg0.SetLineColor(0) #bucket type count ShNaddjets = ROOT.TH1F("signal hNaddjets", "", 21, -0.5, 20.5) fill_hist(ShNaddjets, SNaddjets, "additional jets per event", "a.u.") ShNaddjets.Scale(1. / (ShNaddjets.Integral())) ShNaddjets.SetLineColor(ROOT.kRed) BhNaddjets = ROOT.TH1F("bkg hNaddjets", "", 21, -0.5, 20.5) fill_hist(BhNaddjets, BNaddjets, "additional jets per event", "a.u.") BhNaddjets.Scale(1. / (BhNaddjets.Integral())) BhNaddjets.SetLineColor(ROOT.kBlack) ShNaddjets.SetMaximum( max(ShNaddjets.GetMaximum(), BhNaddjets.GetMaximum()) * 1.1) ShNaddjets.Draw("hist") leg0.AddEntry(ShNaddjets, 'signal', "L") BhNaddjets.Draw("hist same") leg0.AddEntry(BhNaddjets, 'ttbar+jets (bkg)', "L") leg0.Draw() c0.Print("OverlayhNaddjets_%sjetregion.eps" % njet) c1 = ROOT.TCanvas('c', 'c', 800, 600) leg1 = ROOT.TLegend(0.65, 0.75, 0.88, 0.88) leg1.SetFillColor(0) leg1.SetLineColor(0) #bucket type count Shtwcount = ROOT.TH1F("signal htwcount", "", 4, -0.5, 3.5) fill_hist(Shtwcount, Stwcount, "tw buckets per event", "a.u.") Shtwcount.Scale(1. / (Shtwcount.Integral())) Shtwcount.SetLineColor(ROOT.kRed) Bhtwcount = ROOT.TH1F("bkg htwcount", "", 4, -0.5, 3.5) fill_hist(Bhtwcount, Btwcount, "tw buckets per event", "a.u.") Bhtwcount.Scale(1. / (Bhtwcount.Integral())) Bhtwcount.SetLineColor(ROOT.kBlack) Shtwcount.SetMaximum( max(Shtwcount.GetMaximum(), Bhtwcount.GetMaximum()) * 1.1) Shtwcount.Draw("hist") leg1.AddEntry(Shtwcount, 'signal', "L") Bhtwcount.Draw("hist same") leg1.AddEntry(Bhtwcount, 'ttbar+jets (bkg)', "L") leg1.Draw() c1.Print("Overlayhtwcount_%sjetregion.eps" % njet) c2 = ROOT.TCanvas('c', 'c', 800, 600) leg2 = ROOT.TLegend(0.65, 0.75, 0.88, 0.88) leg2.SetFillColor(0) leg2.SetLineColor(0) #bucket type count ShmW0 = ROOT.TH1F("signal hmW0", "Mass of the (possible) W candidate in B1", 150, 0.0001, 300) fill_hist(ShmW0, SmW0, "Mass (GeV)", "") ShmW0.Scale(1. / (ShmW0.Integral())) ShmW0.SetLineColor(ROOT.kRed) BhmW0 = ROOT.TH1F("bkg hmW0", "Mass of the (possible) W candidate in B1", 150, 0.0001, 300) fill_hist(BhmW0, BmW0, "Mass (GeV)", "") BhmW0.Scale(1. / (BhmW0.Integral())) BhmW0.SetLineColor(ROOT.kBlack) ShmW0.SetMaximum(max(ShmW0.GetMaximum(), BhmW0.GetMaximum()) * 1.1) ShmW0.Draw("hist") leg2.AddEntry(ShmW0, 'signal', "L") BhmW0.Draw("hist same") leg2.AddEntry(BhmW0, 'ttbar+jets (bkg)', "L") leg2.Draw() c2.Print("OverlayhmW0_%sjetregion.eps" % njet) c3 = ROOT.TCanvas('c', 'c', 800, 600) leg3 = ROOT.TLegend(0.65, 0.75, 0.88, 0.88) leg3.SetFillColor(0) leg3.SetLineColor(0) #bucket type count ShmBucketPrim0 = ROOT.TH1F("signal hmBucketPrim0", "Mass of B1", 150, 0, 300) fill_hist(ShmBucketPrim0, SmBucketPrim0, "Mass (GeV)", "") ShmBucketPrim0.Scale(1. / (ShmBucketPrim0.Integral())) ShmBucketPrim0.SetLineColor(ROOT.kRed) BhmBucketPrim0 = ROOT.TH1F("bkg hmBucketPrim0", "Mass of B1", 150, 0, 300) fill_hist(BhmBucketPrim0, BmBucketPrim0, "Mass (GeV)", "") BhmBucketPrim0.Scale(1. / (BhmBucketPrim0.Integral())) BhmBucketPrim0.SetLineColor(ROOT.kBlack) ShmBucketPrim0.SetMaximum( max(ShmBucketPrim0.GetMaximum(), BhmBucketPrim0.GetMaximum()) * 1.1) ShmBucketPrim0.Draw("hist") leg3.AddEntry(ShmBucketPrim0, 'signal', "L") BhmBucketPrim0.Draw("hist same") leg3.AddEntry(BhmBucketPrim0, 'ttbar+jets (bkg)', "L") leg3.Draw() c3.Print("OverlayhmBucketPrim0_%sjetregion.eps" % njet) c4 = ROOT.TCanvas('c', 'c', 800, 600) leg4 = ROOT.TLegend(0.65, 0.75, 0.88, 0.88) leg4.SetFillColor(0) leg4.SetLineColor(0) #bucket type count ShmBucketPrim1 = ROOT.TH1F("signal hmBucketPrim1", "Mass of B2", 150, 0, 300) fill_hist(ShmBucketPrim1, SmBucketPrim1, "Mass (GeV)", "") ShmBucketPrim1.Scale(1. / (ShmBucketPrim1.Integral())) ShmBucketPrim1.SetLineColor(ROOT.kRed) BhmBucketPrim1 = ROOT.TH1F("bkg hmBucketPrim1", "Mass of B2", 150, 0, 300) fill_hist(BhmBucketPrim1, BmBucketPrim1, "Mass (GeV)", "") BhmBucketPrim1.Scale(1. / (BhmBucketPrim1.Integral())) BhmBucketPrim1.SetLineColor(ROOT.kBlack) ShmBucketPrim1.SetMaximum( max(ShmBucketPrim1.GetMaximum(), BhmBucketPrim1.GetMaximum()) * 1.1) ShmBucketPrim1.Draw("hist") leg4.AddEntry(ShmBucketPrim1, 'signal', "L") BhmBucketPrim1.Draw("hist same") leg4.AddEntry(BhmBucketPrim1, 'ttbar+jets (bkg)', "L") leg4.Draw() c4.Print("OverlayhmBucketPrim1_%sjetregion.eps" % njet)
def do_cut(did, files, supercuts, weights, tree_name, output_directory, eventWeightBranch, doNumpy, pids): position = -1 if pids is not None: # handle pid registration if os.getpid() not in pids: pids[np.argmax(pids == 0)] = os.getpid() # this gives us the position of this particular process in our list of processes position = np.where(pids == os.getpid())[0][0] start = clock() try: # load up the tree for the files tree = get_ttree(tree_name, files, eventWeightBranch) # if using numpy optimization, load the tree as a numpy array to apply_cuts on if doNumpy: # this part is tricky, a user might specify multiple branches # in their selection string, so we will remove non-alphanumeric characters (underscores are safe) # and remove anything else that is an empty string (hence the filter) # and then flatten the entire list, removing duplicate branch names ''' totalSelections = [] for supercut in supercuts: selection = supercut['selections'] # filter out non-alphanumeric selection = p.sub(' ', selection.format("-", "-", "-", "-", "-", "-", "-", "-", "-", "-")) # split on spaces, since we substituted non alphanumeric with spaces selections = selection.split(' ') # remove empty elements filter(None, selections) totalSelections.append(selections) # flatten the thing totalSelections = itertools.chain.from_iterable(totalSelections) # remove duplicates totalSelections = list(set(totalSelections)) ''' branchesSpecified = list( set( itertools.chain.from_iterable( selection_to_branches(supercut['selections'], tree) for supercut in supercuts))) eventWeightBranchesSpecified = list( set(selection_to_branches(eventWeightBranch, tree))) # get actual list of branches in the file availableBranches = tree_get_branches( tree, eventWeightBranchesSpecified) # remove anything that doesn't exist branchesToUse = [ branch for branch in branchesSpecified if branch in availableBranches ] branchesSkipped = list(set(branchesSpecified) - set(branchesToUse)) if branchesSkipped: logger.info("The following branches have been skipped...") for branch in branchesSkipped: logger.info("\t{0:s}".format(branch)) tree = rnp.tree2array(tree, branches=eventWeightBranchesSpecified + branchesToUse) # get the scale factor sample_scaleFactor = get_scaleFactor(weights, did) # build the containing canvas for all histograms drawn in `apply_selection` canvas = ROOT.TCanvas('test{0:s}'.format(did), 'test{0:s}'.format(did), 200, 10, 100, 100) # iterate over the cuts available cuts = {} for cut in tqdm(get_cut(copy.deepcopy(supercuts)), desc='Working on DID {0:s}'.format(did), total=get_n_cuts(supercuts), disable=(position == -1), position=position + 1, leave=True, mininterval=5, maxinterval=10, unit='cuts', dynamic_ncols=True): cut_hash = get_cut_hash(cut) rawEvents, weightedEvents = apply_cuts(tree, cut, eventWeightBranch, doNumpy, canvas=canvas) scaledEvents = weightedEvents * sample_scaleFactor cuts[cut_hash] = { 'raw': rawEvents, 'weighted': weightedEvents, 'scaled': scaledEvents } logger.info("Applied {0:d} cuts".format(len(cuts))) with open('{0:s}/{1:s}.json'.format(output_directory, did), 'w+') as f: f.write(json.dumps(cuts, sort_keys=True, indent=4)) result = True del canvas except: logger.exception("Caught an error - skipping {0:s}".format(did)) result = False end = clock() return (result, end - start)
def main(): treeFolder = "analysis/allEvents/" treeName = "massData" wrMassBranch = "WRMass" SRmassBranch = "superResolvedNNMass" RmassBranch = "resolvedNNMass" correctMassBranch = "correctNMass" incorrectMassBranch = "incorrectNMass" leadMassBranch = "leadNMass" subleadMassBranch = "subNMass" weightBranch = "weight" #LOADING THE TTREE fileNames = ["TTTo2L2Nu.root"] crossSections = [88.29] counts2 = [79140880] #numpy arrays # make new root file with new tree file = ROOT.TFile("fullttbar.root", 'recreate') tree = ROOT.TTree("fullttbar", "fullttbar") # create 1 dimensional float arrays as fill variables, in this way the float # array serves as a pointer which can be passed to the branch # create some random numbers, assign them into the fill variables and call Fill() WRMass = np.zeros(1, dtype=float) resolvedNNMass = np.zeros(1, dtype=float) superResolvedNNMass = np.zeros(1, dtype=float) correctNMass = np.zeros(1, dtype=float) treeWeight = np.zeros(1, dtype=float) treeWeight2 = np.zeros(1, dtype=float) incorrectNMass = np.zeros(1, dtype=float) leadNMass = np.zeros(1, dtype=float) subNMass = np.zeros(1, dtype=float) tree.Branch("WRMass", WRMass, "WRMass/D") tree.Branch("resolvedNNMass", resolvedNNMass, "resolvedNNMass/D") tree.Branch("superResolvedNNMass", superResolvedNNMass, "superResolvedNNMass/D") tree.Branch("correctNMass", correctNMass, "correctNMass/D") tree.Branch("incorrectNMass", incorrectNMass, "incorrectNMass/D") tree.Branch("leadNMass", leadNMass, "leadNMass/D") tree.Branch("subNMass", subNMass, "subNMass/D") tree.Branch("weight", treeWeight, "weight/D") tree.Branch("weight2", treeWeight2, "weight2/D") for fileName, xSec, count2 in zip(fileNames, crossSections, counts2): rootfile = ROOT.TFile.Open(fileName, "read") massTree = rootfile.Get(treeFolder + treeName) countHisto = rootfile.Get(treeFolder + "countHisto") counts = countHisto.GetBinContent(1) print(counts) WRmassArray = tree2array(massTree, branches=wrMassBranch) SRmassArray = tree2array(massTree, branches=SRmassBranch) RmassArray = tree2array(massTree, branches=RmassBranch) correctMassArray = tree2array(massTree, branches=correctMassBranch) incorrectMassArray = tree2array(massTree, branches=incorrectMassBranch) leadMassArray = tree2array(massTree, branches=leadMassBranch) subleadMassArray = tree2array(massTree, branches=subleadMassBranch) weightArray = tree2array(massTree, branches=weightBranch) weightArray2 = weightArray * xSec / count2 weightArray = weightArray * xSec / counts print(WRmassArray.shape) print(WRmassArray.shape[0]) for i in range(WRmassArray.shape[0]): WRMass[0] = WRmassArray[i] resolvedNNMass[0] = RmassArray[i] superResolvedNNMass[0] = SRmassArray[i] correctNMass[0] = correctMassArray[i] treeWeight[0] = weightArray[i] treeWeight2[0] = weightArray2[i] incorrectNMass[0] = incorrectMassArray[i] leadNMass[0] = leadMassArray[i] subNMass[0] = subleadMassArray[i] tree.Fill() # write the tree into the output file and close the file file.Write() file.Close()
def test_tree2rec(): chain = TChain('tree') chain.Add(load('single1.root')) check_single(rnp.tree2array(chain))
for name in glob.glob(INPUT_FOLDER + '*.root'): filename = name.replace(INPUT_FOLDER, '') print('Opening file : ', filename) if filename.startswith('HToZATo2L2B'): # Signal print('\t-> Signal') Sig = True #Signal case else: # Background print('\t-> Background') Sig = False #Background case f = ROOT.TFile.Open(name) t = f.Get("tree") selection = 'met_pt<80 && ll_M>70 && ll_M<110' jj_M = np.asarray(tree2array(t, branches='jj_M', selection=selection)) lljj_M = np.asarray(tree2array(t, branches='lljj_M', selection=selection)) MEM_TT = np.asarray( tree2array(t, branches='weight_TT', selection=selection)) MEM_DY = np.asarray( tree2array(t, branches='weight_DY', selection=selection)) total_weight = np.asarray( tree2array(t, branches='total_weight', selection=selection)) N = jj_M.shape[0] if Sig: #Signal # Extract mA, mH generated from file title num = [int(s) for s in re.findall('\d+', filename)] print('\tmH = ', num[2], ', mA = ', num[3]) mH = np.ones(N) * num[2] mA = np.ones(N) * num[3]
def test_empty_tree(): from array import array tree = TTree('tree', 'tree') d = array('d', [0.]) tree.Branch('double', d, 'double/D') rnp.tree2array(tree)
from __future__ import print_function from root_numpy import root2array, tree2array import ROOT #Get Data rfile = ROOT.TFile("TreeFile.root") intree = rfile.Get('Tree') intree.Print() # and convert the TTree into an array array = tree2array(intree, branches=["Gen", "Reco", "data"]) array.dtype.names = ('reco', "gen", "data") #Plot Data import matplotlib.pyplot as plt from matplotlib.colors import LogNorm import numpy as np import pandas as pd df = pd.DataFrame(array) NBins = 10 xmin = 0 xmax = 1000 content_reco, bin, patches = plt.hist(df['reco'], bins=NBins) plt.ylabel('# events') plt.xlabel("reco") # plt.show() plt.savefig("plots/reco.pdf") content_gen, bin, patches = plt.hist(df['gen'], bins=NBins) print(content_gen)
def efficiency(year): import numpy as np from root_numpy import tree2array, fill_hist from aliases import AK8veto, electronVeto, muonVeto genPoints = [ 1800, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 7000, 8000 ] eff = {} vetoes = {"AK8": AK8veto, "electron": electronVeto, "muon": muonVeto} VETO = "AK8" ##could change the veto to investigate here if SEPARATE: eff_add = {} #channels = ['none', 'qq', 'bq', 'bb', 'mumu'] channels = ['qq', 'bq', 'bb', 'mumu'] for channel in channels: treeSign = {} ngenSign = {} nevtSign = {} eff[channel] = TGraphErrors() if SEPARATE: nevtSign_add = {} eff_add[channel] = TGraphErrors() for i, m in enumerate(genPoints): signName = "ZpBB_M" + str(m) ngenSign[m] = 0. nevtSign[m] = 0. if SEPARATE: nevtSign_add[m] = 0. for j, ss in enumerate(sample[signName]['files']): if year == "run2" or year in ss: sfile = TFile(NTUPLEDIR + ss + ".root", "READ") ngenSign[m] += sfile.Get("Events").GetBinContent(1) treeSign[m] = sfile.Get("tree") if BTAGGING == 'semimedium': #if SEPARATE: # temp_array = tree2array(treeSign[m], branches='BTagAK4Weight_deepJet', selection=aliasSM[channel].replace(vetoes[VETO], "")) #else: temp_array = tree2array( treeSign[m], branches='BTagAK4Weight_deepJet', selection=aliasSM[channel]) temp_hist = TH1F('pass', 'pass', 1, 0, 1) fill_hist(temp_hist, np.zeros(len(temp_array)), weights=temp_array) nevtSign[m] += temp_hist.GetBinContent(1) temp_array = None temp_hist.Reset() if SEPARATE: temp_array = tree2array( treeSign[m], branches='BTagAK4Weight_deepJet', selection=aliasSM[channel].replace( vetoes[VETO], "")) temp_hist = TH1F('pass', 'pass', 1, 0, 1) fill_hist(temp_hist, np.zeros(len(temp_array)), weights=temp_array) nevtSign[m] += temp_hist.GetBinContent(1) temp_array = None temp_hist.Reset() else: #if SEPARATE: # temp_array = tree2array(treeSign[m], branches='BTagAK4Weight_deepJet', selection=alias[channel].format(WP=working_points[BTAGGING]).replace(vetoes[VETO], "")) #else: temp_array = tree2array( treeSign[m], branches='BTagAK4Weight_deepJet', selection=alias[channel].format( WP=working_points[BTAGGING])) temp_hist = TH1F('pass', 'pass', 1, 0, 1) fill_hist(temp_hist, np.zeros(len(temp_array)), weights=temp_array) nevtSign[m] += temp_hist.GetBinContent(1) temp_array = None temp_hist.Reset() if SEPARATE: temp_array = tree2array( treeSign[m], branches='BTagAK4Weight_deepJet', selection=alias[channel].format( WP=working_points[BTAGGING]).replace( vetoes[VETO], "")) temp_hist = TH1F('pass', 'pass', 1, 0, 1) fill_hist(temp_hist, np.zeros(len(temp_array)), weights=temp_array) nevtSign_add[m] += temp_hist.GetBinContent(1) temp_array = None temp_hist.Reset() sfile.Close() print channel, ss, ":", nevtSign[m], "/", ngenSign[ m], "=", nevtSign[m] / ngenSign[m] if nevtSign[m] == 0 or ngenSign[m] < 0: continue n = eff[channel].GetN() eff[channel].SetPoint(n, m, nevtSign[m] / ngenSign[m]) eff[channel].SetPointError(n, 0, math.sqrt(nevtSign[m]) / ngenSign[m]) if SEPARATE: eff_add[channel].SetPoint(n, m, nevtSign_add[m] / ngenSign[m]) eff_add[channel].SetPointError( n, 0, math.sqrt(nevtSign_add[m]) / ngenSign[m]) eff[channel].SetMarkerColor(color[channel]) eff[channel].SetMarkerStyle(20) eff[channel].SetLineColor(color[channel]) eff[channel].SetLineWidth(2) if SEPARATE: eff_add[channel].SetMarkerColor(color[channel] + color_shift[channel]) eff_add[channel].SetMarkerStyle(21) eff_add[channel].SetLineColor(color[channel] + color_shift[channel]) eff_add[channel].SetLineWidth(2) eff_add[channel].SetLineStyle(7) if channel == 'qq' or channel == 'none': eff[channel].SetLineStyle(3) n = max([eff[x].GetN() for x in channels]) maxEff = 0. # Total efficiency eff["sum"] = TGraphErrors(n) eff["sum"].SetMarkerStyle(24) eff["sum"].SetMarkerColor(1) eff["sum"].SetLineWidth(2) if SEPARATE: eff_add["sum"] = TGraphErrors(n) eff_add["sum"].SetMarkerStyle(25) eff_add["sum"].SetMarkerColor(1) eff_add["sum"].SetLineWidth(2) eff_add["sum"].SetLineStyle(7) for i in range(n): tot, mass = 0., 0. if SEPARATE: tot_add = 0. for channel in channels: if channel == 'qq' or channel == 'none': continue #not sure if I should include 2mu category in sum if eff[channel].GetN() > i: tot += eff[channel].GetY()[i] if SEPARATE: tot_add += eff_add[channel].GetY()[i] mass = eff[channel].GetX()[i] if tot > maxEff: maxEff = tot eff["sum"].SetPoint(i, mass, tot) if SEPARATE: eff_add["sum"].SetPoint(i, mass, tot_add) if SEPARATE: leg = TLegend(0.15, 0.50, 0.95, 0.8) else: leg = TLegend(0.15, 0.60, 0.95, 0.8) leg.SetBorderSize(0) leg.SetFillStyle(0) #1001 leg.SetFillColor(0) leg.SetNColumns(len(channels) / 4) for i, channel in enumerate(channels): if eff[channel].GetN() > 0: leg.AddEntry(eff[channel], getChannel(channel), "pl") if SEPARATE: leg.AddEntry(eff_add[channel], getChannel(channel) + " no " + VETO + "-veto", "pl") if SEPARATE: leg.SetY1(leg.GetY2() - len([x for x in channels if eff[x].GetN() > 0]) * 0.045) else: leg.SetY1(leg.GetY2() - len([x for x in channels if eff[x].GetN() > 0]) / 2. * 0.045) if SEPARATE: legS = TLegend(0.5, 0.8 - 0.045, 0.9, 0.85) else: legS = TLegend(0.5, 0.85 - 0.045, 0.9, 0.85) legS.SetBorderSize(0) legS.SetFillStyle(0) #1001 legS.SetFillColor(0) legS.AddEntry(eff['sum'], "Total b tag efficiency (1 b tag + 2 b tag + 2 #mu)", "pl") if SEPARATE: legS.AddEntry(eff_add['sum'], "Total b tag efficiency, no " + VETO + "-veto", "pl") c1 = TCanvas("c1", "Signal Efficiency", 1200, 800) c1.cd(1) eff['sum'].Draw("APL") if SEPARATE: eff_add['sum'].Draw("SAME, PL") for i, channel in enumerate(channels): eff[channel].Draw("SAME, PL") if SEPARATE: eff_add[channel].Draw("SAME, PL") leg.Draw() legS.Draw() setHistStyle(eff["sum"], 1.1) eff["sum"].SetTitle(";m_{Z'} (GeV);Acceptance #times efficiency") eff["sum"].SetMinimum(0.) eff["sum"].SetMaximum(max(1., maxEff * 1.5)) #0.65 if SEPARATE: eff_add["sum"].SetTitle(";m_{Z'} (GeV);Acceptance #times efficiency") eff_add["sum"].SetMinimum(0.) eff_add["sum"].SetMaximum(1.) eff["sum"].GetXaxis().SetTitleSize(0.045) eff["sum"].GetYaxis().SetTitleSize(0.045) eff["sum"].GetYaxis().SetTitleOffset(1.1) eff["sum"].GetXaxis().SetTitleOffset(1.05) eff["sum"].GetXaxis().SetRangeUser(1500, 8000) c1.SetTopMargin(0.05) #drawCMS(-1, "Simulation Preliminary", year=year) #Preliminary #drawCMS(-1, "Work in Progress", year=year, suppressCMS=True) drawCMS(-1, "", year=year, suppressCMS=True) drawAnalysis("") if SEPARATE: c1.Print("plots/Efficiency/" + year + "_" + BTAGGING + "_no" + VETO + "veto.pdf") c1.Print("plots/Efficiency/" + year + "_" + BTAGGING + "_no" + VETO + "veto.png") else: c1.Print("plots/Efficiency/" + year + "_" + BTAGGING + ".pdf") c1.Print("plots/Efficiency/" + year + "_" + BTAGGING + ".png") # print print "category", for m in range(0, eff["sum"].GetN()): print " & %d" % int(eff["sum"].GetX()[m]), print "\\\\", "\n\\hline" for i, channel in enumerate(channels + ["sum"]): if channel == 'sum': print "\\hline" print getChannel(channel).replace("high ", "H").replace( "low ", "L").replace("purity", "P").replace("b-tag", ""), for m in range(0, eff[channel].GetN()): print "& %.1f" % (100. * eff[channel].GetY()[m]), print "\\\\"
from matplotlib.colors import LogNorm from random import randint seed = 10 np.random.seed(seed) #init for reproducibilty ##Input maxtracks_read = 100 # max number of tracks to read maxtracks_train = 25 # max number of tracks to use in the training filename = 'ntuHevjin.root' file=TFile(filename, 'R') tree=file.Get('PDsecondTree') evtcuts = '((evtNumber % 10) < 8)' # leave out 20% of events for testing vInput=root_numpy.tree2array(tree, branches=['trkPt', 'trkEta', 'trkPhi', 'trkDxy', 'trkDz', 'trkIsInJet', 'trkIsHighPurity', 'trkCharge'], selection=evtcuts) vInput=root_numpy.rec2array(vInput) nspec = 2 # numebr of feature not used in the training nfeat = len(vInput[0]) - nspec vPt = vInput[:,0] vEta = vInput[:,1] vPhi = vInput[:,2] vDxy = vInput[:,3] vDz = vInput[:,4] vtrkIsInJet = vInput[:,-3] vtrkIsHighPurity = vInput[:,-2] vQ = vInput[:,-1] ##Shape formatting and zero padding
def test_branch_DNE(): chain = TChain('tree') chain.Add(load('single1.root')) rnp.tree2array(chain, branches=['my_net_worth'])
def Tree2Pandas(input_file, variables, weight=None, cut=None, xsec=None, event_weight_sum=None, luminosity=None, paramFun=None, tree_name='tree', start=None, stop=None, additional_columns={}): """ Convert a ROOT TTree to a pandas DF """ variables = copy.copy([ var for var in variables if not var.startswith("$") ]) # Otherwise will add the weight and have a duplicate branch # Check for repetitions in variables -> makes root_numpy crash # repeated_var = [ item for item, count in collections.Counter(variables).items() if count > 1 ] if len(repeated_var) != 0: logging.critical('There are repeated variables') for var in repeated_var: logging.critical('... %s' % var) raise RuntimeError("Repeated arguments for importing data") # Get root tree, check if exists first # if not os.path.exists(input_file): logging.warning("File %s does not exist" % input_file) print("File %s does not exist" % input_file) return None file_handle = TFile.Open(input_file) if not file_handle.GetListOfKeys().Contains(tree_name): #logging.warning("Could not find tree %s in %s"%(tree_name,input_file)) logging.debug("Could not find tree %s in %s" % (tree_name, input_file)) return None tree = file_handle.Get(tree_name) N = tree.GetEntries() logging.debug('\tNumber of events : %d' % N) # Read the tree and convert it to a numpy structured array if weight is not None: variables += [weight] try: data = tree2array(tree, branches=variables, selection=cut, start=start, stop=stop) except ValueError as e: logging.error("Issue with file {}".format(input_file)) raise e # Convert to pandas dataframe # df = pd.DataFrame(data) # Reweighting # relative_weight = 1 if weight is not None and xsec is not None and event_weight_sum is not None: if luminosity is None: luminosity = 1 relative_weight = xsec * luminosity / event_weight_sum logging.debug('\t\tReweighting requested') logging.debug('\t\t\tCross section : %0.5f' % xsec) logging.debug('\t\t\tEvent weight sum : %0.2f' % event_weight_sum) logging.debug('\t\t\tLuminosity : %0.2f' % luminosity) logging.debug('\t\tRelative weight %0.3e' % relative_weight) df['cross_section'] = np.ones(df.shape[0]) * xsec df['luminosity'] = np.ones(df.shape[0]) * luminosity df['event_weight_sum'] = np.ones(df.shape[0]) * event_weight_sum else: df['cross_section'] = np.ones(df.shape[0]) df['luminosity'] = np.ones(df.shape[0]) df['event_weight_sum'] = np.ones(df.shape[0]) if df.shape[0] != 0: relative_weight /= df.shape[0] if weight is not None: df['event_weight'] = df[weight] * relative_weight else: df['event_weight'] = np.ones(df.shape[0]) if paramFun is not None: assert callable(paramFun) param = paramFun(os.path.basename(input_file)) if param is None: param = 0 df['param'] = np.ones(df.shape[0]) * param # Register additional columns # if len(additional_columns.keys()) != 0: for key, val in additional_columns.items(): df[key] = pd.Series([val] * df.shape[0]) # Slice printout # if start is not None or stop is not None: ni = start if start is not None else 0 nf = stop if stop is not None else N logging.debug( "Reading from {} to {} in input tree (over {} entries)".format( ni, nf, N)) file_handle.Close() return df
def test_PyROOT(): f = TFile(load('single1.root')) tree = f.Get('tree') rnp.tree2array(tree)
weight__background = [] array__signal = [] array__background = [] name__signal = Options['SignalTree'] #wt_DR_nominal wt_DS name__background = Options['BackgroundTree'] #tt_nominal tt_radHi #Need to add lines to make sure I can use may files print name__signal #for filename in input: for name in name__signal: if file.Get(name) != None: print 'name', name tree__signal.append(file.Get(name)) event__signal.append( tree2array(tree__signal[-1], branches=variableList, selection='1')) weight__signal.append( tree2array(tree__signal[-1], branches=[Options['EventWeight']], selection='1')) # weight__signal.append(tree2array(tree__signal[-1], branches="EventWeight", selection='1')) array__signal.append([ list(elem) for elem in zip(event__signal[-1], weight__signal[-1]) ]) for name in name__background: if file.Get(name) != None: tree__background.append(file.Get(name)) event__background.append( tree2array(tree__background[-1], branches=variableList, selection='1'))
def reweight( sample, puType = 0 ): if sample.path is None: print '[puReweighter]: Need to know the MC tree (option --mcTree or sample.path)' sys.exit(1) ### create a tree with only weights that will be used as friend tree for reweighting different lumi periods print 'Opening mc file: ', sample.path[0] fmc = rt.TFile(sample.path[0],'read') tmc = None if sample.tnpTree is None: dirs = fmc.GetListOfKeys() for d in dirs: if (d.GetName() == "sampleInfo"): continue tmc = fmc.Get("%s/fitter_tree" % d.GetName()) else: tmc = fmc.Get(sample.tnpTree) #### can reweight vs nVtx but better to reweight v truePU puMCnVtx = [] puMCrho = [] if puType == 1 : hmc = rt.TH1F('hMC_nPV' ,'MC nPV' , 75,-0.5,74.5) tmc.Draw('event_nPV>>hMC_nPV','','goff') hmc.Scale(1/hmc.Integral()) for ib in range(1,hmc.GetNbinsX()+1): puMCnVtx.append( hmc.GetBinContent(ib) ) print 'len nvtxMC = ',len(puMCnVtx) elif puType == 2 : hmc = rt.TH1F('hMC_rho' ,'MC #rho' , 75,-0.5,74.5) tmc.Draw('rho>>hMC_rho','','goff') hmc.Scale(1/hmc.Integral()) for ib in range(1,hmc.GetNbinsX()+1): puMCrho.append( hmc.GetBinContent(ib) ) print 'len rhoMC = ',len(puMCrho) puDataDist = {} puDataArray= {} weights = {} epochKeys = puDataEpoch.keys() if puType == 1 : epochKeys = nVtxDataEpoch.keys() if puType == 2 : epochKeys = rhoDataEpoch.keys() for pu in epochKeys: fpu = None if puType == 1 : fpu = rt.TFile(nVtxDataEpoch[pu],'read') elif puType == 2 : fpu = rt.TFile(rhoDataEpoch[pu],'read') else : fpu = rt.TFile(puDataEpoch[pu],'read') puDataDist[pu] = fpu.Get('pileup').Clone('puHist_%s' % pu) puDataDist[pu].Scale(1./puDataDist[pu].Integral()) puDataDist[pu].SetDirectory(0) puDataArray[pu] = [] for ipu in range(len(puMC[puMCscenario])): ibin_pu = puDataDist[pu].GetXaxis().FindBin(ipu+0.00001) puDataArray[pu].append(puDataDist[pu].GetBinContent(ibin_pu)) print 'puData[%s] length = %d' % (pu,len(puDataArray[pu])) fpu.Close() weights[pu] = [] mcEvts = tree2array( tmc, branches = ['weight','truePU','event_nPV','rho'] ) pumc = puMC[puMCscenario] if puType == 1: pumc = puMCnVtx elif puType == 2: pumc = puMCrho else : pumc = puMC[puMCscenario] puMax = len(pumc) print '-> nEvtsTot ', len(mcEvts) for ievt in xrange(len(mcEvts)): if ievt%1000000 == 0 : print 'iEvt:',ievt evt = mcEvts[ievt] for pu in epochKeys: pum = -1 pud = -1 if puType == 1 and evt['event_nPV'] < puMax: pud = puDataArray[pu][evt['event_nPV']] pum = pumc[evt['event_nPV']] if puType == 2 and int(evt['rho']) < puMax: pud = puDataArray[pu][int(evt['rho'])] pum = pumc[int(evt['rho'])] elif puType == 0: pud = puDataArray[pu][evt['truePU']] pum = pumc[evt['truePU']] puw = 1 if pum > 0: puw = pud/pum if evt['weight'] > 0 : totw = +puw else : totw = -puw weights[pu].append( ( puw,totw) ) newFile = rt.TFile( sample.puTree, 'recreate') for pu in epochKeys: treeWeight = rt.TTree('weights_%s'%pu,'tree with weights') wpuarray = np.array(weights[pu],dtype=[('PUweight',float),('totWeight',float)]) array2tree( wpuarray, tree = treeWeight ) treeWeight.Write() newFile.Close() fmc.Close()
from rootpy.io import root_open from root_numpy import tree2array import numpy as np import math as m import pprint from shapely.geometry import Polygon import waferGeometry f = root_open("test_triggergeom.root") cells_tree = f.Get("hgcaltriggergeomtester/TreeCells") TC_tree = f.Get("hgcaltriggergeomtester/TreeTriggerCells") cells_wafer_info = tree2array(cells_tree, branches=[ 'id', 'wafertype', 'wafer', 'layer', 'subdet', 'zside', 'x', 'y', 'cell', 'waferrow', 'wafercolumn' ]) cells_tc_info = tree2array(TC_tree, branches=[ 'triggercell', 'c_id', 'c_cell', 'wafer', 'layer', 'subdet', 'zside' ]) f.close() ######################################################### ## FUNCTIONS ## def ExtractMappingCoordinates(koordx, koordy, d, x0, y0): distx = koordx - x0