def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormZeroPad import numpy from stopwatch import stopwatch import c_meanNormZeroPad c_meanNormZeroPad.zeroPad() sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') nparray = self.readTreeFromRootToTuple(filename) if self.remove: notremoves=weighter.createNotRemoveIndices(nparray) undef=nparray['isUndefined'] notremoves-=undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights=weighter.getJetWeights(nparray) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.ones(self.nsamples) truthtuple = nparray[self.truthclasses] alltruth=self.reduceTruth(truthtuple) if self.remove: print('remove') weights=weights[notremoves > 0] x_global=x_global[notremoves > 0] alltruth=alltruth[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp self.w=[weights] self.x=[x_global] self.y=[alltruth]
def getFlavourClassificationData(self,filename,TupleMeanStd, weighter): from stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get(self.treename) self.nsamples=tree.GetEntries() #print('took ', sw.getAndReset(), ' seconds for getting tree entries') Tuple = self.readTreeFromRootToTuple(filename) x_all = MeanNormZeroPad(filename,TupleMeanStd,self.branches,self.branchcutoffs,self.nsamples) #print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') notremoves=numpy.array([]) weights=numpy.array([]) if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) weights=notremoves #print('took ', sw.getAndReset(), ' to create remove indices') elif self.weight: #print('creating weights') weights= weighter.getJetWeights(Tuple) else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) #print(alltruth.shape) if self.remove: #print('remove') weights=weights[notremoves > 0] x_all=x_all[notremoves > 0] alltruth=alltruth[notremoves > 0] newnsamp=x_all.shape[0] #print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp #print('took in total ', swall.getAndReset(),' seconds for conversion') return weights,x_all,alltruth, notremoves
def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad( filename,None, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples ) x_cpf = MeanNormZeroPadParticles( filename,None, self.branches[1], self.branchcutoffs[1],self.nsamples ) x_sv = MeanNormZeroPadParticles( filename,None, self.branches[2], self.branchcutoffs[2],self.nsamples ) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') npy_array = self.readTreeFromRootToTuple(filename) reg_truth=npy_array['gen_pt_WithNu'].view(numpy.ndarray) reco_pt=npy_array['jet_corr_pt'].view(numpy.ndarray) correctionfactor=numpy.zeros(self.nsamples) for i in range(self.nsamples): correctionfactor[i]=reg_truth[i]/reco_pt[i] truthtuple = npy_array[self.truthclasses] alltruth=self.reduceTruth(truthtuple) self.x=[x_global, x_cpf, x_sv, reco_pt] self.y=[alltruth,correctionfactor] self._normalize_input_(weighter, npy_array)
def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch import c_meanNormZeroPad c_meanNormZeroPad.zeroPad() sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples) x_cpf = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[1], self.branchcutoffs[1],self.nsamples) x_npf = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[2], self.branchcutoffs[2],self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') nparray = self.readTreeFromRootToTuple(filename) if self.remove: notremoves=weighter.createNotRemoveIndices(nparray) undef=nparray['isUndefined'] hf = np_slice.any(axis=1) notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights=weighter.getJetWeights(nparray) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.ones(self.nsamples) pttruth = nparray[self.regtruth] ptreco = nparray[self.regreco] truthtuple = nparray[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) # # sort vectors (according to pt at the moment) # idxs = x_cpf[:,:,0].argsort() #0 is pt ratio xshape = x_cpf.shape static_idxs = numpy.indices(xshape) idxs = idxs.reshape((xshape[0], xshape[1], 1)) idxs = numpy.repeat(idxs, xshape[2], axis=2) x_cpf = x_cpf[static_idxs[0], idxs, static_idxs[2]] idxs = x_npf[:,:,0].argsort() #0 is pt ratio xshape = x_npf.shape static_idxs = numpy.indices(xshape) idxs = idxs.reshape((xshape[0], xshape[1], 1)) idxs = numpy.repeat(idxs, xshape[2], axis=2) x_npf = x_npf[static_idxs[0], idxs, static_idxs[2]] #print(alltruth.shape) if self.remove: print('remove') weights=weights[notremoves > 0] x_global=x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] # x_npf=x_npf[notremoves > 0] alltruth=alltruth[notremoves > 0] pttruth=pttruth[notremoves > 0] ptreco=ptreco[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp self.w=[weights] self.x=[x_global,x_cpf,x_npf,ptreco] self.y=[alltruth,pttruth]
def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos two minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get(self.treename) self.nsamples=tree.GetEntries() Tuple = self.readTreeFromRootToTuple(filename) ########################################################################################### ############ this is where you define how to read in the branches and what to do with them ########################################################################################### ############ MeanNormZeroPad means that all branches are just put into a serial list ############ such as: jet1_pt, jet1_eta, jet2_pt, jet2_eta, ... ############ if there are not suffiecient jets, the rest of the list is filled ############ with zero (zero padding) ############ In addition, the variables are transformed such that they are centred around ############ zero and the width of the distribution is about 1. ############ This is only a technica trick that makes it easier for the DNN to converge reco_global = MeanNormZeroPad(filename,TupleMeanStd, self.branches, self.branchcutoffs,self.nsamples) ############ Another choice for the preprocessing that will be important for you is ############ MeanNormZeroPadParticles. It does the same rescaling as MeanNormZeroPad, ############ but organises the array as a 2D array per event. Such that e.g. each ############ jet has its own list. This can be important when e.g. using more ############ evolved neural networks than just dense layers. We will come to this later, ############ however, I put an example already here (but commented) #reco_jetslist = MeanNormZeroPadParticles(filename,TupleMeanStd, # self.branches[3], # the jet branches (see function above) # self.branchcutoffs[3], # the jet branch cut-offs (maximum six) as defined above # self.nsamples) ############ Here we read the branch that contains the truth information truth = Tuple['gen_mttbar'] oldlength=self.nsamples if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) # this has do be done for each array produced before # don't forget! # it selects only the entries from the array that should not be removed, # (where the notremoves array as an entry above 0) reco_global=reco_global[notremoves > 0] truth=truth[notremoves > 0] print("kept "+str(int(float(self.nsamples)/float(oldlength))*100)+"%" ) # we don't use weights for now, so we fill the weight array with 1 weights=numpy.empty(self.nsamples) weights.fill(1.) self.nsamples=truth.shape[0] # any array that shoul dbe used by the DNN needs to be added here # w: these are the weights (you don't have to change this) # x: this is the reconstructed information to fill # y: the true information self.w=[weights] self.x=[reco_global] self.y=[truth]
def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples) x_a = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[1], self.branchcutoffs[1],self.nsamples) x_b = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[2], self.branchcutoffs[2],self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights=weighter.getJetWeights(Tuple) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) #print(alltruth.shape) if self.remove: print('remove') weights=weights[notremoves > 0] x_global=x_global[notremoves > 0] x_a=x_a[notremoves > 0] x_b=x_b[notremoves > 0] alltruth=alltruth[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp print(x_global.shape,self.nsamples) self.w=[weights] self.x=[x_global,x_a,x_b] self.y=[alltruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): #the first part is standard, no changes needed from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy import ROOT fileTimeOut(filename, 120) #give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() #the definition of what to do with the branches # those are the global branches (jet pt etc) # they should be just glued to each other in one vector # and zero padded (and mean subtracted and normalised) x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) # the second part (the pf candidates) should be treated particle wise # an array with (njets, nparticles, nproperties) is created x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) # maybe also an image of the energy density of charged particles # should be added x_chmap = createDensityMap( filename, TupleMeanStd, 'Cpfcan_erel', #use the energy to create the image self.nsamples, # 7 bins in eta with a total width of 2*0.9 ['Cpfcan_eta', 'jet_eta', 7, 0.9], # 7 bins in phi with a total width of 2*0.9 ['Cpfcan_phi', 'jet_phi', 7, 0.9], 'nCpfcand', # the last is an offset because the relative energy as # can be found in the ntuples is shifted by 1 -1) # now, some jets are removed to avoid pt and eta biases Tuple = self.readTreeFromRootToTuple(filename) if self.remove: # jets are removed until the shapes in eta and pt are the same as # the truth class 'isQCD' notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple[self.undefTruth] notremoves -= undef if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) # create all collections: truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(truthtuple) # remove the entries to get same jet shapes if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_chmap = x_chmap[notremoves > 0] alltruth = alltruth[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp # fill everything self.w = [weights] self.x = [x_global, x_cpf, x_chmap] self.y = [alltruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, createDensityMap, createCountMap, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) #here the difference starts x_chmap = createDensityMap(filename, TupleMeanStd, 'Cpfcan_ptrel', self.nsamples, ['Cpfcan_eta', 'jet_eta', 20, 0.5], ['Cpfcan_phi', 'jet_phi', 20, 0.5], 'nCpfcand', -1, weightbranch='Cpfcan_puppiw') x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Cpfcan_eta', 'jet_eta', 20, 0.5], ['Cpfcan_phi', 'jet_phi', 20, 0.5], 'nCpfcand') x_neumap = createDensityMap(filename, TupleMeanStd, 'Npfcan_ptrel', self.nsamples, ['Npfcan_eta', 'jet_eta', 20, 0.5], ['Npfcan_phi', 'jet_phi', 20, 0.5], 'nNpfcand', -1, weightbranch='Npfcan_puppiw') x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Npfcan_eta', 'jet_eta', 20, 0.5], ['Npfcan_phi', 'jet_phi', 20, 0.5], 'nNpfcand') print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.ones(self.nsamples) pttruth = Tuple[self.regtruth] ptreco = Tuple[self.regreco] truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) x_map = numpy.concatenate((x_chmap, x_chcount, x_neumap, x_neucount), axis=3) #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_map = x_map[notremoves > 0] alltruth = alltruth[notremoves > 0] pttruth = pttruth[notremoves > 0] ptreco = ptreco[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp print(x_global.shape, self.nsamples) self.w = [weights] self.x = [x_global, x_map, ptreco] self.y = [alltruth, pttruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): from preprocessing import MeanNormApply, createCountMap, createDensity, MeanNormZeroPad, createDensityMap, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) #here the difference starts nbins = 8 x_chmap = createDensity( filename, inbranches=['Cpfcan_ptrel', 'Cpfcan_etarel', 'Cpfcan_phirel'], modes=['sum', 'average', 'average'], nevents=self.nsamples, dimension1=['Cpfcan_eta', 'jet_eta', nbins, 0.45], dimension2=['Cpfcan_phi', 'jet_phi', nbins, 0.45], counterbranch='nCpfcand', offsets=[-1, -0.5, -0.5]) x_neumap = createDensity( filename, inbranches=['Npfcan_ptrel', 'Npfcan_etarel', 'Npfcan_phirel'], modes=['sum', 'average', 'average'], nevents=self.nsamples, dimension1=['Npfcan_eta', 'jet_eta', nbins, 0.45], dimension2=['Npfcan_phi', 'jet_phi', nbins, 0.45], counterbranch='nCpfcand', offsets=[-1, -0.5, -0.5]) x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Cpfcan_eta', 'jet_eta', nbins, 0.45], ['Cpfcan_phi', 'jet_phi', nbins, 0.45], 'nCpfcand') x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Npfcan_eta', 'jet_eta', nbins, 0.45], ['Npfcan_phi', 'jet_phi', nbins, 0.45], 'nNpfcand') print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) regtruth = Tuple['gen_pt_WithNu'] regreco = Tuple['jet_corr_pt'] #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] x_sv = x_sv[notremoves > 0] x_chmap = x_chmap[notremoves > 0] x_neumap = x_neumap[notremoves > 0] x_chcount = x_chcount[notremoves > 0] x_neucount = x_neucount[notremoves > 0] alltruth = alltruth[notremoves > 0] regreco = regreco[notremoves > 0] regtruth = regtruth[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp x_map = numpy.concatenate((x_chmap, x_neumap, x_chcount, x_neucount), axis=3) self.w = [weights, weights] self.x = [x_global, x_cpf, x_npf, x_sv, x_map, regreco] self.y = [alltruth, regtruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) x_reg = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[4]], [self.branchcutoffs[4]], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) reg_truth = Tuple['gen_pt_WithNu'].view(numpy.ndarray) reco_pt = Tuple['jet_corr_pt'].view(numpy.ndarray) correctionfactor = numpy.zeros(self.nsamples) for i in range(self.nsamples): correctionfactor[i] = reg_truth[i] / reco_pt[i] if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] x_sv = x_sv[notremoves > 0] alltruth = alltruth[notremoves > 0] x_reg = x_reg[notremoves > 0] correctionfactor = correctionfactor[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp print(x_global.shape, self.nsamples) self.w = [weights, weights] self.x = [x_global, x_cpf, x_npf, x_sv, x_reg] self.y = [alltruth, correctionfactor]
def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, MeanNormZeroPadBinned import numpy from stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() #self.nsamples = 10 #TESTING print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples) # needed # (dimension #1, center #1, nbins 1, half width 1) # (dimension #2, center #2, nbins 2, half width 2) # sum o stack -- max to stack/zero pad x_cpf, sum_cpf = MeanNormZeroPadBinned( filename, 'nCpfcand', self.nsamples, ('Cpfcan_eta', 'jet_eta', self.nbins, self.jet_radius), #X axis ('Cpfcan_phi', 'jet_phi', self.nbins, self.jet_radius), #Y axis (TupleMeanStd, self.branches[1], self.branchcutoffs[1]), #means/std, branches to use, #per-bin # of particles to be kept (self.sums_scaling['charged'], self.binned_sums['charged']), #variables to be summed (no zero padding yet) ) x_npf, sum_npf = MeanNormZeroPadBinned( filename, 'nNpfcand', self.nsamples, ('Npfcan_eta', 'jet_eta', self.nbins, self.jet_radius), ('Npfcan_phi', 'jet_phi', self.nbins, self.jet_radius), (TupleMeanStd, self.branches[2], self.branchcutoffs[2]), (self.sums_scaling['neutral'], self.binned_sums['neutral']), ) x_sv, sum_sv = MeanNormZeroPadBinned( filename, 'nsv', self.nsamples, ('sv_eta', 'jet_eta', self.nbins, self.jet_radius), ('sv_phi', 'jet_phi', self.nbins, self.jet_radius), (TupleMeanStd, self.branches[3], self.branchcutoffs[3]), (self.sums_scaling['svs'], self.binned_sums['svs']), ) #merging sum variables together x_sum = numpy.concatenate((sum_cpf, sum_npf, sum_sv), axis=3) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) undef=Tuple['isUndefined'] notremoves-=undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights=weighter.getJetWeights(Tuple) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) pt_truth = Tuple[self.regtruth] #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] x_sv = x_sv[notremoves > 0] x_sum = x_sum[notremoves > 0] alltruth = alltruth[notremoves > 0] pt_truth = pt_truth[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp self.w = [weights] self.x = [x_global, x_cpf, x_npf, x_sv, x_sum] self.y = [alltruth, pt_truth]
def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples) x_cpf = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[1], self.branchcutoffs[1],self.nsamples) x_npf = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[2], self.branchcutoffs[2],self.nsamples) x_sv = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[3], self.branchcutoffs[3],self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) undef=Tuple['isUndefined'] notremoves-=undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights=weighter.getJetWeights(Tuple) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) mask = Tuple[['nCpfcand','nNpfcand','nsv']] maskListNpf = [] maskListCpf = [] maskListSv = [] for i in range(0,Tuple.shape[0]): nMax = int(mask[i][0]) if(nMax>25): nMax=25 list0 = [[1.]*nMax+[0.]*(25-nMax)]*8 nMax = int(mask[i][1]) if(nMax>25): nMax=25 maskListNpf.append(list0) nMax = int(mask[i][1]) if(nMax>25): nMax=25 list1 = [[1.]*nMax+[0.]*(25-nMax)]*4 maskListCpf.append(list1) nMax = int(mask[i][2]) if(nMax>4): nMax=4 list2 = [[1.]*nMax+[0.]*(4-nMax)]*8 maskListSv.append(list2) maskListNpf = numpy.asarray(maskListNpf,dtype=float) maskListCpf = numpy.asarray(maskListCpf,dtype=float) maskListSv = numpy.asarray(maskListSv,dtype=float) print ('zero shapes ', maskListNpf.shape, ' ' ,maskListCpf.shape , ' ' ,maskListSv.shape ) #print(alltruth.shape) if self.remove: print('remove') weights=weights[notremoves > 0] x_global=x_global[notremoves > 0] x_cpf=x_cpf[notremoves > 0] x_npf=x_npf[notremoves > 0] x_sv=x_sv[notremoves > 0] maskListNpf = maskListNpf[notremoves > 0] maskListCpf = maskListCpf[notremoves > 0] maskListSv = maskListSv[notremoves > 0] alltruth=alltruth[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp print(x_global.shape,self.nsamples) self.w=[weights] print (' types ', type (x_cpf) , type (maskListNpf), ' ' ,type(maskListCpf) , ' ' , type(maskListSv) ) self.x=[x_global,x_cpf,x_npf,x_sv,maskListNpf,maskListCpf,maskListSv] self.y=[alltruth]