def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,None, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples) x_cpf = MeanNormZeroPadParticles(filename,None, self.branches[1], self.branchcutoffs[1],self.nsamples) x_npf = MeanNormZeroPadParticles(filename,None, self.branches[2], self.branchcutoffs[2],self.nsamples) x_sv = MeanNormZeroPadParticles(filename,None, self.branches[3], self.branchcutoffs[3],self.nsamples) #x_reg = MeanNormZeroPad(filename,TupleMeanStd, # [self.branches[4]], # [self.branchcutoffs[4]],self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) reg_truth=Tuple['gen_pt_WithNu'].view(numpy.ndarray) reco_pt=Tuple['jet_corr_pt'].view(numpy.ndarray) correctionfactor=numpy.zeros(self.nsamples) for i in range(self.nsamples): correctionfactor[i]=reg_truth[i]/reco_pt[i] if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) undef=Tuple['isUndefined'] notremoves-=undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights=weighter.getJetWeights(Tuple) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) eventweights = Tuple[self.eventweightbranch].astype(float) #print('eventweights are: ', eventweights) #print(alltruth.shape) if self.remove: print('remove') weights=weights[notremoves > 0] x_global=x_global[notremoves > 0] x_cpf=x_cpf[notremoves > 0] x_npf=x_npf[notremoves > 0] x_sv=x_sv[notremoves > 0] alltruth=alltruth[notremoves > 0] reco_pt=reco_pt[notremoves > 0] correctionfactor=correctionfactor[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp print(x_global.shape,self.nsamples) #print('weights are: ', weights) self.w=[weights*eventweights,weights*eventweights] self.x=[x_global,x_cpf,x_npf,x_sv,reco_pt] self.y=[alltruth,correctionfactor]
def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, MeanNormZeroPadBinned import numpy from stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() #self.nsamples = 10 #TESTING print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples) # needed # (dimension #1, center #1, nbins 1, half width 1) # (dimension #2, center #2, nbins 2, half width 2) # sum o stack -- max to stack/zero pad x_cpf, sum_cpf = MeanNormZeroPadBinned( filename, 'nCpfcand', self.nsamples, ('Cpfcan_eta', 'jet_eta', self.nbins, self.jet_radius), #X axis ('Cpfcan_phi', 'jet_phi', self.nbins, self.jet_radius), #Y axis (TupleMeanStd, self.branches[1], self.branchcutoffs[1]), #means/std, branches to use, #per-bin # of particles to be kept (self.sums_scaling['charged'], self.binned_sums['charged']), #variables to be summed (no zero padding yet) ) x_npf, sum_npf = MeanNormZeroPadBinned( filename, 'nNpfcand', self.nsamples, ('Npfcan_eta', 'jet_eta', self.nbins, self.jet_radius), ('Npfcan_phi', 'jet_phi', self.nbins, self.jet_radius), (TupleMeanStd, self.branches[2], self.branchcutoffs[2]), (self.sums_scaling['neutral'], self.binned_sums['neutral']), ) x_sv, sum_sv = MeanNormZeroPadBinned( filename, 'nsv', self.nsamples, ('sv_eta', 'jet_eta', self.nbins, self.jet_radius), ('sv_phi', 'jet_phi', self.nbins, self.jet_radius), (TupleMeanStd, self.branches[3], self.branchcutoffs[3]), (self.sums_scaling['svs'], self.binned_sums['svs']), ) #merging sum variables together x_sum = numpy.concatenate((sum_cpf, sum_npf, sum_sv), axis=3) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) undef=Tuple['isUndefined'] notremoves-=undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights=weighter.getJetWeights(Tuple) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) pt_truth = Tuple[self.regtruth] #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] x_sv = x_sv[notremoves > 0] x_sum = x_sum[notremoves > 0] alltruth = alltruth[notremoves > 0] pt_truth = pt_truth[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp self.w = [weights] self.x = [x_global, x_cpf, x_npf, x_sv, x_sum] self.y = [alltruth, pt_truth]
def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,TupleMeanStd, self.branches, self.branchcutoffs,self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) undef=Tuple['isUndefined'] notremoves-=undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights=weighter.getJetWeights(Tuple) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) #print(alltruth.shape) if self.remove: print('remove') weights=weights[notremoves > 0] x_global=x_global[notremoves > 0] alltruth=alltruth[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp print(x_global.shape,self.nsamples) self.w=[weights] self.x=[x_global] self.y=[alltruth]
def readFromRootFile(self,filename,TupleMeanStd, weighter): #the first part is standard, no changes needed from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles import numpy import ROOT fileTimeOut(filename,120) #give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() x_glb = ZeroPadParticles(filename,TupleMeanStd, self.branches[0], self.branchcutoffs[0],self.nsamples) x_db = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[1], self.branchcutoffs[1],self.nsamples) # now, some jets are removed to avoid pt and eta biases Tuple = self.readTreeFromRootToTuple(filename) #if self.remove: # jets are removed until the shapes in eta and pt are the same as # the truth class 'fj_isNonBB' notremoves=weighter.createNotRemoveIndices(Tuple) if self.weight: weights=weighter.getJetWeights(Tuple) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) # create all collections: #truthtuple = Tuple[self.truthclasses] alltruth=self.reduceTruth(Tuple) undef=numpy.sum(alltruth,axis=1) weights=weights[undef > 0] x_glb=x_glb[undef > 0] x_db=x_db[undef > 0] alltruth=alltruth[undef > 0] notremoves=notremoves[undef > 0] undef=Tuple['fj_isNonCC'] * Tuple['sample_isQCD'] * Tuple['fj_isQCD'] + Tuple['fj_isCC'] * Tuple['fj_isH'] # remove the entries to get same jet shapes if self.remove: print('remove') weights=weights[notremoves > 0] x_glb=x_glb[notremoves > 0] x_db=x_db[notremoves > 0] alltruth=alltruth[notremoves > 0] newnsamp=x_glb.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp # fill everything self.w=[weights] self.x=[x_db] self.z=[x_glb] self.y=[alltruth]
def readFromRootFile(self,filename,TupleMeanStd, weighter): #the first part is standard, no changes needed from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles import numpy import ROOT fileTimeOut(filename,120) #give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() #the definition of what to do with the branches # those are the global branches (jet pt etc) # they should be just glued to each other in one vector # and zero padded (and mean subtracted and normalised) #x_global = MeanNormZeroPad(filename,TupleMeanStd, # [self.branches[0]], # [self.branchcutoffs[0]],self.nsamples) # the second part (the pf candidates) should be treated particle wise # an array with (njets, nparticles, nproperties) is created x_glb = ZeroPadParticles(filename,TupleMeanStd, self.branches[0], self.branchcutoffs[0],self.nsamples) x_db = ZeroPadParticles(filename,TupleMeanStd, self.branches[1], self.branchcutoffs[1],self.nsamples) x_cpf = ZeroPadParticles(filename,TupleMeanStd, self.branches[2], self.branchcutoffs[2],self.nsamples) x_sv = ZeroPadParticles(filename,TupleMeanStd, self.branches[3], self.branchcutoffs[3],self.nsamples) # now, some jets are removed to avoid pt and eta biases Tuple = self.readTreeFromRootToTuple(filename) #if self.remove: # jets are removed until the shapes in eta and pt are the same as # the truth class 'fj_isNonBB' notremoves=weighter.createNotRemoveIndices(Tuple) #undef=Tuple[self.undefTruth] #notremoves-=undef if self.weight: weights=weighter.getJetWeights(Tuple) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) # create all collections: #truthtuple = Tuple[self.truthclasses] alltruth=self.reduceTruth(Tuple) undef=numpy.sum(alltruth,axis=1) weights=weights[undef > 0] x_glb=x_glb[undef > 0] x_db=x_db[undef > 0] x_sv=x_sv[undef > 0] x_cpf=x_cpf[undef > 0] alltruth=alltruth[undef > 0] if self.remove: notremoves=notremoves[undef > 0] # remove the entries to get same jet shapes if self.remove: print('remove') weights=weights[notremoves > 0] x_glb=x_glb[notremoves > 0] x_db=x_db[notremoves > 0] x_sv=x_sv[notremoves > 0] x_cpf=x_cpf[notremoves > 0] alltruth=alltruth[notremoves > 0] #newnsamp=x_global.shape[0] newnsamp=x_glb.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp # fill everything self.w=[weights] self.x=[x_db,x_cpf,x_sv] self.z=[x_glb] self.y=[alltruth]
def readFromRootFile(self,filename,TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples) x_cpf = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[1], self.branchcutoffs[1],self.nsamples) x_npf = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[2], self.branchcutoffs[2],self.nsamples) x_sv = MeanNormZeroPadParticles(filename,TupleMeanStd, self.branches[3], self.branchcutoffs[3],self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) undef=Tuple['isUndefined'] notremoves-=undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights=weighter.getJetWeights(Tuple) elif self.remove: weights=notremoves else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) mask = Tuple[['nCpfcand','nNpfcand','nsv']] maskListNpf = [] maskListCpf = [] maskListSv = [] for i in range(0,Tuple.shape[0]): nMax = int(mask[i][0]) if(nMax>25): nMax=25 list0 = [[1.]*nMax+[0.]*(25-nMax)]*8 nMax = int(mask[i][1]) if(nMax>25): nMax=25 maskListNpf.append(list0) nMax = int(mask[i][1]) if(nMax>25): nMax=25 list1 = [[1.]*nMax+[0.]*(25-nMax)]*4 maskListCpf.append(list1) nMax = int(mask[i][2]) if(nMax>4): nMax=4 list2 = [[1.]*nMax+[0.]*(4-nMax)]*8 maskListSv.append(list2) maskListNpf = numpy.asarray(maskListNpf,dtype=float) maskListCpf = numpy.asarray(maskListCpf,dtype=float) maskListSv = numpy.asarray(maskListSv,dtype=float) print ('zero shapes ', maskListNpf.shape, ' ' ,maskListCpf.shape , ' ' ,maskListSv.shape ) #print(alltruth.shape) if self.remove: print('remove') weights=weights[notremoves > 0] x_global=x_global[notremoves > 0] x_cpf=x_cpf[notremoves > 0] x_npf=x_npf[notremoves > 0] x_sv=x_sv[notremoves > 0] maskListNpf = maskListNpf[notremoves > 0] maskListCpf = maskListCpf[notremoves > 0] maskListSv = maskListSv[notremoves > 0] alltruth=alltruth[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp print(x_global.shape,self.nsamples) self.w=[weights] print (' types ', type (x_cpf) , type (maskListNpf), ' ' ,type(maskListCpf) , ' ' , type(maskListSv) ) self.x=[x_global,x_cpf,x_npf,x_sv,maskListNpf,maskListCpf,maskListSv] self.y=[alltruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch import c_meanNormZeroPad c_meanNormZeroPad.zeroPad() sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') nparray = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(nparray) undef = nparray['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(nparray) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.ones(self.nsamples) pttruth = nparray[self.regtruth] ptreco = nparray[self.regreco] truthtuple = nparray[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) # # sort vectors (according to pt at the moment) # idxs = x_cpf[:, :, 0].argsort() #0 is pt ratio xshape = x_cpf.shape static_idxs = numpy.indices(xshape) idxs = idxs.reshape((xshape[0], xshape[1], 1)) idxs = numpy.repeat(idxs, xshape[2], axis=2) x_cpf = x_cpf[static_idxs[0], idxs, static_idxs[2]] idxs = x_npf[:, :, 0].argsort() #0 is pt ratio xshape = x_npf.shape static_idxs = numpy.indices(xshape) idxs = idxs.reshape((xshape[0], xshape[1], 1)) idxs = numpy.repeat(idxs, xshape[2], axis=2) x_npf = x_npf[static_idxs[0], idxs, static_idxs[2]] #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] # x_npf=x_npf[notremoves > 0] alltruth = alltruth[notremoves > 0] pttruth = pttruth[notremoves > 0] ptreco = ptreco[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp self.w = [weights] self.x = [x_global, x_cpf, x_npf, ptreco] self.y = [alltruth, pttruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): from preprocessing import MeanNormApply, MeanNormZeroPad, createDensityMap, createCountMap, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) #here the difference starts x_chmap = createDensityMap(filename, TupleMeanStd, 'Cpfcan_ptrel', self.nsamples, ['Cpfcan_eta', 'jet_eta', 20, 0.5], ['Cpfcan_phi', 'jet_phi', 20, 0.5], 'nCpfcand', -1) x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Cpfcan_eta', 'jet_eta', 20, 0.5], ['Cpfcan_phi', 'jet_phi', 20, 0.5], 'nCpfcand') x_neumap = createDensityMap(filename, TupleMeanStd, 'Npfcan_ptrel', self.nsamples, ['Npfcan_eta', 'jet_eta', 20, 0.5], ['Npfcan_phi', 'jet_phi', 20, 0.5], 'nNpfcand', -1) x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Npfcan_eta', 'jet_eta', 20, 0.5], ['Npfcan_phi', 'jet_phi', 20, 0.5], 'nNpfcand') print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.ones(self.nsamples) pttruth = Tuple[self.regtruth] ptreco = Tuple[self.regreco] truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) x_map = numpy.concatenate((x_chmap, x_chcount, x_neumap, x_neucount), axis=3) #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_map = x_map[notremoves > 0] alltruth = alltruth[notremoves > 0] pttruth = pttruth[notremoves > 0] ptreco = ptreco[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp print(x_global.shape, self.nsamples) self.w = [weights] self.x = [x_global, x_map, ptreco] self.y = [alltruth, pttruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): from preprocessing import MeanNormApply, createCountMap, createDensity, MeanNormZeroPad, createDensityMap, MeanNormZeroPadParticles import numpy from stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) #here the difference starts nbins = 8 x_chmap = createDensity( filename, inbranches=['Cpfcan_ptrel', 'Cpfcan_etarel', 'Cpfcan_phirel'], modes=['sum', 'average', 'average'], nevents=self.nsamples, dimension1=['Cpfcan_eta', 'jet_eta', nbins, 0.45], dimension2=['Cpfcan_phi', 'jet_phi', nbins, 0.45], counterbranch='nCpfcand', offsets=[-1, -0.5, -0.5]) x_neumap = createDensity( filename, inbranches=['Npfcan_ptrel', 'Npfcan_etarel', 'Npfcan_phirel'], modes=['sum', 'average', 'average'], nevents=self.nsamples, dimension1=['Npfcan_eta', 'jet_eta', nbins, 0.45], dimension2=['Npfcan_phi', 'jet_phi', nbins, 0.45], counterbranch='nCpfcand', offsets=[-1, -0.5, -0.5]) x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Cpfcan_eta', 'jet_eta', nbins, 0.45], ['Cpfcan_phi', 'jet_phi', nbins, 0.45], 'nCpfcand') x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Npfcan_eta', 'jet_eta', nbins, 0.45], ['Npfcan_phi', 'jet_phi', nbins, 0.45], 'nNpfcand') print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) regtruth = Tuple['gen_pt_WithNu'] regreco = Tuple['jet_corr_pt'] #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] x_sv = x_sv[notremoves > 0] x_chmap = x_chmap[notremoves > 0] x_neumap = x_neumap[notremoves > 0] x_chcount = x_chcount[notremoves > 0] x_neucount = x_neucount[notremoves > 0] alltruth = alltruth[notremoves > 0] regreco = regreco[notremoves > 0] regtruth = regtruth[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp x_map = numpy.concatenate((x_chmap, x_neumap, x_chcount, x_neucount), axis=3) self.w = [weights, weights] self.x = [x_global, x_cpf, x_npf, x_sv, x_map, regreco] self.y = [alltruth, regtruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): #the first part is standard, no changes needed from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy import ROOT fileTimeOut(filename, 120) #give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() #the definition of what to do with the branches # those are the global branches (jet pt etc) # they should be just glued to each other in one vector # and zero padded (and mean subtracted and normalised) #x_global = MeanNormZeroPad(filename,TupleMeanStd, # [self.branches[0]], # [self.branchcutoffs[0]],self.nsamples) # the second part (the pf candidates) should be treated particle wise # an array with (njets, nparticles, nproperties) is created x_glb = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[0], self.branchcutoffs[0], self.nsamples) x_pf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) # maybe also an image of the energy density of charged particles # should be added #x_chmap = createDensityMap(filename,TupleMeanStd, # 'Cpfcan_erel', #use the energy to create the image # self.nsamples, # # 7 bins in eta with a total width of 2*0.9 # ['Cpfcan_eta','jet_eta',7,0.9], # # 7 bins in phi with a total width of 2*0.9 # ['Cpfcan_phi','jet_phi',7,0.9], # 'nCpfcand', # the last is an offset because the relative energy as # can be found in the ntuples is shifted by 1 # -1) # now, some jets are removed to avoid pt and eta biases Tuple = self.readTreeFromRootToTuple(filename) if self.remove: # jets are removed until the shapes in eta and pt are the same as # the truth class 'fj_isLight' notremoves = weighter.createNotRemoveIndices(Tuple) #undef=Tuple[self.undefTruth] #notremoves-=undef if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) # create all collections: truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(truthtuple) # remove the entries to get same jet shapes if self.remove: print('remove') weights = weights[notremoves > 0] x_glb = x_glb[notremoves > 0] x_pf = x_pf[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_sv = x_sv[notremoves > 0] alltruth = alltruth[notremoves > 0] #x_global=x_global[notremoves > 0] #x_chmap=x_chmap[notremoves > 0] #newnsamp=x_global.shape[0] newnsamp = x_glb.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp # fill everything self.w = [weights] self.x = [x_pf, x_cpf, x_sv] self.y = [alltruth]