def getFlavourClassificationData(self,filename,TupleMeanStd, weighter): from DeepJetCore.stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get(self.treename) self.nsamples=tree.GetEntries() #print('took ', sw.getAndReset(), ' seconds for getting tree entries') Tuple = self.readTreeFromRootToTuple(filename) x_all = MeanNormZeroPad(filename,TupleMeanStd,self.branches,self.branchcutoffs,self.nsamples) #print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') notremoves=numpy.array([]) weights=numpy.array([]) if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) weights=notremoves #print('took ', sw.getAndReset(), ' to create remove indices') elif self.weight: #print('creating weights') weights= weighter.getJetWeights(Tuple) else: print('neither remove nor weight') weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) #print(alltruth.shape) if self.remove: #print('remove') weights=weights[notremoves > 0] x_all=x_all[notremoves > 0] alltruth=alltruth[notremoves > 0] newnsamp=x_all.shape[0] #print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp #print('took in total ', swall.getAndReset(),' seconds for conversion') return weights,x_all,alltruth, notremoves
def readFromRootFile(self,filename,TupleMeanStd, weighter): import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get(self.treename) self.nsamples=tree.GetEntries() #for reacArray operations Tuple = self.readTreeFromRootToTuple(filename) #create weights and remove indices notremoves=numpy.array([]) weights=numpy.array([]) if self.remove: notremoves=weighter.createNotRemoveIndices(Tuple) if self.selection: print 'Removing events selected with',self.selection notremoves -= Tuple[self.selection].view(numpy.ndarray) weights=notremoves #print('took ', sw.getAndReset(), ' to create remove indices') elif self.weight: #print('creating weights') weights= weighter.getJetWeights(Tuple) else: weights=numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) #that would be for labels alltruth=self.reduceTruth(truthtuple) reg_truth=Tuple[self.regressiontarget].view(numpy.ndarray) #stuff all in one long vector x_all=MeanNormZeroPad(filename,TupleMeanStd,self.branches,self.branchcutoffs,self.nsamples) #print(alltruth.shape) if self.remove: #print('remove') weights=weights[notremoves > 0] x_all=x_all[notremoves > 0] alltruth=alltruth[notremoves > 0] reg_truth=reg_truth[notremoves > 0] print len(Tuple),'->',len(x_all),'after remove' newnsamp=x_all.shape[0] #print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') self.nsamples = newnsamp self.w=[weights] if self.inputs1: self.x=[ x_all[:,self.inputs0], x_all[:,self.inputs1] ] else: self.x=[x_all] self.y=[reg_truth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): # this function defines how to convert the root ntuple to the training format # options are not yet described here from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("tree") self.nsamples = tree.GetEntries() npy_array = self.readTreeFromRootToTuple(filename) truthtuple = npy_array[self.truthclasses] alltruth = self.reduceTruth(truthtuple) alltruept = npy_array[self.regtruth] # user code x_global = MeanNormZeroPad(filename, None, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, None, self.branches[1], self.branchcutoffs[1], self.nsamples) x_npf = MeanNormZeroPadParticles(filename, None, self.branches[2], self.branchcutoffs[2], self.nsamples) x_recopts = MeanNormZeroPad(filename, None, [self.branches[3]], [self.branchcutoffs[3]], self.nsamples) nold = self.nsamples self.x = [x_global, x_cpf, x_npf, x_recopts] # list of feature numpy arrays self.y = [alltruth, alltruept] # list of target numpy arrays (truth) self.w = [] # list of weight arrays. One for each truth target self._normalize_input_(weighter, npy_array) print('reduced to ', self.nsamples, 'of', nold)
def convertFromSourceFile(self, filename, weighterobjects, istraining): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw=stopwatch() swall=stopwatch() import ROOT fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples=tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename,None, [self.branches[0]], [self.branchcutoffs[0]],self.nsamples) x_cpf = MeanNormZeroPadParticles(filename,None, self.branches[1], self.branchcutoffs[1],self.nsamples) x_npf = MeanNormZeroPadParticles(filename,None, self.branches[2], self.branchcutoffs[2],self.nsamples) x_sv = MeanNormZeroPadParticles(filename,None, self.branches[3], self.branchcutoffs[3],self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth=self.reduceTruth(truthtuple) print(x_global.shape,self.nsamples) return [x_global,x_cpf,x_npf,x_sv], [alltruth], []
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, None, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, None, self.branches[1], self.branchcutoffs[1], self.nsamples) x_etarel = MeanNormZeroPadParticles(filename, None, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, None, self.branches[3], self.branchcutoffs[3], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') npy_array = self.readTreeFromRootToTuple(filename) reg_truth = npy_array['gen_pt_WithNu'].view(numpy.ndarray) reco_pt = npy_array['jet_corr_pt'].view(numpy.ndarray) correctionfactor = numpy.zeros(self.nsamples) for i in range(self.nsamples): correctionfactor[i] = reg_truth[i] / reco_pt[i] truthtuple = npy_array[self.truthclasses] alltruth = self.reduceTruth(truthtuple) self.x = [x_global, x_cpf, x_etarel, x_sv, reco_pt] self.y = [alltruth, correctionfactor] self._normalize_input_(weighter, npy_array)
def readFromRootFile(self, filename, TupleMeanStd, weighter): # the first part is standard, no changes needed from DeepJetCore.preprocessing import MeanNormApply, createDensityLayers, createDensityMap, MeanNormZeroPad, \ MeanNormZeroPadParticles fileTimeOut(filename, 120) # give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print(TupleMeanStd[0]) print(len(TupleMeanStd[0])) x_globalbase = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) # flatten everything out for now X = createRecHitMap(filename, self.nsamples, nbins=13, width=0.2, maxlayers=55, maxhitsperpixel=6) Y = createDensityLayers(filename, TupleMeanStd, inbranches=['rechit_total_fraction'], modes=['sum'], layerbranch='rechit_layer', maxlayers=55, layeroffset=1, nevents=self.nsamples, dimension1=['rechit_eta', 'seed_eta', 13, 0.2], dimension2=['rechit_phi', 'seed_phi', 13, 0.2], counterbranch='nrechits', scales=[1]) print("Hey", np.shape(Y), np.shape(X)) Tuple = self.readTreeFromRootToTuple(filename) self.nsamples = len(x_globalbase) self.w = [np.ones_like(Y)] self.x = [X] self.y = [Y]
def convertFromSourceFile(self, filename, weighterobjects, istraining): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, None, ['x'], [1], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple( filename, branches=['class1', 'class2', 'x']) truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(truthtuple) #print(x_global.shape,x_global[0:10]) #print(alltruth.shape,alltruth[0:10]) #print(alltruth.flags) newnsamp = x_global.shape[0] self.nsamples = newnsamp print(x_global.shape, alltruth.shape, self.nsamples) truth = SimpleArray(alltruth, name="truth") feat = SimpleArray(x_global, name="features0") return [feat], [truth], []
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, None, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(truthtuple) newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp print(x_global.shape, self.nsamples) self.w = [] self.x = [x_global] self.y = [alltruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): # this function defines how to convert the root ntuple to the training format # options are not yet described here feature_array = self.readTreeFromRootToTuple(filename) #notremoves=weighter.createNotRemoveIndices(Tuple) # this removes parts of the dataset for weighting the events #feature_array = feature_array[notremoves > 0] # call this in the end self.nsamples = len(feature_array) x_all = MeanNormZeroPad(filename, TupleMeanStd, self.branches, self.branchcutoffs, self.nsamples) self.x = [x_all] # list of feature numpy arrays self.y = [numpy.vstack(feature_array[self.truthclasses]).transpose() ] # list of target numpy arrays (truth) self.w = [] # list of weight arrays. One for each truth target
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] x_sv = x_sv[notremoves > 0] alltruth = alltruth[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp print(x_global.shape, self.nsamples) self.w = [weights] self.x = [x_global, x_cpf, x_npf, x_sv] self.y = [alltruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): #the first part is standard, no changes needed from DeepJetCore.preprocessing import MeanNormApply, createDensityLayers, createDensityMap, MeanNormZeroPad, MeanNormZeroPadParticles import numpy import ROOT fileTimeOut(filename, 120) #give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print("1") x_globalbase = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) print("2") #flatten everything out for now x_chmapbase = createDensityLayers( filename, TupleMeanStd, inbranches=['rechit_energy', 'rechit_layer', 'rechit_time'], modes=['sum', 'single', 'average'], layerbranch='rechit_layer', maxlayers=55, layeroffset=1, nevents=self.nsamples, dimension1=['rechit_eta', 'seed_eta', 13, 0.2], dimension2=['rechit_phi', 'seed_phi', 13, 0.2], counterbranch='nrechits', scales=[1, 50, 1]) #training data print("3") Tuple = self.readTreeFromRootToTuple(filename) idtruthtuple = self.reduceTruth(Tuple[self.truthclasses]) energytruth = numpy.array(Tuple[self.regtruth]) #simple by-hand scaling to around 0 with a width of max about 1 energytruth = energytruth / 100. totalrecenergy = numpy.array(Tuple['totalrechit_energy']) / 100. weights = numpy.zeros(len(idtruthtuple)) notremoves = numpy.zeros(totalrecenergy.shape[0]) notremoves += 1 if self.remove: from augmentation import augmentRotationalSymmetry8, duplicate8, evaluate8 x_global = duplicate8(x_globalbase) x_chmap = augmentRotationalSymmetry8(x_chmapbase) notremoves = evaluate8(weighter.createNotRemoveIndices, Tuple) weights = duplicate8(weighter.getJetWeights(Tuple)) totalrecenergy = duplicate8(totalrecenergy) energytruth = duplicate8(energytruth) idtruthtuple = duplicate8(idtruthtuple) notremoves -= duplicate8(Tuple['isFake']) notremoves -= duplicate8(Tuple['isEta']) notremoves -= duplicate8(Tuple['isElectron']) notremoves -= duplicate8(Tuple['isMuon']) notremoves -= duplicate8(Tuple['isTau']) notremoves -= duplicate8(Tuple['isPionZero']) notremoves -= duplicate8(Tuple['isPionCharged']) notremoves -= duplicate8(Tuple['isProton']) notremoves -= duplicate8(Tuple['isKaonCharged']) notremoves -= duplicate8(Tuple['isOther']) #notremoves -= energytruth<50 else: notremoves -= Tuple['isFake'] notremoves -= Tuple['isEta'] x_global = x_globalbase x_chmap = x_chmapbase print("4") # no need for changes above #################### # reduce to two dimension # x_chmap[shower][eta][phi][layer]['colours'] # # -> collapse to # x_chmap[shower][eta][phi][colours] # # # # #################### # no need for changes in the following x_chmap = numpy.squeeze(x_chmap[:, :, :, 15:16, :]) before = len(x_global) if self.remove: weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_chmap = x_chmap[notremoves > 0] idtruthtuple = idtruthtuple[notremoves > 0] energytruth = energytruth[notremoves > 0] totalrecenergy = totalrecenergy[notremoves > 0] print("5") print('reduced to ' + str(len(x_global)) + ' of ' + str(before)) self.nsamples = len(x_global) #make control plot for energy #import matplotlib.pyplot as plt #plt.hist(energytruth.flatten(), normed=False, bins=30) #plt.savefig(giffile+"_eshape.pdf") #from plotting import plot4d, rotanimate #giffile=filename.replace('/','_') #giffile='gifs/'+giffile #for i in range(0,len(select)): # if not select[i]: continue # # ax,_=plot4d(x_chmap[i][:,:,:,:1],giffile+"_"+str(i)+"energy_.pdf",'etabin','layer','phibin') # rotanimate(ax,giffile+'_'+str(i)+'_energy.gif',delay=5,prefix=giffile) # print('energy') # timeentries=x_chmap[i][:,:,:,3:4] # timeentries[timeentries<0]=0.00000000001 # ax,_=plot4d(timeentries,giffile+"_"+str(i)+"time_.pdf",'etabin','layer','phibin') # rotanimate(ax,giffile+'_'+str(i)+'_time.gif',delay=5,prefix=giffile) # print('time') self.w = [weights, weights] self.x = [x_global, x_chmap, totalrecenergy] self.y = [idtruthtuple, energytruth]
def convertFromSourceFile(self, filename, weighterobjects, istraining): # Function to produce the numpy training arrays from root files from DeepJetCore.Weighter import Weighter from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() if not istraining: self.remove = False print('reading ' + filename) import ROOT from root_numpy import tree2array, root2array fileTimeOut(filename, 120) # give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("tree") self.nsamples = tree.GetEntries() # user code, example works with the example 2D images in root format generated by make_example_data from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles print('padding ' + filename) x_global = MeanNormZeroPad( filename, None, # 2nd argument None: means no normalisation [self.global_branches], [1], self.nsamples) x_pfCand_neutral = MeanNormZeroPadParticles( filename, None, self.pfCand_neutral_branches, self.npfCand_neutral, self.nsamples) x_pfCand_charged = MeanNormZeroPadParticles( filename, None, self.pfCand_charged_branches, self.npfCand_charged, self.nsamples) x_pfCand_photon = MeanNormZeroPadParticles(filename, None, self.pfCand_photon_branches, self.npfCand_photon, self.nsamples) x_pfCand_electron = MeanNormZeroPadParticles( filename, None, self.pfCand_electron_branches, self.npfCand_electron, self.nsamples) x_pfCand_muon = MeanNormZeroPadParticles(filename, None, self.pfCand_muon_branches, self.npfCand_muon, self.nsamples) x_pfCand_SV = MeanNormZeroPadParticles(filename, None, self.SV_branches, self.nSV, self.nsamples) import uproot3 as uproot urfile = uproot.open(filename)["tree"] mytruth = [] for arr in self.truth_branches: mytruth.append(np.expand_dims(urfile.array(arr), axis=1)) truth = np.concatenate(mytruth, axis=1) # important, float32 and C-type! truth = truth.astype(dtype='float32', order='C') x_global = x_global.astype(dtype='float32', order='C') x_pfCand_neutral = x_pfCand_neutral.astype(dtype='float32', order='C') x_pfCand_charged = x_pfCand_charged.astype(dtype='float32', order='C') x_pfCand_photon = x_pfCand_photon.astype(dtype='float32', order='C') x_pfCand_electron = x_pfCand_electron.astype(dtype='float32', order='C') x_pfCand_muon = x_pfCand_muon.astype(dtype='float32', order='C') x_pfCand_SV = x_pfCand_SV.astype(dtype='float32', order='C') if self.remove: b = [self.weightbranchX, self.weightbranchY] b.extend(self.truth_branches) b.extend(self.undefTruth) fileTimeOut(filename, 120) for_remove = root2array( # returns a structured np array filename, treename="tree", stop=None, branches=b) notremoves = weighterobjects['weigther'].createNotRemoveIndices( for_remove) print('took ', sw.getAndReset(), ' to create remove indices') if self.remove: x_global = x_global[notremoves > 0] x_pfCand_neutral = x_pfCand_neutral[notremoves > 0] x_pfCand_charged = x_pfCand_charged[notremoves > 0] x_pfCand_photon = x_pfCand_photon[notremoves > 0] x_pfCand_electron = x_pfCand_electron[notremoves > 0] x_pfCand_muon = x_pfCand_muon[notremoves > 0] x_pfCand_SV = x_pfCand_SV[notremoves > 0] truth = truth[notremoves > 0] newnsamp = x_global.shape[0] print('Weighter reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') print('removing nans') x_global = np.where(np.isfinite(x_global), x_global, 0) x_pfCand_neutral = np.where(np.isfinite(x_pfCand_neutral), x_pfCand_neutral, 0) x_pfCand_charged = np.where(np.isfinite(x_pfCand_charged), x_pfCand_charged, 0) x_pfCand_photon = np.where(np.isfinite(x_pfCand_photon), x_pfCand_photon, 0) x_pfCand_electron = np.where(np.isfinite(x_pfCand_electron), x_pfCand_electron, 0) x_pfCand_muon = np.where(np.isfinite(x_pfCand_muon), x_pfCand_muon, 0) x_pfCand_SV = np.where(np.isfinite(x_pfCand_SV), x_pfCand_SV, 0) return [ x_global, x_pfCand_neutral, x_pfCand_charged, x_pfCand_photon, x_pfCand_electron, x_pfCand_muon, x_pfCand_SV ], [truth], []
def readFromRootFile(self, filename, TupleMeanStd, weighter): #the first part is standard, no changes needed from DeepJetCore.preprocessing import MeanNormZeroPad from converters import createRecHitMap import numpy import ROOT fileTimeOut(filename, 120) #give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() x_globalbase = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_chmapbase = createRecHitMap(filename, self.nsamples, nbins=13, width=0.10, maxlayers=52, maxhitsperpixel=6) #print(x_chmapbase[0][6][6][15]) #print(x_chmapbase[0][6][6][14]) #print(x_chmapbase[0][6][6][13]) #print(x_chmapbase[0][7][7][13]) #exit() #training data Tuple = self.readTreeFromRootToTuple(filename) idtruthtuple = self.reduceTruth(Tuple[self.truthclasses]) energytruth = numpy.array(Tuple[self.regtruth]) #simple by-hand scaling to around 0 with a width of max about 1 energytruth = energytruth / 100. totalrecenergy = numpy.array(Tuple['totalrechit_energy']) / 100. weights = numpy.zeros(len(idtruthtuple)) notremoves = numpy.zeros(totalrecenergy.shape[0]) notremoves += 1 if self.remove: from augmentation import mirrorInPhi, duplicateImage, evaluateTwice x_global = duplicateImage(x_globalbase) x_chmap = mirrorInPhi(x_chmapbase) notremoves = evaluateTwice(weighter.createNotRemoveIndices, Tuple) weights = duplicateImage(weighter.getJetWeights(Tuple)) totalrecenergy = duplicateImage(totalrecenergy) energytruth = duplicateImage(energytruth) idtruthtuple = duplicateImage(idtruthtuple) notremoves -= duplicateImage(Tuple['isFake']) notremoves -= duplicateImage(Tuple['isEta']) #notremoves -= energytruth<50 else: notremoves -= Tuple['isFake'] notremoves -= Tuple['isEta'] x_global = x_globalbase x_chmap = x_chmapbase before = len(x_global) if self.remove: weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_chmap = x_chmap[notremoves > 0] idtruthtuple = idtruthtuple[notremoves > 0] energytruth = energytruth[notremoves > 0] totalrecenergy = totalrecenergy[notremoves > 0] print('reduced to ' + str(len(x_global)) + ' of ' + str(before)) self.nsamples = len(x_global) #make control plot for energy #import matplotlib.pyplot as plt #plt.hist(energytruth.flatten(), normed=False, bins=30) #plt.savefig(giffile+"_eshape.pdf") #from plotting import plot4d, rotanimate #giffile=filename.replace('/','_') #giffile='gifs/'+giffile #for i in range(0,len(select)): # if not select[i]: continue # # ax,_=plot4d(x_chmap[i][:,:,:,:1],giffile+"_"+str(i)+"energy_.pdf",'etabin','layer','phibin') # rotanimate(ax,giffile+'_'+str(i)+'_energy.gif',delay=5,prefix=giffile) # print('energy') # timeentries=x_chmap[i][:,:,:,3:4] # timeentries[timeentries<0]=0.00000000001 # ax,_=plot4d(timeentries,giffile+"_"+str(i)+"time_.pdf",'etabin','layer','phibin') # rotanimate(ax,giffile+'_'+str(i)+'_time.gif',delay=5,prefix=giffile) # print('time') self.w = [weights, weights] self.x = [x_global, x_chmap, totalrecenergy] self.y = [idtruthtuple, energytruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import ROOT fileTimeOut(filename, 60) #give eos 1 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get(self.treename) self.nsamples = tree.GetEntries() x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) Tuple = self.readTreeFromRootToTuple(filename) undef = Tuple['isUndefined'] if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) notremoves -= undef if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: weights = np.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(truthtuple) # scale down by number of classes in a reduced class if self.weight: if hasattr(self, 'reducedtruthmap'): for i, row in enumerate(iter(alltruth)): for t, truth in enumerate(self.reducedtruthclasses): if row[t] == 1: weights[i] = weights[i] * 1. / len( self.reducedtruthmap[truth]) # remove jets to have the same counts if self.remove: if hasattr(self, 'reducedtruthmap'): total = [] for rt in self.reducedtruthclasses: total += [ sum([ weighter.totalcounts[t] for t, truth in enumerate(self.truthclasses) if truth in self.reducedtruthmap[rt] ]) ] lowest = min(total) for i, row in enumerate(iter(alltruth)): for t, truth in enumerate(self.reducedtruthclasses): if not row[t]: continue keep = float(lowest) / total[t] rand = np.random.ranf() if rand > keep: notremoves[i] = 0 else: total = weighter.totalcounts lowest = min(total) for i, row in enumerate(iter(truthtuple)): for t, truth in enumerate(self.truthclasses): if not row[t]: continue keep = float(lowest) / total[t] rand = np.random.ranf() if rand > keep: notremoves[i] = 0 # pt cut #pt = Tuple['jet_pt'] #weights = weights[ pt > 30] #x_global = x_global[pt > 30] #x_cpf = x_cpf[ pt > 30] #x_npf = x_npf[ pt > 30] #x_sv = x_sv[ pt > 30] #alltruth = alltruth[pt > 30] if self.remove: weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] x_sv = x_sv[notremoves > 0] alltruth = alltruth[notremoves > 0] if self.weight: x_global = x_global[weights > 0] x_cpf = x_cpf[weights > 0] x_npf = x_npf[weights > 0] x_sv = x_sv[weights > 0] alltruth = alltruth[weights > 0] weights = weights[weights > 0] #if self.remove or self.weight: if True: # remove samples with no predicted class skip = np.all(alltruth == 0, axis=1) alltruth = alltruth[~skip] x_global = x_global[~skip] x_cpf = x_cpf[~skip] x_npf = x_npf[~skip] x_sv = x_sv[~skip] weights = weights[~skip] # remove samples with multiple predicted classes skip = np.sum(alltruth, axis=1) > 1 alltruth = alltruth[~skip] x_global = x_global[~skip] x_cpf = x_cpf[~skip] x_npf = x_npf[~skip] x_sv = x_sv[~skip] weights = weights[~skip] newnsamp = x_global.shape[0] logging.info('reduced content to {}%'.format( int(float(newnsamp) / float(self.nsamples) * 100))) self.nsamples = newnsamp if weights.ndim > 1: weights = weights.reshape(weights.shape[0]) self.w = [weights] self.x = [x_global, x_cpf, x_npf, x_sv] self.y = [alltruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): # the first part is standard, no changes needed from DeepJetCore.preprocessing import MeanNormApply, createDensityLayers, createDensityMap, MeanNormZeroPad, \ MeanNormZeroPadParticles import numpy import ROOT fileTimeOut(filename, 120) # give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) # flatten everything out for now x_chmap = createDensityLayers(filename, TupleMeanStd, inbranches=['rechit_energy', 'rechit_layer', 'rechit_seeddr'], modes=['sum', 'single', 'average'], layerbranch='rechit_layer', maxlayers=55, layeroffset=1, nevents=self.nsamples, dimension1=['rechit_eta', 'seed_eta', 23, 0.35], dimension2=['rechit_phi', 'seed_phi', 23, 0.35], counterbranch='nrechits') Tuple = self.readTreeFromRootToTuple(filename) phis = Tuple['seed_phi'] select = phis < -9.1 # print(select) from plotting import plot4d, rotanimate giffile = filename.replace('/', '_') giffile = 'gifs/' + giffile for i in range(0, len(select)): if not select[i]: continue ax, _ = plot4d(x_chmap[i][:, :, :, :1], giffile + "_" + str(i) + "energy_.pdf", 'etabin', 'layer', 'phibin') rotanimate(ax, giffile + '_' + str(i) + '_energy.gif', delay=5, prefix=giffile) print('energy') timeentries = x_chmap[i][:, :, :, 3:4] timeentries[timeentries < 0] = 0.00000000001 ax, _ = plot4d(timeentries, giffile + "_" + str(i) + "time_.pdf", 'etabin', 'layer', 'phibin') rotanimate(ax, giffile + '_' + str(i) + '_time.gif', delay=5, prefix=giffile) print('time') # # # # exit() idtruthtuple = self.reduceTruth(Tuple[self.truthclasses]) energytruth = numpy.array(Tuple[self.regtruth]) weights = numpy.zeros(len(idtruthtuple)) notremoves = numpy.zeros(x_global.shape[0]) notremoves += 1 if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) # print('took ', sw.getAndReset(), ' to create remove indices') notremoves -= Tuple['isFake'] # notremoves-=Tuple['isTau'] notremoves -= Tuple['isEta'] # notremoves-=Tuple['isPionZero'] before = len(x_global) if self.remove: weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_chmap = x_chmap[notremoves > 0] idtruthtuple = idtruthtuple[notremoves > 0] energytruth = energytruth[notremoves > 0] print('reduced to ' + str(len(x_global)) + ' of ' + str(before)) # make control plot for energy import matplotlib.pyplot as plt plt.hist(energytruth.flatten(), normed=False, bins=30) plt.savefig(giffile + "_eshape.pdf") self.w = [weights, weights] self.x = [x_global, x_chmap] self.y = [idtruthtuple, energytruth]
def convertFromSourceFile(self, filename, weighterobjects, istraining): # Function to produce the numpy training arrays from root files from DeepJetCore.Weighter import Weighter from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() if not istraining: self.remove = False def reduceTruth(uproot_arrays): b = uproot_arrays[b'isB'] bb = uproot_arrays[b'isBB'] gbb = uproot_arrays[b'isGBB'] bl = uproot_arrays[b'isLeptonicB'] blc = uproot_arrays[b'isLeptonicB_C'] lepb = bl + blc c = uproot_arrays[b'isC'] cc = uproot_arrays[b'isCC'] gcc = uproot_arrays[b'isGCC'] ud = uproot_arrays[b'isUD'] s = uproot_arrays[b'isS'] uds = ud + s g = uproot_arrays[b'isG'] return np.vstack( (b + lepb, bb + gbb, c + cc + gcc, uds + g)).transpose() print('reading ' + filename) import ROOT from root_numpy import tree2array, root2array fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() # user code, example works with the example 2D images in root format generated by make_example_data from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles x_global = MeanNormZeroPad(filename, weighterobjects['means'], [ self.global_branches, self.track_branches, self.eta_rel_branches, self.vtx_branches ], [1, self.n_track, self.n_eta_rel, self.n_vtx], self.nsamples) import uproot3 as uproot urfile = uproot.open(filename)["deepntuplizer/tree"] truth_arrays = urfile.arrays(self.truth_branches) truth = reduceTruth(truth_arrays) truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type! x_global = x_global.astype(dtype='float32', order='C') if self.remove: b = [self.weightbranchX, self.weightbranchY] b.extend(self.truth_branches) b.extend(self.undefTruth) fileTimeOut(filename, 120) for_remove = root2array(filename, treename="deepntuplizer/tree", stop=None, branches=b) notremoves = weighterobjects['weigther'].createNotRemoveIndices( for_remove) undef = for_remove['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.remove: print('remove') x_global = x_global[notremoves > 0] truth = truth[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') print('remove nans') x_global = np.where( np.logical_and(np.isfinite(x_global), (np.abs(x_global) < 100000.0)), x_global, 0) return [x_global], [truth], []
def convertFromSourceFile(self, filename, weighterobjects, istraining): # This is the only really mandatory function (unless writeFromSourceFile is defined). # It defines the conversion rule from an input source file to the lists of training # arrays self.x, self.y, self.w # self.x is a list of input feature arrays # self.y is a list of truth arrays # self.w is optional and can contain a weight array # (needs to have same number of entries as truth array) # If no weights are needed, this can be left completely empty # # The conversion should convert finally to numpy arrays. In the future, # also tensorflow tensors will be supported. # # In this example, differnt ways of reading files are deliberatly mixed # print('reading ' + filename) import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("tree") self.nsamples = tree.GetEntries() # user code, example works with the example 2D images in root format generated by make_example_data #from DeepJetCore.preprocessing import read2DArray #feature_array = read2DArray(filename,"tree","image2d",self.nsamples,32,32) #print('feature_array',feature_array.shape) import uproot3 as uproot urfile = uproot.open(filename)["tree"] truth = np.concatenate([ np.expand_dims(urfile.array("lep_isPromptId_Training"), axis=1), np.expand_dims(urfile.array("lep_isNonPromptId_Training"), axis=1), np.expand_dims(urfile.array("lep_isFakeId_Training"), axis=1) ], axis=1) truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type! self.global_branches = [ 'lep_pt', 'lep_eta', 'lep_phi', 'lep_mediumId', 'lep_miniPFRelIso_all', 'lep_pfRelIso03_all', 'lep_sip3d', 'lep_dxy', 'lep_dz', 'lep_charge', 'lep_dxyErr', 'lep_dzErr', 'lep_ip3d', 'lep_jetPtRelv2', 'lep_jetRelIso', 'lep_miniPFRelIso_chg', 'lep_mvaLowPt', 'lep_nStations', 'lep_nTrackerLayers', 'lep_pfRelIso03_all', 'lep_pfRelIso03_chg', 'lep_pfRelIso04_all', 'lep_ptErr', 'lep_segmentComp', 'lep_tkRelIso', 'lep_tunepRelPt', ] self.pfCand_neutral_branches = [ 'pfCand_neutral_eta', 'pfCand_neutral_phi', 'pfCand_neutral_pt', 'pfCand_neutral_puppiWeight', 'pfCand_neutral_puppiWeightNoLep', 'pfCand_neutral_ptRel', 'pfCand_neutral_deltaR', ] self.npfCand_neutral = 5 ## works: #x_global = np.concatenate([np.expand_dims(urfile.array(var), axis=1) for var in self.global_branches], axis=1) #x_global = x_global.astype(dtype='float32', order='C') #important, float32 and C-type! #self.nsamples=len(x_global) from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles x_global = MeanNormZeroPad(filename, None, [self.global_branches], [1], self.nsamples) x_pfCand_neutral = MeanNormZeroPadParticles( filename, None, self.pfCand_neutral_branches, self.npfCand_neutral, self.nsamples) x_global = x_global.astype(dtype='float32', order='C') x_pfCand_neutral = x_pfCand_neutral.astype(dtype='float32', order='C') #returns a list of feature arrays, a list of truth arrays and a list of weight arrays return [x_global, x_pfCand_neutral], [truth], []
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, createCountMap, createDensity, MeanNormZeroPad, createDensityMap, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) #here the difference starts nbins = 8 x_chmap = createDensity( filename, inbranches=['Cpfcan_ptrel', 'Cpfcan_etarel', 'Cpfcan_phirel'], modes=['sum', 'average', 'average'], nevents=self.nsamples, dimension1=['Cpfcan_eta', 'jet_eta', nbins, 0.45], dimension2=['Cpfcan_phi', 'jet_phi', nbins, 0.45], counterbranch='nCpfcand', offsets=[-1, -0.5, -0.5]) x_neumap = createDensity( filename, inbranches=['Npfcan_ptrel', 'Npfcan_etarel', 'Npfcan_phirel'], modes=['sum', 'average', 'average'], nevents=self.nsamples, dimension1=['Npfcan_eta', 'jet_eta', nbins, 0.45], dimension2=['Npfcan_phi', 'jet_phi', nbins, 0.45], counterbranch='nCpfcand', offsets=[-1, -0.5, -0.5]) x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Cpfcan_eta', 'jet_eta', nbins, 0.45], ['Cpfcan_phi', 'jet_phi', nbins, 0.45], 'nCpfcand') x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Npfcan_eta', 'jet_eta', nbins, 0.45], ['Npfcan_phi', 'jet_phi', nbins, 0.45], 'nNpfcand') print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) regtruth = Tuple['gen_pt_WithNu'] regreco = Tuple['jet_corr_pt'] #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_cpf = x_cpf[notremoves > 0] x_npf = x_npf[notremoves > 0] x_sv = x_sv[notremoves > 0] x_chmap = x_chmap[notremoves > 0] x_neumap = x_neumap[notremoves > 0] x_chcount = x_chcount[notremoves > 0] x_neucount = x_neucount[notremoves > 0] alltruth = alltruth[notremoves > 0] regreco = regreco[notremoves > 0] regtruth = regtruth[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp x_map = numpy.concatenate((x_chmap, x_neumap, x_chcount, x_neucount), axis=3) self.w = [weights, weights] self.x = [x_global, x_cpf, x_npf, x_sv, x_map, regreco] self.y = [alltruth, regtruth]
def convertFromSourceFile(self, filename, weighterobjects, istraining): # Function to produce the numpy training arrays from root files from DeepJetCore.Weighter import Weighter from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() if not istraining: self.remove = False #def reduceTruth(uproot_arrays): # #import numpy as np # prompt = uproot_arrays[b'lep_isPromptId_Training'] # nonPrompt = uproot_arrays[b'lep_isNonPromptId_Training'] # fake = uproot_arrays[b'lep_isFakeId_Training'] # print (prompt, nonPrompt, fake) # return np.vstack((prompt, nonPrompt, fake)).transpose() # #return np.concatenate( [ prompt, nonPrompt, fake] ) print('reading '+filename) import ROOT from root_numpy import tree2array, root2array fileTimeOut(filename,120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("tree") self.nsamples = tree.GetEntries() # user code, example works with the example 2D images in root format generated by make_example_data from DeepJetCore.preprocessing import MeanNormZeroPad,MeanNormZeroPadParticles x_global = MeanNormZeroPad(filename,None, [self.global_branches], [1],self.nsamples) x_pfCand_neutral = MeanNormZeroPadParticles(filename,None, self.pfCand_neutral_branches, self.npfCand_neutral,self.nsamples) x_pfCand_charged = MeanNormZeroPadParticles(filename,None, self.pfCand_charged_branches, self.npfCand_charged,self.nsamples) x_pfCand_photon = MeanNormZeroPadParticles(filename,None, self.pfCand_photon_branches, self.npfCand_photon,self.nsamples) x_pfCand_electron = MeanNormZeroPadParticles(filename,None, self.pfCand_electron_branches, self.npfCand_electron,self.nsamples) x_pfCand_muon = MeanNormZeroPadParticles(filename,None, self.pfCand_muon_branches, self.npfCand_muon,self.nsamples) x_pfCand_SV = MeanNormZeroPadParticles(filename,None, self.SV_branches, self.nSV,self.nsamples) #import uproot3 as uproot #urfile = uproot.open(filename)["tree"] #truth_arrays = urfile.arrays(self.truth_branches) #truth = reduceTruth(truth_arrays) #truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type! import uproot3 as uproot urfile = uproot.open(filename)["tree"] truth = np.concatenate([np.expand_dims(urfile.array("lep_isPromptId_Training"), axis=1) , np.expand_dims(urfile.array("lep_isNonPromptId_Training"), axis=1), np.expand_dims(urfile.array("lep_isFakeId_Training"), axis=1)],axis=1) truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type! x_global = x_global.astype(dtype='float32', order='C') x_pfCand_neutral = x_pfCand_neutral.astype(dtype='float32', order='C') x_pfCand_charged = x_pfCand_charged.astype(dtype='float32', order='C') x_pfCand_photon = x_pfCand_photon.astype(dtype='float32', order='C') x_pfCand_electron = x_pfCand_electron.astype(dtype='float32', order='C') x_pfCand_muon = x_pfCand_muon.astype(dtype='float32', order='C') x_pfCand_SV = x_pfCand_SV.astype(dtype='float32', order='C') if self.remove: b = [self.weightbranchX,self.weightbranchY] b.extend(self.truth_branches) b.extend(self.undefTruth) fileTimeOut(filename, 120) for_remove = root2array( filename, treename = "tree", stop = None, branches = b ) notremoves=weighterobjects['weigther'].createNotRemoveIndices(for_remove) #undef=for_remove['isUndefined'] #notremoves-=undef print('took ', sw.getAndReset(), ' to create remove indices') #if counter_all == 0: # notremoves = list(np.ones(np.shape(notremoves))) if self.remove: #print('remove') print ("notremoves", notremoves, "<- notremoves") x_global = x_global[notremoves > 0] x_pfCand_neutral = x_pfCand_neutral[notremoves > 0] x_pfCand_charged = x_pfCand_charged[notremoves > 0] x_pfCand_photon = x_pfCand_photon[notremoves > 0] x_pfCand_electron = x_pfCand_electron[notremoves > 0] x_pfCand_muon = x_pfCand_muon[notremoves > 0] x_pfCand_SV = x_pfCand_SV[notremoves > 0] truth = truth[notremoves > 0] newnsamp=x_global.shape[0] print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%') #print(x_global) #print(x_pfCand_neutral) #print(x_pfCand_charged) #print(x_pfCand_photon) #print(x_pfCand_electron) #print(x_pfCand_muon) #print(x_pfCand_SV) print('remove nans') x_global = np.where(np.isfinite(x_global) , x_global, 0) x_pfCand_neutral = np.where(np.isfinite(x_pfCand_neutral), x_pfCand_neutral, 0) x_pfCand_charged = np.where(np.isfinite(x_pfCand_charged), x_pfCand_charged, 0) x_pfCand_photon = np.where(np.isfinite(x_pfCand_photon), x_pfCand_photon, 0) x_pfCand_electron = np.where(np.isfinite(x_pfCand_electron), x_pfCand_electron, 0) x_pfCand_muon = np.where(np.isfinite(x_pfCand_muon), x_pfCand_muon, 0) x_pfCand_SV = np.where(np.isfinite(x_pfCand_SV), x_pfCand_SV, 0) return [x_global, x_pfCand_neutral, x_pfCand_charged, x_pfCand_photon, x_pfCand_electron, x_pfCand_muon, x_pfCand_SV], [truth], []
def readFromRootFile(self, filename, TupleMeanStd, weighter): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, createDensityMap, createCountMap, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]], [self.branchcutoffs[0]], self.nsamples) #here the difference starts x_chmap = createDensityMap(filename, TupleMeanStd, 'Cpfcan_ptrel', self.nsamples, ['Cpfcan_eta', 'jet_eta', 20, 0.5], ['Cpfcan_phi', 'jet_phi', 20, 0.5], 'nCpfcand', -1, weightbranch='Cpfcan_puppiw') x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Cpfcan_eta', 'jet_eta', 20, 0.5], ['Cpfcan_phi', 'jet_phi', 20, 0.5], 'nCpfcand') x_neumap = createDensityMap(filename, TupleMeanStd, 'Npfcan_ptrel', self.nsamples, ['Npfcan_eta', 'jet_eta', 20, 0.5], ['Npfcan_phi', 'jet_phi', 20, 0.5], 'nNpfcand', -1, weightbranch='Npfcan_puppiw') x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples, ['Npfcan_eta', 'jet_eta', 20, 0.5], ['Npfcan_phi', 'jet_phi', 20, 0.5], 'nNpfcand') print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple(filename) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) undef = Tuple['isUndefined'] notremoves -= undef print('took ', sw.getAndReset(), ' to create remove indices') if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.ones(self.nsamples) pttruth = Tuple[self.regtruth] ptreco = Tuple[self.regreco] truthtuple = Tuple[self.truthclasses] #print(self.truthclasses) alltruth = self.reduceTruth(truthtuple) x_map = numpy.concatenate((x_chmap, x_chcount, x_neumap, x_neucount), axis=3) #print(alltruth.shape) if self.remove: print('remove') weights = weights[notremoves > 0] x_global = x_global[notremoves > 0] x_map = x_map[notremoves > 0] alltruth = alltruth[notremoves > 0] pttruth = pttruth[notremoves > 0] ptreco = ptreco[notremoves > 0] newnsamp = x_global.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp print(x_global.shape, self.nsamples) self.w = [weights] self.x = [x_global, x_map, ptreco] self.y = [alltruth, pttruth]