def readFromRootFile(self, filename, TupleMeanStd, weighter): #the first part is standard, no changes needed from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles import numpy import ROOT fileTimeOut(filename, 120) #give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() #the definition of what to do with the branches # those are the global branches (jet pt etc) # they should be just glued to each other in one vector # and zero padded (and mean subtracted and normalised) #x_global = MeanNormZeroPad(filename,TupleMeanStd, # [self.branches[0]], # [self.branchcutoffs[0]],self.nsamples) # the second part (the pf candidates) should be treated particle wise # an array with (njets, nparticles, nproperties) is created x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0], self.branchcutoffs[0], self.nsamples) x_db = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_db_raw = ZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) # now, some jets are removed to avoid pt and eta biases Tuple = self.readTreeFromRootToTuple(filename) if self.remove: # jets are removed until the shapes in eta and pt are the same as # the truth class 'fj_isNonBB' notremoves = weighter.createNotRemoveIndices(Tuple) #undef=Tuple[self.undefTruth] #notremoves-=undef if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves #weighter.createNotRemoveIndices(Tuple) else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(Tuple) undef = numpy.sum(alltruth, axis=1) if self.weight or self.remove: print('Training samples, remove undefined') weights = weights[undef > 0] x_glb = x_glb[undef > 0] x_db = x_db[undef > 0] x_db_raw = x_db_raw[undef > 0] x_sv = x_sv[undef > 0] x_cpf = x_cpf[undef > 0] alltruth = alltruth[undef > 0] if self.remove: print('Removing to match weighting') notremoves = notremoves[undef > 0] weights = weights[notremoves > 0] x_glb = x_glb[notremoves > 0] x_db = x_db[notremoves > 0] x_db_raw = x_db_raw[notremoves > 0] x_sv = x_sv[notremoves > 0] x_cpf = x_cpf[notremoves > 0] alltruth = alltruth[notremoves > 0] if self.weight: print('Adding weights, removing events with 0 weight') x_glb = x_glb[weights > 0] x_db = x_db[weights > 0] x_db_raw = x_db_raw[weights > 0] x_sv = x_sv[weights > 0] x_cpf = x_cpf[weights > 0] alltruth = alltruth[weights > 0] # Weights get adjusted last so they can be used as an index weights = weights[weights > 0] newnsamp = x_glb.shape[0] print('Keeping {}% of input events in the training dataCollection'. format(int(float(newnsamp) / float(self.nsamples) * 100))) self.nsamples = newnsamp #print("Subsample composition:") #for lab in ['fJ_isQCD', 'fj_isH', 'fj_isCC', 'fj_isBB']: # print(numpy.sum((Tuple[lab].view(numpy.ndarray))), lab) #for lab, stat in zip(self.reducedtruthclasses, stats): # print(lab, ': {}%'.format(stat)) # fill everything self.w = [weights] self.x = [x_db, x_cpf, x_sv] self.z = [x_glb, x_db_raw] self.y = [alltruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): #the first part is standard, no changes needed from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles import numpy import ROOT fileTimeOut(filename, 120) #give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0], self.branchcutoffs[0], self.nsamples) x_db = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) # now, some jets are removed to avoid pt and eta biases Tuple = self.readTreeFromRootToTuple(filename) #if self.remove: # jets are removed until the shapes in eta and pt are the same as # the truth class 'fj_isNonBB' notremoves = weighter.createNotRemoveIndices(Tuple) if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) # create all collections: #truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(Tuple) undef = numpy.sum(alltruth, axis=1) #weights=weights[undef > 0] #x_glb=x_glb[undef > 0] #x_db=x_db[undef > 0] #alltruth=alltruth[undef > 0] notremoves = notremoves[undef > 0] undef = Tuple['fj_isNonCC'] * Tuple['sample_isQCD'] * Tuple[ 'fj_isQCD'] + Tuple['fj_isCC'] * Tuple['fj_isH'] # remove the entries to get same jet shapes if self.remove: print('remove') weights = weights[notremoves > 0] x_glb = x_glb[notremoves > 0] x_db = x_db[notremoves > 0] alltruth = alltruth[notremoves > 0] newnsamp = x_glb.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp # fill everything self.w = [weights] self.x = [x_db] self.z = [x_glb] self.y = [alltruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): #the first part is standard, no changes needed from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles import numpy import ROOT fileTimeOut(filename, 60) #give eos 1 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() #the definition of what to do with the branches # those are the global branches (jet pt etc) # they should be just glued to each other in one vector # and zero padded (and mean subtracted and normalised) # x_global = MeanNormZeroPad(filename,TupleMeanStd, # [self.branches[0]], # [self.branchcutoffs[0]],self.nsamples) # the second part (the pf candidates) should be treated particle wise # an array with (njets, nparticles, nproperties) is created # # MeanNormZeroPad[Particles] does preprocessing, ZeroPad[Particles] does not and we normalzie it with batch_norm layer # MeanNorm* does not work when putting the model into cmssw x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0], self.branchcutoffs[0], self.nsamples) x_db = ZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_db_raw = ZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_cpf = ZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = ZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) # Load tuple Tuple = self.readTreeFromRootToTuple(filename) # Append classes constructed in reduceTruth fcn truth_array = Tuple[self.truthclasses] import numpy.lib.recfunctions as rfn reduced_truth = self.reduceTruth(truth_array).transpose() for i, label in enumerate(self.reducedtruthclasses): Tuple = rfn.append_fields(Tuple, label, reduced_truth[i]) if self.remove: notremoves = weighter.createNotRemoveIndices(Tuple) if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves #weighter.createNotRemoveIndices(Tuple) else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) used_truth = self.reduceTruth(truth_array) undef = numpy.sum(used_truth, axis=1) if self.remove: print('Removing to match weighting') notremoves = notremoves[undef > 0] weights = weights[notremoves > 0] x_glb = x_glb[notremoves > 0] x_db = x_db[notremoves > 0] x_db_raw = x_db_raw[notremoves > 0] x_sv = x_sv[notremoves > 0] x_cpf = x_cpf[notremoves > 0] used_truth = used_truth[notremoves > 0] if self.weight: print('Adding weights, removing events with 0 weight') x_glb = x_glb[weights > 0] x_db = x_db[weights > 0] x_db_raw = x_db_raw[weights > 0] x_sv = x_sv[weights > 0] x_cpf = x_cpf[weights > 0] used_truth = used_truth[weights > 0] # Weights get adjusted last so they can be used as an index weights = weights[weights > 0] newnsamp = x_glb.shape[0] print('Keeping {}% of input events in the dataCollection'.format( int(float(newnsamp) / float(self.nsamples) * 100))) self.nsamples = newnsamp # fill everything self.w = [weights] self.x = [x_db, x_cpf, x_sv] self.z = [x_glb, x_db_raw] self.y = [used_truth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): #the first part is standard, no changes needed from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles import numpy import ROOT fileTimeOut(filename, 120) #give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("tree") self.nsamples = tree.GetEntries() #the definition of what to do with the branches # those are the global branches (jet pt etc) # they should be just glued to each other in one vector # and zero padded (and mean subtracted and normalised) #x_global = MeanNormZeroPad(filename,TupleMeanStd, # [self.branches[0]], # [self.branchcutoffs[0]],self.nsamples) # the second part (the pf candidates) should be treated particle wise # an array with (njets, nparticles, nproperties) is created x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0], self.branchcutoffs[0], self.nsamples) x_dbr = ZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_db = MeanNormZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) Tuple = self.readTreeFromRootToTuple(filename) notremoves = weighter.createNotRemoveIndices(Tuple) if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) empty = numpy.empty(self.nsamples) # create all collections: truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(truthtuple) undef = numpy.sum(alltruth, axis=1) weights = weights[undef > 0] x_glb = x_glb[undef > 0] x_db = x_db[undef > 0] alltruth = alltruth[undef > 0] # print("LENS", len(weights), len(notremoves)) # remove the entries to get same jet shapes if self.remove: print('remove') notremoves = notremoves[undef > 0] weights = weights[notremoves > 0] x_glb = x_glb[notremoves > 0] x_db = x_db[notremoves > 0] alltruth = alltruth[notremoves > 0] #newnsamp=x_global.shape[0] newnsamp = x_glb.shape[0] print('reduced content to ', int(float(newnsamp) / float(self.nsamples) * 100), '%') self.nsamples = newnsamp # fill everything self.w = [weights] self.x = [x_db] self.z = [x_glb, x_dbr] self.y = [alltruth]
def readFromRootFile(self, filename, TupleMeanStd, weighter): #the first part is standard, no changes needed from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles import numpy import ROOT fileTimeOut(filename, 120) #give eos 2 minutes to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0], self.branchcutoffs[0], self.nsamples) x_db = ZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_db_raw = ZeroPadParticles(filename, TupleMeanStd, self.branches[1], self.branchcutoffs[1], self.nsamples) x_pf = ZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_cpf = ZeroPadParticles(filename, TupleMeanStd, self.branches[2], self.branchcutoffs[2], self.nsamples) x_sv = ZeroPadParticles(filename, TupleMeanStd, self.branches[3], self.branchcutoffs[3], self.nsamples) Tuple = self.readTreeFromRootToTuple(filename) if self.remove: # jets are removed until the shapes in eta and pt are the same as # the truth class 'fj_isNonBB' notremoves = weighter.createNotRemoveIndices(Tuple) #undef=Tuple[self.undefTruth] #notremoves-=undef if self.weight: weights = weighter.getJetWeights(Tuple) elif self.remove: weights = notremoves #weighter.createNotRemoveIndices(Tuple) else: print('neither remove nor weight') weights = numpy.empty(self.nsamples) weights.fill(1.) truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(Tuple) undef = numpy.sum(alltruth, axis=1) if self.remove: print('Removing to match weighting') notremoves = notremoves[undef > 0] weights = weights[notremoves > 0] x_glb = x_glb[notremoves > 0] x_db = x_db[notremoves > 0] x_db_raw = x_db_raw[notremoves > 0] x_sv = x_sv[notremoves > 0] x_cpf = x_cpf[notremoves > 0] alltruth = alltruth[notremoves > 0] if self.weight: print('Adding weights, removing events with 0 weight') x_glb = x_glb[weights > 0] x_db = x_db[weights > 0] x_db_raw = x_db_raw[weights > 0] x_sv = x_sv[weights > 0] x_cpf = x_cpf[weights > 0] alltruth = alltruth[weights > 0] # Weights get adjusted last so they can be used as an index weights = weights[weights > 0] newnsamp = x_glb.shape[0] print('Keeping {}% of input events in the training dataCollection'. format(int(float(newnsamp) / float(self.nsamples) * 100))) self.nsamples = newnsamp # fill everything self.w = [weights] self.x = [x_db, x_cpf, x_sv] self.z = [x_glb, x_db_raw] self.y = [alltruth]