Exemple #1
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        #the definition of what to do with the branches

        # those are the global branches (jet pt etc)
        # they should be just glued to each other in one vector
        # and zero padded (and mean subtracted and normalised)
        #x_global = MeanNormZeroPad(filename,TupleMeanStd,
        #                           [self.branches[0]],
        #                           [self.branchcutoffs[0]],self.nsamples)

        # the second part (the pf candidates) should be treated particle wise
        # an array with (njets, nparticles, nproperties) is created

        x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0],
                                 self.branchcutoffs[0], self.nsamples)

        x_db = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[1],
                                        self.branchcutoffs[1], self.nsamples)

        x_db_raw = ZeroPadParticles(filename, TupleMeanStd, self.branches[1],
                                    self.branchcutoffs[1], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        # now, some jets are removed to avoid pt and eta biases

        Tuple = self.readTreeFromRootToTuple(filename)
        if self.remove:
            # jets are removed until the shapes in eta and pt are the same as
            # the truth class 'fj_isNonBB'
            notremoves = weighter.createNotRemoveIndices(Tuple)
            #undef=Tuple[self.undefTruth]
        #notremoves-=undef

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves  #weighter.createNotRemoveIndices(Tuple)
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(Tuple)
        undef = numpy.sum(alltruth, axis=1)

        if self.weight or self.remove:
            print('Training samples, remove undefined')
            weights = weights[undef > 0]
            x_glb = x_glb[undef > 0]
            x_db = x_db[undef > 0]
            x_db_raw = x_db_raw[undef > 0]
            x_sv = x_sv[undef > 0]
            x_cpf = x_cpf[undef > 0]
            alltruth = alltruth[undef > 0]

        if self.remove:
            print('Removing to match weighting')
            notremoves = notremoves[undef > 0]
            weights = weights[notremoves > 0]
            x_glb = x_glb[notremoves > 0]
            x_db = x_db[notremoves > 0]
            x_db_raw = x_db_raw[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        if self.weight:
            print('Adding weights, removing events with 0 weight')
            x_glb = x_glb[weights > 0]
            x_db = x_db[weights > 0]
            x_db_raw = x_db_raw[weights > 0]
            x_sv = x_sv[weights > 0]
            x_cpf = x_cpf[weights > 0]
            alltruth = alltruth[weights > 0]
            # Weights get adjusted last so they can be used as an index
            weights = weights[weights > 0]

        newnsamp = x_glb.shape[0]
        print('Keeping {}% of input events in the training dataCollection'.
              format(int(float(newnsamp) / float(self.nsamples) * 100)))
        self.nsamples = newnsamp

        #print("Subsample composition:")
        #for lab in ['fJ_isQCD', 'fj_isH', 'fj_isCC', 'fj_isBB']:
        #	print(numpy.sum((Tuple[lab].view(numpy.ndarray))), lab)
        #for lab, stat in zip(self.reducedtruthclasses, stats):
        #	print(lab, ': {}%'.format(stat))

        # fill everything
        self.w = [weights]
        self.x = [x_db, x_cpf, x_sv]
        self.z = [x_glb, x_db_raw]
        self.y = [alltruth]
Exemple #2
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0],
                                 self.branchcutoffs[0], self.nsamples)

        x_db = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[1],
                                        self.branchcutoffs[1], self.nsamples)

        # now, some jets are removed to avoid pt and eta biases

        Tuple = self.readTreeFromRootToTuple(filename)
        #if self.remove:
        # jets are removed until the shapes in eta and pt are the same as
        # the truth class 'fj_isNonBB'
        notremoves = weighter.createNotRemoveIndices(Tuple)
        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        # create all collections:
        #truthtuple =  Tuple[self.truthclasses]
        alltruth = self.reduceTruth(Tuple)
        undef = numpy.sum(alltruth, axis=1)
        #weights=weights[undef > 0]
        #x_glb=x_glb[undef > 0]
        #x_db=x_db[undef > 0]
        #alltruth=alltruth[undef > 0]
        notremoves = notremoves[undef > 0]

        undef = Tuple['fj_isNonCC'] * Tuple['sample_isQCD'] * Tuple[
            'fj_isQCD'] + Tuple['fj_isCC'] * Tuple['fj_isH']

        # remove the entries to get same jet shapes
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_glb = x_glb[notremoves > 0]
            x_db = x_db[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        newnsamp = x_glb.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        # fill everything
        self.w = [weights]
        self.x = [x_db]
        self.z = [x_glb]
        self.y = [alltruth]
Exemple #3
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 60)  #give eos 1 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        #the definition of what to do with the branches

        # those are the global branches (jet pt etc)
        # they should be just glued to each other in one vector
        # and zero padded (and mean subtracted and normalised)
        # x_global = MeanNormZeroPad(filename,TupleMeanStd,
        #                           [self.branches[0]],
        #                           [self.branchcutoffs[0]],self.nsamples)
        # the second part (the pf candidates) should be treated particle wise
        # an array with (njets, nparticles, nproperties) is created
        #
        # MeanNormZeroPad[Particles] does preprocessing, ZeroPad[Particles] does not and we normalzie it with batch_norm layer
        # MeanNorm* does not work when putting the model into cmssw

        x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0],
                                 self.branchcutoffs[0], self.nsamples)

        x_db = ZeroPadParticles(filename, TupleMeanStd, self.branches[1],
                                self.branchcutoffs[1], self.nsamples)

        x_db_raw = ZeroPadParticles(filename, TupleMeanStd, self.branches[1],
                                    self.branchcutoffs[1], self.nsamples)

        x_cpf = ZeroPadParticles(filename, TupleMeanStd, self.branches[2],
                                 self.branchcutoffs[2], self.nsamples)

        x_sv = ZeroPadParticles(filename, TupleMeanStd, self.branches[3],
                                self.branchcutoffs[3], self.nsamples)

        # Load tuple
        Tuple = self.readTreeFromRootToTuple(filename)
        # Append classes constructed in reduceTruth fcn
        truth_array = Tuple[self.truthclasses]
        import numpy.lib.recfunctions as rfn
        reduced_truth = self.reduceTruth(truth_array).transpose()
        for i, label in enumerate(self.reducedtruthclasses):
            Tuple = rfn.append_fields(Tuple, label, reduced_truth[i])

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves  #weighter.createNotRemoveIndices(Tuple)
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        used_truth = self.reduceTruth(truth_array)
        undef = numpy.sum(used_truth, axis=1)

        if self.remove:
            print('Removing to match weighting')
            notremoves = notremoves[undef > 0]
            weights = weights[notremoves > 0]
            x_glb = x_glb[notremoves > 0]
            x_db = x_db[notremoves > 0]
            x_db_raw = x_db_raw[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            used_truth = used_truth[notremoves > 0]

        if self.weight:
            print('Adding weights, removing events with 0 weight')
            x_glb = x_glb[weights > 0]
            x_db = x_db[weights > 0]
            x_db_raw = x_db_raw[weights > 0]
            x_sv = x_sv[weights > 0]
            x_cpf = x_cpf[weights > 0]
            used_truth = used_truth[weights > 0]
            # Weights get adjusted last so they can be used as an index
            weights = weights[weights > 0]

        newnsamp = x_glb.shape[0]
        print('Keeping {}% of input events in the dataCollection'.format(
            int(float(newnsamp) / float(self.nsamples) * 100)))
        self.nsamples = newnsamp

        # fill everything
        self.w = [weights]
        self.x = [x_db, x_cpf, x_sv]
        self.z = [x_glb, x_db_raw]
        self.y = [used_truth]
Exemple #4
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()

        #the definition of what to do with the branches

        # those are the global branches (jet pt etc)
        # they should be just glued to each other in one vector
        # and zero padded (and mean subtracted and normalised)
        #x_global = MeanNormZeroPad(filename,TupleMeanStd,
        #                           [self.branches[0]],
        #                           [self.branchcutoffs[0]],self.nsamples)

        # the second part (the pf candidates) should be treated particle wise
        # an array with (njets, nparticles, nproperties) is created

        x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0],
                                 self.branchcutoffs[0], self.nsamples)

        x_dbr = ZeroPadParticles(filename, TupleMeanStd, self.branches[1],
                                 self.branchcutoffs[1], self.nsamples)

        x_db = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[1],
                                        self.branchcutoffs[1], self.nsamples)

        Tuple = self.readTreeFromRootToTuple(filename)
        notremoves = weighter.createNotRemoveIndices(Tuple)
        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)
        empty = numpy.empty(self.nsamples)

        # create all collections:
        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)
        undef = numpy.sum(alltruth, axis=1)
        weights = weights[undef > 0]
        x_glb = x_glb[undef > 0]
        x_db = x_db[undef > 0]
        alltruth = alltruth[undef > 0]

        #        print("LENS", len(weights), len(notremoves))
        # remove the entries to get same jet shapes
        if self.remove:
            print('remove')
            notremoves = notremoves[undef > 0]
            weights = weights[notremoves > 0]
            x_glb = x_glb[notremoves > 0]
            x_db = x_db[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        #newnsamp=x_global.shape[0]
        newnsamp = x_glb.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        # fill everything
        self.w = [weights]
        self.x = [x_db]
        self.z = [x_glb, x_dbr]
        self.y = [alltruth]
Exemple #5
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0],
                                 self.branchcutoffs[0], self.nsamples)

        x_db = ZeroPadParticles(filename, TupleMeanStd, self.branches[1],
                                self.branchcutoffs[1], self.nsamples)

        x_db_raw = ZeroPadParticles(filename, TupleMeanStd, self.branches[1],
                                    self.branchcutoffs[1], self.nsamples)

        x_pf = ZeroPadParticles(filename, TupleMeanStd, self.branches[2],
                                self.branchcutoffs[2], self.nsamples)

        x_cpf = ZeroPadParticles(filename, TupleMeanStd, self.branches[2],
                                 self.branchcutoffs[2], self.nsamples)

        x_sv = ZeroPadParticles(filename, TupleMeanStd, self.branches[3],
                                self.branchcutoffs[3], self.nsamples)

        Tuple = self.readTreeFromRootToTuple(filename)
        if self.remove:
            # jets are removed until the shapes in eta and pt are the same as
            # the truth class 'fj_isNonBB'
            notremoves = weighter.createNotRemoveIndices(Tuple)
            #undef=Tuple[self.undefTruth]
        #notremoves-=undef

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves  #weighter.createNotRemoveIndices(Tuple)
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(Tuple)
        undef = numpy.sum(alltruth, axis=1)

        if self.remove:
            print('Removing to match weighting')
            notremoves = notremoves[undef > 0]
            weights = weights[notremoves > 0]
            x_glb = x_glb[notremoves > 0]
            x_db = x_db[notremoves > 0]
            x_db_raw = x_db_raw[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        if self.weight:
            print('Adding weights, removing events with 0 weight')
            x_glb = x_glb[weights > 0]
            x_db = x_db[weights > 0]
            x_db_raw = x_db_raw[weights > 0]
            x_sv = x_sv[weights > 0]
            x_cpf = x_cpf[weights > 0]
            alltruth = alltruth[weights > 0]
            # Weights get adjusted last so they can be used as an index
            weights = weights[weights > 0]

        newnsamp = x_glb.shape[0]
        print('Keeping {}% of input events in the training dataCollection'.
              format(int(float(newnsamp) / float(self.nsamples) * 100)))
        self.nsamples = newnsamp

        # fill everything
        self.w = [weights]
        self.x = [x_db, x_cpf, x_sv]
        self.z = [x_glb, x_db_raw]
        self.y = [alltruth]