Exemple #1
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(filename,None,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        x_cpf = MeanNormZeroPadParticles(filename,None,
                                   self.branches[1],
                                   self.branchcutoffs[1],self.nsamples)
        
        x_npf = MeanNormZeroPadParticles(filename,None,
                                   self.branches[2],
                                   self.branchcutoffs[2],self.nsamples)
        
        x_sv = MeanNormZeroPadParticles(filename,None,
                                   self.branches[3],
                                   self.branchcutoffs[3],self.nsamples)
        
        
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        Tuple = self.readTreeFromRootToTuple(filename)
        
        
        
        truthtuple =  Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth=self.reduceTruth(truthtuple)
        
       
        print(x_global.shape,self.nsamples)

        return [x_global,x_cpf,x_npf,x_sv], [alltruth], []
Exemple #2
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, None, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, None, self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_etarel = MeanNormZeroPadParticles(filename, None, self.branches[2],
                                            self.branchcutoffs[2],
                                            self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, None, self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        npy_array = self.readTreeFromRootToTuple(filename)

        reg_truth = npy_array['gen_pt_WithNu'].view(numpy.ndarray)
        reco_pt = npy_array['jet_corr_pt'].view(numpy.ndarray)

        correctionfactor = numpy.zeros(self.nsamples)
        for i in range(self.nsamples):
            correctionfactor[i] = reg_truth[i] / reco_pt[i]

        truthtuple = npy_array[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)

        self.x = [x_global, x_cpf, x_etarel, x_sv, reco_pt]
        self.y = [alltruth, correctionfactor]
        self._normalize_input_(weighter, npy_array)
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        # this function defines how to convert the root ntuple to the training format
        # options are not yet described here
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()

        npy_array = self.readTreeFromRootToTuple(filename)

        truthtuple = npy_array[self.truthclasses]

        alltruth = self.reduceTruth(truthtuple)
        alltruept = npy_array[self.regtruth]

        # user code
        x_global = MeanNormZeroPad(filename, None, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, None, self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, None, self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_recopts = MeanNormZeroPad(filename, None, [self.branches[3]],
                                    [self.branchcutoffs[3]], self.nsamples)

        nold = self.nsamples

        self.x = [x_global, x_cpf, x_npf,
                  x_recopts]  # list of feature numpy arrays
        self.y = [alltruth, alltruept]  # list of target numpy arrays (truth)
        self.w = []  # list of weight arrays. One for each truth target
        self._normalize_input_(weighter, npy_array)

        print('reduced to ', self.nsamples, 'of', nold)
Exemple #4
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        # Function to produce the numpy training arrays from root files

        from DeepJetCore.Weighter import Weighter
        from DeepJetCore.stopwatch import stopwatch
        sw    = stopwatch()
        swall = stopwatch()
        if not istraining:
            self.remove = False

        #def reduceTruth(uproot_arrays):
        #    #import numpy as np
        #    prompt    = uproot_arrays[b'lep_isPromptId_Training']
        #    nonPrompt = uproot_arrays[b'lep_isNonPromptId_Training']
        #    fake      = uproot_arrays[b'lep_isFakeId_Training']
        #    print (prompt, nonPrompt, fake)
        #    return np.vstack((prompt, nonPrompt, fake)).transpose()
        #    #return np.concatenate( [ prompt, nonPrompt, fake] )
        
        print('reading '+filename)
        
        import ROOT
        from root_numpy import tree2array, root2array
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()
        
        # user code, example works with the example 2D images in root format generated by make_example_data
        from DeepJetCore.preprocessing import MeanNormZeroPad,MeanNormZeroPadParticles
        
        x_global = MeanNormZeroPad(filename,None,
                                   [self.global_branches],
                                   [1],self.nsamples)

        x_pfCand_neutral = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_neutral_branches,
                                   self.npfCand_neutral,self.nsamples)

        x_pfCand_charged = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_charged_branches,
                                   self.npfCand_charged,self.nsamples)

        x_pfCand_photon = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_photon_branches,
                                   self.npfCand_photon,self.nsamples)
        
        x_pfCand_electron = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_electron_branches,
                                   self.npfCand_electron,self.nsamples)
        
        x_pfCand_muon = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_muon_branches,
                                   self.npfCand_muon,self.nsamples)

        x_pfCand_SV = MeanNormZeroPadParticles(filename,None,
                                   self.SV_branches,
                                   self.nSV,self.nsamples)

        #import uproot3 as uproot
        #urfile       = uproot.open(filename)["tree"]
        #truth_arrays = urfile.arrays(self.truth_branches)
        #truth        = reduceTruth(truth_arrays)
        #truth        = truth.astype(dtype='float32', order='C') #important, float32 and C-type!

        import uproot3 as uproot
        urfile = uproot.open(filename)["tree"]
        truth = np.concatenate([np.expand_dims(urfile.array("lep_isPromptId_Training"), axis=1) ,
                                np.expand_dims(urfile.array("lep_isNonPromptId_Training"), axis=1),
                                np.expand_dims(urfile.array("lep_isFakeId_Training"), axis=1)],axis=1)
        truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type!

        x_global            = x_global.astype(dtype='float32', order='C')
        x_pfCand_neutral    = x_pfCand_neutral.astype(dtype='float32', order='C')
        x_pfCand_charged    = x_pfCand_charged.astype(dtype='float32', order='C')
        x_pfCand_photon     = x_pfCand_photon.astype(dtype='float32', order='C')
        x_pfCand_electron   = x_pfCand_electron.astype(dtype='float32', order='C')
        x_pfCand_muon       = x_pfCand_muon.astype(dtype='float32', order='C')
        x_pfCand_SV         = x_pfCand_SV.astype(dtype='float32', order='C')

        if self.remove:
            b = [self.weightbranchX,self.weightbranchY]
            b.extend(self.truth_branches)
            b.extend(self.undefTruth)
            fileTimeOut(filename, 120)
            for_remove = root2array(
                filename,
                treename = "tree",
                stop = None,
                branches = b
            )
            notremoves=weighterobjects['weigther'].createNotRemoveIndices(for_remove)
            #undef=for_remove['isUndefined']
            #notremoves-=undef
            print('took ', sw.getAndReset(), ' to create remove indices')
            #if counter_all == 0:
            #    notremoves = list(np.ones(np.shape(notremoves)))
                
        if self.remove:
            #print('remove')
            print ("notremoves", notremoves, "<- notremoves")
            x_global            =   x_global[notremoves > 0]
            x_pfCand_neutral    =   x_pfCand_neutral[notremoves > 0]
            x_pfCand_charged    =   x_pfCand_charged[notremoves > 0]
            x_pfCand_photon     =   x_pfCand_photon[notremoves > 0]
            x_pfCand_electron   =   x_pfCand_electron[notremoves > 0]
            x_pfCand_muon       =   x_pfCand_muon[notremoves > 0]
            x_pfCand_SV         =   x_pfCand_SV[notremoves > 0]
            truth               =   truth[notremoves > 0]

        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        #print(x_global)
        #print(x_pfCand_neutral)
        #print(x_pfCand_charged)
        #print(x_pfCand_photon)
        #print(x_pfCand_electron)
        #print(x_pfCand_muon)
        #print(x_pfCand_SV)
        
        print('remove nans')
        x_global          = np.where(np.isfinite(x_global) , x_global, 0)
        x_pfCand_neutral  = np.where(np.isfinite(x_pfCand_neutral), x_pfCand_neutral, 0)
        x_pfCand_charged  = np.where(np.isfinite(x_pfCand_charged), x_pfCand_charged, 0)
        x_pfCand_photon   = np.where(np.isfinite(x_pfCand_photon), x_pfCand_photon, 0)
        x_pfCand_electron = np.where(np.isfinite(x_pfCand_electron), x_pfCand_electron, 0)
        x_pfCand_muon     = np.where(np.isfinite(x_pfCand_muon), x_pfCand_muon, 0)
        x_pfCand_SV       = np.where(np.isfinite(x_pfCand_SV), x_pfCand_SV, 0)

        return [x_global, x_pfCand_neutral, x_pfCand_charged, x_pfCand_photon, x_pfCand_electron, x_pfCand_muon, x_pfCand_SV], [truth], []
Exemple #5
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        #the definition of what to do with the branches

        # those are the global branches (jet pt etc)
        # they should be just glued to each other in one vector
        # and zero padded (and mean subtracted and normalised)
        #x_global = MeanNormZeroPad(filename,TupleMeanStd,
        #                           [self.branches[0]],
        #                           [self.branchcutoffs[0]],self.nsamples)

        # the second part (the pf candidates) should be treated particle wise
        # an array with (njets, nparticles, nproperties) is created

        x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0],
                                 self.branchcutoffs[0], self.nsamples)

        x_db = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[1],
                                        self.branchcutoffs[1], self.nsamples)

        x_db_raw = ZeroPadParticles(filename, TupleMeanStd, self.branches[1],
                                    self.branchcutoffs[1], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        # now, some jets are removed to avoid pt and eta biases

        Tuple = self.readTreeFromRootToTuple(filename)
        if self.remove:
            # jets are removed until the shapes in eta and pt are the same as
            # the truth class 'fj_isNonBB'
            notremoves = weighter.createNotRemoveIndices(Tuple)
            #undef=Tuple[self.undefTruth]
        #notremoves-=undef

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves  #weighter.createNotRemoveIndices(Tuple)
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(Tuple)
        undef = numpy.sum(alltruth, axis=1)

        if self.weight or self.remove:
            print('Training samples, remove undefined')
            weights = weights[undef > 0]
            x_glb = x_glb[undef > 0]
            x_db = x_db[undef > 0]
            x_db_raw = x_db_raw[undef > 0]
            x_sv = x_sv[undef > 0]
            x_cpf = x_cpf[undef > 0]
            alltruth = alltruth[undef > 0]

        if self.remove:
            print('Removing to match weighting')
            notremoves = notremoves[undef > 0]
            weights = weights[notremoves > 0]
            x_glb = x_glb[notremoves > 0]
            x_db = x_db[notremoves > 0]
            x_db_raw = x_db_raw[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        if self.weight:
            print('Adding weights, removing events with 0 weight')
            x_glb = x_glb[weights > 0]
            x_db = x_db[weights > 0]
            x_db_raw = x_db_raw[weights > 0]
            x_sv = x_sv[weights > 0]
            x_cpf = x_cpf[weights > 0]
            alltruth = alltruth[weights > 0]
            # Weights get adjusted last so they can be used as an index
            weights = weights[weights > 0]

        newnsamp = x_glb.shape[0]
        print('Keeping {}% of input events in the training dataCollection'.
              format(int(float(newnsamp) / float(self.nsamples) * 100)))
        self.nsamples = newnsamp

        #print("Subsample composition:")
        #for lab in ['fJ_isQCD', 'fj_isH', 'fj_isCC', 'fj_isBB']:
        #	print(numpy.sum((Tuple[lab].view(numpy.ndarray))), lab)
        #for lab, stat in zip(self.reducedtruthclasses, stats):
        #	print(lab, ': {}%'.format(stat))

        # fill everything
        self.w = [weights]
        self.x = [x_db, x_cpf, x_sv]
        self.z = [x_glb, x_db_raw]
        self.y = [alltruth]
Exemple #6
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0],
                                 self.branchcutoffs[0], self.nsamples)

        x_db = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[1],
                                        self.branchcutoffs[1], self.nsamples)

        # now, some jets are removed to avoid pt and eta biases

        Tuple = self.readTreeFromRootToTuple(filename)
        #if self.remove:
        # jets are removed until the shapes in eta and pt are the same as
        # the truth class 'fj_isNonBB'
        notremoves = weighter.createNotRemoveIndices(Tuple)
        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        # create all collections:
        #truthtuple =  Tuple[self.truthclasses]
        alltruth = self.reduceTruth(Tuple)
        undef = numpy.sum(alltruth, axis=1)
        #weights=weights[undef > 0]
        #x_glb=x_glb[undef > 0]
        #x_db=x_db[undef > 0]
        #alltruth=alltruth[undef > 0]
        notremoves = notremoves[undef > 0]

        undef = Tuple['fj_isNonCC'] * Tuple['sample_isQCD'] * Tuple[
            'fj_isQCD'] + Tuple['fj_isCC'] * Tuple['fj_isH']

        # remove the entries to get same jet shapes
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_glb = x_glb[notremoves > 0]
            x_db = x_db[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        newnsamp = x_glb.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        # fill everything
        self.w = [weights]
        self.x = [x_db]
        self.z = [x_glb]
        self.y = [alltruth]
Exemple #7
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):
        # This is the only really mandatory function (unless writeFromSourceFile is defined).
        # It defines the conversion rule from an input source file to the lists of training
        # arrays self.x, self.y, self.w
        #  self.x is a list of input feature arrays
        #  self.y is a list of truth arrays
        #  self.w is optional and can contain a weight array
        #         (needs to have same number of entries as truth array)
        #         If no weights are needed, this can be left completely empty
        #
        # The conversion should convert finally to numpy arrays. In the future,
        # also tensorflow tensors will be supported.
        #
        # In this example, differnt ways of reading files are deliberatly mixed
        #

        print('reading ' + filename)

        import ROOT
        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()

        # user code, example works with the example 2D images in root format generated by make_example_data
        #from DeepJetCore.preprocessing import read2DArray

        #feature_array = read2DArray(filename,"tree","image2d",self.nsamples,32,32)
        #print('feature_array',feature_array.shape)

        import uproot3 as uproot

        urfile = uproot.open(filename)["tree"]
        truth = np.concatenate([
            np.expand_dims(urfile.array("lep_isPromptId_Training"), axis=1),
            np.expand_dims(urfile.array("lep_isNonPromptId_Training"), axis=1),
            np.expand_dims(urfile.array("lep_isFakeId_Training"), axis=1)
        ],
                               axis=1)

        truth = truth.astype(dtype='float32',
                             order='C')  #important, float32 and C-type!

        self.global_branches = [
            'lep_pt',
            'lep_eta',
            'lep_phi',
            'lep_mediumId',
            'lep_miniPFRelIso_all',
            'lep_pfRelIso03_all',
            'lep_sip3d',
            'lep_dxy',
            'lep_dz',
            'lep_charge',
            'lep_dxyErr',
            'lep_dzErr',
            'lep_ip3d',
            'lep_jetPtRelv2',
            'lep_jetRelIso',
            'lep_miniPFRelIso_chg',
            'lep_mvaLowPt',
            'lep_nStations',
            'lep_nTrackerLayers',
            'lep_pfRelIso03_all',
            'lep_pfRelIso03_chg',
            'lep_pfRelIso04_all',
            'lep_ptErr',
            'lep_segmentComp',
            'lep_tkRelIso',
            'lep_tunepRelPt',
        ]

        self.pfCand_neutral_branches = [
            'pfCand_neutral_eta',
            'pfCand_neutral_phi',
            'pfCand_neutral_pt',
            'pfCand_neutral_puppiWeight',
            'pfCand_neutral_puppiWeightNoLep',
            'pfCand_neutral_ptRel',
            'pfCand_neutral_deltaR',
        ]
        self.npfCand_neutral = 5

        ## works:
        #x_global = np.concatenate([np.expand_dims(urfile.array(var), axis=1) for var in self.global_branches], axis=1)
        #x_global = x_global.astype(dtype='float32', order='C') #important, float32 and C-type!
        #self.nsamples=len(x_global)

        from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles
        x_global = MeanNormZeroPad(filename, None, [self.global_branches], [1],
                                   self.nsamples)

        x_pfCand_neutral = MeanNormZeroPadParticles(
            filename, None, self.pfCand_neutral_branches, self.npfCand_neutral,
            self.nsamples)

        x_global = x_global.astype(dtype='float32', order='C')
        x_pfCand_neutral = x_pfCand_neutral.astype(dtype='float32', order='C')

        #returns a list of feature arrays, a list of truth arrays and a list of weight arrays
        return [x_global, x_pfCand_neutral], [truth], []
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        # Function to produce the numpy training arrays from root files

        from DeepJetCore.Weighter import Weighter
        from DeepJetCore.stopwatch import stopwatch
        sw = stopwatch()
        swall = stopwatch()
        if not istraining:
            self.remove = False

        def reduceTruth(uproot_arrays):

            b = uproot_arrays[b'isB']

            bb = uproot_arrays[b'isBB']
            gbb = uproot_arrays[b'isGBB']

            bl = uproot_arrays[b'isLeptonicB']
            blc = uproot_arrays[b'isLeptonicB_C']
            lepb = bl + blc

            c = uproot_arrays[b'isC']
            cc = uproot_arrays[b'isCC']
            gcc = uproot_arrays[b'isGCC']

            ud = uproot_arrays[b'isUD']
            s = uproot_arrays[b'isS']
            uds = ud + s

            g = uproot_arrays[b'isG']

            return np.vstack(
                (b, bb + gbb, lepb, c + cc + gcc, uds, g)).transpose()

        print('reading ' + filename)

        import ROOT
        from root_numpy import tree2array, root2array
        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        # user code, example works with the example 2D images in root format generated by make_example_data
        from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles

        x_global = MeanNormZeroPad(filename, None, [self.global_branches], [1],
                                   self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, None, self.cpf_branches,
                                         self.n_cpf, self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, None, self.npf_branches,
                                         self.n_npf, self.nsamples)

        x_vtx = MeanNormZeroPadParticles(filename, None, self.vtx_branches,
                                         self.n_vtx, self.nsamples)

        import uproot3 as uproot
        urfile = uproot.open(filename)["deepntuplizer/tree"]
        truth_arrays = urfile.arrays(self.truth_branches)
        truth = reduceTruth(truth_arrays)
        truth = truth.astype(dtype='float32',
                             order='C')  #important, float32 and C-type!

        x_global = x_global.astype(dtype='float32', order='C')
        x_cpf = x_cpf.astype(dtype='float32', order='C')
        x_npf = x_npf.astype(dtype='float32', order='C')
        x_vtx = x_vtx.astype(dtype='float32', order='C')

        if self.remove:
            b = [self.weightbranchX, self.weightbranchY]
            b.extend(self.truth_branches)
            b.extend(self.undefTruth)
            fileTimeOut(filename, 120)
            for_remove = root2array(filename,
                                    treename="deepntuplizer/tree",
                                    stop=None,
                                    branches=b)
            notremoves = weighterobjects['weigther'].createNotRemoveIndices(
                for_remove)
            undef = for_remove['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.remove:
            print('remove')
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_vtx = x_vtx[notremoves > 0]
            truth = truth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')

        print('remove nans')
        x_global = np.where(np.isfinite(x_global), x_global, 0)
        x_cpf = np.where(np.isfinite(x_cpf), x_cpf, 0)
        x_npf = np.where(np.isfinite(x_npf), x_npf, 0)
        x_vtx = np.where(np.isfinite(x_vtx), x_vtx, 0)

        return [x_global, x_cpf, x_npf, x_vtx], [truth], []
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        print(x_global.shape, self.nsamples)

        self.w = [weights]
        self.x = [x_global, x_cpf, x_npf, x_sv]
        self.y = [alltruth]
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        # Function to produce the numpy training arrays from root files

        from DeepJetCore.Weighter import Weighter
        from DeepJetCore.stopwatch import stopwatch
        sw = stopwatch()
        swall = stopwatch()
        if not istraining:
            self.remove = False

        print('reading ' + filename)
        import ROOT
        from root_numpy import tree2array, root2array
        fileTimeOut(filename, 120)  # give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()

        # user code, example works with the example 2D images in root format generated by make_example_data
        from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles

        print('padding ' + filename)

        x_global = MeanNormZeroPad(
            filename,
            None,  # 2nd argument None: means no normalisation 
            [self.global_branches],
            [1],
            self.nsamples)

        x_pfCand_neutral = MeanNormZeroPadParticles(
            filename, None, self.pfCand_neutral_branches, self.npfCand_neutral,
            self.nsamples)

        x_pfCand_charged = MeanNormZeroPadParticles(
            filename, None, self.pfCand_charged_branches, self.npfCand_charged,
            self.nsamples)

        x_pfCand_photon = MeanNormZeroPadParticles(filename, None,
                                                   self.pfCand_photon_branches,
                                                   self.npfCand_photon,
                                                   self.nsamples)

        x_pfCand_electron = MeanNormZeroPadParticles(
            filename, None, self.pfCand_electron_branches,
            self.npfCand_electron, self.nsamples)

        x_pfCand_muon = MeanNormZeroPadParticles(filename, None,
                                                 self.pfCand_muon_branches,
                                                 self.npfCand_muon,
                                                 self.nsamples)

        x_pfCand_SV = MeanNormZeroPadParticles(filename, None,
                                               self.SV_branches, self.nSV,
                                               self.nsamples)

        import uproot3 as uproot
        urfile = uproot.open(filename)["tree"]

        mytruth = []
        for arr in self.truth_branches:
            mytruth.append(np.expand_dims(urfile.array(arr), axis=1))
        truth = np.concatenate(mytruth, axis=1)

        # important, float32 and C-type!
        truth = truth.astype(dtype='float32', order='C')

        x_global = x_global.astype(dtype='float32', order='C')
        x_pfCand_neutral = x_pfCand_neutral.astype(dtype='float32', order='C')
        x_pfCand_charged = x_pfCand_charged.astype(dtype='float32', order='C')
        x_pfCand_photon = x_pfCand_photon.astype(dtype='float32', order='C')
        x_pfCand_electron = x_pfCand_electron.astype(dtype='float32',
                                                     order='C')
        x_pfCand_muon = x_pfCand_muon.astype(dtype='float32', order='C')
        x_pfCand_SV = x_pfCand_SV.astype(dtype='float32', order='C')

        if self.remove:
            b = [self.weightbranchX, self.weightbranchY]
            b.extend(self.truth_branches)
            b.extend(self.undefTruth)
            fileTimeOut(filename, 120)
            for_remove = root2array(  # returns a structured np array
                filename,
                treename="tree",
                stop=None,
                branches=b)
            notremoves = weighterobjects['weigther'].createNotRemoveIndices(
                for_remove)
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.remove:

            x_global = x_global[notremoves > 0]
            x_pfCand_neutral = x_pfCand_neutral[notremoves > 0]
            x_pfCand_charged = x_pfCand_charged[notremoves > 0]
            x_pfCand_photon = x_pfCand_photon[notremoves > 0]
            x_pfCand_electron = x_pfCand_electron[notremoves > 0]
            x_pfCand_muon = x_pfCand_muon[notremoves > 0]
            x_pfCand_SV = x_pfCand_SV[notremoves > 0]
            truth = truth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('Weighter reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')

        print('removing nans')
        x_global = np.where(np.isfinite(x_global), x_global, 0)
        x_pfCand_neutral = np.where(np.isfinite(x_pfCand_neutral),
                                    x_pfCand_neutral, 0)
        x_pfCand_charged = np.where(np.isfinite(x_pfCand_charged),
                                    x_pfCand_charged, 0)
        x_pfCand_photon = np.where(np.isfinite(x_pfCand_photon),
                                   x_pfCand_photon, 0)
        x_pfCand_electron = np.where(np.isfinite(x_pfCand_electron),
                                     x_pfCand_electron, 0)
        x_pfCand_muon = np.where(np.isfinite(x_pfCand_muon), x_pfCand_muon, 0)
        x_pfCand_SV = np.where(np.isfinite(x_pfCand_SV), x_pfCand_SV, 0)

        return [
            x_global, x_pfCand_neutral, x_pfCand_charged, x_pfCand_photon,
            x_pfCand_electron, x_pfCand_muon, x_pfCand_SV
        ], [truth], []
Exemple #11
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()

        #the definition of what to do with the branches

        # those are the global branches (jet pt etc)
        # they should be just glued to each other in one vector
        # and zero padded (and mean subtracted and normalised)
        #x_global = MeanNormZeroPad(filename,TupleMeanStd,
        #                           [self.branches[0]],
        #                           [self.branchcutoffs[0]],self.nsamples)

        # the second part (the pf candidates) should be treated particle wise
        # an array with (njets, nparticles, nproperties) is created

        x_glb = ZeroPadParticles(filename, TupleMeanStd, self.branches[0],
                                 self.branchcutoffs[0], self.nsamples)

        x_dbr = ZeroPadParticles(filename, TupleMeanStd, self.branches[1],
                                 self.branchcutoffs[1], self.nsamples)

        x_db = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[1],
                                        self.branchcutoffs[1], self.nsamples)

        Tuple = self.readTreeFromRootToTuple(filename)
        notremoves = weighter.createNotRemoveIndices(Tuple)
        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)
        empty = numpy.empty(self.nsamples)

        # create all collections:
        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)
        undef = numpy.sum(alltruth, axis=1)
        weights = weights[undef > 0]
        x_glb = x_glb[undef > 0]
        x_db = x_db[undef > 0]
        alltruth = alltruth[undef > 0]

        #        print("LENS", len(weights), len(notremoves))
        # remove the entries to get same jet shapes
        if self.remove:
            print('remove')
            notremoves = notremoves[undef > 0]
            weights = weights[notremoves > 0]
            x_glb = x_glb[notremoves > 0]
            x_db = x_db[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        #newnsamp=x_global.shape[0]
        newnsamp = x_glb.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        # fill everything
        self.w = [weights]
        self.x = [x_db]
        self.z = [x_glb, x_dbr]
        self.y = [alltruth]
Exemple #12
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, createCountMap, createDensity, MeanNormZeroPad, createDensityMap, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        #here the difference starts
        nbins = 8

        x_chmap = createDensity(
            filename,
            inbranches=['Cpfcan_ptrel', 'Cpfcan_etarel', 'Cpfcan_phirel'],
            modes=['sum', 'average', 'average'],
            nevents=self.nsamples,
            dimension1=['Cpfcan_eta', 'jet_eta', nbins, 0.45],
            dimension2=['Cpfcan_phi', 'jet_phi', nbins, 0.45],
            counterbranch='nCpfcand',
            offsets=[-1, -0.5, -0.5])

        x_neumap = createDensity(
            filename,
            inbranches=['Npfcan_ptrel', 'Npfcan_etarel', 'Npfcan_phirel'],
            modes=['sum', 'average', 'average'],
            nevents=self.nsamples,
            dimension1=['Npfcan_eta', 'jet_eta', nbins, 0.45],
            dimension2=['Npfcan_phi', 'jet_phi', nbins, 0.45],
            counterbranch='nCpfcand',
            offsets=[-1, -0.5, -0.5])

        x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', nbins, 0.45],
                                   ['Cpfcan_phi', 'jet_phi', nbins, 0.45],
                                   'nCpfcand')

        x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', nbins, 0.45],
                                    ['Npfcan_phi', 'jet_phi', nbins, 0.45],
                                    'nNpfcand')

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        regtruth = Tuple['gen_pt_WithNu']
        regreco = Tuple['jet_corr_pt']

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]

            x_chmap = x_chmap[notremoves > 0]
            x_neumap = x_neumap[notremoves > 0]

            x_chcount = x_chcount[notremoves > 0]
            x_neucount = x_neucount[notremoves > 0]

            alltruth = alltruth[notremoves > 0]

            regreco = regreco[notremoves > 0]
            regtruth = regtruth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        x_map = numpy.concatenate((x_chmap, x_neumap, x_chcount, x_neucount),
                                  axis=3)

        self.w = [weights, weights]
        self.x = [x_global, x_cpf, x_npf, x_sv, x_map, regreco]
        self.y = [alltruth, regtruth]
Exemple #13
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import ROOT

        fileTimeOut(filename, 60)  #give eos 1 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get(self.treename)
        self.nsamples = tree.GetEntries()

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        Tuple = self.readTreeFromRootToTuple(filename)

        undef = Tuple['isUndefined']
        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            notremoves -= undef

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            weights = np.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)

        # scale down by number of classes in a reduced class
        if self.weight:
            if hasattr(self, 'reducedtruthmap'):
                for i, row in enumerate(iter(alltruth)):
                    for t, truth in enumerate(self.reducedtruthclasses):
                        if row[t] == 1:
                            weights[i] = weights[i] * 1. / len(
                                self.reducedtruthmap[truth])

        # remove jets to have the same counts
        if self.remove:
            if hasattr(self, 'reducedtruthmap'):
                total = []
                for rt in self.reducedtruthclasses:
                    total += [
                        sum([
                            weighter.totalcounts[t]
                            for t, truth in enumerate(self.truthclasses)
                            if truth in self.reducedtruthmap[rt]
                        ])
                    ]
                lowest = min(total)
                for i, row in enumerate(iter(alltruth)):
                    for t, truth in enumerate(self.reducedtruthclasses):
                        if not row[t]: continue
                        keep = float(lowest) / total[t]
                        rand = np.random.ranf()
                        if rand > keep:
                            notremoves[i] = 0

            else:
                total = weighter.totalcounts
                lowest = min(total)
                for i, row in enumerate(iter(truthtuple)):
                    for t, truth in enumerate(self.truthclasses):
                        if not row[t]: continue
                        keep = float(lowest) / total[t]
                        rand = np.random.ranf()
                        if rand > keep:
                            notremoves[i] = 0

        # pt cut
        #pt = Tuple['jet_pt']
        #weights   = weights[ pt > 30]
        #x_global  = x_global[pt > 30]
        #x_cpf     = x_cpf[   pt > 30]
        #x_npf     = x_npf[   pt > 30]
        #x_sv      = x_sv[    pt > 30]
        #alltruth  = alltruth[pt > 30]

        if self.remove:
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        if self.weight:
            x_global = x_global[weights > 0]
            x_cpf = x_cpf[weights > 0]
            x_npf = x_npf[weights > 0]
            x_sv = x_sv[weights > 0]
            alltruth = alltruth[weights > 0]
            weights = weights[weights > 0]

        #if self.remove or self.weight:
        if True:
            # remove samples with no predicted class
            skip = np.all(alltruth == 0, axis=1)
            alltruth = alltruth[~skip]
            x_global = x_global[~skip]
            x_cpf = x_cpf[~skip]
            x_npf = x_npf[~skip]
            x_sv = x_sv[~skip]
            weights = weights[~skip]

            # remove samples with multiple predicted classes
            skip = np.sum(alltruth, axis=1) > 1
            alltruth = alltruth[~skip]
            x_global = x_global[~skip]
            x_cpf = x_cpf[~skip]
            x_npf = x_npf[~skip]
            x_sv = x_sv[~skip]
            weights = weights[~skip]

        newnsamp = x_global.shape[0]
        logging.info('reduced content to {}%'.format(
            int(float(newnsamp) / float(self.nsamples) * 100)))
        self.nsamples = newnsamp

        if weights.ndim > 1:
            weights = weights.reshape(weights.shape[0])

        self.w = [weights]
        self.x = [x_global, x_cpf, x_npf, x_sv]
        self.y = [alltruth]