예제 #1
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        from preprocessing import MeanNormZeroPad
        import numpy
        from stopwatch import stopwatch
        import c_meanNormZeroPad
        c_meanNormZeroPad.zeroPad()
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(filename,TupleMeanStd,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        nparray = self.readTreeFromRootToTuple(filename)        
        if self.remove:
            notremoves=weighter.createNotRemoveIndices(nparray)
            undef=nparray['isUndefined']
            notremoves-=undef
            print('took ', sw.getAndReset(), ' to create remove indices')
        
        if self.weight:
            weights=weighter.getJetWeights(nparray)
        elif self.remove:
            weights=notremoves
        else:
            print('neither remove nor weight')
            weights=numpy.ones(self.nsamples)
        
        truthtuple =  nparray[self.truthclasses]
        alltruth=self.reduceTruth(truthtuple)

        if self.remove:
            print('remove')
            weights=weights[notremoves > 0]
            x_global=x_global[notremoves > 0]
            alltruth=alltruth[notremoves > 0]
                        
        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        self.w=[weights]
        self.x=[x_global]
        self.y=[alltruth]
예제 #2
0
파일: TrainData.py 프로젝트: sscruz/DeepJet
 def getFlavourClassificationData(self,filename,TupleMeanStd, weighter):
     from stopwatch import stopwatch
     
     sw=stopwatch()
     swall=stopwatch()
     
     import ROOT
     
     fileTimeOut(filename,120) #give eos a minute to recover
     rfile = ROOT.TFile(filename)
     tree = rfile.Get(self.treename)
     self.nsamples=tree.GetEntries()
     
     #print('took ', sw.getAndReset(), ' seconds for getting tree entries')
 
     
     Tuple = self.readTreeFromRootToTuple(filename)
     
     
     x_all = MeanNormZeroPad(filename,TupleMeanStd,self.branches,self.branchcutoffs,self.nsamples)
     
     #print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
     
     notremoves=numpy.array([])
     weights=numpy.array([])
     if self.remove:
         notremoves=weighter.createNotRemoveIndices(Tuple)
         weights=notremoves
         #print('took ', sw.getAndReset(), ' to create remove indices')
     elif self.weight:
         #print('creating weights')
         weights= weighter.getJetWeights(Tuple)
     else:
         print('neither remove nor weight')
         weights=numpy.empty(self.nsamples)
         weights.fill(1.)
     
     
     
     truthtuple =  Tuple[self.truthclasses]
     #print(self.truthclasses)
     alltruth=self.reduceTruth(truthtuple)
     
     #print(alltruth.shape)
     if self.remove:
         #print('remove')
         weights=weights[notremoves > 0]
         x_all=x_all[notremoves > 0]
         alltruth=alltruth[notremoves > 0]
    
     newnsamp=x_all.shape[0]
     #print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
     self.nsamples = newnsamp
     
     #print('took in total ', swall.getAndReset(),' seconds for conversion')
     
     return weights,x_all,alltruth, notremoves
예제 #3
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(
            filename,None,
            [self.branches[0]],
            [self.branchcutoffs[0]],self.nsamples
        )
        
        x_cpf = MeanNormZeroPadParticles(
            filename,None,
            self.branches[1],
            self.branchcutoffs[1],self.nsamples
        )
                
        x_sv = MeanNormZeroPadParticles(
            filename,None,
            self.branches[2],
            self.branchcutoffs[2],self.nsamples
        )
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        npy_array = self.readTreeFromRootToTuple(filename)
        
        reg_truth=npy_array['gen_pt_WithNu'].view(numpy.ndarray)
        reco_pt=npy_array['jet_corr_pt'].view(numpy.ndarray)
        
        correctionfactor=numpy.zeros(self.nsamples)
        for i in range(self.nsamples):
            correctionfactor[i]=reg_truth[i]/reco_pt[i]

        truthtuple =  npy_array[self.truthclasses]
        alltruth=self.reduceTruth(truthtuple)
        
        self.x=[x_global, x_cpf, x_sv, reco_pt]
        self.y=[alltruth,correctionfactor]
        self._normalize_input_(weighter, npy_array)
예제 #4
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch
        import c_meanNormZeroPad
        c_meanNormZeroPad.zeroPad()
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(filename,TupleMeanStd,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        x_cpf = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[1],
                                   self.branchcutoffs[1],self.nsamples)
        
        x_npf = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[2],
                                   self.branchcutoffs[2],self.nsamples)
        
     
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        nparray = self.readTreeFromRootToTuple(filename)        
        if self.remove:
            notremoves=weighter.createNotRemoveIndices(nparray)
            undef=nparray['isUndefined']
            hf = np_slice.any(axis=1)
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')
        
        if self.weight:
            weights=weighter.getJetWeights(nparray)
        elif self.remove:
            weights=notremoves
        else:
            print('neither remove nor weight')
            weights=numpy.ones(self.nsamples)
        
        pttruth = nparray[self.regtruth]
        ptreco  = nparray[self.regreco]        
        truthtuple =  nparray[self.truthclasses]
        #print(self.truthclasses)
        alltruth=self.reduceTruth(truthtuple)

        #
        # sort vectors (according to pt at the moment)
        #
        idxs = x_cpf[:,:,0].argsort() #0 is pt ratio
        xshape = x_cpf.shape
        static_idxs = numpy.indices(xshape)
        idxs = idxs.reshape((xshape[0], xshape[1], 1))
        idxs = numpy.repeat(idxs, xshape[2], axis=2)
        x_cpf = x_cpf[static_idxs[0], idxs, static_idxs[2]]

        idxs = x_npf[:,:,0].argsort() #0 is pt ratio
        xshape = x_npf.shape
        static_idxs = numpy.indices(xshape)
        idxs = idxs.reshape((xshape[0], xshape[1], 1))
        idxs = numpy.repeat(idxs, xshape[2], axis=2)
        x_npf = x_npf[static_idxs[0], idxs, static_idxs[2]]

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights=weights[notremoves > 0]
            x_global=x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
           # x_npf=x_npf[notremoves > 0]
            alltruth=alltruth[notremoves > 0]
            pttruth=pttruth[notremoves > 0]
            ptreco=ptreco[notremoves > 0]
                        
        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        self.w=[weights]
        self.x=[x_global,x_cpf,x_npf,ptreco]
        self.y=[alltruth,pttruth]
예제 #5
0
 def readFromRootFile(self,filename,TupleMeanStd, weighter):
     
     from preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles
     import numpy
     from stopwatch import stopwatch
     
     sw=stopwatch()
     swall=stopwatch()
     
     import ROOT
     
     fileTimeOut(filename,120) #give eos two minute to recover
     rfile = ROOT.TFile(filename)
     tree = rfile.Get(self.treename)
     self.nsamples=tree.GetEntries()
     Tuple = self.readTreeFromRootToTuple(filename)
     
     ###########################################################################################
     ############ this is where you define how to read in the branches and what to do with them
     ###########################################################################################
     
     
     
     
     ############ MeanNormZeroPad means that all branches are just put into a serial list
     ############ such as: jet1_pt, jet1_eta, jet2_pt, jet2_eta, ...
     ############ if there are not suffiecient jets, the rest of the list is filled
     ############ with zero (zero padding)
     ############ In addition, the variables are transformed such that they are centred around
     ############ zero and the width of the distribution is about 1.
     ############ This is only a technica trick that makes it easier for the DNN to converge
     reco_global = MeanNormZeroPad(filename,TupleMeanStd,
                                self.branches,
                                self.branchcutoffs,self.nsamples)
     
     ############ Another choice for the preprocessing that will be important for you is
     ############ MeanNormZeroPadParticles. It does the same rescaling as MeanNormZeroPad, 
     ############ but organises the array as a 2D array per event. Such that e.g. each 
     ############ jet has its own list. This can be important when e.g. using more
     ############ evolved neural networks than just dense layers. We will come to this later,
     ############ however, I put an example already here (but commented)
     #reco_jetslist = MeanNormZeroPadParticles(filename,TupleMeanStd,
     #                           self.branches[3],      # the jet branches (see function above)
     #                           self.branchcutoffs[3], # the jet branch cut-offs (maximum six) as defined above
     #                           self.nsamples)
     
     
     ############ Here we read the branch that contains the truth information
     truth = Tuple['gen_mttbar']
     
     
     
     oldlength=self.nsamples
     if self.remove:
         notremoves=weighter.createNotRemoveIndices(Tuple)
         # this has do be done for each array produced before
         # don't forget!
         # it selects only the entries from the array that should not be removed,
         # (where the notremoves array as an entry above 0)
         reco_global=reco_global[notremoves > 0]
         truth=truth[notremoves > 0]
         
         
         print("kept "+str(int(float(self.nsamples)/float(oldlength))*100)+"%" )
         
     # we don't use weights for now, so we fill the weight array with 1
     weights=numpy.empty(self.nsamples)
     weights.fill(1.)
     self.nsamples=truth.shape[0]
     
     
     # any array that shoul dbe used by the DNN needs to be added here
     # w: these are the weights (you don't have to change this)
     # x: this is the reconstructed information to fill
     # y: the true information
     self.w=[weights]
     self.x=[reco_global]
     self.y=[truth]
     
     
     
     
     
     
     
     
     
     
     
     
예제 #6
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(filename,TupleMeanStd,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        
        x_a = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[1],
                                   self.branchcutoffs[1],self.nsamples)
        
        x_b = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[2],
                                   self.branchcutoffs[2],self.nsamples)
        
        
        
        
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        Tuple = self.readTreeFromRootToTuple(filename)
        
        if self.remove:
            notremoves=weighter.createNotRemoveIndices(Tuple)
        
            print('took ', sw.getAndReset(), ' to create remove indices')
        
        if self.weight:
            weights=weighter.getJetWeights(Tuple)
        elif self.remove:
            weights=notremoves
        else:
            print('neither remove nor weight')
            weights=numpy.empty(self.nsamples)
            weights.fill(1.)
        
        
        truthtuple =  Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth=self.reduceTruth(truthtuple)
        
        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights=weights[notremoves > 0]
            x_global=x_global[notremoves > 0]
            x_a=x_a[notremoves > 0]
            x_b=x_b[notremoves > 0]
            alltruth=alltruth[notremoves > 0]
       
        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        print(x_global.shape,self.nsamples)

        self.w=[weights]
        self.x=[x_global,x_a,x_b]
        self.y=[alltruth]
예제 #7
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        #the definition of what to do with the branches

        # those are the global branches (jet pt etc)
        # they should be just glued to each other in one vector
        # and zero padded (and mean subtracted and normalised)
        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        # the second part (the pf candidates) should be treated particle wise
        # an array with (njets, nparticles, nproperties) is created
        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        # maybe also an image of the energy density of charged particles
        # should be added
        x_chmap = createDensityMap(
            filename,
            TupleMeanStd,
            'Cpfcan_erel',  #use the energy to create the image
            self.nsamples,
            # 7 bins in eta with a total width of 2*0.9
            ['Cpfcan_eta', 'jet_eta', 7, 0.9],
            # 7 bins in phi with a total width of 2*0.9
            ['Cpfcan_phi', 'jet_phi', 7, 0.9],
            'nCpfcand',
            # the last is an offset because the relative energy as
            # can be found in the ntuples is shifted by 1
            -1)

        # now, some jets are removed to avoid pt and eta biases

        Tuple = self.readTreeFromRootToTuple(filename)
        if self.remove:
            # jets are removed until the shapes in eta and pt are the same as
            # the truth class 'isQCD'
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple[self.undefTruth]
            notremoves -= undef

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        # create all collections:
        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)

        # remove the entries to get same jet shapes
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_chmap = x_chmap[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        # fill everything
        self.w = [weights]
        self.x = [x_global, x_cpf, x_chmap]
        self.y = [alltruth]
예제 #8
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, createDensityMap, createCountMap, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        #here the difference starts
        x_chmap = createDensityMap(filename,
                                   TupleMeanStd,
                                   'Cpfcan_ptrel',
                                   self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', 20, 0.5],
                                   ['Cpfcan_phi', 'jet_phi', 20, 0.5],
                                   'nCpfcand',
                                   -1,
                                   weightbranch='Cpfcan_puppiw')

        x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', 20, 0.5],
                                   ['Cpfcan_phi', 'jet_phi', 20, 0.5],
                                   'nCpfcand')

        x_neumap = createDensityMap(filename,
                                    TupleMeanStd,
                                    'Npfcan_ptrel',
                                    self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', 20, 0.5],
                                    ['Npfcan_phi', 'jet_phi', 20, 0.5],
                                    'nNpfcand',
                                    -1,
                                    weightbranch='Npfcan_puppiw')

        x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', 20, 0.5],
                                    ['Npfcan_phi', 'jet_phi', 20, 0.5],
                                    'nNpfcand')

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.ones(self.nsamples)

        pttruth = Tuple[self.regtruth]
        ptreco = Tuple[self.regreco]

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        x_map = numpy.concatenate((x_chmap, x_chcount, x_neumap, x_neucount),
                                  axis=3)

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_map = x_map[notremoves > 0]
            alltruth = alltruth[notremoves > 0]
            pttruth = pttruth[notremoves > 0]
            ptreco = ptreco[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp
        print(x_global.shape, self.nsamples)

        self.w = [weights]
        self.x = [x_global, x_map, ptreco]
        self.y = [alltruth, pttruth]
예제 #9
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, createCountMap, createDensity, MeanNormZeroPad, createDensityMap, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        #here the difference starts
        nbins = 8

        x_chmap = createDensity(
            filename,
            inbranches=['Cpfcan_ptrel', 'Cpfcan_etarel', 'Cpfcan_phirel'],
            modes=['sum', 'average', 'average'],
            nevents=self.nsamples,
            dimension1=['Cpfcan_eta', 'jet_eta', nbins, 0.45],
            dimension2=['Cpfcan_phi', 'jet_phi', nbins, 0.45],
            counterbranch='nCpfcand',
            offsets=[-1, -0.5, -0.5])

        x_neumap = createDensity(
            filename,
            inbranches=['Npfcan_ptrel', 'Npfcan_etarel', 'Npfcan_phirel'],
            modes=['sum', 'average', 'average'],
            nevents=self.nsamples,
            dimension1=['Npfcan_eta', 'jet_eta', nbins, 0.45],
            dimension2=['Npfcan_phi', 'jet_phi', nbins, 0.45],
            counterbranch='nCpfcand',
            offsets=[-1, -0.5, -0.5])

        x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', nbins, 0.45],
                                   ['Cpfcan_phi', 'jet_phi', nbins, 0.45],
                                   'nCpfcand')

        x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', nbins, 0.45],
                                    ['Npfcan_phi', 'jet_phi', nbins, 0.45],
                                    'nNpfcand')

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        regtruth = Tuple['gen_pt_WithNu']
        regreco = Tuple['jet_corr_pt']

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]

            x_chmap = x_chmap[notremoves > 0]
            x_neumap = x_neumap[notremoves > 0]

            x_chcount = x_chcount[notremoves > 0]
            x_neucount = x_neucount[notremoves > 0]

            alltruth = alltruth[notremoves > 0]

            regreco = regreco[notremoves > 0]
            regtruth = regtruth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        x_map = numpy.concatenate((x_chmap, x_neumap, x_chcount, x_neucount),
                                  axis=3)

        self.w = [weights, weights]
        self.x = [x_global, x_cpf, x_npf, x_sv, x_map, regreco]
        self.y = [alltruth, regtruth]
예제 #10
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        x_reg = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[4]],
                                [self.branchcutoffs[4]], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        reg_truth = Tuple['gen_pt_WithNu'].view(numpy.ndarray)
        reco_pt = Tuple['jet_corr_pt'].view(numpy.ndarray)

        correctionfactor = numpy.zeros(self.nsamples)
        for i in range(self.nsamples):
            correctionfactor[i] = reg_truth[i] / reco_pt[i]

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

            x_reg = x_reg[notremoves > 0]
            correctionfactor = correctionfactor[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        print(x_global.shape, self.nsamples)

        self.w = [weights, weights]
        self.x = [x_global, x_cpf, x_npf, x_sv, x_reg]
        self.y = [alltruth, correctionfactor]
예제 #11
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, MeanNormZeroPadBinned
        import numpy
        from stopwatch import stopwatch
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        #self.nsamples = 10 #TESTING

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        # split for convolutional network
        x_global = MeanNormZeroPad(filename,TupleMeanStd,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        # needed
        # (dimension #1, center #1, nbins 1, half width 1)
        # (dimension #2, center #2, nbins 2, half width 2)
        # sum o stack -- max to stack/zero pad        
        x_cpf, sum_cpf = MeanNormZeroPadBinned(
            filename, 'nCpfcand', self.nsamples,
            ('Cpfcan_eta', 'jet_eta', self.nbins, self.jet_radius), #X axis
            ('Cpfcan_phi', 'jet_phi', self.nbins, self.jet_radius), #Y axis
            (TupleMeanStd, self.branches[1], self.branchcutoffs[1]), #means/std, branches to use, #per-bin # of particles to be kept            
            (self.sums_scaling['charged'], self.binned_sums['charged']), #variables to be summed (no zero padding yet)
        )

        x_npf, sum_npf = MeanNormZeroPadBinned(
            filename, 'nNpfcand', self.nsamples,
            ('Npfcan_eta', 'jet_eta', self.nbins, self.jet_radius), 
            ('Npfcan_phi', 'jet_phi', self.nbins, self.jet_radius), 
            (TupleMeanStd, self.branches[2], self.branchcutoffs[2]),
            (self.sums_scaling['neutral'], self.binned_sums['neutral']),
        )
        
        x_sv, sum_sv = MeanNormZeroPadBinned(
            filename, 'nsv', self.nsamples, 
            ('sv_eta', 'jet_eta', self.nbins, self.jet_radius), 
            ('sv_phi', 'jet_phi', self.nbins, self.jet_radius), 
            (TupleMeanStd, self.branches[3], self.branchcutoffs[3]),
            (self.sums_scaling['svs'], self.binned_sums['svs']),
        )

        #merging sum variables together
        x_sum = numpy.concatenate((sum_cpf, sum_npf, sum_sv), axis=3)

        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves=weighter.createNotRemoveIndices(Tuple)
            undef=Tuple['isUndefined']
            notremoves-=undef
            print('took ', sw.getAndReset(), ' to create remove indices')
        
        if self.weight:
            weights=weighter.getJetWeights(Tuple)
        elif self.remove:
            weights=notremoves
        else:
            print('neither remove nor weight')
            weights=numpy.empty(self.nsamples)
            weights.fill(1.)
        
        
        truthtuple =  Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth=self.reduceTruth(truthtuple)
        pt_truth = Tuple[self.regtruth]
        
        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv  = x_sv[notremoves > 0]
            x_sum = x_sum[notremoves > 0]
            alltruth = alltruth[notremoves > 0]
            pt_truth = pt_truth[notremoves > 0]
       
        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        self.w = [weights]
        self.x = [x_global, x_cpf, x_npf, x_sv, x_sum]
        self.y = [alltruth, pt_truth]
예제 #12
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(filename,TupleMeanStd,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        x_cpf = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[1],
                                   self.branchcutoffs[1],self.nsamples)
        
        x_npf = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[2],
                                   self.branchcutoffs[2],self.nsamples)
        
        x_sv = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[3],
                                   self.branchcutoffs[3],self.nsamples)
        
        
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        Tuple = self.readTreeFromRootToTuple(filename)
        
        if self.remove:
            notremoves=weighter.createNotRemoveIndices(Tuple)
            undef=Tuple['isUndefined']
            notremoves-=undef
            print('took ', sw.getAndReset(), ' to create remove indices')
        
        if self.weight:
            weights=weighter.getJetWeights(Tuple)
        elif self.remove:
            weights=notremoves
        else:
            print('neither remove nor weight')
            weights=numpy.empty(self.nsamples)
            weights.fill(1.)
        
        
        truthtuple =  Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth=self.reduceTruth(truthtuple)

        mask = Tuple[['nCpfcand','nNpfcand','nsv']]
        maskListNpf = []
        maskListCpf = []
        maskListSv = []

        for i in range(0,Tuple.shape[0]):
            nMax = int(mask[i][0])
            if(nMax>25): nMax=25
            list0 = [[1.]*nMax+[0.]*(25-nMax)]*8
            nMax = int(mask[i][1])
            if(nMax>25): nMax=25
            maskListNpf.append(list0)

            nMax = int(mask[i][1])
            if(nMax>25): nMax=25
            list1 = [[1.]*nMax+[0.]*(25-nMax)]*4
            maskListCpf.append(list1)
            nMax = int(mask[i][2])
            if(nMax>4): nMax=4
            list2 = [[1.]*nMax+[0.]*(4-nMax)]*8
            maskListSv.append(list2)
            
        
        maskListNpf = numpy.asarray(maskListNpf,dtype=float)
        maskListCpf = numpy.asarray(maskListCpf,dtype=float)
        maskListSv = numpy.asarray(maskListSv,dtype=float)
        print ('zero shapes ', maskListNpf.shape, ' ' ,maskListCpf.shape  , ' ' ,maskListSv.shape )
        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights=weights[notremoves > 0]
            x_global=x_global[notremoves > 0]
            x_cpf=x_cpf[notremoves > 0]
            x_npf=x_npf[notremoves > 0]
            x_sv=x_sv[notremoves > 0]
            maskListNpf = maskListNpf[notremoves > 0]
            maskListCpf = maskListCpf[notremoves > 0]
            maskListSv = maskListSv[notremoves > 0]
            alltruth=alltruth[notremoves > 0]
       
        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        print(x_global.shape,self.nsamples)

        self.w=[weights]
        print (' types ',  type (x_cpf) , type (maskListNpf), ' ' ,type(maskListCpf) , ' ' , type(maskListSv) )
        self.x=[x_global,x_cpf,x_npf,x_sv,maskListNpf,maskListCpf,maskListSv]
        self.y=[alltruth]