Example #1
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(
            filename,None,
            [self.branches[0]],
            [self.branchcutoffs[0]],self.nsamples
        )
        
        x_cpf = MeanNormZeroPadParticles(
            filename,None,
            self.branches[1],
            self.branchcutoffs[1],self.nsamples
        )
                
        x_sv = MeanNormZeroPadParticles(
            filename,None,
            self.branches[2],
            self.branchcutoffs[2],self.nsamples
        )
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        npy_array = self.readTreeFromRootToTuple(filename)
        
        reg_truth=npy_array['gen_pt_WithNu'].view(numpy.ndarray)
        reco_pt=npy_array['jet_corr_pt'].view(numpy.ndarray)
        
        correctionfactor=numpy.zeros(self.nsamples)
        for i in range(self.nsamples):
            correctionfactor[i]=reg_truth[i]/reco_pt[i]

        truthtuple =  npy_array[self.truthclasses]
        alltruth=self.reduceTruth(truthtuple)
        
        self.x=[x_global, x_cpf, x_sv, reco_pt]
        self.y=[alltruth,correctionfactor]
        self._normalize_input_(weighter, npy_array)
Example #2
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch
        import c_meanNormZeroPad
        c_meanNormZeroPad.zeroPad()
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(filename,TupleMeanStd,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        x_cpf = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[1],
                                   self.branchcutoffs[1],self.nsamples)
        
        x_npf = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[2],
                                   self.branchcutoffs[2],self.nsamples)
        
     
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        nparray = self.readTreeFromRootToTuple(filename)        
        if self.remove:
            notremoves=weighter.createNotRemoveIndices(nparray)
            undef=nparray['isUndefined']
            hf = np_slice.any(axis=1)
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')
        
        if self.weight:
            weights=weighter.getJetWeights(nparray)
        elif self.remove:
            weights=notremoves
        else:
            print('neither remove nor weight')
            weights=numpy.ones(self.nsamples)
        
        pttruth = nparray[self.regtruth]
        ptreco  = nparray[self.regreco]        
        truthtuple =  nparray[self.truthclasses]
        #print(self.truthclasses)
        alltruth=self.reduceTruth(truthtuple)

        #
        # sort vectors (according to pt at the moment)
        #
        idxs = x_cpf[:,:,0].argsort() #0 is pt ratio
        xshape = x_cpf.shape
        static_idxs = numpy.indices(xshape)
        idxs = idxs.reshape((xshape[0], xshape[1], 1))
        idxs = numpy.repeat(idxs, xshape[2], axis=2)
        x_cpf = x_cpf[static_idxs[0], idxs, static_idxs[2]]

        idxs = x_npf[:,:,0].argsort() #0 is pt ratio
        xshape = x_npf.shape
        static_idxs = numpy.indices(xshape)
        idxs = idxs.reshape((xshape[0], xshape[1], 1))
        idxs = numpy.repeat(idxs, xshape[2], axis=2)
        x_npf = x_npf[static_idxs[0], idxs, static_idxs[2]]

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights=weights[notremoves > 0]
            x_global=x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
           # x_npf=x_npf[notremoves > 0]
            alltruth=alltruth[notremoves > 0]
            pttruth=pttruth[notremoves > 0]
            ptreco=ptreco[notremoves > 0]
                        
        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        self.w=[weights]
        self.x=[x_global,x_cpf,x_npf,ptreco]
        self.y=[alltruth,pttruth]
Example #3
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(filename,TupleMeanStd,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        
        x_a = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[1],
                                   self.branchcutoffs[1],self.nsamples)
        
        x_b = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[2],
                                   self.branchcutoffs[2],self.nsamples)
        
        
        
        
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        Tuple = self.readTreeFromRootToTuple(filename)
        
        if self.remove:
            notremoves=weighter.createNotRemoveIndices(Tuple)
        
            print('took ', sw.getAndReset(), ' to create remove indices')
        
        if self.weight:
            weights=weighter.getJetWeights(Tuple)
        elif self.remove:
            weights=notremoves
        else:
            print('neither remove nor weight')
            weights=numpy.empty(self.nsamples)
            weights.fill(1.)
        
        
        truthtuple =  Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth=self.reduceTruth(truthtuple)
        
        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights=weights[notremoves > 0]
            x_global=x_global[notremoves > 0]
            x_a=x_a[notremoves > 0]
            x_b=x_b[notremoves > 0]
            alltruth=alltruth[notremoves > 0]
       
        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        print(x_global.shape,self.nsamples)

        self.w=[weights]
        self.x=[x_global,x_a,x_b]
        self.y=[alltruth]
Example #4
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        #the definition of what to do with the branches

        # those are the global branches (jet pt etc)
        # they should be just glued to each other in one vector
        # and zero padded (and mean subtracted and normalised)
        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        # the second part (the pf candidates) should be treated particle wise
        # an array with (njets, nparticles, nproperties) is created
        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        # maybe also an image of the energy density of charged particles
        # should be added
        x_chmap = createDensityMap(
            filename,
            TupleMeanStd,
            'Cpfcan_erel',  #use the energy to create the image
            self.nsamples,
            # 7 bins in eta with a total width of 2*0.9
            ['Cpfcan_eta', 'jet_eta', 7, 0.9],
            # 7 bins in phi with a total width of 2*0.9
            ['Cpfcan_phi', 'jet_phi', 7, 0.9],
            'nCpfcand',
            # the last is an offset because the relative energy as
            # can be found in the ntuples is shifted by 1
            -1)

        # now, some jets are removed to avoid pt and eta biases

        Tuple = self.readTreeFromRootToTuple(filename)
        if self.remove:
            # jets are removed until the shapes in eta and pt are the same as
            # the truth class 'isQCD'
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple[self.undefTruth]
            notremoves -= undef

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        # create all collections:
        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)

        # remove the entries to get same jet shapes
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_chmap = x_chmap[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        # fill everything
        self.w = [weights]
        self.x = [x_global, x_cpf, x_chmap]
        self.y = [alltruth]
Example #5
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, createCountMap, createDensity, MeanNormZeroPad, createDensityMap, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        #here the difference starts
        nbins = 8

        x_chmap = createDensity(
            filename,
            inbranches=['Cpfcan_ptrel', 'Cpfcan_etarel', 'Cpfcan_phirel'],
            modes=['sum', 'average', 'average'],
            nevents=self.nsamples,
            dimension1=['Cpfcan_eta', 'jet_eta', nbins, 0.45],
            dimension2=['Cpfcan_phi', 'jet_phi', nbins, 0.45],
            counterbranch='nCpfcand',
            offsets=[-1, -0.5, -0.5])

        x_neumap = createDensity(
            filename,
            inbranches=['Npfcan_ptrel', 'Npfcan_etarel', 'Npfcan_phirel'],
            modes=['sum', 'average', 'average'],
            nevents=self.nsamples,
            dimension1=['Npfcan_eta', 'jet_eta', nbins, 0.45],
            dimension2=['Npfcan_phi', 'jet_phi', nbins, 0.45],
            counterbranch='nCpfcand',
            offsets=[-1, -0.5, -0.5])

        x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', nbins, 0.45],
                                   ['Cpfcan_phi', 'jet_phi', nbins, 0.45],
                                   'nCpfcand')

        x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', nbins, 0.45],
                                    ['Npfcan_phi', 'jet_phi', nbins, 0.45],
                                    'nNpfcand')

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        regtruth = Tuple['gen_pt_WithNu']
        regreco = Tuple['jet_corr_pt']

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]

            x_chmap = x_chmap[notremoves > 0]
            x_neumap = x_neumap[notremoves > 0]

            x_chcount = x_chcount[notremoves > 0]
            x_neucount = x_neucount[notremoves > 0]

            alltruth = alltruth[notremoves > 0]

            regreco = regreco[notremoves > 0]
            regtruth = regtruth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        x_map = numpy.concatenate((x_chmap, x_neumap, x_chcount, x_neucount),
                                  axis=3)

        self.w = [weights, weights]
        self.x = [x_global, x_cpf, x_npf, x_sv, x_map, regreco]
        self.y = [alltruth, regtruth]
Example #6
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        x_reg = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[4]],
                                [self.branchcutoffs[4]], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        reg_truth = Tuple['gen_pt_WithNu'].view(numpy.ndarray)
        reco_pt = Tuple['jet_corr_pt'].view(numpy.ndarray)

        correctionfactor = numpy.zeros(self.nsamples)
        for i in range(self.nsamples):
            correctionfactor[i] = reg_truth[i] / reco_pt[i]

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

            x_reg = x_reg[notremoves > 0]
            correctionfactor = correctionfactor[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        print(x_global.shape, self.nsamples)

        self.w = [weights, weights]
        self.x = [x_global, x_cpf, x_npf, x_sv, x_reg]
        self.y = [alltruth, correctionfactor]
Example #7
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        
        #the first part is standard, no changes needed
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles
        import numpy
        import ROOT
        
        fileTimeOut(filename,120) #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        #the definition of what to do with the branches
        
        # those are the global branches (jet pt etc)
        # they should be just glued to each other in one vector
        # and zero padded (and mean subtracted and normalised)
        #x_global = MeanNormZeroPad(filename,TupleMeanStd,
        #                           [self.branches[0]],
        #                           [self.branchcutoffs[0]],self.nsamples)
        
        # the second part (the pf candidates) should be treated particle wise
        # an array with (njets, nparticles, nproperties) is created
    
        x_glb  = ZeroPadParticles(filename,TupleMeanStd,
                                          self.branches[0],
                                          self.branchcutoffs[0],self.nsamples)

        x_db  = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1],self.nsamples)
        
        x_pf  = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2],self.nsamples)
        
        x_cpf = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                         self.branches[3],
                                         self.branchcutoffs[3],self.nsamples)
        
        x_sv = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                        self.branches[4],
                                        self.branchcutoffs[4],self.nsamples)
        
        # now, some jets are removed to avoid pt and eta biases
        
        Tuple = self.readTreeFromRootToTuple(filename)
	
        #if self.remove:
            # jets are removed until the shapes in eta and pt are the same as
            # the truth class 'fj_isNonBB'
        notremoves=weighter.createNotRemoveIndices(Tuple)
            #undef=Tuple[self.undefTruth]
            #notremoves-=undef
        if self.weight:
            weights=weighter.getJetWeights(Tuple)
        elif self.remove:
            weights=notremoves
        else:
            print('neither remove nor weight')
            weights=numpy.empty(self.nsamples)
            weights.fill(1.)
            
            
        # create all collections:
        #truthtuple =  Tuple[self.truthclasses]
        alltruth=self.reduceTruth(Tuple)
        undef=numpy.sum(alltruth,axis=1)
        weights=weights[undef > 0]
        x_glb=x_glb[undef > 0]
        x_db=x_db[undef > 0]
        x_sv=x_sv[undef > 0]
        x_pf=x_pf[undef > 0]
        x_cpf=x_cpf[undef > 0]
        alltruth=alltruth[undef > 0]
        notremoves=notremoves[undef > 0]

        print(len(weights), len(notremoves))
        # remove the entries to get same jet shapes
        if self.remove:
            print('remove')
            weights=weights[notremoves > 0]
            x_glb=x_glb[notremoves > 0]
            x_db=x_db[notremoves > 0]
            x_sv=x_sv[notremoves > 0]
            x_pf=x_pf[notremoves > 0]
            x_cpf=x_cpf[notremoves > 0]
            alltruth=alltruth[notremoves > 0]
            
        #newnsamp=x_global.shape[0]
        newnsamp=x_glb.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        # fill everything
        self.w=[weights]
        self.x=[x_db,x_pf,x_cpf,x_sv]
        self.z=[x_glb]
        self.y=[alltruth]
Example #8
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        
        #the first part is standard, no changes needed
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles, ZeroPadParticles
        import numpy
        import ROOT
        
        fileTimeOut(filename,120) #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
    
        x_glb  = ZeroPadParticles(filename,TupleMeanStd,
                                          self.branches[0],
                                          self.branchcutoffs[0],self.nsamples)

        x_db  = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1],self.nsamples)
        
        # now, some jets are removed to avoid pt and eta biases
        
        Tuple = self.readTreeFromRootToTuple(filename)
        #if self.remove:
            # jets are removed until the shapes in eta and pt are the same as
            # the truth class 'fj_isNonBB'
        notremoves=weighter.createNotRemoveIndices(Tuple)
        if self.weight:
            weights=weighter.getJetWeights(Tuple)
        elif self.remove:
            weights=notremoves
        else:
            print('neither remove nor weight')
            weights=numpy.empty(self.nsamples)
            weights.fill(1.)
            
            
        # create all collections:
        #truthtuple =  Tuple[self.truthclasses]
        alltruth=self.reduceTruth(Tuple)
        undef=numpy.sum(alltruth,axis=1)
        weights=weights[undef > 0]
        x_glb=x_glb[undef > 0]
        x_db=x_db[undef > 0]
        alltruth=alltruth[undef > 0]
        notremoves=notremoves[undef > 0]

        undef=Tuple['fj_isNonCC'] * Tuple['sample_isQCD'] * Tuple['fj_isQCD'] + Tuple['fj_isCC'] * Tuple['fj_isH']

        # remove the entries to get same jet shapes
        if self.remove:
            print('remove')
            weights=weights[notremoves > 0]
            x_glb=x_glb[notremoves > 0]
            x_db=x_db[notremoves > 0]
            alltruth=alltruth[notremoves > 0]
            
        newnsamp=x_glb.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        # fill everything
        self.w=[weights]
        self.x=[x_db]
        self.z=[x_glb]
        self.y=[alltruth]        
Example #9
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        from preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from stopwatch import stopwatch
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(filename,TupleMeanStd,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        x_cpf = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[1],
                                   self.branchcutoffs[1],self.nsamples)
        
        x_npf = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[2],
                                   self.branchcutoffs[2],self.nsamples)
        
        x_sv = MeanNormZeroPadParticles(filename,TupleMeanStd,
                                   self.branches[3],
                                   self.branchcutoffs[3],self.nsamples)
        
        
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        Tuple = self.readTreeFromRootToTuple(filename)
        
        if self.remove:
            notremoves=weighter.createNotRemoveIndices(Tuple)
            undef=Tuple['isUndefined']
            notremoves-=undef
            print('took ', sw.getAndReset(), ' to create remove indices')
        
        if self.weight:
            weights=weighter.getJetWeights(Tuple)
        elif self.remove:
            weights=notremoves
        else:
            print('neither remove nor weight')
            weights=numpy.empty(self.nsamples)
            weights.fill(1.)
        
        
        truthtuple =  Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth=self.reduceTruth(truthtuple)

        mask = Tuple[['nCpfcand','nNpfcand','nsv']]
        maskListNpf = []
        maskListCpf = []
        maskListSv = []

        for i in range(0,Tuple.shape[0]):
            nMax = int(mask[i][0])
            if(nMax>25): nMax=25
            list0 = [[1.]*nMax+[0.]*(25-nMax)]*8
            nMax = int(mask[i][1])
            if(nMax>25): nMax=25
            maskListNpf.append(list0)

            nMax = int(mask[i][1])
            if(nMax>25): nMax=25
            list1 = [[1.]*nMax+[0.]*(25-nMax)]*4
            maskListCpf.append(list1)
            nMax = int(mask[i][2])
            if(nMax>4): nMax=4
            list2 = [[1.]*nMax+[0.]*(4-nMax)]*8
            maskListSv.append(list2)
            
        
        maskListNpf = numpy.asarray(maskListNpf,dtype=float)
        maskListCpf = numpy.asarray(maskListCpf,dtype=float)
        maskListSv = numpy.asarray(maskListSv,dtype=float)
        print ('zero shapes ', maskListNpf.shape, ' ' ,maskListCpf.shape  , ' ' ,maskListSv.shape )
        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights=weights[notremoves > 0]
            x_global=x_global[notremoves > 0]
            x_cpf=x_cpf[notremoves > 0]
            x_npf=x_npf[notremoves > 0]
            x_sv=x_sv[notremoves > 0]
            maskListNpf = maskListNpf[notremoves > 0]
            maskListCpf = maskListCpf[notremoves > 0]
            maskListSv = maskListSv[notremoves > 0]
            alltruth=alltruth[notremoves > 0]
       
        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        print(x_global.shape,self.nsamples)

        self.w=[weights]
        print (' types ',  type (x_cpf) , type (maskListNpf), ' ' ,type(maskListCpf) , ' ' , type(maskListSv) )
        self.x=[x_global,x_cpf,x_npf,x_sv,maskListNpf,maskListCpf,maskListSv]
        self.y=[alltruth]