Esempio n. 1
0
 def getFlavourClassificationData(self,filename,TupleMeanStd, weighter):
     from DeepJetCore.stopwatch import stopwatch
     
     sw=stopwatch()
     swall=stopwatch()
     
     import ROOT
     
     fileTimeOut(filename,120) #give eos a minute to recover
     rfile = ROOT.TFile(filename)
     tree = rfile.Get(self.treename)
     self.nsamples=tree.GetEntries()
     
     #print('took ', sw.getAndReset(), ' seconds for getting tree entries')
 
     
     Tuple = self.readTreeFromRootToTuple(filename)
     
     
     x_all = MeanNormZeroPad(filename,TupleMeanStd,self.branches,self.branchcutoffs,self.nsamples)
     
     #print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
     
     notremoves=numpy.array([])
     weights=numpy.array([])
     if self.remove:
         notremoves=weighter.createNotRemoveIndices(Tuple)
         weights=notremoves
         #print('took ', sw.getAndReset(), ' to create remove indices')
     elif self.weight:
         #print('creating weights')
         weights= weighter.getJetWeights(Tuple)
     else:
         print('neither remove nor weight')
         weights=numpy.empty(self.nsamples)
         weights.fill(1.)
     
     
     
     truthtuple =  Tuple[self.truthclasses]
     #print(self.truthclasses)
     alltruth=self.reduceTruth(truthtuple)
     
     #print(alltruth.shape)
     if self.remove:
         #print('remove')
         weights=weights[notremoves > 0]
         x_all=x_all[notremoves > 0]
         alltruth=alltruth[notremoves > 0]
    
     newnsamp=x_all.shape[0]
     #print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
     self.nsamples = newnsamp
     
     #print('took in total ', swall.getAndReset(),' seconds for conversion')
     
     return weights,x_all,alltruth, notremoves
Esempio n. 2
0
    def readFromRootFile(self,filename,TupleMeanStd, weighter):
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get(self.treename)
        self.nsamples=tree.GetEntries()
        #for reacArray operations
        Tuple = self.readTreeFromRootToTuple(filename)

        #create weights and remove indices
        notremoves=numpy.array([])
        weights=numpy.array([])
        if self.remove:
            notremoves=weighter.createNotRemoveIndices(Tuple)
            if self.selection:
                print 'Removing events selected with',self.selection
                notremoves -= Tuple[self.selection].view(numpy.ndarray)
            weights=notremoves
            #print('took ', sw.getAndReset(), ' to create remove indices')
        elif self.weight:
            #print('creating weights')
            weights= weighter.getJetWeights(Tuple)
        else:
            weights=numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple =  Tuple[self.truthclasses]
        #print(self.truthclasses)
        #that would be for labels
        alltruth=self.reduceTruth(truthtuple)
        
        reg_truth=Tuple[self.regressiontarget].view(numpy.ndarray)        
        
        #stuff all in one long vector
        x_all=MeanNormZeroPad(filename,TupleMeanStd,self.branches,self.branchcutoffs,self.nsamples)
        
        #print(alltruth.shape)
        if self.remove:
            #print('remove')
            weights=weights[notremoves > 0]
            x_all=x_all[notremoves > 0]
            alltruth=alltruth[notremoves > 0]
            reg_truth=reg_truth[notremoves > 0]
            print len(Tuple),'->',len(x_all),'after remove'

        newnsamp=x_all.shape[0]
        #print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        self.nsamples = newnsamp
        
        self.w=[weights]
        if self.inputs1:
            self.x=[ x_all[:,self.inputs0], x_all[:,self.inputs1] ]
        else:
            self.x=[x_all]
        self.y=[reg_truth]    
Esempio n. 3
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        # this function defines how to convert the root ntuple to the training format
        # options are not yet described here
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()

        npy_array = self.readTreeFromRootToTuple(filename)

        truthtuple = npy_array[self.truthclasses]

        alltruth = self.reduceTruth(truthtuple)
        alltruept = npy_array[self.regtruth]

        # user code
        x_global = MeanNormZeroPad(filename, None, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, None, self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, None, self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_recopts = MeanNormZeroPad(filename, None, [self.branches[3]],
                                    [self.branchcutoffs[3]], self.nsamples)

        nold = self.nsamples

        self.x = [x_global, x_cpf, x_npf,
                  x_recopts]  # list of feature numpy arrays
        self.y = [alltruth, alltruept]  # list of target numpy arrays (truth)
        self.w = []  # list of weight arrays. One for each truth target
        self._normalize_input_(weighter, npy_array)

        print('reduced to ', self.nsamples, 'of', nold)
Esempio n. 4
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch
        
        sw=stopwatch()
        swall=stopwatch()
        
        import ROOT
        
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples=tree.GetEntries()
        
        print('took ', sw.getAndReset(), ' seconds for getting tree entries')
        
        
        # split for convolutional network
        
        x_global = MeanNormZeroPad(filename,None,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]],self.nsamples)
        
        x_cpf = MeanNormZeroPadParticles(filename,None,
                                   self.branches[1],
                                   self.branchcutoffs[1],self.nsamples)
        
        x_npf = MeanNormZeroPadParticles(filename,None,
                                   self.branches[2],
                                   self.branchcutoffs[2],self.nsamples)
        
        x_sv = MeanNormZeroPadParticles(filename,None,
                                   self.branches[3],
                                   self.branchcutoffs[3],self.nsamples)
        
        
        
        print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)')
        
        Tuple = self.readTreeFromRootToTuple(filename)
        
        
        
        truthtuple =  Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth=self.reduceTruth(truthtuple)
        
       
        print(x_global.shape,self.nsamples)

        return [x_global,x_cpf,x_npf,x_sv], [alltruth], []
Esempio n. 5
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, None, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, None, self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_etarel = MeanNormZeroPadParticles(filename, None, self.branches[2],
                                            self.branchcutoffs[2],
                                            self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, None, self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        npy_array = self.readTreeFromRootToTuple(filename)

        reg_truth = npy_array['gen_pt_WithNu'].view(numpy.ndarray)
        reco_pt = npy_array['jet_corr_pt'].view(numpy.ndarray)

        correctionfactor = numpy.zeros(self.nsamples)
        for i in range(self.nsamples):
            correctionfactor[i] = reg_truth[i] / reco_pt[i]

        truthtuple = npy_array[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)

        self.x = [x_global, x_cpf, x_etarel, x_sv, reco_pt]
        self.y = [alltruth, correctionfactor]
        self._normalize_input_(weighter, npy_array)
Esempio n. 6
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        # the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, createDensityLayers, createDensityMap, MeanNormZeroPad, \
            MeanNormZeroPadParticles

        fileTimeOut(filename, 120)  # give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print(TupleMeanStd[0])
        print(len(TupleMeanStd[0]))

        x_globalbase = MeanNormZeroPad(filename, TupleMeanStd,
                                       [self.branches[0]],
                                       [self.branchcutoffs[0]], self.nsamples)

        # flatten everything out for now
        X = createRecHitMap(filename,
                            self.nsamples,
                            nbins=13,
                            width=0.2,
                            maxlayers=55,
                            maxhitsperpixel=6)

        Y = createDensityLayers(filename,
                                TupleMeanStd,
                                inbranches=['rechit_total_fraction'],
                                modes=['sum'],
                                layerbranch='rechit_layer',
                                maxlayers=55,
                                layeroffset=1,
                                nevents=self.nsamples,
                                dimension1=['rechit_eta', 'seed_eta', 13, 0.2],
                                dimension2=['rechit_phi', 'seed_phi', 13, 0.2],
                                counterbranch='nrechits',
                                scales=[1])

        print("Hey", np.shape(Y), np.shape(X))

        Tuple = self.readTreeFromRootToTuple(filename)

        self.nsamples = len(x_globalbase)
        self.w = [np.ones_like(Y)]
        self.x = [X]
        self.y = [Y]
Esempio n. 7
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, None, ['x'], [1], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(
            filename, branches=['class1', 'class2', 'x'])

        truthtuple = Tuple[self.truthclasses]

        alltruth = self.reduceTruth(truthtuple)

        #print(x_global.shape,x_global[0:10])
        #print(alltruth.shape,alltruth[0:10])
        #print(alltruth.flags)

        newnsamp = x_global.shape[0]
        self.nsamples = newnsamp

        print(x_global.shape, alltruth.shape, self.nsamples)

        truth = SimpleArray(alltruth, name="truth")
        feat = SimpleArray(x_global, name="features0")

        return [feat], [truth], []
Esempio n. 8
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, None, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        print(x_global.shape, self.nsamples)

        self.w = []
        self.x = [x_global]
        self.y = [alltruth]
Esempio n. 9
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        # this function defines how to convert the root ntuple to the training format
        # options are not yet described here

        feature_array = self.readTreeFromRootToTuple(filename)

        #notremoves=weighter.createNotRemoveIndices(Tuple)

        # this removes parts of the dataset for weighting the events
        #feature_array = feature_array[notremoves > 0]

        # call this in the end

        self.nsamples = len(feature_array)

        x_all = MeanNormZeroPad(filename, TupleMeanStd, self.branches,
                                self.branchcutoffs, self.nsamples)

        self.x = [x_all]  # list of feature numpy arrays
        self.y = [numpy.vstack(feature_array[self.truthclasses]).transpose()
                  ]  # list of target numpy arrays (truth)
        self.w = []  # list of weight arrays. One for each truth target
Esempio n. 10
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        print(x_global.shape, self.nsamples)

        self.w = [weights]
        self.x = [x_global, x_cpf, x_npf, x_sv]
        self.y = [alltruth]
Esempio n. 11
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, createDensityLayers, createDensityMap, MeanNormZeroPad, MeanNormZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print("1")

        x_globalbase = MeanNormZeroPad(filename, TupleMeanStd,
                                       [self.branches[0]],
                                       [self.branchcutoffs[0]], self.nsamples)

        print("2")

        #flatten everything out for now
        x_chmapbase = createDensityLayers(
            filename,
            TupleMeanStd,
            inbranches=['rechit_energy', 'rechit_layer', 'rechit_time'],
            modes=['sum', 'single', 'average'],
            layerbranch='rechit_layer',
            maxlayers=55,
            layeroffset=1,
            nevents=self.nsamples,
            dimension1=['rechit_eta', 'seed_eta', 13, 0.2],
            dimension2=['rechit_phi', 'seed_phi', 13, 0.2],
            counterbranch='nrechits',
            scales=[1, 50, 1])

        #training data
        print("3")
        Tuple = self.readTreeFromRootToTuple(filename)

        idtruthtuple = self.reduceTruth(Tuple[self.truthclasses])
        energytruth = numpy.array(Tuple[self.regtruth])
        #simple by-hand scaling to around 0 with a width of max about 1
        energytruth = energytruth / 100.

        totalrecenergy = numpy.array(Tuple['totalrechit_energy']) / 100.

        weights = numpy.zeros(len(idtruthtuple))

        notremoves = numpy.zeros(totalrecenergy.shape[0])
        notremoves += 1
        if self.remove:
            from augmentation import augmentRotationalSymmetry8, duplicate8, evaluate8

            x_global = duplicate8(x_globalbase)
            x_chmap = augmentRotationalSymmetry8(x_chmapbase)

            notremoves = evaluate8(weighter.createNotRemoveIndices, Tuple)

            weights = duplicate8(weighter.getJetWeights(Tuple))
            totalrecenergy = duplicate8(totalrecenergy)
            energytruth = duplicate8(energytruth)
            idtruthtuple = duplicate8(idtruthtuple)
            notremoves -= duplicate8(Tuple['isFake'])
            notremoves -= duplicate8(Tuple['isEta'])
            notremoves -= duplicate8(Tuple['isElectron'])
            notremoves -= duplicate8(Tuple['isMuon'])
            notremoves -= duplicate8(Tuple['isTau'])
            notremoves -= duplicate8(Tuple['isPionZero'])
            notremoves -= duplicate8(Tuple['isPionCharged'])
            notremoves -= duplicate8(Tuple['isProton'])
            notremoves -= duplicate8(Tuple['isKaonCharged'])
            notremoves -= duplicate8(Tuple['isOther'])

        #notremoves -= energytruth<50

        else:
            notremoves -= Tuple['isFake']
            notremoves -= Tuple['isEta']
            x_global = x_globalbase
            x_chmap = x_chmapbase

        print("4")
        # no need for changes above
        ####################
        # reduce to two dimension
        # x_chmap[shower][eta][phi][layer]['colours']
        #
        # -> collapse to
        # x_chmap[shower][eta][phi][colours]
        #
        #
        #
        #
        ####################
        # no need for changes in the following

        x_chmap = numpy.squeeze(x_chmap[:, :, :, 15:16, :])

        before = len(x_global)

        if self.remove:
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_chmap = x_chmap[notremoves > 0]
            idtruthtuple = idtruthtuple[notremoves > 0]
            energytruth = energytruth[notremoves > 0]
            totalrecenergy = totalrecenergy[notremoves > 0]
        print("5")

        print('reduced to ' + str(len(x_global)) + ' of ' + str(before))
        self.nsamples = len(x_global)
        #make control plot for energy
        #import matplotlib.pyplot as plt
        #plt.hist(energytruth.flatten(), normed=False, bins=30)
        #plt.savefig(giffile+"_eshape.pdf")
        #from plotting import plot4d, rotanimate
        #giffile=filename.replace('/','_')
        #giffile='gifs/'+giffile
        #for i in range(0,len(select)):
        #    if not select[i]: continue
        #
        #    ax,_=plot4d(x_chmap[i][:,:,:,:1],giffile+"_"+str(i)+"energy_.pdf",'etabin','layer','phibin')
        #    rotanimate(ax,giffile+'_'+str(i)+'_energy.gif',delay=5,prefix=giffile)
        #    print('energy')
        #    timeentries=x_chmap[i][:,:,:,3:4]
        #    timeentries[timeentries<0]=0.00000000001
        #    ax,_=plot4d(timeentries,giffile+"_"+str(i)+"time_.pdf",'etabin','layer','phibin')
        #    rotanimate(ax,giffile+'_'+str(i)+'_time.gif',delay=5,prefix=giffile)
        #    print('time')

        self.w = [weights, weights]
        self.x = [x_global, x_chmap, totalrecenergy]
        self.y = [idtruthtuple, energytruth]
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        # Function to produce the numpy training arrays from root files

        from DeepJetCore.Weighter import Weighter
        from DeepJetCore.stopwatch import stopwatch
        sw = stopwatch()
        swall = stopwatch()
        if not istraining:
            self.remove = False

        print('reading ' + filename)
        import ROOT
        from root_numpy import tree2array, root2array
        fileTimeOut(filename, 120)  # give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()

        # user code, example works with the example 2D images in root format generated by make_example_data
        from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles

        print('padding ' + filename)

        x_global = MeanNormZeroPad(
            filename,
            None,  # 2nd argument None: means no normalisation 
            [self.global_branches],
            [1],
            self.nsamples)

        x_pfCand_neutral = MeanNormZeroPadParticles(
            filename, None, self.pfCand_neutral_branches, self.npfCand_neutral,
            self.nsamples)

        x_pfCand_charged = MeanNormZeroPadParticles(
            filename, None, self.pfCand_charged_branches, self.npfCand_charged,
            self.nsamples)

        x_pfCand_photon = MeanNormZeroPadParticles(filename, None,
                                                   self.pfCand_photon_branches,
                                                   self.npfCand_photon,
                                                   self.nsamples)

        x_pfCand_electron = MeanNormZeroPadParticles(
            filename, None, self.pfCand_electron_branches,
            self.npfCand_electron, self.nsamples)

        x_pfCand_muon = MeanNormZeroPadParticles(filename, None,
                                                 self.pfCand_muon_branches,
                                                 self.npfCand_muon,
                                                 self.nsamples)

        x_pfCand_SV = MeanNormZeroPadParticles(filename, None,
                                               self.SV_branches, self.nSV,
                                               self.nsamples)

        import uproot3 as uproot
        urfile = uproot.open(filename)["tree"]

        mytruth = []
        for arr in self.truth_branches:
            mytruth.append(np.expand_dims(urfile.array(arr), axis=1))
        truth = np.concatenate(mytruth, axis=1)

        # important, float32 and C-type!
        truth = truth.astype(dtype='float32', order='C')

        x_global = x_global.astype(dtype='float32', order='C')
        x_pfCand_neutral = x_pfCand_neutral.astype(dtype='float32', order='C')
        x_pfCand_charged = x_pfCand_charged.astype(dtype='float32', order='C')
        x_pfCand_photon = x_pfCand_photon.astype(dtype='float32', order='C')
        x_pfCand_electron = x_pfCand_electron.astype(dtype='float32',
                                                     order='C')
        x_pfCand_muon = x_pfCand_muon.astype(dtype='float32', order='C')
        x_pfCand_SV = x_pfCand_SV.astype(dtype='float32', order='C')

        if self.remove:
            b = [self.weightbranchX, self.weightbranchY]
            b.extend(self.truth_branches)
            b.extend(self.undefTruth)
            fileTimeOut(filename, 120)
            for_remove = root2array(  # returns a structured np array
                filename,
                treename="tree",
                stop=None,
                branches=b)
            notremoves = weighterobjects['weigther'].createNotRemoveIndices(
                for_remove)
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.remove:

            x_global = x_global[notremoves > 0]
            x_pfCand_neutral = x_pfCand_neutral[notremoves > 0]
            x_pfCand_charged = x_pfCand_charged[notremoves > 0]
            x_pfCand_photon = x_pfCand_photon[notremoves > 0]
            x_pfCand_electron = x_pfCand_electron[notremoves > 0]
            x_pfCand_muon = x_pfCand_muon[notremoves > 0]
            x_pfCand_SV = x_pfCand_SV[notremoves > 0]
            truth = truth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('Weighter reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')

        print('removing nans')
        x_global = np.where(np.isfinite(x_global), x_global, 0)
        x_pfCand_neutral = np.where(np.isfinite(x_pfCand_neutral),
                                    x_pfCand_neutral, 0)
        x_pfCand_charged = np.where(np.isfinite(x_pfCand_charged),
                                    x_pfCand_charged, 0)
        x_pfCand_photon = np.where(np.isfinite(x_pfCand_photon),
                                   x_pfCand_photon, 0)
        x_pfCand_electron = np.where(np.isfinite(x_pfCand_electron),
                                     x_pfCand_electron, 0)
        x_pfCand_muon = np.where(np.isfinite(x_pfCand_muon), x_pfCand_muon, 0)
        x_pfCand_SV = np.where(np.isfinite(x_pfCand_SV), x_pfCand_SV, 0)

        return [
            x_global, x_pfCand_neutral, x_pfCand_charged, x_pfCand_photon,
            x_pfCand_electron, x_pfCand_muon, x_pfCand_SV
        ], [truth], []
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        #the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormZeroPad
        from converters import createRecHitMap
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  #give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        x_globalbase = MeanNormZeroPad(filename, TupleMeanStd,
                                       [self.branches[0]],
                                       [self.branchcutoffs[0]], self.nsamples)

        x_chmapbase = createRecHitMap(filename,
                                      self.nsamples,
                                      nbins=13,
                                      width=0.10,
                                      maxlayers=52,
                                      maxhitsperpixel=6)

        #print(x_chmapbase[0][6][6][15])
        #print(x_chmapbase[0][6][6][14])
        #print(x_chmapbase[0][6][6][13])
        #print(x_chmapbase[0][7][7][13])
        #exit()
        #training data

        Tuple = self.readTreeFromRootToTuple(filename)

        idtruthtuple = self.reduceTruth(Tuple[self.truthclasses])
        energytruth = numpy.array(Tuple[self.regtruth])
        #simple by-hand scaling to around 0 with a width of max about 1
        energytruth = energytruth / 100.

        totalrecenergy = numpy.array(Tuple['totalrechit_energy']) / 100.

        weights = numpy.zeros(len(idtruthtuple))

        notremoves = numpy.zeros(totalrecenergy.shape[0])
        notremoves += 1
        if self.remove:
            from augmentation import mirrorInPhi, duplicateImage, evaluateTwice

            x_global = duplicateImage(x_globalbase)
            x_chmap = mirrorInPhi(x_chmapbase)

            notremoves = evaluateTwice(weighter.createNotRemoveIndices, Tuple)

            weights = duplicateImage(weighter.getJetWeights(Tuple))
            totalrecenergy = duplicateImage(totalrecenergy)
            energytruth = duplicateImage(energytruth)
            idtruthtuple = duplicateImage(idtruthtuple)
            notremoves -= duplicateImage(Tuple['isFake'])
            notremoves -= duplicateImage(Tuple['isEta'])

            #notremoves -= energytruth<50

        else:
            notremoves -= Tuple['isFake']
            notremoves -= Tuple['isEta']
            x_global = x_globalbase
            x_chmap = x_chmapbase

        before = len(x_global)

        if self.remove:
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_chmap = x_chmap[notremoves > 0]
            idtruthtuple = idtruthtuple[notremoves > 0]
            energytruth = energytruth[notremoves > 0]
            totalrecenergy = totalrecenergy[notremoves > 0]

        print('reduced to ' + str(len(x_global)) + ' of ' + str(before))
        self.nsamples = len(x_global)
        #make control plot for energy
        #import matplotlib.pyplot as plt
        #plt.hist(energytruth.flatten(), normed=False, bins=30)
        #plt.savefig(giffile+"_eshape.pdf")
        #from plotting import plot4d, rotanimate
        #giffile=filename.replace('/','_')
        #giffile='gifs/'+giffile
        #for i in range(0,len(select)):
        #    if not select[i]: continue
        #
        #    ax,_=plot4d(x_chmap[i][:,:,:,:1],giffile+"_"+str(i)+"energy_.pdf",'etabin','layer','phibin')
        #    rotanimate(ax,giffile+'_'+str(i)+'_energy.gif',delay=5,prefix=giffile)
        #    print('energy')
        #    timeentries=x_chmap[i][:,:,:,3:4]
        #    timeentries[timeentries<0]=0.00000000001
        #    ax,_=plot4d(timeentries,giffile+"_"+str(i)+"time_.pdf",'etabin','layer','phibin')
        #    rotanimate(ax,giffile+'_'+str(i)+'_time.gif',delay=5,prefix=giffile)
        #    print('time')

        self.w = [weights, weights]
        self.x = [x_global, x_chmap, totalrecenergy]
        self.y = [idtruthtuple, energytruth]
Esempio n. 14
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles
        import ROOT

        fileTimeOut(filename, 60)  #give eos 1 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get(self.treename)
        self.nsamples = tree.GetEntries()

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        Tuple = self.readTreeFromRootToTuple(filename)

        undef = Tuple['isUndefined']
        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            notremoves -= undef

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            weights = np.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        alltruth = self.reduceTruth(truthtuple)

        # scale down by number of classes in a reduced class
        if self.weight:
            if hasattr(self, 'reducedtruthmap'):
                for i, row in enumerate(iter(alltruth)):
                    for t, truth in enumerate(self.reducedtruthclasses):
                        if row[t] == 1:
                            weights[i] = weights[i] * 1. / len(
                                self.reducedtruthmap[truth])

        # remove jets to have the same counts
        if self.remove:
            if hasattr(self, 'reducedtruthmap'):
                total = []
                for rt in self.reducedtruthclasses:
                    total += [
                        sum([
                            weighter.totalcounts[t]
                            for t, truth in enumerate(self.truthclasses)
                            if truth in self.reducedtruthmap[rt]
                        ])
                    ]
                lowest = min(total)
                for i, row in enumerate(iter(alltruth)):
                    for t, truth in enumerate(self.reducedtruthclasses):
                        if not row[t]: continue
                        keep = float(lowest) / total[t]
                        rand = np.random.ranf()
                        if rand > keep:
                            notremoves[i] = 0

            else:
                total = weighter.totalcounts
                lowest = min(total)
                for i, row in enumerate(iter(truthtuple)):
                    for t, truth in enumerate(self.truthclasses):
                        if not row[t]: continue
                        keep = float(lowest) / total[t]
                        rand = np.random.ranf()
                        if rand > keep:
                            notremoves[i] = 0

        # pt cut
        #pt = Tuple['jet_pt']
        #weights   = weights[ pt > 30]
        #x_global  = x_global[pt > 30]
        #x_cpf     = x_cpf[   pt > 30]
        #x_npf     = x_npf[   pt > 30]
        #x_sv      = x_sv[    pt > 30]
        #alltruth  = alltruth[pt > 30]

        if self.remove:
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]
            alltruth = alltruth[notremoves > 0]

        if self.weight:
            x_global = x_global[weights > 0]
            x_cpf = x_cpf[weights > 0]
            x_npf = x_npf[weights > 0]
            x_sv = x_sv[weights > 0]
            alltruth = alltruth[weights > 0]
            weights = weights[weights > 0]

        #if self.remove or self.weight:
        if True:
            # remove samples with no predicted class
            skip = np.all(alltruth == 0, axis=1)
            alltruth = alltruth[~skip]
            x_global = x_global[~skip]
            x_cpf = x_cpf[~skip]
            x_npf = x_npf[~skip]
            x_sv = x_sv[~skip]
            weights = weights[~skip]

            # remove samples with multiple predicted classes
            skip = np.sum(alltruth, axis=1) > 1
            alltruth = alltruth[~skip]
            x_global = x_global[~skip]
            x_cpf = x_cpf[~skip]
            x_npf = x_npf[~skip]
            x_sv = x_sv[~skip]
            weights = weights[~skip]

        newnsamp = x_global.shape[0]
        logging.info('reduced content to {}%'.format(
            int(float(newnsamp) / float(self.nsamples) * 100)))
        self.nsamples = newnsamp

        if weights.ndim > 1:
            weights = weights.reshape(weights.shape[0])

        self.w = [weights]
        self.x = [x_global, x_cpf, x_npf, x_sv]
        self.y = [alltruth]
    def readFromRootFile(self, filename, TupleMeanStd, weighter):

        # the first part is standard, no changes needed
        from DeepJetCore.preprocessing import MeanNormApply, createDensityLayers, createDensityMap, MeanNormZeroPad, \
            MeanNormZeroPadParticles
        import numpy
        import ROOT

        fileTimeOut(filename, 120)  # give eos 2 minutes to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        x_global = MeanNormZeroPad(filename, TupleMeanStd,
                                   [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        # flatten everything out for now
        x_chmap = createDensityLayers(filename,
                                      TupleMeanStd,
                                      inbranches=['rechit_energy', 'rechit_layer', 'rechit_seeddr'],
                                      modes=['sum', 'single', 'average'],
                                      layerbranch='rechit_layer',
                                      maxlayers=55,
                                      layeroffset=1,
                                      nevents=self.nsamples,
                                      dimension1=['rechit_eta', 'seed_eta', 23, 0.35],
                                      dimension2=['rechit_phi', 'seed_phi', 23, 0.35],
                                      counterbranch='nrechits')

        Tuple = self.readTreeFromRootToTuple(filename)
        phis = Tuple['seed_phi']
        select = phis < -9.1

        # print(select)

        from plotting import plot4d, rotanimate
        giffile = filename.replace('/', '_')
        giffile = 'gifs/' + giffile
        for i in range(0, len(select)):
            if not select[i]: continue

            ax, _ = plot4d(x_chmap[i][:, :, :, :1], giffile + "_" + str(i) + "energy_.pdf", 'etabin', 'layer', 'phibin')
            rotanimate(ax, giffile + '_' + str(i) + '_energy.gif', delay=5, prefix=giffile)
            print('energy')
            timeentries = x_chmap[i][:, :, :, 3:4]
            timeentries[timeentries < 0] = 0.00000000001
            ax, _ = plot4d(timeentries, giffile + "_" + str(i) + "time_.pdf", 'etabin', 'layer', 'phibin')
            rotanimate(ax, giffile + '_' + str(i) + '_time.gif', delay=5, prefix=giffile)
            print('time')
        #
        #
        #
        # exit()

        idtruthtuple = self.reduceTruth(Tuple[self.truthclasses])
        energytruth = numpy.array(Tuple[self.regtruth])

        weights = numpy.zeros(len(idtruthtuple))

        notremoves = numpy.zeros(x_global.shape[0])
        notremoves += 1
        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            # print('took ', sw.getAndReset(), ' to create remove indices')

        notremoves -= Tuple['isFake']
        # notremoves-=Tuple['isTau']
        notremoves -= Tuple['isEta']
        # notremoves-=Tuple['isPionZero']

        before = len(x_global)

        if self.remove:
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_chmap = x_chmap[notremoves > 0]
            idtruthtuple = idtruthtuple[notremoves > 0]
            energytruth = energytruth[notremoves > 0]

        print('reduced to ' + str(len(x_global)) + ' of ' + str(before))

        # make control plot for energy
        import matplotlib.pyplot as plt
        plt.hist(energytruth.flatten(), normed=False, bins=30)
        plt.savefig(giffile + "_eshape.pdf")

        self.w = [weights, weights]
        self.x = [x_global, x_chmap]
        self.y = [idtruthtuple, energytruth]
Esempio n. 16
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        # Function to produce the numpy training arrays from root files

        from DeepJetCore.Weighter import Weighter
        from DeepJetCore.stopwatch import stopwatch
        sw = stopwatch()
        swall = stopwatch()
        if not istraining:
            self.remove = False

        def reduceTruth(uproot_arrays):

            b = uproot_arrays[b'isB']

            bb = uproot_arrays[b'isBB']
            gbb = uproot_arrays[b'isGBB']

            bl = uproot_arrays[b'isLeptonicB']
            blc = uproot_arrays[b'isLeptonicB_C']
            lepb = bl + blc

            c = uproot_arrays[b'isC']
            cc = uproot_arrays[b'isCC']
            gcc = uproot_arrays[b'isGCC']

            ud = uproot_arrays[b'isUD']
            s = uproot_arrays[b'isS']
            uds = ud + s

            g = uproot_arrays[b'isG']

            return np.vstack(
                (b + lepb, bb + gbb, c + cc + gcc, uds + g)).transpose()

        print('reading ' + filename)

        import ROOT
        from root_numpy import tree2array, root2array
        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()
        # user code, example works with the example 2D images in root format generated by make_example_data
        from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles
        x_global = MeanNormZeroPad(filename, weighterobjects['means'], [
            self.global_branches, self.track_branches, self.eta_rel_branches,
            self.vtx_branches
        ], [1, self.n_track, self.n_eta_rel, self.n_vtx], self.nsamples)

        import uproot3 as uproot
        urfile = uproot.open(filename)["deepntuplizer/tree"]
        truth_arrays = urfile.arrays(self.truth_branches)
        truth = reduceTruth(truth_arrays)
        truth = truth.astype(dtype='float32',
                             order='C')  #important, float32 and C-type!

        x_global = x_global.astype(dtype='float32', order='C')

        if self.remove:
            b = [self.weightbranchX, self.weightbranchY]
            b.extend(self.truth_branches)
            b.extend(self.undefTruth)
            fileTimeOut(filename, 120)
            for_remove = root2array(filename,
                                    treename="deepntuplizer/tree",
                                    stop=None,
                                    branches=b)
            notremoves = weighterobjects['weigther'].createNotRemoveIndices(
                for_remove)
            undef = for_remove['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.remove:
            print('remove')
            x_global = x_global[notremoves > 0]
            truth = truth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')

        print('remove nans')
        x_global = np.where(
            np.logical_and(np.isfinite(x_global),
                           (np.abs(x_global) < 100000.0)), x_global, 0)
        return [x_global], [truth], []
Esempio n. 17
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):
        # This is the only really mandatory function (unless writeFromSourceFile is defined).
        # It defines the conversion rule from an input source file to the lists of training
        # arrays self.x, self.y, self.w
        #  self.x is a list of input feature arrays
        #  self.y is a list of truth arrays
        #  self.w is optional and can contain a weight array
        #         (needs to have same number of entries as truth array)
        #         If no weights are needed, this can be left completely empty
        #
        # The conversion should convert finally to numpy arrays. In the future,
        # also tensorflow tensors will be supported.
        #
        # In this example, differnt ways of reading files are deliberatly mixed
        #

        print('reading ' + filename)

        import ROOT
        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()

        # user code, example works with the example 2D images in root format generated by make_example_data
        #from DeepJetCore.preprocessing import read2DArray

        #feature_array = read2DArray(filename,"tree","image2d",self.nsamples,32,32)
        #print('feature_array',feature_array.shape)

        import uproot3 as uproot

        urfile = uproot.open(filename)["tree"]
        truth = np.concatenate([
            np.expand_dims(urfile.array("lep_isPromptId_Training"), axis=1),
            np.expand_dims(urfile.array("lep_isNonPromptId_Training"), axis=1),
            np.expand_dims(urfile.array("lep_isFakeId_Training"), axis=1)
        ],
                               axis=1)

        truth = truth.astype(dtype='float32',
                             order='C')  #important, float32 and C-type!

        self.global_branches = [
            'lep_pt',
            'lep_eta',
            'lep_phi',
            'lep_mediumId',
            'lep_miniPFRelIso_all',
            'lep_pfRelIso03_all',
            'lep_sip3d',
            'lep_dxy',
            'lep_dz',
            'lep_charge',
            'lep_dxyErr',
            'lep_dzErr',
            'lep_ip3d',
            'lep_jetPtRelv2',
            'lep_jetRelIso',
            'lep_miniPFRelIso_chg',
            'lep_mvaLowPt',
            'lep_nStations',
            'lep_nTrackerLayers',
            'lep_pfRelIso03_all',
            'lep_pfRelIso03_chg',
            'lep_pfRelIso04_all',
            'lep_ptErr',
            'lep_segmentComp',
            'lep_tkRelIso',
            'lep_tunepRelPt',
        ]

        self.pfCand_neutral_branches = [
            'pfCand_neutral_eta',
            'pfCand_neutral_phi',
            'pfCand_neutral_pt',
            'pfCand_neutral_puppiWeight',
            'pfCand_neutral_puppiWeightNoLep',
            'pfCand_neutral_ptRel',
            'pfCand_neutral_deltaR',
        ]
        self.npfCand_neutral = 5

        ## works:
        #x_global = np.concatenate([np.expand_dims(urfile.array(var), axis=1) for var in self.global_branches], axis=1)
        #x_global = x_global.astype(dtype='float32', order='C') #important, float32 and C-type!
        #self.nsamples=len(x_global)

        from DeepJetCore.preprocessing import MeanNormZeroPad, MeanNormZeroPadParticles
        x_global = MeanNormZeroPad(filename, None, [self.global_branches], [1],
                                   self.nsamples)

        x_pfCand_neutral = MeanNormZeroPadParticles(
            filename, None, self.pfCand_neutral_branches, self.npfCand_neutral,
            self.nsamples)

        x_global = x_global.astype(dtype='float32', order='C')
        x_pfCand_neutral = x_pfCand_neutral.astype(dtype='float32', order='C')

        #returns a list of feature arrays, a list of truth arrays and a list of weight arrays
        return [x_global, x_pfCand_neutral], [truth], []
Esempio n. 18
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, createCountMap, createDensity, MeanNormZeroPad, createDensityMap, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        x_cpf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[1],
                                         self.branchcutoffs[1], self.nsamples)

        x_npf = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                         self.branches[2],
                                         self.branchcutoffs[2], self.nsamples)

        x_sv = MeanNormZeroPadParticles(filename, TupleMeanStd,
                                        self.branches[3],
                                        self.branchcutoffs[3], self.nsamples)

        #here the difference starts
        nbins = 8

        x_chmap = createDensity(
            filename,
            inbranches=['Cpfcan_ptrel', 'Cpfcan_etarel', 'Cpfcan_phirel'],
            modes=['sum', 'average', 'average'],
            nevents=self.nsamples,
            dimension1=['Cpfcan_eta', 'jet_eta', nbins, 0.45],
            dimension2=['Cpfcan_phi', 'jet_phi', nbins, 0.45],
            counterbranch='nCpfcand',
            offsets=[-1, -0.5, -0.5])

        x_neumap = createDensity(
            filename,
            inbranches=['Npfcan_ptrel', 'Npfcan_etarel', 'Npfcan_phirel'],
            modes=['sum', 'average', 'average'],
            nevents=self.nsamples,
            dimension1=['Npfcan_eta', 'jet_eta', nbins, 0.45],
            dimension2=['Npfcan_phi', 'jet_phi', nbins, 0.45],
            counterbranch='nCpfcand',
            offsets=[-1, -0.5, -0.5])

        x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', nbins, 0.45],
                                   ['Cpfcan_phi', 'jet_phi', nbins, 0.45],
                                   'nCpfcand')

        x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', nbins, 0.45],
                                    ['Npfcan_phi', 'jet_phi', nbins, 0.45],
                                    'nNpfcand')

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.empty(self.nsamples)
            weights.fill(1.)

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        regtruth = Tuple['gen_pt_WithNu']
        regreco = Tuple['jet_corr_pt']

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_cpf = x_cpf[notremoves > 0]
            x_npf = x_npf[notremoves > 0]
            x_sv = x_sv[notremoves > 0]

            x_chmap = x_chmap[notremoves > 0]
            x_neumap = x_neumap[notremoves > 0]

            x_chcount = x_chcount[notremoves > 0]
            x_neucount = x_neucount[notremoves > 0]

            alltruth = alltruth[notremoves > 0]

            regreco = regreco[notremoves > 0]
            regtruth = regtruth[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp

        x_map = numpy.concatenate((x_chmap, x_neumap, x_chcount, x_neucount),
                                  axis=3)

        self.w = [weights, weights]
        self.x = [x_global, x_cpf, x_npf, x_sv, x_map, regreco]
        self.y = [alltruth, regtruth]
Esempio n. 19
0
    def convertFromSourceFile(self, filename, weighterobjects, istraining):

        # Function to produce the numpy training arrays from root files

        from DeepJetCore.Weighter import Weighter
        from DeepJetCore.stopwatch import stopwatch
        sw    = stopwatch()
        swall = stopwatch()
        if not istraining:
            self.remove = False

        #def reduceTruth(uproot_arrays):
        #    #import numpy as np
        #    prompt    = uproot_arrays[b'lep_isPromptId_Training']
        #    nonPrompt = uproot_arrays[b'lep_isNonPromptId_Training']
        #    fake      = uproot_arrays[b'lep_isFakeId_Training']
        #    print (prompt, nonPrompt, fake)
        #    return np.vstack((prompt, nonPrompt, fake)).transpose()
        #    #return np.concatenate( [ prompt, nonPrompt, fake] )
        
        print('reading '+filename)
        
        import ROOT
        from root_numpy import tree2array, root2array
        fileTimeOut(filename,120) #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("tree")
        self.nsamples = tree.GetEntries()
        
        # user code, example works with the example 2D images in root format generated by make_example_data
        from DeepJetCore.preprocessing import MeanNormZeroPad,MeanNormZeroPadParticles
        
        x_global = MeanNormZeroPad(filename,None,
                                   [self.global_branches],
                                   [1],self.nsamples)

        x_pfCand_neutral = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_neutral_branches,
                                   self.npfCand_neutral,self.nsamples)

        x_pfCand_charged = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_charged_branches,
                                   self.npfCand_charged,self.nsamples)

        x_pfCand_photon = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_photon_branches,
                                   self.npfCand_photon,self.nsamples)
        
        x_pfCand_electron = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_electron_branches,
                                   self.npfCand_electron,self.nsamples)
        
        x_pfCand_muon = MeanNormZeroPadParticles(filename,None,
                                   self.pfCand_muon_branches,
                                   self.npfCand_muon,self.nsamples)

        x_pfCand_SV = MeanNormZeroPadParticles(filename,None,
                                   self.SV_branches,
                                   self.nSV,self.nsamples)

        #import uproot3 as uproot
        #urfile       = uproot.open(filename)["tree"]
        #truth_arrays = urfile.arrays(self.truth_branches)
        #truth        = reduceTruth(truth_arrays)
        #truth        = truth.astype(dtype='float32', order='C') #important, float32 and C-type!

        import uproot3 as uproot
        urfile = uproot.open(filename)["tree"]
        truth = np.concatenate([np.expand_dims(urfile.array("lep_isPromptId_Training"), axis=1) ,
                                np.expand_dims(urfile.array("lep_isNonPromptId_Training"), axis=1),
                                np.expand_dims(urfile.array("lep_isFakeId_Training"), axis=1)],axis=1)
        truth = truth.astype(dtype='float32', order='C') #important, float32 and C-type!

        x_global            = x_global.astype(dtype='float32', order='C')
        x_pfCand_neutral    = x_pfCand_neutral.astype(dtype='float32', order='C')
        x_pfCand_charged    = x_pfCand_charged.astype(dtype='float32', order='C')
        x_pfCand_photon     = x_pfCand_photon.astype(dtype='float32', order='C')
        x_pfCand_electron   = x_pfCand_electron.astype(dtype='float32', order='C')
        x_pfCand_muon       = x_pfCand_muon.astype(dtype='float32', order='C')
        x_pfCand_SV         = x_pfCand_SV.astype(dtype='float32', order='C')

        if self.remove:
            b = [self.weightbranchX,self.weightbranchY]
            b.extend(self.truth_branches)
            b.extend(self.undefTruth)
            fileTimeOut(filename, 120)
            for_remove = root2array(
                filename,
                treename = "tree",
                stop = None,
                branches = b
            )
            notremoves=weighterobjects['weigther'].createNotRemoveIndices(for_remove)
            #undef=for_remove['isUndefined']
            #notremoves-=undef
            print('took ', sw.getAndReset(), ' to create remove indices')
            #if counter_all == 0:
            #    notremoves = list(np.ones(np.shape(notremoves)))
                
        if self.remove:
            #print('remove')
            print ("notremoves", notremoves, "<- notremoves")
            x_global            =   x_global[notremoves > 0]
            x_pfCand_neutral    =   x_pfCand_neutral[notremoves > 0]
            x_pfCand_charged    =   x_pfCand_charged[notremoves > 0]
            x_pfCand_photon     =   x_pfCand_photon[notremoves > 0]
            x_pfCand_electron   =   x_pfCand_electron[notremoves > 0]
            x_pfCand_muon       =   x_pfCand_muon[notremoves > 0]
            x_pfCand_SV         =   x_pfCand_SV[notremoves > 0]
            truth               =   truth[notremoves > 0]

        newnsamp=x_global.shape[0]
        print('reduced content to ', int(float(newnsamp)/float(self.nsamples)*100),'%')
        #print(x_global)
        #print(x_pfCand_neutral)
        #print(x_pfCand_charged)
        #print(x_pfCand_photon)
        #print(x_pfCand_electron)
        #print(x_pfCand_muon)
        #print(x_pfCand_SV)
        
        print('remove nans')
        x_global          = np.where(np.isfinite(x_global) , x_global, 0)
        x_pfCand_neutral  = np.where(np.isfinite(x_pfCand_neutral), x_pfCand_neutral, 0)
        x_pfCand_charged  = np.where(np.isfinite(x_pfCand_charged), x_pfCand_charged, 0)
        x_pfCand_photon   = np.where(np.isfinite(x_pfCand_photon), x_pfCand_photon, 0)
        x_pfCand_electron = np.where(np.isfinite(x_pfCand_electron), x_pfCand_electron, 0)
        x_pfCand_muon     = np.where(np.isfinite(x_pfCand_muon), x_pfCand_muon, 0)
        x_pfCand_SV       = np.where(np.isfinite(x_pfCand_SV), x_pfCand_SV, 0)

        return [x_global, x_pfCand_neutral, x_pfCand_charged, x_pfCand_photon, x_pfCand_electron, x_pfCand_muon, x_pfCand_SV], [truth], []
Esempio n. 20
0
    def readFromRootFile(self, filename, TupleMeanStd, weighter):
        from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, createDensityMap, createCountMap, MeanNormZeroPadParticles
        import numpy
        from DeepJetCore.stopwatch import stopwatch

        sw = stopwatch()
        swall = stopwatch()

        import ROOT

        fileTimeOut(filename, 120)  #give eos a minute to recover
        rfile = ROOT.TFile(filename)
        tree = rfile.Get("deepntuplizer/tree")
        self.nsamples = tree.GetEntries()

        print('took ', sw.getAndReset(), ' seconds for getting tree entries')

        # split for convolutional network

        x_global = MeanNormZeroPad(filename, TupleMeanStd, [self.branches[0]],
                                   [self.branchcutoffs[0]], self.nsamples)

        #here the difference starts
        x_chmap = createDensityMap(filename,
                                   TupleMeanStd,
                                   'Cpfcan_ptrel',
                                   self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', 20, 0.5],
                                   ['Cpfcan_phi', 'jet_phi', 20, 0.5],
                                   'nCpfcand',
                                   -1,
                                   weightbranch='Cpfcan_puppiw')

        x_chcount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                   ['Cpfcan_eta', 'jet_eta', 20, 0.5],
                                   ['Cpfcan_phi', 'jet_phi', 20, 0.5],
                                   'nCpfcand')

        x_neumap = createDensityMap(filename,
                                    TupleMeanStd,
                                    'Npfcan_ptrel',
                                    self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', 20, 0.5],
                                    ['Npfcan_phi', 'jet_phi', 20, 0.5],
                                    'nNpfcand',
                                    -1,
                                    weightbranch='Npfcan_puppiw')

        x_neucount = createCountMap(filename, TupleMeanStd, self.nsamples,
                                    ['Npfcan_eta', 'jet_eta', 20, 0.5],
                                    ['Npfcan_phi', 'jet_phi', 20, 0.5],
                                    'nNpfcand')

        print('took ', sw.getAndReset(),
              ' seconds for mean norm and zero padding (C module)')

        Tuple = self.readTreeFromRootToTuple(filename)

        if self.remove:
            notremoves = weighter.createNotRemoveIndices(Tuple)
            undef = Tuple['isUndefined']
            notremoves -= undef
            print('took ', sw.getAndReset(), ' to create remove indices')

        if self.weight:
            weights = weighter.getJetWeights(Tuple)
        elif self.remove:
            weights = notremoves
        else:
            print('neither remove nor weight')
            weights = numpy.ones(self.nsamples)

        pttruth = Tuple[self.regtruth]
        ptreco = Tuple[self.regreco]

        truthtuple = Tuple[self.truthclasses]
        #print(self.truthclasses)
        alltruth = self.reduceTruth(truthtuple)

        x_map = numpy.concatenate((x_chmap, x_chcount, x_neumap, x_neucount),
                                  axis=3)

        #print(alltruth.shape)
        if self.remove:
            print('remove')
            weights = weights[notremoves > 0]
            x_global = x_global[notremoves > 0]
            x_map = x_map[notremoves > 0]
            alltruth = alltruth[notremoves > 0]
            pttruth = pttruth[notremoves > 0]
            ptreco = ptreco[notremoves > 0]

        newnsamp = x_global.shape[0]
        print('reduced content to ',
              int(float(newnsamp) / float(self.nsamples) * 100), '%')
        self.nsamples = newnsamp
        print(x_global.shape, self.nsamples)

        self.w = [weights]
        self.x = [x_global, x_map, ptreco]
        self.y = [alltruth, pttruth]