def __init__(self):
        import numpy
        TrainData.__init__(self)

        #setting DeepJet specific defaults
        self.treename = "deepntuplizer/tree"
        self.truthclasses = ['class1', 'class2']

        self.treename = "deepntuplizer/tree"
        self.referenceclass = 'class1'

        self.registerBranches(self.truthclasses)
        self.registerBranches(['x'])

        self.weightbranchX = 'x'
        self.weightbranchY = 'x'

        self.weight_binX = numpy.array([-1, 0.9, 2.0], dtype=float)

        self.weight_binY = numpy.array([-1, 0.9, 2.0], dtype=float)

        def reduceTruth(self, tuple_in):

            self.reducedtruthclasses = [
                'isB', 'isBB', 'isLeptB', 'isC', 'isUDS', 'isG'
            ]
            if tuple_in is not None:
                class1 = tuple_in['class1'].view(numpy.ndarray)

                class2 = tuple_in['class2'].view(numpy.ndarray)

                return numpy.vstack((class1, class2)).transpose()
Exemple #2
0
    def __init__(self):
        import numpy
        TrainData.__init__(self)

        self.treename = "tree"

        self.undefTruth = ['']

        self.truthclasses = [
            'isGamma',
            'isElectron',
            'isPionCharged',
            'isNeutralPion',
        ]

        self.weightbranchX = 'true_energy'
        self.weightbranchY = 'seed_eta'

        #is already flat
        self.referenceclass = 'flatten'
        self.weight_binX = numpy.array([0, 0.1, 40000], dtype=float)
        self.weight_binY = numpy.array([-40000, 40000], dtype=float)

        self.registerBranches([
            'rechit_energy', 'rechit_eta', 'rechit_phi', 'rechit_layer',
            'nrechits', 'seed_eta', 'seed_phi', 'true_energy'
        ])

        self.regtruth = 'true_energy'

        self.regressiontargetclasses = ['E']

        self.registerBranches(self.truthclasses)

        self.reduceTruth(None)
    def __init__(self):
        TrainData.__init__(self)

        self.nPU = 200
        self.nfilespremix = 20
        self.eventsperround = 200
        self.always_use_test_minbias = False
Exemple #4
0
    def __init__(self):
        TrainData.__init__(self)

        self.treename="tree" #input root tree name
        
        self.truthclasses=[]#['isA','isB','isC'] #truth classes for classification
        self.regressiontargetclasses=['sigsum']
        
        self.weightbranchX='isA' #needs to be specified if weighter is used
        self.weightbranchY='isB' #needs to be specified if weighter is used
        
        #there is no need to resample/reweight
        self.weight=False
        self.remove=False
        #does not do anything in this configuration
        self.referenceclass='flatten'
        self.weight_binX = numpy.array([0,40000],dtype=float) 
        self.weight_binY = numpy.array([0,40000],dtype=float) 
        
        
        self.registerBranches(self.regressiontargetclasses) #list of branches to be used 
        
        self.registerBranches(self.truthclasses)
        
        
        #call this at the end
        self.reduceTruth(None)
Exemple #5
0
    def __init__(self):
        TrainData.__init__(self)

        self.treename = "clusters"  #input root tree name

        self.truthclasses = [
            'electron', 'muon', 'photon', 'pi0', 'neutral', 'charged'
        ]  #truth classes for classification

        self.weightbranchX = 'cluster_pt'  #needs to be specified
        self.weightbranchY = 'cluster_eta'  #needs to be specified

        self.referenceclass = 'electron'
        self.weight_binX = np.array(
            [0, 3, 5, 10, 20, 40, 70, 100, 150, 200, 300, 500, 40000],
            dtype=float)
        self.weight_binY = np.array(
            [1.3, 1.5, 1.7, 1.9, 2.1, 2.3, 2.5, 2.7, 3.0], dtype=float)

        self.registerBranches(['cluster_pt',
                               'cluster_eta'])  #list of branches to be used
        self.registerBranches(self.truthclasses)

        #call this at the end
        self.reducedtruthclasses = ['egamma', 'muon', 'pi0', 'hadron']
    def __init__(self):
        import numpy 
        TrainData.__init__(self)
        
        self.treename="events"
        
        self.undefTruth=['']
    
        self.truthclasses=[]
        
        self.remove=False
        self.weight=False
        
        self.weightbranchX='true_energy'
        self.weightbranchY='true_x'
        
        #is already flat
        self.referenceclass='flatten'
        self.weight_binX = numpy.array([0,0.1,40000],dtype=float) 
        self.weight_binY = numpy.array([-40000,40000],dtype=float) 
        
        
        
        
        self.regtruth='true_energy'

        self.regressiontargetclasses=['E']
        
        self.registerBranches([self.regtruth])
        
        self.reduceTruth(None)
        
        self.rebinx = 1
        self.rebiny = 1
        self.rebinz = 1
Exemple #7
0
    def __init__(
            self,
            samplefile,
            function_to_apply=None,  #needs to be function(counter,[model_input], [predict_output], [truth])
            after_n_batches=50,
            on_epoch_end=False,
            use_event=0,
            decay_function=None):
        super(PredictCallback, self).__init__()
        self.samplefile = samplefile
        self.function_to_apply = function_to_apply
        self.counter = 0
        self.call_counter = 0
        self.decay_function = decay_function

        self.after_n_batches = after_n_batches
        self.run_on_epoch_end = on_epoch_end

        if self.run_on_epoch_end and self.after_n_batches >= 0:
            print(
                'PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end'
            )
            self.after_n_batches = 0

        self.td = TrainData()
        self.td.readIn(samplefile)
        if use_event >= 0:
            self.td.skim(event=use_event)
 def __init__(self):
     import numpy
     TrainData.__init__(self)
     
     #setting DeepJet specific defaults
     self.treename="tree"
     self.undefTruth=[]
     self.referenceclass='isB'
     self.truthclasses=['isB','isC','isUDSG']
     
     
     #standard branches
     self.registerBranches(self.undefTruth)
     self.registerBranches(self.truthclasses)
     self.registerBranches(['jet_pt','jet_eta'])
     
     self.weightbranchX='jet_pt'
     self.weightbranchY='jet_eta'
     
     self.weight_binX = numpy.array([
             10,25,30,35,40,45,50,60,75,100,
             125,150,175,200,250,300,400,500,
             600,2000],dtype=float)
     
     self.weight_binY = numpy.array(
         [-2.5,-2.,-1.5,-1.,-0.5,0.5,1,1.5,2.,2.5],
         dtype=float
         )
     
          
     self.reduceTruth(None)
 def __init__(self):
     import numpy
     TrainData.__init__(self)
     
     #setting DeepJet specific defaults
     self.treename="tree"
     #self.undefTruth=['isUndefined']
     self.referenceclass='lep_isPromptId_Training'
     self.truthclasses=['lep_isPromptId_Training','lep_isNonPromptId_Training','lep_isFakeId_Training']
     
     
     #standard branches
     #self.registerBranches(self.undefTruth)
     self.registerBranches(self.truthclasses)
     self.registerBranches(['lep_pt','lep_eta'])
     
     self.weightbranchX='lep_pt'
     self.weightbranchY='lep_eta'
     
     self.weight_binX = numpy.array([
             5,7.5,10,12.5,15,17.5,20,25,30,35,40,45,50,60,75,100,
             125,150,175,200,250,300,400,500,
             600,2000],dtype=float)
     
     self.weight_binY = numpy.array(
         [-2.5,-2.,-1.5,-1.,-0.5,0.5,1,1.5,2.,2.5],
         dtype=float
         )
     
     
          
     self.reduceTruth(None)
    def __init__(self):
        '''
        This class is meant as a base class for the FatJet studies
        You will not need to edit it for trying out things
        '''
        TrainData.__init__(self)
        
        #define truth:
	self.treename = "deepntuplizer/tree"
        self.undefTruth=['isUndefined']
        self.truthclasses=['fj_isNonCC', 'fj_isCC', 'fj_isNonBB']
        self.referenceclass='fj_isNonCC' ## used for pt reshaping
        self.registerBranches(['fj_pt','fj_sdmass'])

        self.weightbranchX='fj_pt'
        self.weightbranchY='fj_sdmass'

        #self.weight_binX = numpy.array([
        #        300,2500],dtype=float)
	self.weight_binX = numpy.array([
                10,25,30,35,40,45,50,60,75,100,
                125,150,175,200,250,300,400,500,
                600,700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000],dtype=float)

        self.weight_binY = numpy.array(
            [40,200],
            dtype=float
            )

        #this is only needed because the truth definitions are different from deepFlavour
        self.allbranchestoberead=[]
        self.registerBranches(self.undefTruth)
        self.registerBranches(self.truthclasses)
        self.registerBranches(['fj_isBB', 'fj_isNonBB'])
        print("Branches read:", self.allbranchestoberead)
Exemple #11
0
    def __init__(self):
        '''
        This class is meant as a base class for the FatJet studies
        You will not need to edit it for trying out things
        '''
        TrainData.__init__(self)

        #define truth:
        self.treename = "tree"
        self.undefTruth = ['']
        self.truthclasses = ['sig', 'bkg']
        self.referenceclass = 'sig'  ## used for pt reshaping
        self.registerBranches([])

        self.weightbranchX = 'H_pt'
        self.weightbranchY = 'H_mass'

        self.weight_binX = numpy.array([0, 3000], dtype=float)
        self.weight_binY = numpy.array([0, 3000], dtype=float)

        #this is only needed because the truth definitions are different from deepFlavour
        self.allbranchestoberead = []
        self.registerBranches(self.undefTruth)
        self.registerBranches(self.truthclasses)
        print("Branches read:", self.allbranchestoberead)
Exemple #12
0
    def __init__(self):
        TrainData.__init__(self)

        self.treename = "clusters"  #input root tree name

        self.truthclasses = [
            'electron', 'muon', 'photon', 'pi0', 'neutral', 'charged'
        ]  #truth classes for classification

        self.weightbranchX = 'pt'  #needs to be specified
        self.weightbranchY = 'eta'  #needs to be specified

        self.referenceclass = 'flatten'
        self.weight_binX = numpy.array([0, 40000], dtype=float)
        self.weight_binY = numpy.array([-40000, 40000], dtype=float)

        self.addBranches(['pt', 'eta'])  #list of branches to be used

        self.channels = [
            'bin_x_1', 'bin_y_1', 'bin_z_1', 'bin_energy_1', 'bin_x_2',
            'bin_y_2', 'bin_z_2', 'bin_energy_2'
        ]

        self.addBranches(self.channels, 950)

        self.registerBranches(self.truthclasses)

        #call this at the end
        self.reduceTruth(None)
Exemple #13
0
class PredictCallback(Callback):
    def __init__(
            self,
            samplefile,
            function_to_apply=None,  #needs to be function(counter,[model_input], [predict_output], [truth])
            after_n_batches=50,
            on_epoch_end=False,
            use_event=0,
            decay_function=None):
        super(PredictCallback, self).__init__()
        self.samplefile = samplefile
        self.function_to_apply = function_to_apply
        self.counter = 0
        self.call_counter = 0
        self.decay_function = decay_function

        self.after_n_batches = after_n_batches
        self.run_on_epoch_end = on_epoch_end

        if self.run_on_epoch_end and self.after_n_batches >= 0:
            print(
                'PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end'
            )
            self.after_n_batches = 0

        self.td = TrainData()
        self.td.readIn(samplefile)
        if use_event >= 0:
            self.td.skim(event=use_event)

    def on_train_begin(self, logs=None):
        pass

    def reset(self):
        self.call_counter = 0

    def predict_and_call(self, counter):

        predicted = self.model.predict(self.td.x)
        if not isinstance(predicted, list):
            predicted = [predicted]

        self.function_to_apply(self.call_counter, self.td.x, predicted,
                               self.td.y)
        self.call_counter += 1

    def on_epoch_end(self, epoch, logs=None):
        self.counter = 0
        if self.decay_function is not None:
            self.after_n_batches = self.decay_function(self.after_n_batches)
        if not self.run_on_epoch_end: return
        self.predict_and_call(epoch)

    def on_batch_end(self, batch, logs=None):
        if self.after_n_batches <= 0: return
        self.counter += 1
        if self.counter > self.after_n_batches:
            self.counter = 0
            self.predict_and_call(batch)
Exemple #14
0
    def __init__(self):
        TrainData.__init__(self)

        self.description      = "DeepLepton training datastructure"
        self.truth_branches   = ['lep_isPromptId_Training','lep_isNonPromptId_Training','lep_isFakeId_Training']
        self.undefTruth       = []
        self.weightbranchX    = 'lep_pt'
        self.weightbranchY    = 'lep_eta'
        self.remove           = True
        self.referenceclass   = 'lep_isPromptId_Training' #'lep_isNonPromptId_Training'
        #setting DeepLepton specific defaults
        self.treename         = "tree"
        #self.undefTruth=['isUndefined']
        #self.red_classes      = ['cat_P', 'cat_NP', 'cat_F']
        #self.reduce_truth     = ['lep_isPromptId_Training', 'lep_isNonPromptId_Training', 'lep_isFakeId_Training']
        #self.class_weights    = [1.00, 1.00, 1.00]

        #self.weight_binX = np.array([
        #        5,7.5,10,12.5,15,17.5,20,25,30,35,40,45,50,60,75,100,
        #        125,150,175,200,250,300,400,500,
        #        600,2000],dtype=float)
        self.weight_binX = np.geomspace(3.5, 2000, 30)
        
        self.weight_binY = np.array(
            [-2.5,-2.,-1.5,-1.,-0.5,0.5,1,1.5,2.,2.5],
            dtype=float
            )

        self.global_branches = [
            'lep_pt', 'lep_eta', 'lep_phi',
            'lep_mediumId',
            'lep_miniPFRelIso_all',
            'lep_sip3d', 'lep_dxy', 'lep_dz',
            'lep_charge',
            'lep_dxyErr', 'lep_dzErr', 'lep_ip3d',
            'lep_jetPtRelv2', 'lep_jetRelIso',
            'lep_miniPFRelIso_chg', 'lep_mvaLowPt', 'lep_nStations', 'lep_nTrackerLayers', 'lep_pfRelIso03_all', 'lep_pfRelIso03_chg', 'lep_pfRelIso04_all', 'lep_ptErr',
            'lep_segmentComp', 'lep_tkRelIso', 'lep_tunepRelPt',
            ]

        self.pfCand_neutral_branches = ['pfCand_neutral_eta', 'pfCand_neutral_phi', 'pfCand_neutral_pt', 'pfCand_neutral_puppiWeight', 'pfCand_neutral_puppiWeightNoLep', 'pfCand_neutral_ptRel', 'pfCand_neutral_deltaR',]
        self.npfCand_neutral         = 10

        self.pfCand_charged_branches = ['pfCand_charged_d0', 'pfCand_charged_d0Err', 'pfCand_charged_dz', 'pfCand_charged_dzErr', 'pfCand_charged_eta', 'pfCand_charged_mass', 'pfCand_charged_phi', 'pfCand_charged_pt', 'pfCand_charged_puppiWeight', 'pfCand_charged_puppiWeightNoLep', 'pfCand_charged_trkChi2', 'pfCand_charged_vtxChi2', 'pfCand_charged_charge', 'pfCand_charged_lostInnerHits', 'pfCand_charged_pvAssocQuality', 'pfCand_charged_trkQuality', 'pfCand_charged_ptRel', 'pfCand_charged_deltaR',]
        self.npfCand_charged         = 80

        self.pfCand_photon_branches  = ['pfCand_photon_eta', 'pfCand_photon_phi', 'pfCand_photon_pt', 'pfCand_photon_puppiWeight', 'pfCand_photon_puppiWeightNoLep', 'pfCand_photon_ptRel', 'pfCand_photon_deltaR',]
        self.npfCand_photon          = 50

        self.pfCand_electron_branches = ['pfCand_electron_d0', 'pfCand_electron_d0Err', 'pfCand_electron_dz', 'pfCand_electron_dzErr', 'pfCand_electron_eta', 'pfCand_electron_mass', 'pfCand_electron_phi', 'pfCand_electron_pt', 'pfCand_electron_puppiWeight', 'pfCand_electron_puppiWeightNoLep', 'pfCand_electron_trkChi2', 'pfCand_electron_vtxChi2', 'pfCand_electron_charge', 'pfCand_electron_lostInnerHits', 'pfCand_electron_pvAssocQuality', 'pfCand_electron_trkQuality', 'pfCand_electron_ptRel', 'pfCand_electron_deltaR',]
        self.npfCand_electron         = 4

        self.pfCand_muon_branches = ['pfCand_muon_d0', 'pfCand_muon_d0Err', 'pfCand_muon_dz', 'pfCand_muon_dzErr', 'pfCand_muon_eta', 'pfCand_muon_mass', 'pfCand_muon_phi', 'pfCand_muon_pt', 'pfCand_muon_puppiWeight', 'pfCand_muon_puppiWeightNoLep', 'pfCand_muon_trkChi2', 'pfCand_muon_vtxChi2', 'pfCand_muon_charge', 'pfCand_muon_lostInnerHits', 'pfCand_muon_pvAssocQuality', 'pfCand_muon_trkQuality', 'pfCand_muon_ptRel', 'pfCand_muon_deltaR']
        self.npfCand_muon         = 6

        self.SV_branches = ['SV_dlen', 'SV_dlenSig', 'SV_dxy', 'SV_dxySig', 'SV_pAngle', 'SV_chi2', 'SV_eta', 'SV_mass', 'SV_ndof', 'SV_phi', 'SV_pt', 'SV_x', 'SV_y', 'SV_z', 'SV_ptRel', 'SV_deltaR',]
        self.nSV         = 10
Exemple #15
0
    def __init__(self):
        import numpy
        TrainData.__init__(self)
        
        #setting DeepJet specific defaults
        self.treename="deepntuplizer/tree"
        self.truthclasses=['class1','class2']

        self.treename="deepntuplizer/tree"
        self.referenceclass='class1'
Exemple #16
0
    def test_slice(self):
        print('TestTrainData: skim')
        a = self.createSimpleArray('int32', 600)
        b = self.createSimpleArray('float32', 600)
        d = self.createSimpleArray('float32', 600)

        a_slice = a.getSlice(2, 3)
        b_slice = b.getSlice(2, 3)
        d_slice = d.getSlice(2, 3)

        td = TrainData()
        td._store([a, b], [d], [])
        td_slice = td.getSlice(2, 3)

        fl = td_slice.transferFeatureListToNumpy(False)
        tl = td_slice.transferTruthListToNumpy(False)
        a_tdslice = SimpleArray(fl[0], fl[1])
        b_tdslice = SimpleArray(fl[2], fl[3])
        d_tdslice = SimpleArray(tl[0], tl[1])

        self.assertEqual(a_slice, a_tdslice)
        self.assertEqual(b_slice, b_tdslice)
        self.assertEqual(d_slice, d_tdslice)

        #test skim
        td.skim(2)
        fl = td.transferFeatureListToNumpy(False)
        tl = td.transferTruthListToNumpy(False)
        a_tdslice = SimpleArray(fl[0], fl[1])
        b_tdslice = SimpleArray(fl[2], fl[3])
        d_tdslice = SimpleArray(tl[0], tl[1])

        self.assertEqual(a_slice, a_tdslice)
        self.assertEqual(b_slice, b_tdslice)
        self.assertEqual(d_slice, d_tdslice)
    def __init__(self):

        TrainData.__init__(self)

        self.description = "DeepCSV training datastructure"

        self.truth_branches = [
            'isB', 'isBB', 'isGBB', 'isLeptonicB', 'isLeptonicB_C', 'isC',
            'isGCC', 'isCC', 'isUD', 'isS', 'isG'
        ]
        self.undefTruth = ['isUndefined']
        self.weightbranchX = 'jet_pt'
        self.weightbranchY = 'jet_eta'
        self.remove = True
        self.referenceclass = 'isB'
        self.weight_binX = np.array([
            10, 25, 30, 35, 40, 45, 50, 60, 75, 100, 125, 150, 175, 200, 250,
            300, 400, 500, 600, 2000
        ],
                                    dtype=float)

        self.weight_binY = np.array(
            [-2.5, -2., -1.5, -1., -0.5, 0.5, 1, 1.5, 2., 2.5], dtype=float)

        self.global_branches = [
            'jet_pt', 'jet_eta', 'TagVarCSV_jetNSecondaryVertices',
            'TagVarCSV_trackSumJetEtRatio', 'TagVarCSV_trackSumJetDeltaR',
            'TagVarCSV_vertexCategory', 'TagVarCSV_trackSip2dValAboveCharm',
            'TagVarCSV_trackSip2dSigAboveCharm',
            'TagVarCSV_trackSip3dValAboveCharm',
            'TagVarCSV_trackSip3dSigAboveCharm',
            'TagVarCSV_jetNSelectedTracks', 'TagVarCSV_jetNTracksEtaRel'
        ]

        self.track_branches = [
            'TagVarCSVTrk_trackJetDistVal', 'TagVarCSVTrk_trackPtRel',
            'TagVarCSVTrk_trackDeltaR', 'TagVarCSVTrk_trackPtRatio',
            'TagVarCSVTrk_trackSip3dSig', 'TagVarCSVTrk_trackSip2dSig',
            'TagVarCSVTrk_trackDecayLenVal'
        ]
        self.n_track = 6

        self.eta_rel_branches = ['TagVarCSV_trackEtaRel']
        self.n_eta_rel = 4

        self.vtx_branches = [
            'TagVarCSV_vertexMass', 'TagVarCSV_vertexNTracks',
            'TagVarCSV_vertexEnergyRatio', 'TagVarCSV_vertexJetDeltaR',
            'TagVarCSV_flightDistance2dVal', 'TagVarCSV_flightDistance2dSig',
            'TagVarCSV_flightDistance3dVal', 'TagVarCSV_flightDistance3dSig'
        ]
        self.n_vtx = 1

        self.reduced_truth = ['isB', 'isBB', 'isC', 'isUDSG']
Exemple #18
0
    def test_KerasDTypes(self):
        print('TestTrainData: split')
        a = self.createSimpleArray('int32')
        b = self.createSimpleArray('float32', 600)
        c = self.createSimpleArray('int32')
        d = self.createSimpleArray('float32', 400)

        td = TrainData()
        td._store([a, b], [c, d], [])

        #data, rs, data, rs
        self.assertEqual(td.getKerasFeatureDTypes(),
                         ['int32', 'int64', 'float32', 'int64'])
Exemple #19
0
    def __init__(self):
        TrainData.__init__(self)

        #define truth:
        self.treename = "deepntuplizerCA8/tree"
        self.undefTruth = ['isUndefined']
        self.truthclasses = [
            'isB',
            'isBB',  #'isGBB',
            #'isLeptonicB','isLeptonicB_C',
            'isC',
            'isCC',  #'isGCC',
            'isUD',
            'isS',
            'isG',
            'isTauHTauH',
            'isTauHTauM',
            'isTauHTauE',
            #'isTauMTauM','isTauMTauE','isTauETauE',
            #'isTauH','isTauM','isTauE',
        ]

        self.registerBranches(self.truthclasses)
        self.registerBranches(self.undefTruth)

        self.referenceclass = 'isTauHTauH'  # 'flatten' or class name
        #self.referenceclass='flatten'
        #self.referenceclass='lowest'
        self.weightbranchX = 'jet_pt'
        self.weightbranchY = 'jet_eta'
        #self.weightbranchY='jet_mass'

        self.registerBranches([self.weightbranchX, self.weightbranchY])

        self.weight_binX = np.array([
            10, 25, 30, 35, 40, 45, 50, 60, 75, 100, 125, 150, 175, 200, 250,
            300, 400, 500, 600, 2000
        ],
                                    dtype=float)

        self.weight_binY = np.array(
            [-2.5, -2., -1.5, -1., -0.5, 0.5, 1, 1.5, 2., 2.5], dtype=float)

        #self.weight_binY = np.array([
        #        10,30,40,50,75,100,
        #        125,150,175,200,250,300,400,500,
        #        600,800,1000,1500,2000],dtype=float)

        self.weight = False
        self.remove = True
Exemple #20
0
    def __init__(self):
        import numpy
        TrainData.__init__(self)

        #setting DeepJet specific defaults
        self.treename = "deepntuplizer/tree"
        self.undefTruth = ['isUndefined']
        self.referenceclass = 'isB'
        self.truthclasses = [
            'isB', 'isBB', 'isGBB', 'isLeptonicB', 'isLeptonicB_C', 'isC',
            'isCC', 'isGCC', 'isUD', 'isS', 'isG', 'isUndefined'
        ]

        self.branches = []
        self.reduceTruth(None)
Exemple #21
0
    def __init__(self):
        '''
        This class is meant as a base class for the FatJet studies
        You will not need to edit it for trying out things
        '''
        TrainData.__init__(self)

        #define truth:
        self.treename = "deepntuplizer/tree"
        self.undefTruth = ['isUndefined']
        #self.truthclasses=['fj_isNonCC', 'fj_isCC', 'fj_isNonBB', 'fj_isQCD', 'fj_isH']
        #self.truthclasses=['fj_isCC', 'fj_isBB', 'fj_isQCD' ]
        #self.truthclasses=["label_H_bb", "label_H_cc", "label_QCD_bb",  "label_QCD_cc", "label_QCD_others"]
        self.truthclasses = ['fj_isNonCC', 'fj_isCC']
        #self.truthclasses=["fj_isCC", "fj_isNonCC"]
        #self.referenceclass='label_H_cc' ## used for pt reshaping
        self.referenceclass = 'lowest'  ## used for pt reshaping
        #self.referenceclass='flatten' ## used for pt reshaping
        self.registerBranches(['fj_pt', 'fj_sdmass'])

        self.weightbranchX = 'fj_pt'
        self.weightbranchY = 'fj_sdmass'

        #self.weight_binX = numpy.array([
        #        300,1000,2500],dtype=float)
        self.weight_binX = numpy.array(
            #[300,400,500,600,700, 800, 900, 1000,
            #1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000]
            range(300, 1000, 50) + range(1000, 2600, 200),
            dtype=float)

        self.weight_binY = numpy.array(
            [40, 200],
            #range(40,200,40),
            dtype=float)
        self.weight = True
        self.remove = False

        #this is only needed because the truth definitions are different from deepFlavour
        self.allbranchestoberead = []
        self.registerBranches(self.undefTruth)
        self.registerBranches(self.truthclasses)
        self.registerBranches([
            'fj_isBB', 'fj_isNonBB', 'fj_isNonCC', 'fj_isCC', 'fj_isQCD',
            'fj_isH', "label_H_bb", "label_H_cc", "label_QCD_bb",
            "label_QCD_cc", "label_QCD_others"
        ])
        print("Branches read:", self.allbranchestoberead)
Exemple #22
0
    def __init__(self):
        '''
        This class is meant as a base class for the FatJet studies
        You will not need to edit it for trying out things
        '''
        TrainData.__init__(self)

        #define truth:
        self.treename = "deepntuplizer/tree"
        self.undefTruth = ['isUndefined']
        self.truthclasses = [
            "fj_isH", "fj_isCC", "fj_isBB", "fj_isNonCC", "fj_isNonBB",
            "fj_isZ", "fj_isQCD", "sample_isQCD"
        ]

        self.referenceclass = 'lowest'  ## used for pt reshaping options=['lowest', 'flatten', '<class_name>']
        self.weightbranchX = 'fj_pt'
        self.weightbranchY = 'fj_sdmass'

        self.weight_binX = numpy.array(range(300, 1000, 50) +
                                       range(1000, 2600, 200),
                                       dtype=float)

        self.weight_binY = numpy.array([40, 200], dtype=float)

        self.weight = True
        self.remove = False
        self.removeUnderOverflow = True

        #this is only needed because the truth definitions are different from deepFlavour
        self.allbranchestoberead = []
        self.registerBranches(self.undefTruth)
        self.registerBranches(self.truthclasses)
        self.registerBranches(['fj_pt', 'fj_sdmass'])
        self.registerBranches([
            "label_H_bb",
            "label_H_cc",
            "label_QCD_bb",
            "label_QCD_cc",
            "label_QCD_others",
            "label_Z_bb",
            "label_Z_cc",
        ])
        print("Branches read:", self.allbranchestoberead)
Exemple #23
0
    def __init__(self):
        TrainData.__init__(self)

        self.treename = "Delphes"  #input root tree name

        self.feat_branch = "rechit_features"
        self.truth_branch = "rechit_simcluster_fractions"
        # this needs to be adapted!!
        self.max_rechits = 3500

        #this should be fine
        self.n_features = 10
        self.n_simcluster = 20

        self.regressiontargetclasses = [
            str(i) for i in range(self.n_simcluster)
        ]

        self.other_useless_inits()
Exemple #24
0
 def test_split(self):
     print('TestTrainData: split')
     a = self.createSimpleArray('int32')
     b = self.createSimpleArray('float32',600)
     c = self.createSimpleArray('int32')
     d = self.createSimpleArray('float32',400)
     all_orig = [a.copy(),b.copy(),c.copy(),d.copy()]
     all_splitorig = [sa.split(2) for sa in all_orig]
     
     td = TrainData()
     td._store([a,b], [c,d], [])
     
     
     tdb = td.split(2)
     f = tdb.transferFeatureListToNumpy(False)
     t = tdb.transferTruthListToNumpy(False)
     _ = tdb.transferWeightListToNumpy(False)
     all_split = [SimpleArray(f[0],f[1]), SimpleArray(f[2],f[3]),
                  SimpleArray(t[0],t[1]), SimpleArray(t[2],t[3])]
     
     self.assertEqual(all_splitorig,all_split)
Exemple #25
0
    def __init__(self):
        import numpy
        TrainData.__init__(self)

        self.treename = "deepntuplizer/tree"

        self.weightbranchX = 'true_energy'
        self.weightbranchY = 'true_eta'

        self.referenceclass = 'flatten'
        self.weight_binX = numpy.array([
            0, 1, 2, 3, 4, 5, 7.5, 10, 20, 30, 40, 50, 60, 80, 100, 120, 140,
            160, 200, 240, 300, 400
        ],
                                       dtype=float)

        self.registerBranches([
            'rechit_energy', 'rechit_eta', 'rechit_phi', 'rechit_time',
            'rechit_layer', 'nrechits', 'seed_eta', 'seed_phi', 'true_energy',
            'true_eta', 'true_energyfraction'
        ])
Exemple #26
0
    def writeOutPrediction(self, predicted, features, truth, weights,
                           outfilename, inputfile):
        # predicted will be a list
        print('writeout')
        print('predicted', predicted[0].shape)
        print('features', features[0].shape)
        print('truth', truth[0].shape)

        def unroll(a):
            a = np.reshape(a,
                           [a.shape[0], a.shape[1] * a.shape[2], a.shape[3]])
            return a

        #unroll to event x vector
        # first 100 are enough for now
        parr = predicted[0][:100, ...]  #unroll(predicted[0])
        farr = features[0][:100, ...]  #unroll(features[0])
        tarr = truth[0][:100, ...]  #unroll(truth[0])

        from DeepJetCore.TrainData import TrainData
        #use traindata as data storage
        td = TrainData()
        td._store([parr, farr, tarr], [], [])
        td.writeToFile(outfilename)

        return

        from root_numpy import array2root
        out = np.core.records.fromarrays(
            [
                parr[:, :, 0],
                parr[:, :, 1],
                parr[:, :, 2],
                parr[:, :, 3],
                parr[:, :, 4],
                parr[:, :, 5],
                parr[:, :, 6],
                parr[:, :, 7],
                parr[:, :, 9],
                parr[:, :, 10],
                tarr[:, :, 0],
                tarr[:, :, 1],
                tarr[:, :, 2],
                tarr[:, :, 3],
                tarr[:, :, 4],
                tarr[:, :, 5],
                tarr[:, :, 6],
                tarr[:, :, 7],
                farr[:, :, 0],
                farr[:, :, 1],
                farr[:, :, 2],
                farr[:, :, 3],
                farr[:, :, 4],
            ],
            names=
            'p_beta, p_posx, p_posy, p_ID0, p_ID1, p_ID2, p_dim1, p_dim2, p_ccoords1, p_coords2, t_mask, t_posx, t_posy, t_ID0, t_ID1, tID_2, t_dim1, t_dim2, f_r, f_g, f_b, f_x, f_y'
        )

        array2root(out, outfilename, 'tree')
        '''
Exemple #27
0
    def writeOutPrediction(self, predicted, features, truth, weights,
                           outfilename, inputfile):
        # predicted will be a list
        print('writeout')
        print('predicted', predicted[0].shape)
        print('features', features[0].shape)
        print('truth', truth[0].shape)

        parr = predicted[0]  #unroll(predicted[0])
        farr = features[0]  #unroll(features[0])
        tarr = truth[0]  #unroll(truth[0])

        from DeepJetCore.TrainData import TrainData
        #use traindata as data storage
        td = TrainData()
        td._store([parr, farr, tarr], [], [])
        td.writeToFile(outfilename)
Exemple #28
0
def worker(i):

    td = TDOld()
    tdnew = TrainData()
    print("converting",dcold.samples[i])
    
    td.readIn(dir + dcold.samples[i])
    x = td.x
    y = td.y
    w = td.w
    
    tdnew.tdnew._store(x,y,w)
    tdnew.writeToFile(dcnew.samples[i])
    
    td.clear()
    tdnew.clear()
    del x,y,w
    return True
Exemple #29
0
    def test_TrainDataRead(self):
        print('TestCompatibility TrainData')
        td = TrainData()
        td.readFromFile('trainData_previous.djctd')

        self.assertEqual(td.nFeatureArrays(), 1)

        arr = np.load("np_arr.npy")
        rs = np.load("np_rs.npy")

        b = SimpleArray(arr, rs)

        a = td.transferFeatureListToNumpy(False)
        a, rs = a[0], a[1]

        a = SimpleArray(a, np.array(rs, dtype='int64'))

        self.assertEqual(a, b)
Exemple #30
0
def premixfile(i):

    eventsperround = 100
    neventstotal = nEvents
    nPUpremix = nPU
    nfilespremix = 5

    filearr = pmf(allfiles,
                  neventstotal,
                  nPUpremix,
                  nfilespremix=5,
                  eventsperround=100)

    print('nevents', filearr.shape[0])
    td = TrainData()
    td._store([filearr], [], [])
    print('..writing ' + str(i))

    td.writeToFile(outputDir + '/' + str(i) + '_mix.djctd')
    del td