def __init__(self): import numpy TrainData.__init__(self) #setting DeepJet specific defaults self.treename = "deepntuplizer/tree" self.truthclasses = ['class1', 'class2'] self.treename = "deepntuplizer/tree" self.referenceclass = 'class1' self.registerBranches(self.truthclasses) self.registerBranches(['x']) self.weightbranchX = 'x' self.weightbranchY = 'x' self.weight_binX = numpy.array([-1, 0.9, 2.0], dtype=float) self.weight_binY = numpy.array([-1, 0.9, 2.0], dtype=float) def reduceTruth(self, tuple_in): self.reducedtruthclasses = [ 'isB', 'isBB', 'isLeptB', 'isC', 'isUDS', 'isG' ] if tuple_in is not None: class1 = tuple_in['class1'].view(numpy.ndarray) class2 = tuple_in['class2'].view(numpy.ndarray) return numpy.vstack((class1, class2)).transpose()
def __init__(self): import numpy TrainData.__init__(self) self.treename = "tree" self.undefTruth = [''] self.truthclasses = [ 'isGamma', 'isElectron', 'isPionCharged', 'isNeutralPion', ] self.weightbranchX = 'true_energy' self.weightbranchY = 'seed_eta' #is already flat self.referenceclass = 'flatten' self.weight_binX = numpy.array([0, 0.1, 40000], dtype=float) self.weight_binY = numpy.array([-40000, 40000], dtype=float) self.registerBranches([ 'rechit_energy', 'rechit_eta', 'rechit_phi', 'rechit_layer', 'nrechits', 'seed_eta', 'seed_phi', 'true_energy' ]) self.regtruth = 'true_energy' self.regressiontargetclasses = ['E'] self.registerBranches(self.truthclasses) self.reduceTruth(None)
def __init__(self): TrainData.__init__(self) self.nPU = 200 self.nfilespremix = 20 self.eventsperround = 200 self.always_use_test_minbias = False
def __init__(self): TrainData.__init__(self) self.treename="tree" #input root tree name self.truthclasses=[]#['isA','isB','isC'] #truth classes for classification self.regressiontargetclasses=['sigsum'] self.weightbranchX='isA' #needs to be specified if weighter is used self.weightbranchY='isB' #needs to be specified if weighter is used #there is no need to resample/reweight self.weight=False self.remove=False #does not do anything in this configuration self.referenceclass='flatten' self.weight_binX = numpy.array([0,40000],dtype=float) self.weight_binY = numpy.array([0,40000],dtype=float) self.registerBranches(self.regressiontargetclasses) #list of branches to be used self.registerBranches(self.truthclasses) #call this at the end self.reduceTruth(None)
def __init__(self): TrainData.__init__(self) self.treename = "clusters" #input root tree name self.truthclasses = [ 'electron', 'muon', 'photon', 'pi0', 'neutral', 'charged' ] #truth classes for classification self.weightbranchX = 'cluster_pt' #needs to be specified self.weightbranchY = 'cluster_eta' #needs to be specified self.referenceclass = 'electron' self.weight_binX = np.array( [0, 3, 5, 10, 20, 40, 70, 100, 150, 200, 300, 500, 40000], dtype=float) self.weight_binY = np.array( [1.3, 1.5, 1.7, 1.9, 2.1, 2.3, 2.5, 2.7, 3.0], dtype=float) self.registerBranches(['cluster_pt', 'cluster_eta']) #list of branches to be used self.registerBranches(self.truthclasses) #call this at the end self.reducedtruthclasses = ['egamma', 'muon', 'pi0', 'hadron']
def __init__(self): import numpy TrainData.__init__(self) self.treename="events" self.undefTruth=[''] self.truthclasses=[] self.remove=False self.weight=False self.weightbranchX='true_energy' self.weightbranchY='true_x' #is already flat self.referenceclass='flatten' self.weight_binX = numpy.array([0,0.1,40000],dtype=float) self.weight_binY = numpy.array([-40000,40000],dtype=float) self.regtruth='true_energy' self.regressiontargetclasses=['E'] self.registerBranches([self.regtruth]) self.reduceTruth(None) self.rebinx = 1 self.rebiny = 1 self.rebinz = 1
def __init__( self, samplefile, function_to_apply=None, #needs to be function(counter,[model_input], [predict_output], [truth]) after_n_batches=50, on_epoch_end=False, use_event=0, decay_function=None): super(PredictCallback, self).__init__() self.samplefile = samplefile self.function_to_apply = function_to_apply self.counter = 0 self.call_counter = 0 self.decay_function = decay_function self.after_n_batches = after_n_batches self.run_on_epoch_end = on_epoch_end if self.run_on_epoch_end and self.after_n_batches >= 0: print( 'PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end' ) self.after_n_batches = 0 self.td = TrainData() self.td.readIn(samplefile) if use_event >= 0: self.td.skim(event=use_event)
def __init__(self): import numpy TrainData.__init__(self) #setting DeepJet specific defaults self.treename="tree" self.undefTruth=[] self.referenceclass='isB' self.truthclasses=['isB','isC','isUDSG'] #standard branches self.registerBranches(self.undefTruth) self.registerBranches(self.truthclasses) self.registerBranches(['jet_pt','jet_eta']) self.weightbranchX='jet_pt' self.weightbranchY='jet_eta' self.weight_binX = numpy.array([ 10,25,30,35,40,45,50,60,75,100, 125,150,175,200,250,300,400,500, 600,2000],dtype=float) self.weight_binY = numpy.array( [-2.5,-2.,-1.5,-1.,-0.5,0.5,1,1.5,2.,2.5], dtype=float ) self.reduceTruth(None)
def __init__(self): import numpy TrainData.__init__(self) #setting DeepJet specific defaults self.treename="tree" #self.undefTruth=['isUndefined'] self.referenceclass='lep_isPromptId_Training' self.truthclasses=['lep_isPromptId_Training','lep_isNonPromptId_Training','lep_isFakeId_Training'] #standard branches #self.registerBranches(self.undefTruth) self.registerBranches(self.truthclasses) self.registerBranches(['lep_pt','lep_eta']) self.weightbranchX='lep_pt' self.weightbranchY='lep_eta' self.weight_binX = numpy.array([ 5,7.5,10,12.5,15,17.5,20,25,30,35,40,45,50,60,75,100, 125,150,175,200,250,300,400,500, 600,2000],dtype=float) self.weight_binY = numpy.array( [-2.5,-2.,-1.5,-1.,-0.5,0.5,1,1.5,2.,2.5], dtype=float ) self.reduceTruth(None)
def __init__(self): ''' This class is meant as a base class for the FatJet studies You will not need to edit it for trying out things ''' TrainData.__init__(self) #define truth: self.treename = "deepntuplizer/tree" self.undefTruth=['isUndefined'] self.truthclasses=['fj_isNonCC', 'fj_isCC', 'fj_isNonBB'] self.referenceclass='fj_isNonCC' ## used for pt reshaping self.registerBranches(['fj_pt','fj_sdmass']) self.weightbranchX='fj_pt' self.weightbranchY='fj_sdmass' #self.weight_binX = numpy.array([ # 300,2500],dtype=float) self.weight_binX = numpy.array([ 10,25,30,35,40,45,50,60,75,100, 125,150,175,200,250,300,400,500, 600,700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000],dtype=float) self.weight_binY = numpy.array( [40,200], dtype=float ) #this is only needed because the truth definitions are different from deepFlavour self.allbranchestoberead=[] self.registerBranches(self.undefTruth) self.registerBranches(self.truthclasses) self.registerBranches(['fj_isBB', 'fj_isNonBB']) print("Branches read:", self.allbranchestoberead)
def __init__(self): ''' This class is meant as a base class for the FatJet studies You will not need to edit it for trying out things ''' TrainData.__init__(self) #define truth: self.treename = "tree" self.undefTruth = [''] self.truthclasses = ['sig', 'bkg'] self.referenceclass = 'sig' ## used for pt reshaping self.registerBranches([]) self.weightbranchX = 'H_pt' self.weightbranchY = 'H_mass' self.weight_binX = numpy.array([0, 3000], dtype=float) self.weight_binY = numpy.array([0, 3000], dtype=float) #this is only needed because the truth definitions are different from deepFlavour self.allbranchestoberead = [] self.registerBranches(self.undefTruth) self.registerBranches(self.truthclasses) print("Branches read:", self.allbranchestoberead)
def __init__(self): TrainData.__init__(self) self.treename = "clusters" #input root tree name self.truthclasses = [ 'electron', 'muon', 'photon', 'pi0', 'neutral', 'charged' ] #truth classes for classification self.weightbranchX = 'pt' #needs to be specified self.weightbranchY = 'eta' #needs to be specified self.referenceclass = 'flatten' self.weight_binX = numpy.array([0, 40000], dtype=float) self.weight_binY = numpy.array([-40000, 40000], dtype=float) self.addBranches(['pt', 'eta']) #list of branches to be used self.channels = [ 'bin_x_1', 'bin_y_1', 'bin_z_1', 'bin_energy_1', 'bin_x_2', 'bin_y_2', 'bin_z_2', 'bin_energy_2' ] self.addBranches(self.channels, 950) self.registerBranches(self.truthclasses) #call this at the end self.reduceTruth(None)
class PredictCallback(Callback): def __init__( self, samplefile, function_to_apply=None, #needs to be function(counter,[model_input], [predict_output], [truth]) after_n_batches=50, on_epoch_end=False, use_event=0, decay_function=None): super(PredictCallback, self).__init__() self.samplefile = samplefile self.function_to_apply = function_to_apply self.counter = 0 self.call_counter = 0 self.decay_function = decay_function self.after_n_batches = after_n_batches self.run_on_epoch_end = on_epoch_end if self.run_on_epoch_end and self.after_n_batches >= 0: print( 'PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end' ) self.after_n_batches = 0 self.td = TrainData() self.td.readIn(samplefile) if use_event >= 0: self.td.skim(event=use_event) def on_train_begin(self, logs=None): pass def reset(self): self.call_counter = 0 def predict_and_call(self, counter): predicted = self.model.predict(self.td.x) if not isinstance(predicted, list): predicted = [predicted] self.function_to_apply(self.call_counter, self.td.x, predicted, self.td.y) self.call_counter += 1 def on_epoch_end(self, epoch, logs=None): self.counter = 0 if self.decay_function is not None: self.after_n_batches = self.decay_function(self.after_n_batches) if not self.run_on_epoch_end: return self.predict_and_call(epoch) def on_batch_end(self, batch, logs=None): if self.after_n_batches <= 0: return self.counter += 1 if self.counter > self.after_n_batches: self.counter = 0 self.predict_and_call(batch)
def __init__(self): TrainData.__init__(self) self.description = "DeepLepton training datastructure" self.truth_branches = ['lep_isPromptId_Training','lep_isNonPromptId_Training','lep_isFakeId_Training'] self.undefTruth = [] self.weightbranchX = 'lep_pt' self.weightbranchY = 'lep_eta' self.remove = True self.referenceclass = 'lep_isPromptId_Training' #'lep_isNonPromptId_Training' #setting DeepLepton specific defaults self.treename = "tree" #self.undefTruth=['isUndefined'] #self.red_classes = ['cat_P', 'cat_NP', 'cat_F'] #self.reduce_truth = ['lep_isPromptId_Training', 'lep_isNonPromptId_Training', 'lep_isFakeId_Training'] #self.class_weights = [1.00, 1.00, 1.00] #self.weight_binX = np.array([ # 5,7.5,10,12.5,15,17.5,20,25,30,35,40,45,50,60,75,100, # 125,150,175,200,250,300,400,500, # 600,2000],dtype=float) self.weight_binX = np.geomspace(3.5, 2000, 30) self.weight_binY = np.array( [-2.5,-2.,-1.5,-1.,-0.5,0.5,1,1.5,2.,2.5], dtype=float ) self.global_branches = [ 'lep_pt', 'lep_eta', 'lep_phi', 'lep_mediumId', 'lep_miniPFRelIso_all', 'lep_sip3d', 'lep_dxy', 'lep_dz', 'lep_charge', 'lep_dxyErr', 'lep_dzErr', 'lep_ip3d', 'lep_jetPtRelv2', 'lep_jetRelIso', 'lep_miniPFRelIso_chg', 'lep_mvaLowPt', 'lep_nStations', 'lep_nTrackerLayers', 'lep_pfRelIso03_all', 'lep_pfRelIso03_chg', 'lep_pfRelIso04_all', 'lep_ptErr', 'lep_segmentComp', 'lep_tkRelIso', 'lep_tunepRelPt', ] self.pfCand_neutral_branches = ['pfCand_neutral_eta', 'pfCand_neutral_phi', 'pfCand_neutral_pt', 'pfCand_neutral_puppiWeight', 'pfCand_neutral_puppiWeightNoLep', 'pfCand_neutral_ptRel', 'pfCand_neutral_deltaR',] self.npfCand_neutral = 10 self.pfCand_charged_branches = ['pfCand_charged_d0', 'pfCand_charged_d0Err', 'pfCand_charged_dz', 'pfCand_charged_dzErr', 'pfCand_charged_eta', 'pfCand_charged_mass', 'pfCand_charged_phi', 'pfCand_charged_pt', 'pfCand_charged_puppiWeight', 'pfCand_charged_puppiWeightNoLep', 'pfCand_charged_trkChi2', 'pfCand_charged_vtxChi2', 'pfCand_charged_charge', 'pfCand_charged_lostInnerHits', 'pfCand_charged_pvAssocQuality', 'pfCand_charged_trkQuality', 'pfCand_charged_ptRel', 'pfCand_charged_deltaR',] self.npfCand_charged = 80 self.pfCand_photon_branches = ['pfCand_photon_eta', 'pfCand_photon_phi', 'pfCand_photon_pt', 'pfCand_photon_puppiWeight', 'pfCand_photon_puppiWeightNoLep', 'pfCand_photon_ptRel', 'pfCand_photon_deltaR',] self.npfCand_photon = 50 self.pfCand_electron_branches = ['pfCand_electron_d0', 'pfCand_electron_d0Err', 'pfCand_electron_dz', 'pfCand_electron_dzErr', 'pfCand_electron_eta', 'pfCand_electron_mass', 'pfCand_electron_phi', 'pfCand_electron_pt', 'pfCand_electron_puppiWeight', 'pfCand_electron_puppiWeightNoLep', 'pfCand_electron_trkChi2', 'pfCand_electron_vtxChi2', 'pfCand_electron_charge', 'pfCand_electron_lostInnerHits', 'pfCand_electron_pvAssocQuality', 'pfCand_electron_trkQuality', 'pfCand_electron_ptRel', 'pfCand_electron_deltaR',] self.npfCand_electron = 4 self.pfCand_muon_branches = ['pfCand_muon_d0', 'pfCand_muon_d0Err', 'pfCand_muon_dz', 'pfCand_muon_dzErr', 'pfCand_muon_eta', 'pfCand_muon_mass', 'pfCand_muon_phi', 'pfCand_muon_pt', 'pfCand_muon_puppiWeight', 'pfCand_muon_puppiWeightNoLep', 'pfCand_muon_trkChi2', 'pfCand_muon_vtxChi2', 'pfCand_muon_charge', 'pfCand_muon_lostInnerHits', 'pfCand_muon_pvAssocQuality', 'pfCand_muon_trkQuality', 'pfCand_muon_ptRel', 'pfCand_muon_deltaR'] self.npfCand_muon = 6 self.SV_branches = ['SV_dlen', 'SV_dlenSig', 'SV_dxy', 'SV_dxySig', 'SV_pAngle', 'SV_chi2', 'SV_eta', 'SV_mass', 'SV_ndof', 'SV_phi', 'SV_pt', 'SV_x', 'SV_y', 'SV_z', 'SV_ptRel', 'SV_deltaR',] self.nSV = 10
def __init__(self): import numpy TrainData.__init__(self) #setting DeepJet specific defaults self.treename="deepntuplizer/tree" self.truthclasses=['class1','class2'] self.treename="deepntuplizer/tree" self.referenceclass='class1'
def test_slice(self): print('TestTrainData: skim') a = self.createSimpleArray('int32', 600) b = self.createSimpleArray('float32', 600) d = self.createSimpleArray('float32', 600) a_slice = a.getSlice(2, 3) b_slice = b.getSlice(2, 3) d_slice = d.getSlice(2, 3) td = TrainData() td._store([a, b], [d], []) td_slice = td.getSlice(2, 3) fl = td_slice.transferFeatureListToNumpy(False) tl = td_slice.transferTruthListToNumpy(False) a_tdslice = SimpleArray(fl[0], fl[1]) b_tdslice = SimpleArray(fl[2], fl[3]) d_tdslice = SimpleArray(tl[0], tl[1]) self.assertEqual(a_slice, a_tdslice) self.assertEqual(b_slice, b_tdslice) self.assertEqual(d_slice, d_tdslice) #test skim td.skim(2) fl = td.transferFeatureListToNumpy(False) tl = td.transferTruthListToNumpy(False) a_tdslice = SimpleArray(fl[0], fl[1]) b_tdslice = SimpleArray(fl[2], fl[3]) d_tdslice = SimpleArray(tl[0], tl[1]) self.assertEqual(a_slice, a_tdslice) self.assertEqual(b_slice, b_tdslice) self.assertEqual(d_slice, d_tdslice)
def __init__(self): TrainData.__init__(self) self.description = "DeepCSV training datastructure" self.truth_branches = [ 'isB', 'isBB', 'isGBB', 'isLeptonicB', 'isLeptonicB_C', 'isC', 'isGCC', 'isCC', 'isUD', 'isS', 'isG' ] self.undefTruth = ['isUndefined'] self.weightbranchX = 'jet_pt' self.weightbranchY = 'jet_eta' self.remove = True self.referenceclass = 'isB' self.weight_binX = np.array([ 10, 25, 30, 35, 40, 45, 50, 60, 75, 100, 125, 150, 175, 200, 250, 300, 400, 500, 600, 2000 ], dtype=float) self.weight_binY = np.array( [-2.5, -2., -1.5, -1., -0.5, 0.5, 1, 1.5, 2., 2.5], dtype=float) self.global_branches = [ 'jet_pt', 'jet_eta', 'TagVarCSV_jetNSecondaryVertices', 'TagVarCSV_trackSumJetEtRatio', 'TagVarCSV_trackSumJetDeltaR', 'TagVarCSV_vertexCategory', 'TagVarCSV_trackSip2dValAboveCharm', 'TagVarCSV_trackSip2dSigAboveCharm', 'TagVarCSV_trackSip3dValAboveCharm', 'TagVarCSV_trackSip3dSigAboveCharm', 'TagVarCSV_jetNSelectedTracks', 'TagVarCSV_jetNTracksEtaRel' ] self.track_branches = [ 'TagVarCSVTrk_trackJetDistVal', 'TagVarCSVTrk_trackPtRel', 'TagVarCSVTrk_trackDeltaR', 'TagVarCSVTrk_trackPtRatio', 'TagVarCSVTrk_trackSip3dSig', 'TagVarCSVTrk_trackSip2dSig', 'TagVarCSVTrk_trackDecayLenVal' ] self.n_track = 6 self.eta_rel_branches = ['TagVarCSV_trackEtaRel'] self.n_eta_rel = 4 self.vtx_branches = [ 'TagVarCSV_vertexMass', 'TagVarCSV_vertexNTracks', 'TagVarCSV_vertexEnergyRatio', 'TagVarCSV_vertexJetDeltaR', 'TagVarCSV_flightDistance2dVal', 'TagVarCSV_flightDistance2dSig', 'TagVarCSV_flightDistance3dVal', 'TagVarCSV_flightDistance3dSig' ] self.n_vtx = 1 self.reduced_truth = ['isB', 'isBB', 'isC', 'isUDSG']
def test_KerasDTypes(self): print('TestTrainData: split') a = self.createSimpleArray('int32') b = self.createSimpleArray('float32', 600) c = self.createSimpleArray('int32') d = self.createSimpleArray('float32', 400) td = TrainData() td._store([a, b], [c, d], []) #data, rs, data, rs self.assertEqual(td.getKerasFeatureDTypes(), ['int32', 'int64', 'float32', 'int64'])
def __init__(self): TrainData.__init__(self) #define truth: self.treename = "deepntuplizerCA8/tree" self.undefTruth = ['isUndefined'] self.truthclasses = [ 'isB', 'isBB', #'isGBB', #'isLeptonicB','isLeptonicB_C', 'isC', 'isCC', #'isGCC', 'isUD', 'isS', 'isG', 'isTauHTauH', 'isTauHTauM', 'isTauHTauE', #'isTauMTauM','isTauMTauE','isTauETauE', #'isTauH','isTauM','isTauE', ] self.registerBranches(self.truthclasses) self.registerBranches(self.undefTruth) self.referenceclass = 'isTauHTauH' # 'flatten' or class name #self.referenceclass='flatten' #self.referenceclass='lowest' self.weightbranchX = 'jet_pt' self.weightbranchY = 'jet_eta' #self.weightbranchY='jet_mass' self.registerBranches([self.weightbranchX, self.weightbranchY]) self.weight_binX = np.array([ 10, 25, 30, 35, 40, 45, 50, 60, 75, 100, 125, 150, 175, 200, 250, 300, 400, 500, 600, 2000 ], dtype=float) self.weight_binY = np.array( [-2.5, -2., -1.5, -1., -0.5, 0.5, 1, 1.5, 2., 2.5], dtype=float) #self.weight_binY = np.array([ # 10,30,40,50,75,100, # 125,150,175,200,250,300,400,500, # 600,800,1000,1500,2000],dtype=float) self.weight = False self.remove = True
def __init__(self): import numpy TrainData.__init__(self) #setting DeepJet specific defaults self.treename = "deepntuplizer/tree" self.undefTruth = ['isUndefined'] self.referenceclass = 'isB' self.truthclasses = [ 'isB', 'isBB', 'isGBB', 'isLeptonicB', 'isLeptonicB_C', 'isC', 'isCC', 'isGCC', 'isUD', 'isS', 'isG', 'isUndefined' ] self.branches = [] self.reduceTruth(None)
def __init__(self): ''' This class is meant as a base class for the FatJet studies You will not need to edit it for trying out things ''' TrainData.__init__(self) #define truth: self.treename = "deepntuplizer/tree" self.undefTruth = ['isUndefined'] #self.truthclasses=['fj_isNonCC', 'fj_isCC', 'fj_isNonBB', 'fj_isQCD', 'fj_isH'] #self.truthclasses=['fj_isCC', 'fj_isBB', 'fj_isQCD' ] #self.truthclasses=["label_H_bb", "label_H_cc", "label_QCD_bb", "label_QCD_cc", "label_QCD_others"] self.truthclasses = ['fj_isNonCC', 'fj_isCC'] #self.truthclasses=["fj_isCC", "fj_isNonCC"] #self.referenceclass='label_H_cc' ## used for pt reshaping self.referenceclass = 'lowest' ## used for pt reshaping #self.referenceclass='flatten' ## used for pt reshaping self.registerBranches(['fj_pt', 'fj_sdmass']) self.weightbranchX = 'fj_pt' self.weightbranchY = 'fj_sdmass' #self.weight_binX = numpy.array([ # 300,1000,2500],dtype=float) self.weight_binX = numpy.array( #[300,400,500,600,700, 800, 900, 1000, #1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000] range(300, 1000, 50) + range(1000, 2600, 200), dtype=float) self.weight_binY = numpy.array( [40, 200], #range(40,200,40), dtype=float) self.weight = True self.remove = False #this is only needed because the truth definitions are different from deepFlavour self.allbranchestoberead = [] self.registerBranches(self.undefTruth) self.registerBranches(self.truthclasses) self.registerBranches([ 'fj_isBB', 'fj_isNonBB', 'fj_isNonCC', 'fj_isCC', 'fj_isQCD', 'fj_isH', "label_H_bb", "label_H_cc", "label_QCD_bb", "label_QCD_cc", "label_QCD_others" ]) print("Branches read:", self.allbranchestoberead)
def __init__(self): ''' This class is meant as a base class for the FatJet studies You will not need to edit it for trying out things ''' TrainData.__init__(self) #define truth: self.treename = "deepntuplizer/tree" self.undefTruth = ['isUndefined'] self.truthclasses = [ "fj_isH", "fj_isCC", "fj_isBB", "fj_isNonCC", "fj_isNonBB", "fj_isZ", "fj_isQCD", "sample_isQCD" ] self.referenceclass = 'lowest' ## used for pt reshaping options=['lowest', 'flatten', '<class_name>'] self.weightbranchX = 'fj_pt' self.weightbranchY = 'fj_sdmass' self.weight_binX = numpy.array(range(300, 1000, 50) + range(1000, 2600, 200), dtype=float) self.weight_binY = numpy.array([40, 200], dtype=float) self.weight = True self.remove = False self.removeUnderOverflow = True #this is only needed because the truth definitions are different from deepFlavour self.allbranchestoberead = [] self.registerBranches(self.undefTruth) self.registerBranches(self.truthclasses) self.registerBranches(['fj_pt', 'fj_sdmass']) self.registerBranches([ "label_H_bb", "label_H_cc", "label_QCD_bb", "label_QCD_cc", "label_QCD_others", "label_Z_bb", "label_Z_cc", ]) print("Branches read:", self.allbranchestoberead)
def __init__(self): TrainData.__init__(self) self.treename = "Delphes" #input root tree name self.feat_branch = "rechit_features" self.truth_branch = "rechit_simcluster_fractions" # this needs to be adapted!! self.max_rechits = 3500 #this should be fine self.n_features = 10 self.n_simcluster = 20 self.regressiontargetclasses = [ str(i) for i in range(self.n_simcluster) ] self.other_useless_inits()
def test_split(self): print('TestTrainData: split') a = self.createSimpleArray('int32') b = self.createSimpleArray('float32',600) c = self.createSimpleArray('int32') d = self.createSimpleArray('float32',400) all_orig = [a.copy(),b.copy(),c.copy(),d.copy()] all_splitorig = [sa.split(2) for sa in all_orig] td = TrainData() td._store([a,b], [c,d], []) tdb = td.split(2) f = tdb.transferFeatureListToNumpy(False) t = tdb.transferTruthListToNumpy(False) _ = tdb.transferWeightListToNumpy(False) all_split = [SimpleArray(f[0],f[1]), SimpleArray(f[2],f[3]), SimpleArray(t[0],t[1]), SimpleArray(t[2],t[3])] self.assertEqual(all_splitorig,all_split)
def __init__(self): import numpy TrainData.__init__(self) self.treename = "deepntuplizer/tree" self.weightbranchX = 'true_energy' self.weightbranchY = 'true_eta' self.referenceclass = 'flatten' self.weight_binX = numpy.array([ 0, 1, 2, 3, 4, 5, 7.5, 10, 20, 30, 40, 50, 60, 80, 100, 120, 140, 160, 200, 240, 300, 400 ], dtype=float) self.registerBranches([ 'rechit_energy', 'rechit_eta', 'rechit_phi', 'rechit_time', 'rechit_layer', 'nrechits', 'seed_eta', 'seed_phi', 'true_energy', 'true_eta', 'true_energyfraction' ])
def writeOutPrediction(self, predicted, features, truth, weights, outfilename, inputfile): # predicted will be a list print('writeout') print('predicted', predicted[0].shape) print('features', features[0].shape) print('truth', truth[0].shape) def unroll(a): a = np.reshape(a, [a.shape[0], a.shape[1] * a.shape[2], a.shape[3]]) return a #unroll to event x vector # first 100 are enough for now parr = predicted[0][:100, ...] #unroll(predicted[0]) farr = features[0][:100, ...] #unroll(features[0]) tarr = truth[0][:100, ...] #unroll(truth[0]) from DeepJetCore.TrainData import TrainData #use traindata as data storage td = TrainData() td._store([parr, farr, tarr], [], []) td.writeToFile(outfilename) return from root_numpy import array2root out = np.core.records.fromarrays( [ parr[:, :, 0], parr[:, :, 1], parr[:, :, 2], parr[:, :, 3], parr[:, :, 4], parr[:, :, 5], parr[:, :, 6], parr[:, :, 7], parr[:, :, 9], parr[:, :, 10], tarr[:, :, 0], tarr[:, :, 1], tarr[:, :, 2], tarr[:, :, 3], tarr[:, :, 4], tarr[:, :, 5], tarr[:, :, 6], tarr[:, :, 7], farr[:, :, 0], farr[:, :, 1], farr[:, :, 2], farr[:, :, 3], farr[:, :, 4], ], names= 'p_beta, p_posx, p_posy, p_ID0, p_ID1, p_ID2, p_dim1, p_dim2, p_ccoords1, p_coords2, t_mask, t_posx, t_posy, t_ID0, t_ID1, tID_2, t_dim1, t_dim2, f_r, f_g, f_b, f_x, f_y' ) array2root(out, outfilename, 'tree') '''
def writeOutPrediction(self, predicted, features, truth, weights, outfilename, inputfile): # predicted will be a list print('writeout') print('predicted', predicted[0].shape) print('features', features[0].shape) print('truth', truth[0].shape) parr = predicted[0] #unroll(predicted[0]) farr = features[0] #unroll(features[0]) tarr = truth[0] #unroll(truth[0]) from DeepJetCore.TrainData import TrainData #use traindata as data storage td = TrainData() td._store([parr, farr, tarr], [], []) td.writeToFile(outfilename)
def worker(i): td = TDOld() tdnew = TrainData() print("converting",dcold.samples[i]) td.readIn(dir + dcold.samples[i]) x = td.x y = td.y w = td.w tdnew.tdnew._store(x,y,w) tdnew.writeToFile(dcnew.samples[i]) td.clear() tdnew.clear() del x,y,w return True
def test_TrainDataRead(self): print('TestCompatibility TrainData') td = TrainData() td.readFromFile('trainData_previous.djctd') self.assertEqual(td.nFeatureArrays(), 1) arr = np.load("np_arr.npy") rs = np.load("np_rs.npy") b = SimpleArray(arr, rs) a = td.transferFeatureListToNumpy(False) a, rs = a[0], a[1] a = SimpleArray(a, np.array(rs, dtype='int64')) self.assertEqual(a, b)
def premixfile(i): eventsperround = 100 neventstotal = nEvents nPUpremix = nPU nfilespremix = 5 filearr = pmf(allfiles, neventstotal, nPUpremix, nfilespremix=5, eventsperround=100) print('nevents', filearr.shape[0]) td = TrainData() td._store([filearr], [], []) print('..writing ' + str(i)) td.writeToFile(outputDir + '/' + str(i) + '_mix.djctd') del td