def getFinalTruthDictSA(self): truthdict = self.getFinalTruthDictNumpy() out={} for k in truthdict.keys(): a,rs = truthdict[k] out[k] = SimpleArray(a,rs,name=k) return out
def convertFromSourceFile(self, filename, weighterobjects, istraining): global raggedtester import hashlib from DeepJetCore import SimpleArray seed = int(hashlib.sha1(filename.encode('utf-8')).hexdigest(), 16) % (10**8) np.random.seed(seed) nsamples = np.random.randint(12, 101, size=1) data, rs = raggedtester.createData(nsamples) farr = SimpleArray(data, rs) true_arr = SimpleArray(data, rs) farrint = SimpleArray(np.array(data, dtype='int32'), rs) #farr.createFromNumpy() return [farr, farrint], [true_arr], []
def convertFromSourceFile(self, filename, weighterobjects, istraining): from DeepJetCore.preprocessing import MeanNormApply, MeanNormZeroPad, MeanNormZeroPadParticles import numpy from DeepJetCore.stopwatch import stopwatch sw = stopwatch() swall = stopwatch() import ROOT fileTimeOut(filename, 120) #give eos a minute to recover rfile = ROOT.TFile(filename) tree = rfile.Get("deepntuplizer/tree") self.nsamples = tree.GetEntries() print('took ', sw.getAndReset(), ' seconds for getting tree entries') # split for convolutional network x_global = MeanNormZeroPad(filename, None, ['x'], [1], self.nsamples) print('took ', sw.getAndReset(), ' seconds for mean norm and zero padding (C module)') Tuple = self.readTreeFromRootToTuple( filename, branches=['class1', 'class2', 'x']) truthtuple = Tuple[self.truthclasses] alltruth = self.reduceTruth(truthtuple) #print(x_global.shape,x_global[0:10]) #print(alltruth.shape,alltruth[0:10]) #print(alltruth.flags) newnsamp = x_global.shape[0] self.nsamples = newnsamp print(x_global.shape, alltruth.shape, self.nsamples) truth = SimpleArray(alltruth, name="truth") feat = SimpleArray(x_global, name="features0") return [feat], [truth], []
def convertFromSourceFile(self, filename, weighterobjects, istraining, treename="SLCIOConverted"): fileTimeOut(filename, 10)#10 seconds for eos to recover tree = uproot.open(filename)[treename] nevents = tree.numentries selection=None hit_energy , rs = self.branchToFlatArray(tree["energy"], True,selection) hit_x = self.branchToFlatArray(tree["positionX"], False,selection) hit_y = self.branchToFlatArray(tree["positionY"], False,selection) hit_z = self.branchToFlatArray(tree["positionZ"], False,selection) hit_ass_truth_idx = self.branchToFlatArray(tree["maxE_particle_index"], False,selection) hit_ass_truth_energy = self.branchToFlatArray(tree["maxE_particle_energy"], False,selection) #not used right now hit_ass_truth_pX = self.branchToFlatArray(tree["maxE_particle_pX"], False,selection) hit_ass_truth_pY = self.branchToFlatArray(tree["maxE_particle_pY"], False,selection) hit_ass_truth_pZ = self.branchToFlatArray(tree["maxE_particle_pZ"], False,selection) features = np.concatenate([ hit_energy, hit_x , hit_y, hit_z ], axis=-1) farr = SimpleArray(features,rs,name="features") t_idxarr = SimpleArray(hit_ass_truth_idx,rs,name="t_idx") t_energyarr = SimpleArray(hit_ass_truth_energy,rs,name="t_energy") zeros = np.zeros_like(hit_ass_truth_energy) #just for compatibility t_posarr = SimpleArray(zeros,rs,name="t_pos") t_time = SimpleArray(zeros,rs,name="t_time") t_pid = SimpleArray(zeros,rs,name="t_pid") #this would need some massaging so we can't use the PID directly t_spectator = SimpleArray(zeros,rs,name="t_spectator") t_fully_contained = SimpleArray(zeros,rs,name="t_fully_contained") t_rest = SimpleArray(zeros,rs,name="t_rest") #breaks with old plotting but needs to be done at some point return [farr, t_idxarr, t_energyarr, t_posarr, t_time, t_pid, t_spectator, t_fully_contained],[t_rest], []
def createFromCsvs(self, filename_truth, filename_hits, filename_cells, filename_particles, outfilename): df_hits = pd.read_csv(os.path.join( '/Users/shahrukhqasim/Downloads/kaggle_trackml/train_100_events/', filename_hits), sep=',') df_truth = pd.read_csv(os.path.join( '/Users/shahrukhqasim/Downloads/kaggle_trackml/train_100_events/', filename_truth), sep=',') df_particles = pd.read_csv(os.path.join( '/Users/shahrukhqasim/Downloads/kaggle_trackml/train_100_events/', filename_particles), sep=',') df_cells = pd.read_csv(os.path.join( '/Users/shahrukhqasim/Downloads/kaggle_trackml/train_100_events/', filename_cells), sep=',') pt = np.sqrt(df_particles['px']**2 + df_particles['py']**2) df_particles = df_particles[(pt > 1.5)] # print("Particles", df_particles.shape) # print("Truths before", df_truth['particle_id'].shape) df_truth = df_truth[np.isin(df_truth['particle_id'], df_particles['particle_id'])] # print("Truth after",df_truth.shape) # print("Hits before", df_hits.shape) df_hits = df_hits[np.isin(df_hits['hit_id'], df_truth['hit_id'])] # print("Hits after", df_hits.shape) particle_id = df_truth['particle_id'] particle_id2 = particle_id.copy() unique_pids = np.unique(particle_id) for i, u in enumerate(unique_pids): particle_id2[particle_id == u] = i # print(np.unique(particle_id2)) particle_id2 = particle_id2.astype(np.int32) farr = np.stack( (df_hits['x'], df_hits['y'], df_hits['z'], df_hits['volume_id'], df_hits['layer_id'], df_hits['module_id']), axis=-1) farr = farr.astype(np.float32) tarr = np.stack((particle_id2, df_truth['hit_id'], df_truth['tx'], df_truth['ty'], df_truth['tz'], df_truth['weight']), axis=-1) tarr = tarr.astype(np.float32) # rs = np.array([0, len(farr)], np.int64) print(rs, len(farr), len(tarr)) # rs[1] = 100 # print(rs, rs.dtype) # # 0/0 farr_ = SimpleArray(name="feats") farr_.createFromNumpy(farr, rs) tarr_ = SimpleArray(name="truths") tarr_.createFromNumpy(tarr, rs) tarr_2 = SimpleArray(name="truths") tarr_2.createFromNumpy(tarr, rs) # print(farr.shape, tarr.shape) self._store([farr_, tarr_], [tarr_2], []) # print(outfilename) self.writeToFile(outfilename)
def createFromCsvsIntoStandard(self, filename_truth, filename_hits, filename_cells, filename_particles, outfilename): df_hits = pd.read_csv(filename_hits, sep=',') df_truth = pd.read_csv(filename_truth, sep=',') df_particles = pd.read_csv(filename_particles, sep=',') df_cells = pd.read_csv(filename_cells, sep=',') cells_hit_ids = df_cells['hit_id'].to_numpy(copy=True) cells_hit_weights = df_cells['value'].to_numpy(copy=True) hit_ids = df_hits['hit_id'] last_hit_id = -1 last_sum = -1 hit_ids_c = [] rechitEnergy = [] # recHitEnergy,recHitEta,zeroFeature,recHitTheta,recHitR,recHitX,recHitY,recHitZ,recHitTime, for id, weight in zip(cells_hit_ids, cells_hit_weights): if id == last_hit_id: rechitEnergy[-1] += weight else: hit_ids_c.append(id) rechitEnergy.append(weight) last_hit_id = id assert np.sum(hit_ids - hit_ids_c) == 0 df_hits['energy'] = rechitEnergy ptx = np.sqrt(df_particles['px']**2 + df_particles['py']**2) p = np.sqrt(df_particles['px']**2 + df_particles['py']**2 + df_particles['pz']**2) # print("Particles", df_particles.shape) # print("Truths before", df_truth['particle_id'].shape) # print(len(np.unique(df_particles['particle_id'])), len(np.unique(df_truth['particle_id']))) df_particles = df_particles[(ptx > 1.5)] df_truth = df_truth[np.isin(df_truth['particle_id'], df_particles['particle_id'])] # print(len(np.unique(df_particles['particle_id'])), len(np.unique(df_truth['particle_id']))) # 0/0 # df_truth['pt'] = pt # print("Truth after",df_truth.shape) # print("Hits before", df_hits.shape) df_hits = df_hits[np.isin(df_hits['hit_id'], df_truth['hit_id'])] df_cells = df_cells[np.isin(df_cells['hit_id'], df_truth['hit_id'])] rechHitEnergy = df_hits['energy'].to_numpy(dtype=np.float32) recHitX = df_hits['x'].to_numpy(dtype=np.float32) recHitY = df_hits['y'].to_numpy(dtype=np.float32) recHitZ = df_hits['z'].to_numpy(dtype=np.float32) recHitR = np.sqrt(recHitX**2. + recHitY**2. + recHitZ**2.) recHitTheta = np.arccos(recHitZ / recHitR) recHitEta = -np.log(np.tan(recHitTheta / 2)) zeroFeature = recHitEta * 0 # print("Hits after", df_hits.shape) particle_id = df_truth['particle_id'].to_numpy() particle_id2 = particle_id.copy() # unique_pids = np.unique(particle_id) # x = df_particles['particle_id'] # print(unique_pids.shape, x.shape) # print(np.sum(x-unique_pids)) # 0/0 recHitTruthEnergy = zeroFeature.copy() recHitTruthDepEnergy = zeroFeature.copy() unique_pids = df_particles['particle_id'] px = df_particles['px'].to_numpy() py = df_particles['py'].to_numpy() pz = df_particles['pz'].to_numpy() for i, u in enumerate(unique_pids): particle_id2[particle_id == u] = i recHitTruthEnergy[particle_id == u] = np.sqrt(px[i]**2 + py[i]**2 + 0 * pz[i]**2) recHitTruthDepEnergy[particle_id == u] = np.sum( rechHitEnergy[particle_id == u]) df_truth['particle_id'] = particle_id2 df_truth['p'] = recHitTruthEnergy recHitSimClusIdx = df_truth['particle_id'].to_numpy().astype(np.int32) # recHitTruthEnergy = df_truth['pt'].to_numpy(dtype=np.float32) recHitTruthX = df_truth['tx'].to_numpy().astype(np.float32) recHitTruthY = df_truth['ty'].to_numpy().astype(np.float32) recHitTruthZ = df_truth['tz'].to_numpy().astype(np.float32) recHitTruthR = np.sqrt(recHitTruthX**2. + recHitTruthY**2. + recHitTruthZ**2.).astype(np.float32) recHitTruthTheta = np.arccos(recHitTruthZ / recHitTruthR).astype( np.float32) recHitTruthEta = -np.log(np.tan(recHitTruthTheta / 2)).astype( np.float32) recHitTruthPhi = np.arctan(recHitTruthY / recHitTruthX).astype( np.float32) recHitTruthTime = zeroFeature recHitTruthDepEnergy = recHitTruthDepEnergy.astype(np.float32) recHitTruthPID = zeroFeature truth = np.stack( [ np.array(recHitSimClusIdx, dtype='float32'), # 0 recHitTruthEnergy, recHitTruthX, recHitTruthY, recHitTruthZ, # 4 zeroFeature, # truthHitAssignedDirX, zeroFeature, # 6 zeroFeature, recHitTruthEta, recHitTruthPhi, recHitTruthTime, # 10 zeroFeature, zeroFeature, recHitTruthDepEnergy, # 13 zeroFeature, # 14 zeroFeature, # 15 recHitTruthPID, # 16 - 16+n_classes #won't be used anymore zeroFeature, zeroFeature ], axis=1) truth = truth.astype(np.float32) features = np.stack( [ rechHitEnergy, recHitEta, zeroFeature, # indicator if it is track or not recHitTheta, recHitR, recHitX, recHitY, recHitZ, zeroFeature, zeroFeature, ], axis=1) features = features.astype(np.float32) rs = np.array([0, len(features)], np.int64) farr = SimpleArray(name="recHitFeatures") farr.createFromNumpy(features, rs) t_rest = SimpleArray(name="recHitTruth") t_rest.createFromNumpy(truth, rs) # rs[1] = 100 # print(rs, rs.dtype) # # 0/0 t_idxarr = SimpleArray(name="recHitTruthClusterIdx") t_idxarr.createFromNumpy(recHitSimClusIdx[..., np.newaxis], rs) t_energyarr = SimpleArray(name="recHitTruthEnergy") t_energyarr.createFromNumpy(recHitTruthEnergy[..., np.newaxis], rs) t_posarr = SimpleArray(name="recHitTruthPosition") t_posarr.createFromNumpy( np.concatenate( [recHitTruthX[..., np.newaxis], recHitTruthY[..., np.newaxis]], axis=-1), rs) # print(np.concatenate([recHitTruthX[..., np.newaxis], recHitTruthY[..., np.newaxis]], axis=-1).shape) # 0/0 t_time = SimpleArray(name="recHitTruthTime") t_time.createFromNumpy(recHitTruthTime[..., np.newaxis], rs) t_pid = SimpleArray(name="recHitTruthID") t_pid.createFromNumpy(recHitTruthPID[..., np.newaxis], rs) t_spectator = SimpleArray( name="recHitSpectatorFlag" ) # why do we have inconsistent namings, where is it needed? wrt. to truth array t_spectator.createFromNumpy(zeroFeature[..., np.newaxis], rs) t_fully_contained = SimpleArray(name="recHitFullyContainedFlag") t_fully_contained.createFromNumpy( (zeroFeature[..., np.newaxis] + 1).astype(np.int32), rs) # remaining truth is mostly for consistency in the plotting tools t_rest = SimpleArray(name="recHitTruth") t_rest.createFromNumpy(truth, rs) x, y, z = [ farr, t_idxarr, t_energyarr, t_posarr, t_time, t_pid, t_spectator, t_fully_contained ], [], [] self._store(x, y, z) self.writeToFile(outfilename) print("Storing in new format")
def base_convertFromSourceFile(self, filename, weighterobjects, istraining, treename="Events", removeTracks=True): fileTimeOut(filename, 10) #10 seconds for eos to recover tree = uproot.open(filename)[treename] hits = "RecHitHGC" front_face_z = 323 #this needs to be more precise recHitZUnsplit = self.hitObservable(tree, hits, "z", split=False, flatten=False) self.setSplitIdx(recHitZUnsplit < 0) recHitZ = self.splitJaggedArray(recHitZUnsplit) offsets = recHitZ.offsets recHitX = self.hitObservable(tree, hits, "x", split=True, flatten=False) recHitY = self.hitObservable(tree, hits, "y", split=True, flatten=False) recHitSimClusIdx = self.hitObservable(tree, hits, "BestMergedSimClusterIdx", split=True, flatten=False) #Define spectators recHit_df_events = [ pd.DataFrame({ "recHitX": recHitX[i], "recHitY": recHitY[i], "recHitZ": recHitZ[i], "recHitSimClusIdx": recHitSimClusIdx[i] }) for i in range(recHitX.shape[0]) ] for ievent in range(len(recHit_df_events)): df_event = recHit_df_events[ievent] unique_shower_idx = np.unique(df_event['recHitSimClusIdx']) df_event['spectator_distance'] = 0 # df_event['recHitSimClus_nHits'] = df_event.groupby( 'recHitSimClusIdx' ).recHitX.transform( len ) #adding number of rec hits that are associated to this truth cluster for idx in unique_shower_idx: df_shower = df_event[df_event['recHitSimClusIdx'] == idx] x_to_fit = df_shower[['recHitX', 'recHitY', 'recHitZ']].to_numpy() spectators_shower_dist = find_pcas(x_to_fit, PCA_n=2, min_hits=10) if (spectators_shower_dist is not None): spectators_idx = (df_shower.index.tolist()) df_event.loc[spectators_idx, 'spectator_distance'] = spectators_shower_dist del df_shower del df_event #Expand back recHitX = np.expand_dims(recHitX.content, axis=1) recHitY = np.expand_dims(recHitY.content, axis=1) recHitZ = np.expand_dims(recHitZ.content, axis=1) recHitSpectatorFlag = np.concatenate( np.array([ recHit_df_events[i]['spectator_distance'].to_numpy() for i in range(len(recHit_df_events)) ], dtype=object)).reshape(-1, 1) recHitSimClusterNumHits = np.concatenate( np.array([ recHit_df_events[i]['recHitSimClus_nHits'].to_numpy() for i in range(len(recHit_df_events)) ], dtype=object)).reshape(-1, 1) #number of rec hits del recHit_df_events recHitEnergy = self.hitObservable(tree, hits, "energy") recHitDetaId = self.hitObservable(tree, hits, "detId") recHitTime = self.hitObservable(tree, hits, "time") recHitR = np.sqrt(recHitX * recHitX + recHitY * recHitY + recHitZ * recHitZ) recHitTheta = np.arccos(recHitZ / recHitR) recHitEta = -np.log(np.tan(recHitTheta / 2)) # Don't split this until the end, so it can be used to index the truth arrays recHitSimClusIdx = self.hitObservable(tree, hits, "BestMergedSimClusterIdx", split=False, flatten=False) simClusterDepEnergy = tree["MergedSimCluster_recEnergy"].array() simClusterEnergy = tree["MergedSimCluster_boundaryEnergy"].array() simClusterEnergyNoMu = tree[ "MergedSimCluster_boundaryEnergyNoMu"].array() simClusterNumHits = tree["MergedSimCluster_nHits"].array( ) #numebr of sim hits # Remove muon energy, add back muon deposited energy unmergedId = tree["SimCluster_pdgId"].array() unmergedDepE = tree["SimCluster_recEnergy"].array() unmergedMatchIdx = tree["MergedSimCluster_SimCluster_MatchIdx"].array() unmergedMatches = tree["MergedSimCluster_SimClusterNumMatch"].array() unmergedDepEMuOnly = unmergedDepE unmergedDepEMuOnly[np.abs(unmergedId) != 13] = 0 # Add another layer of nesting, then sum over all unmerged associated to merged unmergedDepEMuOnly = ak.JaggedArray.fromcounts( unmergedMatches.counts, ak.JaggedArray.fromcounts( unmergedMatches.content, unmergedDepEMuOnly[unmergedMatchIdx].flatten())) depEMuOnly = unmergedDepEMuOnly.sum() #why wasn't it possible to just do instead of all of the above : simClusterEnergy[simClusterPdgId == 13] = simClusterDepEnergy ? simClusterEnergyMuCorr = simClusterEnergyNoMu + depEMuOnly simClusterX = tree["MergedSimCluster_impactPoint_x"].array() simClusterY = tree["MergedSimCluster_impactPoint_y"].array() simClusterZ = tree["MergedSimCluster_impactPoint_z"].array() simClusterTime = tree["MergedSimCluster_impactPoint_t"].array() simClusterEta = tree["MergedSimCluster_impactPoint_eta"].array() simClusterPhi = tree["MergedSimCluster_impactPoint_phi"].array() simClusterPdgId = tree["MergedSimCluster_pdgId"].array() # Mark simclusters outside of volume or with very few hits as noise # Maybe not a good idea if the merged SC pdgId is screwed up # Probably removing neutrons is a good idea though #noNeutrons = simClusterPdgId[recHitSimClusIdx] == 2112 #filter non-boundary positions. Hopefully working? goodSimClus = tree["MergedSimCluster_isTrainable"].array() # Don't split by index here to keep same dimensions as SimClusIdx markNoise = self.truthObjects(~goodSimClus, recHitSimClusIdx, False, split=False, flatten=False).astype(np.bool_) nbefore = (recHitSimClusIdx < 0).sum().sum() recHitSimClusIdx[markNoise] = -1 nafter = (recHitSimClusIdx < 0).sum().sum() print("Number of noise hits before", nbefore, "after", nafter) print('removed another factor of', nafter / nbefore, ' bad simclusters') recHitTruthPID = self.truthObjects(simClusterPdgId, recHitSimClusIdx, 0.) recHitTruthDepEnergy = self.truthObjects(simClusterDepEnergy, recHitSimClusIdx, 0) recHitTruthEnergy = self.truthObjects(simClusterEnergy, recHitSimClusIdx, 0) recHitTruthEnergyCorrMu = self.truthObjects(simClusterEnergyMuCorr, recHitSimClusIdx, 0) low_energy_shower_cutoff = 3 # Uncorrected currently not used recHitTruthEnergy = np.where( recHitTruthEnergy > low_energy_shower_cutoff, recHitTruthEnergy, recHitTruthDepEnergy) recHitTruthEnergy = np.where( recHitTruthEnergyCorrMu > low_energy_shower_cutoff, recHitTruthEnergyCorrMu, recHitTruthDepEnergy) #very bad names because these quatities are associated to Merged Clusters and not hits recHitTruthX = self.truthObjects(simClusterX, recHitSimClusIdx, 0) recHitTruthY = self.truthObjects(simClusterY, recHitSimClusIdx, 0) recHitTruthZ = self.truthObjects(simClusterZ, recHitSimClusIdx, 0) recHitTruthTime = self.truthObjects(simClusterTime, recHitSimClusIdx, 0) recHitTruthR = np.sqrt(recHitTruthX * recHitTruthX + recHitTruthY * recHitTruthY + recHitTruthZ * recHitTruthZ) recHitTruthTheta = np.arccos( np.divide(recHitTruthZ, recHitTruthR, out=np.zeros_like(recHitTruthZ), where=recHitTruthR != 0)) recHitTruthPhi = self.truthObjects(simClusterPhi, recHitSimClusIdx, 0) recHitTruthEta = self.truthObjects(simClusterEta, recHitSimClusIdx, 0) #recHitAverageEnergy = self.truthObjects(simClusterDepEnergy/simClusterNumHits, recHitSimClusIdx, 0) #this is not technically very good because simClusterNumHits is number of sim clusters, not reco recHitAverageEnergy = recHitTruthDepEnergy / recHitSimClusterNumHits #print(recHitTruthPhi) #print(np.max(recHitTruthPhi)) #print(np.min(recHitTruthPhi)) # Placeholder zeroFeature = np.zeros(shape=(len(recHitEnergy), 1), dtype='float32') features = np.concatenate( [ recHitEnergy, recHitEta, zeroFeature, #indicator if it is track or not recHitTheta, recHitR, recHitX, recHitY, recHitZ, recHitTime, ], axis=1) farr = SimpleArray(name="recHitFeatures") farr.createFromNumpy(features, offsets) del features recHitSimClusIdx = np.expand_dims( self.splitJaggedArray(recHitSimClusIdx).content.astype(np.int32), axis=1) print('noise', (100 * np.count_nonzero(recHitSimClusIdx < 0)) // recHitSimClusIdx.shape[0], '% of hits') print('truth eta min max', np.min(np.abs(recHitTruthEta[recHitSimClusIdx >= 0])), np.max(np.abs(recHitTruthEta[recHitSimClusIdx >= 0]))) print( 'non-boundary truth positions', np.count_nonzero( np.abs(np.abs(recHitTruthZ[recHitSimClusIdx >= 0]) - 320) > 5) / recHitTruthZ[recHitSimClusIdx >= 0].shape[0]) #now all numpy #Why do we want noise (-1) sim hits to be equal rec? recHitTruthX[recHitSimClusIdx < 0] = recHitX[recHitSimClusIdx < 0] recHitTruthY[recHitSimClusIdx < 0] = recHitY[recHitSimClusIdx < 0] recHitTruthZ[recHitSimClusIdx < 0] = recHitZ[recHitSimClusIdx < 0] recHitTruthEnergyCorrMu[recHitSimClusIdx < 0] = recHitEnergy[ recHitSimClusIdx < 0] recHitTruthTime[recHitSimClusIdx < 0] = recHitTime[ recHitSimClusIdx < 0] #import matplotlib.pyplot as plt #plt.hist(np.abs(recHitTruthEnergyCorrMu[recHitSimClusIdx>=0]/recHitTruthDepEnergy[recHitSimClusIdx>=0])) #plt.yscale('log') #plt.savefig("scat.pdf") truth = np.concatenate( [ np.array(recHitSimClusIdx, dtype='float32'), # 0 recHitTruthEnergyCorrMu, recHitTruthX, recHitTruthY, recHitTruthZ, #4 zeroFeature, #truthHitAssignedDirX, zeroFeature, #6 zeroFeature, recHitTruthEta, recHitTruthPhi, recHitTruthTime, #10 zeroFeature, zeroFeature, recHitTruthDepEnergy, #13 zeroFeature, #14 zeroFeature, #15 recHitTruthPID, #16 - 16+n_classes #won't be used anymore np.array(recHitSpectatorFlag, dtype='float32'), np.where(recHitTruthZ < front_face_z, 1., 0.).astype('float32') ], axis=1) t_idxarr = SimpleArray(recHitSimClusIdx, offsets, name="recHitTruthClusterIdx") t_energyarr = SimpleArray(name="recHitTruthEnergy") t_energyarr.createFromNumpy(recHitTruthEnergyCorrMu, offsets) t_posarr = SimpleArray(name="recHitTruthPosition") t_posarr.createFromNumpy( np.concatenate([recHitTruthX, recHitTruthY], axis=-1), offsets) t_time = SimpleArray(name="recHitTruthTime") t_time.createFromNumpy(recHitTruthTime, offsets) t_pid = SimpleArray(name="recHitTruthID") t_pid.createFromNumpy(recHitTruthPID, offsets) t_spectator = SimpleArray( name="recHitSpectatorFlag" ) #why do we have inconsistent namings, where is it needed? wrt. to truth array t_spectator.createFromNumpy(recHitSpectatorFlag.astype('float32'), offsets) t_fully_contained = SimpleArray(name="recHitFullyContainedFlag") t_fully_contained.createFromNumpy( np.where(recHitTruthZ < front_face_z, 1., 0.).astype('float32'), offsets) #remaining truth is mostly for consistency in the plotting tools t_rest = SimpleArray(name="recHitTruth") t_rest.createFromNumpy(truth, offsets) return [ farr, t_idxarr, t_energyarr, t_posarr, t_time, t_pid, t_spectator, t_fully_contained ], [t_rest], []
def base_convertFromSourceFile(self, filename, weighterobjects, istraining, treename="WindowNTupler/tree", removeTracks=True): fileTimeOut(filename, 10) #10 seconds for eos to recover tree = uproot.open(filename)[treename] nevents = tree.numentries print("n entries: ", nevents) selection = (tree["recHitEnergy"]).array() > 0 if removeTracks: selection = np.logical_and(selection, (tree["recHitID"]).array() > -0.5) recHitEnergy, rs = self.branchToFlatArray(tree["recHitEnergy"], True, selection) recHitEta = self.branchToFlatArray(tree["recHitEta"], False, selection) #recHitRelPhi = self.branchToFlatArray(tree["recHitRelPhi"], False,selection) recHitTheta = self.branchToFlatArray(tree["recHitTheta"], False, selection) recHitR = self.branchToFlatArray(tree["recHitR"], False, selection) recHitX = self.branchToFlatArray(tree["recHitX"], False, selection) recHitY = self.branchToFlatArray(tree["recHitY"], False, selection) recHitZ = self.branchToFlatArray(tree["recHitZ"], False, selection) recHitDetID = self.branchToFlatArray(tree["recHitDetID"], False, selection) recHitTime = self.branchToFlatArray(tree["recHitTime"], False, selection) recHitID = self.branchToFlatArray(tree["recHitID"], False, selection) recHitPad = self.branchToFlatArray(tree["recHitPad"], False, selection) ## weird shape for this truthHitFractions = self.branchToFlatArray(tree["truthHitFractions"], False) truthHitAssignementIdx = self.branchToFlatArray( tree["truthHitAssignementIdx"], False, selection) #0 truthHitAssignedEnergies = self.branchToFlatArray( tree["truthHitAssignedEnergies"], False, selection) #1 truthHitAssignedX = self.branchToFlatArray(tree["truthHitAssignedX"], False, selection) #2 truthHitAssignedY = self.branchToFlatArray(tree["truthHitAssignedY"], False, selection) #3 truthHitAssignedZ = self.branchToFlatArray(tree["truthHitAssignedZ"], False, selection) #3 truthHitAssignedDirX = self.branchToFlatArray( tree["truthHitAssignedDirX"], False, selection) #4 truthHitAssignedDirY = self.branchToFlatArray( tree["truthHitAssignedDirY"], False, selection) #4 truthHitAssignedDirZ = self.branchToFlatArray( tree["truthHitAssignedDirZ"], False, selection) #4 truthHitAssignedT = self.branchToFlatArray(tree["truthHitAssignedT"], False, selection) truthHitAssignedEta = self.branchToFlatArray( tree["truthHitAssignedEta"], False, selection) #2 truthHitAssignedPhi = self.branchToFlatArray( tree["truthHitAssignedPhi"], False, selection) #3 truthHitAssignedDirEta = self.branchToFlatArray( tree["truthHitAssignedDirEta"], False, selection) #4 truthHitAssignedDepEnergies = self.branchToFlatArray( tree["truthHitAssignedDepEnergies"], False, selection) #4 truthHitAssignedDirR = self.branchToFlatArray( tree["truthHitAssignedDirR"], False, selection) #4 ## weird shape for this truthHitAssignedPIDs = self.branchToFlatArray(tree["truthHitAssignedPIDs"], False) truthHitAssignedPIDs = self.branchToFlatArray( tree["truthHitAssignedPIDs"], False, selection, is3d=True) truthHitAssignedPIDs = np.expand_dims(np.argmax(truthHitAssignedPIDs, axis=-1), axis=1) #no one-hot encoding truthHitAssignedPIDs = np.array(truthHitAssignedPIDs, dtype='float32') ticlHitAssignementIdx = self.branchToFlatArray( tree["ticlHitAssignementIdx"], False, selection) #4 ticlHitAssignedEnergies = self.branchToFlatArray( tree["ticlHitAssignedEnergies"], False, selection) #4 #for now! truthHitAssignedEnergies = truthHitAssignedDepEnergies #for now rechitsSum #object weighted energy (1.0 for highest energy hit per object) features = np.concatenate( [ recHitEnergy, recHitEta, recHitID, #indicator if it is track or not recHitTheta, recHitR, recHitX, recHitY, recHitZ, recHitTime ], axis=-1) farr = SimpleArray() farr.createFromNumpy(features, rs) del features truth = np.concatenate( [ truthHitAssignementIdx, # 0 truthHitAssignedEnergies, truthHitAssignedX, truthHitAssignedY, truthHitAssignedZ, #4 truthHitAssignedDirX, truthHitAssignedDirY, #6 truthHitAssignedDirZ, truthHitAssignedEta, truthHitAssignedPhi, truthHitAssignedT, #10 truthHitAssignedDirEta, truthHitAssignedDirR, truthHitAssignedDepEnergies, #13 ticlHitAssignementIdx, #14 ticlHitAssignedEnergies, #15 truthHitAssignedPIDs #16 - 16+n_classes #won't be used anymore ], axis=-1) t_idxarr = SimpleArray() t_idxarr.createFromNumpy(truthHitAssignementIdx, rs) t_energyarr = SimpleArray() t_energyarr.createFromNumpy(truthHitAssignedEnergies, rs) t_posarr = SimpleArray() t_posarr.createFromNumpy( np.concatenate([truthHitAssignedX, truthHitAssignedY], axis=-1), rs) t_time = SimpleArray() t_time.createFromNumpy(truthHitAssignedT, rs) t_pid = SimpleArray() t_pid.createFromNumpy(truthHitAssignedPIDs, rs) #remaining truth is mostly for consistency in the plotting tools t_rest = SimpleArray() t_rest.createFromNumpy(truth, rs) return [farr, t_idxarr, t_energyarr, t_posarr, t_time, t_pid], [t_rest], []
def base_convertFromSourceFile(self, filename, weighterobjects, istraining, treename="Events", removeTracks=True): fileTimeOut(filename, 10) #10 seconds for eos to recover tree = uproot.open(filename)[treename] hits = "RecHitHGC" recHitZUnsplit = self.hitObservable(tree, hits, "z", split=False, flatten=False) self.setSplitIdx(recHitZUnsplit < 0) recHitZ = self.splitJaggedArray(recHitZUnsplit) offsets = recHitZ.offsets recHitZ = np.expand_dims(recHitZ.content, axis=1) recHitX = self.hitObservable(tree, hits, "x") recHitY = self.hitObservable(tree, hits, "y") recHitEnergy = self.hitObservable(tree, hits, "energy") recHitDetaId = self.hitObservable(tree, hits, "detId") recHitTime = self.hitObservable(tree, hits, "time") recHitR = np.sqrt(recHitX * recHitX + recHitY * recHitY + recHitZ * recHitZ) recHitTheta = np.arccos(recHitZ / recHitR) recHitEta = -np.log(np.tan(recHitTheta / 2)) # Don't split this until the end, so it can be used to index the truth arrays recHitSimClusIdx = self.hitObservable(tree, hits, "BestMergedSimClusterIdx", split=False, flatten=False) simClusterDepEnergy = tree["MergedSimCluster_recEnergy"].array() simClusterEnergy = tree["MergedSimCluster_boundaryEnergy"].array() simClusterEnergyNoMu = tree[ "MergedSimCluster_boundaryEnergyNoMu"].array() # Remove muon energy, add back muon deposited energy unmergedId = tree["SimCluster_pdgId"].array() unmergedDepE = tree["SimCluster_recEnergy"].array() unmergedMatchIdx = tree["MergedSimCluster_SimCluster_MatchIdx"].array() unmergedMatches = tree["MergedSimCluster_SimClusterNumMatch"].array() unmergedDepEMuOnly = unmergedDepE unmergedDepEMuOnly[np.abs(unmergedId) != 13] = 0 # Add another layer of nesting, then sum over all unmerged associated to merged unmergedDepEMuOnly = ak.JaggedArray.fromcounts( unmergedMatches.counts, ak.JaggedArray.fromcounts( unmergedMatches.content, unmergedDepEMuOnly[unmergedMatchIdx].flatten())) depEMuOnly = unmergedDepEMuOnly.sum() simClusterEnergyMuCorr = simClusterEnergyNoMu + depEMuOnly simClusterX = tree["MergedSimCluster_impactPoint_x"].array() simClusterY = tree["MergedSimCluster_impactPoint_y"].array() simClusterZ = tree["MergedSimCluster_impactPoint_z"].array() simClusterTime = tree["MergedSimCluster_impactPoint_t"].array() simClusterEta = tree["MergedSimCluster_impactPoint_eta"].array() simClusterPhi = tree["MergedSimCluster_impactPoint_phi"].array() simClusterPdgId = tree["MergedSimCluster_pdgId"].array() # Mark simclusters outside of volume or with very few hits as noise # Maybe not a good idea if the merged SC pdgId is screwed up # Probably removing neutrons is a good idea though #noNeutrons = simClusterPdgId[recHitSimClusIdx] == 2112 #filter non-boundary positions. Hopefully working? goodSimClus = tree["MergedSimCluster_isTrainable"].array() # Don't split by index here to keep same dimensions as SimClusIdx markNoise = self.truthObjects(~goodSimClus, recHitSimClusIdx, False, split=False, flatten=False).astype(np.bool_) nbefore = (recHitSimClusIdx < 0).sum().sum() recHitSimClusIdx[markNoise] = -1 nafter = (recHitSimClusIdx < 0).sum().sum() print("Number of noise hits before", nbefore, "after", nafter) print('removed another factor of', nafter / nbefore, ' bad simclusters') recHitTruthPID = self.truthObjects(simClusterPdgId, recHitSimClusIdx, 0.) recHitTruthEnergy = self.truthObjects(simClusterEnergy, recHitSimClusIdx, 0) recHitTruthDepEnergy = self.truthObjects(simClusterDepEnergy, recHitSimClusIdx, 0) recHitTruthEnergyCorrMu = self.truthObjects(simClusterEnergyMuCorr, recHitSimClusIdx, 0) recHitTruthX = self.truthObjects(simClusterX, recHitSimClusIdx, 0) recHitTruthY = self.truthObjects(simClusterY, recHitSimClusIdx, 0) recHitTruthZ = self.truthObjects(simClusterZ, recHitSimClusIdx, 0) recHitTruthTime = self.truthObjects(simClusterTime, recHitSimClusIdx, 0) recHitTruthR = np.sqrt(recHitTruthX * recHitTruthX + recHitTruthY * recHitTruthY + recHitTruthZ * recHitTruthZ) recHitTruthTheta = np.arccos( np.divide(recHitTruthZ, recHitTruthR, out=np.zeros_like(recHitTruthZ), where=recHitTruthR != 0)) recHitTruthPhi = self.truthObjects(simClusterPhi, recHitSimClusIdx, 0) recHitTruthEta = self.truthObjects(simClusterEta, recHitSimClusIdx, 0) #print(recHitTruthPhi) #print(np.max(recHitTruthPhi)) #print(np.min(recHitTruthPhi)) # Placeholder zeroFeature = np.zeros(shape=(len(recHitEnergy), 1), dtype='float32') features = np.concatenate( [ recHitEnergy, recHitEta, zeroFeature, #indicator if it is track or not recHitTheta, recHitR, recHitX, recHitY, recHitZ, recHitTime, ], axis=1) farr = SimpleArray(name="recHitFeatures") farr.createFromNumpy(features, offsets) del features recHitSimClusIdx = np.expand_dims( self.splitJaggedArray(recHitSimClusIdx).content.astype(np.int32), axis=1) print('noise', (100 * np.count_nonzero(recHitSimClusIdx < 0)) // recHitSimClusIdx.shape[0], '% of hits') print('truth eta min max', np.min(np.abs(recHitTruthEta[recHitSimClusIdx >= 0])), np.max(np.abs(recHitTruthEta[recHitSimClusIdx >= 0]))) print( 'non-boundary truth positions', np.count_nonzero( np.abs(np.abs(recHitTruthZ[recHitSimClusIdx >= 0]) - 320) > 5) / recHitTruthZ[recHitSimClusIdx >= 0].shape[0]) #now all numpy recHitTruthX[recHitSimClusIdx < 0] = recHitX[recHitSimClusIdx < 0] recHitTruthY[recHitSimClusIdx < 0] = recHitY[recHitSimClusIdx < 0] recHitTruthZ[recHitSimClusIdx < 0] = recHitZ[recHitSimClusIdx < 0] recHitTruthEnergyCorrMu[recHitSimClusIdx < 0] = recHitEnergy[ recHitSimClusIdx < 0] recHitTruthTime[recHitSimClusIdx < 0] = recHitTime[ recHitSimClusIdx < 0] #import matplotlib.pyplot as plt #plt.hist(np.abs(recHitTruthEnergyCorrMu[recHitSimClusIdx>=0]/recHitTruthDepEnergy[recHitSimClusIdx>=0])) #plt.yscale('log') #plt.savefig("scat.pdf") truth = np.concatenate( [ np.array(recHitSimClusIdx, dtype='float32'), # 0 recHitTruthEnergyCorrMu, recHitTruthX, recHitTruthY, recHitTruthZ, #4 zeroFeature, #truthHitAssignedDirX, zeroFeature, #6 zeroFeature, recHitTruthEta, recHitTruthPhi, recHitTruthTime, #10 zeroFeature, zeroFeature, recHitTruthDepEnergy, #13 zeroFeature, #14 zeroFeature, #15 recHitTruthPID #16 - 16+n_classes #won't be used anymore ], axis=1) t_idxarr = SimpleArray(recHitSimClusIdx, offsets, name="recHitTruthClusterIdx") t_energyarr = SimpleArray(name="recHitTruthEnergy") t_energyarr.createFromNumpy(recHitTruthEnergy, offsets) t_posarr = SimpleArray(name="recHitTruthPosition") t_posarr.createFromNumpy( np.concatenate([recHitTruthX, recHitTruthY], axis=-1), offsets) t_time = SimpleArray(name="recHitTruthTime") t_time.createFromNumpy(recHitTruthTime, offsets) t_pid = SimpleArray(name="recHitTruthID") t_pid.createFromNumpy(recHitTruthPID, offsets) #remaining truth is mostly for consistency in the plotting tools t_rest = SimpleArray(name="recHitTruth") t_rest.createFromNumpy(truth, offsets) return [farr, t_idxarr, t_energyarr, t_posarr, t_time, t_pid], [t_rest], []
def getFinalFeaturesSA(self): a,rs = self.getFinalFeaturesNumpy() sa = SimpleArray(a,rs,name="recHitFeatures") #sa.setFeatureNames(self.featurenames) #not yet return sa
def base_convertFromSourceFile(self, filename, weighterobjects, istraining, onlytruth, treename="WindowNTupler/tree"): fileTimeOut(filename, 10) #10 seconds for eos to recover tree = uproot.open(filename)[treename] nevents = tree.numentries print("n entries: ", nevents) selection = None if onlytruth: selection = (tree["truthHitAssignementIdx"]).array() > -0.1 #0 #remove zero energy hits from removing of bad simclusters if selection is None: selection = (tree["recHitEnergy"]).array() > 0 else: selection = np.logical_and(selection, (tree["recHitEnergy"]).array() > 0) recHitEnergy, rs = self.branchToFlatArray(tree["recHitEnergy"], True, selection) recHitEta = self.branchToFlatArray(tree["recHitEta"], False, selection) #recHitRelPhi = self.branchToFlatArray(tree["recHitRelPhi"], False,selection) recHitTheta = self.branchToFlatArray(tree["recHitTheta"], False, selection) recHitR = self.branchToFlatArray(tree["recHitR"], False, selection) recHitX = self.branchToFlatArray(tree["recHitX"], False, selection) recHitY = self.branchToFlatArray(tree["recHitY"], False, selection) recHitZ = self.branchToFlatArray(tree["recHitZ"], False, selection) recHitDetID = self.branchToFlatArray(tree["recHitDetID"], False, selection) recHitTime = self.branchToFlatArray(tree["recHitTime"], False, selection) recHitID = self.branchToFlatArray(tree["recHitID"], False, selection) recHitPad = self.branchToFlatArray(tree["recHitPad"], False, selection) ## weird shape for this truthHitFractions = self.branchToFlatArray(tree["truthHitFractions"], False) truthHitAssignementIdx = self.branchToFlatArray( tree["truthHitAssignementIdx"], False, selection) #0 truthHitAssignedEnergies = self.branchToFlatArray( tree["truthHitAssignedEnergies"], False, selection) #1 truthHitAssignedX = self.branchToFlatArray(tree["truthHitAssignedX"], False, selection) #2 truthHitAssignedY = self.branchToFlatArray(tree["truthHitAssignedY"], False, selection) #3 truthHitAssignedZ = self.branchToFlatArray(tree["truthHitAssignedZ"], False, selection) #3 truthHitAssignedDirX = self.branchToFlatArray( tree["truthHitAssignedDirX"], False, selection) #4 truthHitAssignedDirY = self.branchToFlatArray( tree["truthHitAssignedDirY"], False, selection) #4 truthHitAssignedDirZ = self.branchToFlatArray( tree["truthHitAssignedDirZ"], False, selection) #4 truthHitAssignedEta = self.branchToFlatArray( tree["truthHitAssignedEta"], False, selection) #2 truthHitAssignedPhi = self.branchToFlatArray( tree["truthHitAssignedPhi"], False, selection) #3 #truthHitAssignedR = self.branchToFlatArray(tree["truthHitAssignedR"], False,selection) #3 truthHitAssignedDirEta = self.branchToFlatArray( tree["truthHitAssignedDirEta"], False, selection) #4 truthHitAssignedDirPhi = self.branchToFlatArray( tree["truthHitAssignedDirPhi"], False, selection) #4 truthHitAssignedDirR = self.branchToFlatArray( tree["truthHitAssignedDirR"], False, selection) #4 ## weird shape for this truthHitAssignedPIDs = self.branchToFlatArray(tree["truthHitAssignedPIDs"], False) truthHitAssignedPIDs = self.branchToFlatArray( tree["truthHitAssignedPIDs"], False, selection, is3d=True) #print('truthHitAssignedPIDs',truthHitAssignedPIDs.shape) #print('truthHitAssignedEnergies',truthHitAssignedEnergies.shape) #print(truthHitAssignedPIDs) #truthHitAssignedPIDs = np.zeros_like(truthHitAssignedEnergies) #truthHitAssignedPIDs = np.tile(truthHitAssignedPIDs, [1, n_classes]) #type_ambiguous, #type_electron, #type_photon, #type_mip, #type_charged_hadron, #type_neutral_hadron, truthHitAssignedT = self.branchToFlatArray(tree["truthHitAssignedT"], False, selection) ticlHitAssignementIdx = self.branchToFlatArray( tree["ticlHitAssignementIdx"], False, selection) #4 ticlHitAssignedEnergies = self.branchToFlatArray( tree["ticlHitAssignedEnergies"], False, selection) #4 # For calculating spectators rechitsSum = findRechitsSum(truthHitAssignementIdx, recHitEnergy, rs) spectator = np.where(recHitEnergy < 0.0005 * rechitsSum, np.ones_like(recHitEnergy), np.zeros_like(recHitEnergy)) # If truth shower energy < 5% of sum of rechits, assign rechits sum to it instead truthShowerEnergies = truthHitAssignedEnergies.copy() #take them as is #truthShowerEnergies[rechitsSum<0.25*truthHitAssignedEnergies] = rechitsSum[rechitsSum<0.25*truthHitAssignedEnergies] #for now! truthShowerEnergies = rechitsSum features = np.concatenate( [ recHitEnergy, recHitEta, truthHitAssignementIdx, #no phi anymore! truthShowerEnergies, recHitR, recHitX, recHitY, recHitZ, recHitTime ], axis=-1) farr = SimpleArray() farr.createFromNumpy(features, rs) #farr.cout() print("features", features.shape) del features truth = np.concatenate( [ truthHitAssignementIdx, # 0 truthShowerEnergies, truthHitAssignedX, truthHitAssignedY, truthHitAssignedZ, #4 truthHitAssignedDirX, truthHitAssignedDirY, #6 truthHitAssignedDirZ, truthHitAssignedEta, truthHitAssignedPhi, truthHitAssignedT, #10 truthHitAssignedDirEta, truthHitAssignedDirPhi, #12 truthHitAssignedDirR, spectator, #14 truthHitAssignedEnergies, #15 rechitsSum, #16 ticlHitAssignementIdx, #17 ticlHitAssignedEnergies, #18 truthHitAssignedPIDs #19 - 19+n_classes ], axis=-1) tarr = SimpleArray() tarr.createFromNumpy(truth, rs) print("truth", truth.shape) return [farr], [tarr], []
def base_convertFromSourceFile(self, filename, weighterobjects, istraining, onlytruth, treename="WindowNTupler/tree", removeTracks=True): fileTimeOut(filename, 10) #10 seconds for eos to recover tree = uproot.open(filename)[treename] nevents = tree.numentries print("n entries: ", nevents) selection = None if onlytruth: selection = (tree["truthHitAssignementIdx"]).array() > -0.1 #0 #remove zero energy hits from removing of bad simclusters if selection is None: selection = (tree["recHitEnergy"]).array() > 0 else: selection = np.logical_and(selection, (tree["recHitEnergy"]).array() > 0) if removeTracks: selection = np.logical_and(selection, (tree["recHitID"]).array() > -0.5) recHitEnergy, rs = self.branchToFlatArray(tree["recHitEnergy"], True, selection) recHitEta = self.branchToFlatArray(tree["recHitEta"], False, selection) #recHitRelPhi = self.branchToFlatArray(tree["recHitRelPhi"], False,selection) recHitTheta = self.branchToFlatArray(tree["recHitTheta"], False, selection) recHitR = self.branchToFlatArray(tree["recHitR"], False, selection) recHitX = self.branchToFlatArray(tree["recHitX"], False, selection) recHitY = self.branchToFlatArray(tree["recHitY"], False, selection) recHitZ = self.branchToFlatArray(tree["recHitZ"], False, selection) recHitDetID = self.branchToFlatArray(tree["recHitDetID"], False, selection) recHitTime = self.branchToFlatArray(tree["recHitTime"], False, selection) recHitID = self.branchToFlatArray(tree["recHitID"], False, selection) recHitPad = self.branchToFlatArray(tree["recHitPad"], False, selection) ## weird shape for this truthHitFractions = self.branchToFlatArray(tree["truthHitFractions"], False) truthHitAssignementIdx = self.branchToFlatArray( tree["truthHitAssignementIdx"], False, selection) #0 truthHitAssignedEnergies = self.branchToFlatArray( tree["truthHitAssignedEnergies"], False, selection) #1 truthHitAssignedX = self.branchToFlatArray(tree["truthHitAssignedX"], False, selection) #2 truthHitAssignedY = self.branchToFlatArray(tree["truthHitAssignedY"], False, selection) #3 truthHitAssignedZ = self.branchToFlatArray(tree["truthHitAssignedZ"], False, selection) #3 truthHitAssignedDirX = self.branchToFlatArray( tree["truthHitAssignedDirX"], False, selection) #4 truthHitAssignedDirY = self.branchToFlatArray( tree["truthHitAssignedDirY"], False, selection) #4 truthHitAssignedDirZ = self.branchToFlatArray( tree["truthHitAssignedDirZ"], False, selection) #4 truthHitAssignedEta = self.branchToFlatArray( tree["truthHitAssignedEta"], False, selection) #2 truthHitAssignedPhi = self.branchToFlatArray( tree["truthHitAssignedPhi"], False, selection) #3 #truthHitAssignedR = self.branchToFlatArray(tree["truthHitAssignedR"], False,selection) #3 truthHitAssignedDirEta = self.branchToFlatArray( tree["truthHitAssignedDirEta"], False, selection) #4 truthHitAssignedDepEnergies = self.branchToFlatArray( tree["truthHitAssignedDepEnergies"], False, selection) #4 truthHitAssignedDirR = self.branchToFlatArray( tree["truthHitAssignedDirR"], False, selection) #4 ## weird shape for this truthHitAssignedPIDs = self.branchToFlatArray(tree["truthHitAssignedPIDs"], False) truthHitAssignedPIDs = self.branchToFlatArray( tree["truthHitAssignedPIDs"], False, selection, is3d=True) #print('truthHitAssignedPIDs',truthHitAssignedPIDs.shape) #print('truthHitAssignedEnergies',truthHitAssignedEnergies.shape) #print(truthHitAssignedPIDs) #truthHitAssignedPIDs = np.zeros_like(truthHitAssignedEnergies) #truthHitAssignedPIDs = np.tile(truthHitAssignedPIDs, [1, n_classes]) #type_ambiguous, #type_electron, #type_photon, #type_mip, #type_charged_hadron, #type_neutral_hadron, # For tracks # # *(data++) = t->obj->p(); *(data++) = recHit->hit->energy(); #### *(data++) = t->pos.eta(); *(data++) = recHit->pos.eta(); #### *(data++) = t->pos.phi(); *(data++) = recHit->pos.phi(); #### *(data++) = t->pos.theta(); *(data++) = recHit->pos.theta(); #### *(data++) = t->pos.mag(); *(data++) = recHit->pos.mag(); #### *(data++) = t->pos.x(); *(data++) = recHit->pos.x(); #### *(data++) = t->pos.y(); *(data++) = recHit->pos.y(); #### *(data++) = t->pos.z(); *(data++) = recHit->pos.z(); #### *(data++) = t->obj->charge(); *(data++) = (float)recHit->hit->detid(); #### *(data++) = t->obj->chi2(); *(data++) = recHit->hit->time(); #### *(data++) = -1.; //track ID bit *(data++) = 0.; //rechit ID bit #### *(data++) = 0.; //pad *(data++) = 0.; //pad # # # #make these the only spectators, and set 'energy' to zero truthHitAssignedT = self.branchToFlatArray(tree["truthHitAssignedT"], False, selection) ticlHitAssignementIdx = self.branchToFlatArray( tree["ticlHitAssignementIdx"], False, selection) #4 ticlHitAssignedEnergies = self.branchToFlatArray( tree["ticlHitAssignedEnergies"], False, selection) #4 # For calculating spectators #rechitsSum = findRechitsSum(truthHitAssignementIdx, recHitEnergy, rs) #spectator = np.where(recHitEnergy < 0.0005 * rechitsSum, np.ones_like(recHitEnergy), np.zeros_like(recHitEnergy)) ############ special track treatment for now #make tracks spectators isTrack = recHitID < 0 spectator = np.where(isTrack, np.ones_like(isTrack), np.zeros_like(isTrack)) recHitEnergy[isTrack] = 0. #don't use track momenta just use as seeds ############## # If truth shower energy < 5% of sum of rechits, assign rechits sum to it instead truthShowerEnergies = truthHitAssignedEnergies.copy() #take them as is #truthShowerEnergies[rechitsSum<0.25*truthHitAssignedEnergies] = rechitsSum[rechitsSum<0.25*truthHitAssignedEnergies] #for now! truthShowerEnergies = truthHitAssignedDepEnergies #for now rechitsSum features = np.concatenate( [ recHitEnergy, recHitEta, recHitID, #indicator if it is track or not recHitTheta, recHitR, recHitX, recHitY, recHitZ, recHitTime ], axis=-1) np.savetxt("textarr.txt", features[0:rs[1]]) farr = SimpleArray() farr.createFromNumpy(features, rs) #farr.cout() print("features", features.shape) del features truth = np.concatenate( [ truthHitAssignementIdx, # 0 truthShowerEnergies, truthHitAssignedX, truthHitAssignedY, truthHitAssignedZ, #4 truthHitAssignedDirX, truthHitAssignedDirY, #6 truthHitAssignedDirZ, truthHitAssignedEta, truthHitAssignedPhi, truthHitAssignedT, #10 truthHitAssignedDirEta, np.zeros_like(truthHitAssignedDirEta), #12 truthHitAssignedDirR, spectator, #14 truthHitAssignedEnergies, #15 truthHitAssignedDepEnergies, #16 ticlHitAssignementIdx, #17 ticlHitAssignedEnergies, #18 truthHitAssignedPIDs #19 - 19+n_classes ], axis=-1) tarr = SimpleArray() tarr.createFromNumpy(truth, rs) print("truth", truth.shape) return [farr], [tarr], []