Ejemplo n.º 1
0
    def createFromCsvsIntoStandard(self, filename_truth, filename_hits,
                                   filename_cells, filename_particles,
                                   outfilename):
        df_hits = pd.read_csv(filename_hits, sep=',')
        df_truth = pd.read_csv(filename_truth, sep=',')
        df_particles = pd.read_csv(filename_particles, sep=',')
        df_cells = pd.read_csv(filename_cells, sep=',')

        cells_hit_ids = df_cells['hit_id'].to_numpy(copy=True)
        cells_hit_weights = df_cells['value'].to_numpy(copy=True)

        hit_ids = df_hits['hit_id']

        last_hit_id = -1
        last_sum = -1
        hit_ids_c = []
        rechitEnergy = []
        #    recHitEnergy,recHitEta,zeroFeature,recHitTheta,recHitR,recHitX,recHitY,recHitZ,recHitTime,

        for id, weight in zip(cells_hit_ids, cells_hit_weights):
            if id == last_hit_id:
                rechitEnergy[-1] += weight
            else:
                hit_ids_c.append(id)
                rechitEnergy.append(weight)
                last_hit_id = id

        assert np.sum(hit_ids - hit_ids_c) == 0

        df_hits['energy'] = rechitEnergy

        ptx = np.sqrt(df_particles['px']**2 + df_particles['py']**2)

        p = np.sqrt(df_particles['px']**2 + df_particles['py']**2 +
                    df_particles['pz']**2)

        # print("Particles", df_particles.shape)

        # print("Truths before", df_truth['particle_id'].shape)

        # print(len(np.unique(df_particles['particle_id'])), len(np.unique(df_truth['particle_id'])))
        df_particles = df_particles[(ptx > 1.5)]
        df_truth = df_truth[np.isin(df_truth['particle_id'],
                                    df_particles['particle_id'])]
        # print(len(np.unique(df_particles['particle_id'])), len(np.unique(df_truth['particle_id'])))
        # 0/0
        # df_truth['pt'] = pt
        # print("Truth after",df_truth.shape)

        # print("Hits before", df_hits.shape)
        df_hits = df_hits[np.isin(df_hits['hit_id'], df_truth['hit_id'])]
        df_cells = df_cells[np.isin(df_cells['hit_id'], df_truth['hit_id'])]

        rechHitEnergy = df_hits['energy'].to_numpy(dtype=np.float32)
        recHitX = df_hits['x'].to_numpy(dtype=np.float32)
        recHitY = df_hits['y'].to_numpy(dtype=np.float32)
        recHitZ = df_hits['z'].to_numpy(dtype=np.float32)
        recHitR = np.sqrt(recHitX**2. + recHitY**2. + recHitZ**2.)
        recHitTheta = np.arccos(recHitZ / recHitR)
        recHitEta = -np.log(np.tan(recHitTheta / 2))
        zeroFeature = recHitEta * 0

        # print("Hits after", df_hits.shape)

        particle_id = df_truth['particle_id'].to_numpy()

        particle_id2 = particle_id.copy()

        # unique_pids = np.unique(particle_id)

        # x = df_particles['particle_id']
        # print(unique_pids.shape, x.shape)
        # print(np.sum(x-unique_pids))
        # 0/0

        recHitTruthEnergy = zeroFeature.copy()
        recHitTruthDepEnergy = zeroFeature.copy()
        unique_pids = df_particles['particle_id']
        px = df_particles['px'].to_numpy()
        py = df_particles['py'].to_numpy()
        pz = df_particles['pz'].to_numpy()
        for i, u in enumerate(unique_pids):
            particle_id2[particle_id == u] = i
            recHitTruthEnergy[particle_id == u] = np.sqrt(px[i]**2 + py[i]**2 +
                                                          0 * pz[i]**2)
            recHitTruthDepEnergy[particle_id == u] = np.sum(
                rechHitEnergy[particle_id == u])
        df_truth['particle_id'] = particle_id2
        df_truth['p'] = recHitTruthEnergy

        recHitSimClusIdx = df_truth['particle_id'].to_numpy().astype(np.int32)
        # recHitTruthEnergy = df_truth['pt'].to_numpy(dtype=np.float32)

        recHitTruthX = df_truth['tx'].to_numpy().astype(np.float32)
        recHitTruthY = df_truth['ty'].to_numpy().astype(np.float32)
        recHitTruthZ = df_truth['tz'].to_numpy().astype(np.float32)
        recHitTruthR = np.sqrt(recHitTruthX**2. + recHitTruthY**2. +
                               recHitTruthZ**2.).astype(np.float32)
        recHitTruthTheta = np.arccos(recHitTruthZ / recHitTruthR).astype(
            np.float32)
        recHitTruthEta = -np.log(np.tan(recHitTruthTheta / 2)).astype(
            np.float32)
        recHitTruthPhi = np.arctan(recHitTruthY / recHitTruthX).astype(
            np.float32)
        recHitTruthTime = zeroFeature
        recHitTruthDepEnergy = recHitTruthDepEnergy.astype(np.float32)
        recHitTruthPID = zeroFeature

        truth = np.stack(
            [
                np.array(recHitSimClusIdx, dtype='float32'),  # 0
                recHitTruthEnergy,
                recHitTruthX,
                recHitTruthY,
                recHitTruthZ,  # 4
                zeroFeature,  # truthHitAssignedDirX,
                zeroFeature,  # 6
                zeroFeature,
                recHitTruthEta,
                recHitTruthPhi,
                recHitTruthTime,  # 10
                zeroFeature,
                zeroFeature,
                recHitTruthDepEnergy,  # 13
                zeroFeature,  # 14
                zeroFeature,  # 15
                recHitTruthPID,  # 16 - 16+n_classes #won't be used anymore
                zeroFeature,
                zeroFeature
            ],
            axis=1)
        truth = truth.astype(np.float32)

        features = np.stack(
            [
                rechHitEnergy,
                recHitEta,
                zeroFeature,  # indicator if it is track or not
                recHitTheta,
                recHitR,
                recHitX,
                recHitY,
                recHitZ,
                zeroFeature,
                zeroFeature,
            ],
            axis=1)
        features = features.astype(np.float32)

        rs = np.array([0, len(features)], np.int64)

        farr = SimpleArray(name="recHitFeatures")
        farr.createFromNumpy(features, rs)

        t_rest = SimpleArray(name="recHitTruth")
        t_rest.createFromNumpy(truth, rs)

        # rs[1] = 100

        # print(rs, rs.dtype)
        #
        # 0/0

        t_idxarr = SimpleArray(name="recHitTruthClusterIdx")
        t_idxarr.createFromNumpy(recHitSimClusIdx[..., np.newaxis], rs)

        t_energyarr = SimpleArray(name="recHitTruthEnergy")
        t_energyarr.createFromNumpy(recHitTruthEnergy[..., np.newaxis], rs)

        t_posarr = SimpleArray(name="recHitTruthPosition")
        t_posarr.createFromNumpy(
            np.concatenate(
                [recHitTruthX[..., np.newaxis], recHitTruthY[..., np.newaxis]],
                axis=-1), rs)

        # print(np.concatenate([recHitTruthX[..., np.newaxis], recHitTruthY[..., np.newaxis]], axis=-1).shape)
        # 0/0

        t_time = SimpleArray(name="recHitTruthTime")
        t_time.createFromNumpy(recHitTruthTime[..., np.newaxis], rs)

        t_pid = SimpleArray(name="recHitTruthID")
        t_pid.createFromNumpy(recHitTruthPID[..., np.newaxis], rs)

        t_spectator = SimpleArray(
            name="recHitSpectatorFlag"
        )  # why do we have inconsistent namings, where is it needed? wrt. to truth array
        t_spectator.createFromNumpy(zeroFeature[..., np.newaxis], rs)

        t_fully_contained = SimpleArray(name="recHitFullyContainedFlag")
        t_fully_contained.createFromNumpy(
            (zeroFeature[..., np.newaxis] + 1).astype(np.int32), rs)

        # remaining truth is mostly for consistency in the plotting tools
        t_rest = SimpleArray(name="recHitTruth")
        t_rest.createFromNumpy(truth, rs)

        x, y, z = [
            farr, t_idxarr, t_energyarr, t_posarr, t_time, t_pid, t_spectator,
            t_fully_contained
        ], [], []
        self._store(x, y, z)
        self.writeToFile(outfilename)
        print("Storing in new format")
Ejemplo n.º 2
0
    def createFromCsvs(self, filename_truth, filename_hits, filename_cells,
                       filename_particles, outfilename):
        df_hits = pd.read_csv(os.path.join(
            '/Users/shahrukhqasim/Downloads/kaggle_trackml/train_100_events/',
            filename_hits),
                              sep=',')
        df_truth = pd.read_csv(os.path.join(
            '/Users/shahrukhqasim/Downloads/kaggle_trackml/train_100_events/',
            filename_truth),
                               sep=',')
        df_particles = pd.read_csv(os.path.join(
            '/Users/shahrukhqasim/Downloads/kaggle_trackml/train_100_events/',
            filename_particles),
                                   sep=',')
        df_cells = pd.read_csv(os.path.join(
            '/Users/shahrukhqasim/Downloads/kaggle_trackml/train_100_events/',
            filename_cells),
                               sep=',')

        pt = np.sqrt(df_particles['px']**2 + df_particles['py']**2)
        df_particles = df_particles[(pt > 1.5)]

        # print("Particles", df_particles.shape)

        # print("Truths before", df_truth['particle_id'].shape)
        df_truth = df_truth[np.isin(df_truth['particle_id'],
                                    df_particles['particle_id'])]
        # print("Truth after",df_truth.shape)

        # print("Hits before", df_hits.shape)
        df_hits = df_hits[np.isin(df_hits['hit_id'], df_truth['hit_id'])]
        # print("Hits after", df_hits.shape)

        particle_id = df_truth['particle_id']

        particle_id2 = particle_id.copy()

        unique_pids = np.unique(particle_id)
        for i, u in enumerate(unique_pids):
            particle_id2[particle_id == u] = i

        # print(np.unique(particle_id2))

        particle_id2 = particle_id2.astype(np.int32)

        farr = np.stack(
            (df_hits['x'], df_hits['y'], df_hits['z'], df_hits['volume_id'],
             df_hits['layer_id'], df_hits['module_id']),
            axis=-1)

        farr = farr.astype(np.float32)

        tarr = np.stack((particle_id2, df_truth['hit_id'], df_truth['tx'],
                         df_truth['ty'], df_truth['tz'], df_truth['weight']),
                        axis=-1)

        tarr = tarr.astype(np.float32)
        #
        rs = np.array([0, len(farr)], np.int64)

        print(rs, len(farr), len(tarr))
        # rs[1] = 100

        # print(rs, rs.dtype)
        #
        # 0/0

        farr_ = SimpleArray(name="feats")
        farr_.createFromNumpy(farr, rs)

        tarr_ = SimpleArray(name="truths")
        tarr_.createFromNumpy(tarr, rs)

        tarr_2 = SimpleArray(name="truths")
        tarr_2.createFromNumpy(tarr, rs)

        # print(farr.shape, tarr.shape)

        self._store([farr_, tarr_], [tarr_2], [])

        # print(outfilename)
        self.writeToFile(outfilename)
Ejemplo n.º 3
0
    def base_convertFromSourceFile(self,
                                   filename,
                                   weighterobjects,
                                   istraining,
                                   treename="Events",
                                   removeTracks=True):

        fileTimeOut(filename, 10)  #10 seconds for eos to recover
        tree = uproot.open(filename)[treename]

        hits = "RecHitHGC"
        front_face_z = 323  #this needs to be more precise

        recHitZUnsplit = self.hitObservable(tree,
                                            hits,
                                            "z",
                                            split=False,
                                            flatten=False)
        self.setSplitIdx(recHitZUnsplit < 0)
        recHitZ = self.splitJaggedArray(recHitZUnsplit)
        offsets = recHitZ.offsets

        recHitX = self.hitObservable(tree,
                                     hits,
                                     "x",
                                     split=True,
                                     flatten=False)
        recHitY = self.hitObservable(tree,
                                     hits,
                                     "y",
                                     split=True,
                                     flatten=False)
        recHitSimClusIdx = self.hitObservable(tree,
                                              hits,
                                              "BestMergedSimClusterIdx",
                                              split=True,
                                              flatten=False)

        #Define spectators
        recHit_df_events = [
            pd.DataFrame({
                "recHitX": recHitX[i],
                "recHitY": recHitY[i],
                "recHitZ": recHitZ[i],
                "recHitSimClusIdx": recHitSimClusIdx[i]
            }) for i in range(recHitX.shape[0])
        ]
        for ievent in range(len(recHit_df_events)):
            df_event = recHit_df_events[ievent]
            unique_shower_idx = np.unique(df_event['recHitSimClusIdx'])
            df_event['spectator_distance'] = 0  #
            df_event['recHitSimClus_nHits'] = df_event.groupby(
                'recHitSimClusIdx'
            ).recHitX.transform(
                len
            )  #adding number of rec hits that are associated to this truth cluster
            for idx in unique_shower_idx:
                df_shower = df_event[df_event['recHitSimClusIdx'] == idx]
                x_to_fit = df_shower[['recHitX', 'recHitY',
                                      'recHitZ']].to_numpy()
                spectators_shower_dist = find_pcas(x_to_fit,
                                                   PCA_n=2,
                                                   min_hits=10)
                if (spectators_shower_dist is not None):
                    spectators_idx = (df_shower.index.tolist())
                    df_event.loc[spectators_idx,
                                 'spectator_distance'] = spectators_shower_dist
                del df_shower
            del df_event

        #Expand back
        recHitX = np.expand_dims(recHitX.content, axis=1)
        recHitY = np.expand_dims(recHitY.content, axis=1)
        recHitZ = np.expand_dims(recHitZ.content, axis=1)
        recHitSpectatorFlag = np.concatenate(
            np.array([
                recHit_df_events[i]['spectator_distance'].to_numpy()
                for i in range(len(recHit_df_events))
            ],
                     dtype=object)).reshape(-1, 1)
        recHitSimClusterNumHits = np.concatenate(
            np.array([
                recHit_df_events[i]['recHitSimClus_nHits'].to_numpy()
                for i in range(len(recHit_df_events))
            ],
                     dtype=object)).reshape(-1, 1)  #number of rec hits
        del recHit_df_events

        recHitEnergy = self.hitObservable(tree, hits, "energy")
        recHitDetaId = self.hitObservable(tree, hits, "detId")
        recHitTime = self.hitObservable(tree, hits, "time")
        recHitR = np.sqrt(recHitX * recHitX + recHitY * recHitY +
                          recHitZ * recHitZ)
        recHitTheta = np.arccos(recHitZ / recHitR)
        recHitEta = -np.log(np.tan(recHitTheta / 2))

        # Don't split this until the end, so it can be used to index the truth arrays
        recHitSimClusIdx = self.hitObservable(tree,
                                              hits,
                                              "BestMergedSimClusterIdx",
                                              split=False,
                                              flatten=False)

        simClusterDepEnergy = tree["MergedSimCluster_recEnergy"].array()
        simClusterEnergy = tree["MergedSimCluster_boundaryEnergy"].array()
        simClusterEnergyNoMu = tree[
            "MergedSimCluster_boundaryEnergyNoMu"].array()
        simClusterNumHits = tree["MergedSimCluster_nHits"].array(
        )  #numebr of sim hits

        # Remove muon energy, add back muon deposited energy
        unmergedId = tree["SimCluster_pdgId"].array()
        unmergedDepE = tree["SimCluster_recEnergy"].array()
        unmergedMatchIdx = tree["MergedSimCluster_SimCluster_MatchIdx"].array()
        unmergedMatches = tree["MergedSimCluster_SimClusterNumMatch"].array()
        unmergedDepEMuOnly = unmergedDepE
        unmergedDepEMuOnly[np.abs(unmergedId) != 13] = 0
        # Add another layer of nesting, then sum over all unmerged associated to merged
        unmergedDepEMuOnly = ak.JaggedArray.fromcounts(
            unmergedMatches.counts,
            ak.JaggedArray.fromcounts(
                unmergedMatches.content,
                unmergedDepEMuOnly[unmergedMatchIdx].flatten()))
        depEMuOnly = unmergedDepEMuOnly.sum()
        #why wasn't it possible to just do instead of all of the above : simClusterEnergy[simClusterPdgId == 13] = simClusterDepEnergy ?

        simClusterEnergyMuCorr = simClusterEnergyNoMu + depEMuOnly

        simClusterX = tree["MergedSimCluster_impactPoint_x"].array()
        simClusterY = tree["MergedSimCluster_impactPoint_y"].array()
        simClusterZ = tree["MergedSimCluster_impactPoint_z"].array()
        simClusterTime = tree["MergedSimCluster_impactPoint_t"].array()
        simClusterEta = tree["MergedSimCluster_impactPoint_eta"].array()
        simClusterPhi = tree["MergedSimCluster_impactPoint_phi"].array()
        simClusterPdgId = tree["MergedSimCluster_pdgId"].array()

        # Mark simclusters outside of volume or with very few hits as noise
        # Maybe not a good idea if the merged SC pdgId is screwed up
        # Probably removing neutrons is a good idea though
        #noNeutrons = simClusterPdgId[recHitSimClusIdx] == 2112

        #filter non-boundary positions. Hopefully working?
        goodSimClus = tree["MergedSimCluster_isTrainable"].array()
        # Don't split by index here to keep same dimensions as SimClusIdx
        markNoise = self.truthObjects(~goodSimClus,
                                      recHitSimClusIdx,
                                      False,
                                      split=False,
                                      flatten=False).astype(np.bool_)

        nbefore = (recHitSimClusIdx < 0).sum().sum()
        recHitSimClusIdx[markNoise] = -1
        nafter = (recHitSimClusIdx < 0).sum().sum()

        print("Number of noise hits before", nbefore, "after", nafter)
        print('removed another factor of', nafter / nbefore,
              ' bad simclusters')

        recHitTruthPID = self.truthObjects(simClusterPdgId, recHitSimClusIdx,
                                           0.)
        recHitTruthDepEnergy = self.truthObjects(simClusterDepEnergy,
                                                 recHitSimClusIdx, 0)
        recHitTruthEnergy = self.truthObjects(simClusterEnergy,
                                              recHitSimClusIdx, 0)
        recHitTruthEnergyCorrMu = self.truthObjects(simClusterEnergyMuCorr,
                                                    recHitSimClusIdx, 0)

        low_energy_shower_cutoff = 3
        # Uncorrected currently not used
        recHitTruthEnergy = np.where(
            recHitTruthEnergy > low_energy_shower_cutoff, recHitTruthEnergy,
            recHitTruthDepEnergy)
        recHitTruthEnergy = np.where(
            recHitTruthEnergyCorrMu > low_energy_shower_cutoff,
            recHitTruthEnergyCorrMu, recHitTruthDepEnergy)

        #very bad names because these quatities are associated to Merged Clusters and not hits
        recHitTruthX = self.truthObjects(simClusterX, recHitSimClusIdx, 0)
        recHitTruthY = self.truthObjects(simClusterY, recHitSimClusIdx, 0)
        recHitTruthZ = self.truthObjects(simClusterZ, recHitSimClusIdx, 0)
        recHitTruthTime = self.truthObjects(simClusterTime, recHitSimClusIdx,
                                            0)
        recHitTruthR = np.sqrt(recHitTruthX * recHitTruthX +
                               recHitTruthY * recHitTruthY +
                               recHitTruthZ * recHitTruthZ)
        recHitTruthTheta = np.arccos(
            np.divide(recHitTruthZ,
                      recHitTruthR,
                      out=np.zeros_like(recHitTruthZ),
                      where=recHitTruthR != 0))
        recHitTruthPhi = self.truthObjects(simClusterPhi, recHitSimClusIdx, 0)
        recHitTruthEta = self.truthObjects(simClusterEta, recHitSimClusIdx, 0)
        #recHitAverageEnergy =  self.truthObjects(simClusterDepEnergy/simClusterNumHits, recHitSimClusIdx, 0) #this is not technically very good because simClusterNumHits is number of sim clusters, not reco
        recHitAverageEnergy = recHitTruthDepEnergy / recHitSimClusterNumHits

        #print(recHitTruthPhi)
        #print(np.max(recHitTruthPhi))
        #print(np.min(recHitTruthPhi))

        # Placeholder
        zeroFeature = np.zeros(shape=(len(recHitEnergy), 1), dtype='float32')

        features = np.concatenate(
            [
                recHitEnergy,
                recHitEta,
                zeroFeature,  #indicator if it is track or not
                recHitTheta,
                recHitR,
                recHitX,
                recHitY,
                recHitZ,
                recHitTime,
            ],
            axis=1)

        farr = SimpleArray(name="recHitFeatures")
        farr.createFromNumpy(features, offsets)
        del features

        recHitSimClusIdx = np.expand_dims(
            self.splitJaggedArray(recHitSimClusIdx).content.astype(np.int32),
            axis=1)

        print('noise', (100 * np.count_nonzero(recHitSimClusIdx < 0)) //
              recHitSimClusIdx.shape[0], '% of hits')
        print('truth eta min max',
              np.min(np.abs(recHitTruthEta[recHitSimClusIdx >= 0])),
              np.max(np.abs(recHitTruthEta[recHitSimClusIdx >= 0])))
        print(
            'non-boundary truth positions',
            np.count_nonzero(
                np.abs(np.abs(recHitTruthZ[recHitSimClusIdx >= 0]) - 320) > 5)
            / recHitTruthZ[recHitSimClusIdx >= 0].shape[0])

        #now all numpy
        #Why do we want noise (-1) sim hits to be equal rec?
        recHitTruthX[recHitSimClusIdx < 0] = recHitX[recHitSimClusIdx < 0]
        recHitTruthY[recHitSimClusIdx < 0] = recHitY[recHitSimClusIdx < 0]
        recHitTruthZ[recHitSimClusIdx < 0] = recHitZ[recHitSimClusIdx < 0]
        recHitTruthEnergyCorrMu[recHitSimClusIdx < 0] = recHitEnergy[
            recHitSimClusIdx < 0]
        recHitTruthTime[recHitSimClusIdx < 0] = recHitTime[
            recHitSimClusIdx < 0]

        #import matplotlib.pyplot as plt
        #plt.hist(np.abs(recHitTruthEnergyCorrMu[recHitSimClusIdx>=0]/recHitTruthDepEnergy[recHitSimClusIdx>=0]))
        #plt.yscale('log')
        #plt.savefig("scat.pdf")

        truth = np.concatenate(
            [
                np.array(recHitSimClusIdx, dtype='float32'),  # 0
                recHitTruthEnergyCorrMu,
                recHitTruthX,
                recHitTruthY,
                recHitTruthZ,  #4
                zeroFeature,  #truthHitAssignedDirX,
                zeroFeature,  #6
                zeroFeature,
                recHitTruthEta,
                recHitTruthPhi,
                recHitTruthTime,  #10
                zeroFeature,
                zeroFeature,
                recHitTruthDepEnergy,  #13
                zeroFeature,  #14
                zeroFeature,  #15
                recHitTruthPID,  #16 - 16+n_classes #won't be used anymore
                np.array(recHitSpectatorFlag, dtype='float32'),
                np.where(recHitTruthZ < front_face_z, 1., 0.).astype('float32')
            ],
            axis=1)

        t_idxarr = SimpleArray(recHitSimClusIdx,
                               offsets,
                               name="recHitTruthClusterIdx")

        t_energyarr = SimpleArray(name="recHitTruthEnergy")
        t_energyarr.createFromNumpy(recHitTruthEnergyCorrMu, offsets)

        t_posarr = SimpleArray(name="recHitTruthPosition")
        t_posarr.createFromNumpy(
            np.concatenate([recHitTruthX, recHitTruthY], axis=-1), offsets)

        t_time = SimpleArray(name="recHitTruthTime")
        t_time.createFromNumpy(recHitTruthTime, offsets)

        t_pid = SimpleArray(name="recHitTruthID")
        t_pid.createFromNumpy(recHitTruthPID, offsets)

        t_spectator = SimpleArray(
            name="recHitSpectatorFlag"
        )  #why do we have inconsistent namings, where is it needed? wrt. to truth array
        t_spectator.createFromNumpy(recHitSpectatorFlag.astype('float32'),
                                    offsets)

        t_fully_contained = SimpleArray(name="recHitFullyContainedFlag")
        t_fully_contained.createFromNumpy(
            np.where(recHitTruthZ < front_face_z, 1., 0.).astype('float32'),
            offsets)

        #remaining truth is mostly for consistency in the plotting tools
        t_rest = SimpleArray(name="recHitTruth")
        t_rest.createFromNumpy(truth, offsets)

        return [
            farr, t_idxarr, t_energyarr, t_posarr, t_time, t_pid, t_spectator,
            t_fully_contained
        ], [t_rest], []
Ejemplo n.º 4
0
    def base_convertFromSourceFile(self,
                                   filename,
                                   weighterobjects,
                                   istraining,
                                   treename="WindowNTupler/tree",
                                   removeTracks=True):

        fileTimeOut(filename, 10)  #10 seconds for eos to recover

        tree = uproot.open(filename)[treename]
        nevents = tree.numentries

        print("n entries: ", nevents)

        selection = (tree["recHitEnergy"]).array() > 0

        if removeTracks:
            selection = np.logical_and(selection,
                                       (tree["recHitID"]).array() > -0.5)

        recHitEnergy, rs = self.branchToFlatArray(tree["recHitEnergy"], True,
                                                  selection)
        recHitEta = self.branchToFlatArray(tree["recHitEta"], False, selection)
        #recHitRelPhi             = self.branchToFlatArray(tree["recHitRelPhi"], False,selection)
        recHitTheta = self.branchToFlatArray(tree["recHitTheta"], False,
                                             selection)
        recHitR = self.branchToFlatArray(tree["recHitR"], False, selection)
        recHitX = self.branchToFlatArray(tree["recHitX"], False, selection)
        recHitY = self.branchToFlatArray(tree["recHitY"], False, selection)
        recHitZ = self.branchToFlatArray(tree["recHitZ"], False, selection)
        recHitDetID = self.branchToFlatArray(tree["recHitDetID"], False,
                                             selection)
        recHitTime = self.branchToFlatArray(tree["recHitTime"], False,
                                            selection)
        recHitID = self.branchToFlatArray(tree["recHitID"], False, selection)
        recHitPad = self.branchToFlatArray(tree["recHitPad"], False, selection)

        ## weird shape for this truthHitFractions        = self.branchToFlatArray(tree["truthHitFractions"], False)
        truthHitAssignementIdx = self.branchToFlatArray(
            tree["truthHitAssignementIdx"], False, selection)  #0
        truthHitAssignedEnergies = self.branchToFlatArray(
            tree["truthHitAssignedEnergies"], False, selection)  #1
        truthHitAssignedX = self.branchToFlatArray(tree["truthHitAssignedX"],
                                                   False, selection)  #2
        truthHitAssignedY = self.branchToFlatArray(tree["truthHitAssignedY"],
                                                   False, selection)  #3
        truthHitAssignedZ = self.branchToFlatArray(tree["truthHitAssignedZ"],
                                                   False, selection)  #3
        truthHitAssignedDirX = self.branchToFlatArray(
            tree["truthHitAssignedDirX"], False, selection)  #4
        truthHitAssignedDirY = self.branchToFlatArray(
            tree["truthHitAssignedDirY"], False, selection)  #4
        truthHitAssignedDirZ = self.branchToFlatArray(
            tree["truthHitAssignedDirZ"], False, selection)  #4
        truthHitAssignedT = self.branchToFlatArray(tree["truthHitAssignedT"],
                                                   False, selection)
        truthHitAssignedEta = self.branchToFlatArray(
            tree["truthHitAssignedEta"], False, selection)  #2
        truthHitAssignedPhi = self.branchToFlatArray(
            tree["truthHitAssignedPhi"], False, selection)  #3
        truthHitAssignedDirEta = self.branchToFlatArray(
            tree["truthHitAssignedDirEta"], False, selection)  #4
        truthHitAssignedDepEnergies = self.branchToFlatArray(
            tree["truthHitAssignedDepEnergies"], False, selection)  #4
        truthHitAssignedDirR = self.branchToFlatArray(
            tree["truthHitAssignedDirR"], False, selection)  #4
        ## weird shape for this truthHitAssignedPIDs     = self.branchToFlatArray(tree["truthHitAssignedPIDs"], False)

        truthHitAssignedPIDs = self.branchToFlatArray(
            tree["truthHitAssignedPIDs"], False, selection, is3d=True)

        truthHitAssignedPIDs = np.expand_dims(np.argmax(truthHitAssignedPIDs,
                                                        axis=-1),
                                              axis=1)  #no one-hot encoding
        truthHitAssignedPIDs = np.array(truthHitAssignedPIDs, dtype='float32')

        ticlHitAssignementIdx = self.branchToFlatArray(
            tree["ticlHitAssignementIdx"], False, selection)  #4
        ticlHitAssignedEnergies = self.branchToFlatArray(
            tree["ticlHitAssignedEnergies"], False, selection)  #4

        #for now!
        truthHitAssignedEnergies = truthHitAssignedDepEnergies  #for now rechitsSum

        #object weighted energy (1.0 for highest energy hit per object)

        features = np.concatenate(
            [
                recHitEnergy,
                recHitEta,
                recHitID,  #indicator if it is track or not
                recHitTheta,
                recHitR,
                recHitX,
                recHitY,
                recHitZ,
                recHitTime
            ],
            axis=-1)

        farr = SimpleArray()
        farr.createFromNumpy(features, rs)
        del features

        truth = np.concatenate(
            [
                truthHitAssignementIdx,  # 0
                truthHitAssignedEnergies,
                truthHitAssignedX,
                truthHitAssignedY,
                truthHitAssignedZ,  #4
                truthHitAssignedDirX,
                truthHitAssignedDirY,  #6
                truthHitAssignedDirZ,
                truthHitAssignedEta,
                truthHitAssignedPhi,
                truthHitAssignedT,  #10
                truthHitAssignedDirEta,
                truthHitAssignedDirR,
                truthHitAssignedDepEnergies,  #13
                ticlHitAssignementIdx,  #14
                ticlHitAssignedEnergies,  #15
                truthHitAssignedPIDs  #16 - 16+n_classes #won't be used anymore
            ],
            axis=-1)

        t_idxarr = SimpleArray()
        t_idxarr.createFromNumpy(truthHitAssignementIdx, rs)

        t_energyarr = SimpleArray()
        t_energyarr.createFromNumpy(truthHitAssignedEnergies, rs)

        t_posarr = SimpleArray()
        t_posarr.createFromNumpy(
            np.concatenate([truthHitAssignedX, truthHitAssignedY], axis=-1),
            rs)

        t_time = SimpleArray()
        t_time.createFromNumpy(truthHitAssignedT, rs)

        t_pid = SimpleArray()
        t_pid.createFromNumpy(truthHitAssignedPIDs, rs)

        #remaining truth is mostly for consistency in the plotting tools
        t_rest = SimpleArray()
        t_rest.createFromNumpy(truth, rs)

        return [farr, t_idxarr, t_energyarr, t_posarr, t_time,
                t_pid], [t_rest], []
Ejemplo n.º 5
0
    def base_convertFromSourceFile(self,
                                   filename,
                                   weighterobjects,
                                   istraining,
                                   treename="Events",
                                   removeTracks=True):

        fileTimeOut(filename, 10)  #10 seconds for eos to recover
        tree = uproot.open(filename)[treename]

        hits = "RecHitHGC"
        recHitZUnsplit = self.hitObservable(tree,
                                            hits,
                                            "z",
                                            split=False,
                                            flatten=False)
        self.setSplitIdx(recHitZUnsplit < 0)

        recHitZ = self.splitJaggedArray(recHitZUnsplit)
        offsets = recHitZ.offsets
        recHitZ = np.expand_dims(recHitZ.content, axis=1)

        recHitX = self.hitObservable(tree, hits, "x")
        recHitY = self.hitObservable(tree, hits, "y")
        recHitEnergy = self.hitObservable(tree, hits, "energy")
        recHitDetaId = self.hitObservable(tree, hits, "detId")
        recHitTime = self.hitObservable(tree, hits, "time")
        recHitR = np.sqrt(recHitX * recHitX + recHitY * recHitY +
                          recHitZ * recHitZ)
        recHitTheta = np.arccos(recHitZ / recHitR)
        recHitEta = -np.log(np.tan(recHitTheta / 2))

        # Don't split this until the end, so it can be used to index the truth arrays
        recHitSimClusIdx = self.hitObservable(tree,
                                              hits,
                                              "BestMergedSimClusterIdx",
                                              split=False,
                                              flatten=False)

        simClusterDepEnergy = tree["MergedSimCluster_recEnergy"].array()
        simClusterEnergy = tree["MergedSimCluster_boundaryEnergy"].array()
        simClusterEnergyNoMu = tree[
            "MergedSimCluster_boundaryEnergyNoMu"].array()

        # Remove muon energy, add back muon deposited energy
        unmergedId = tree["SimCluster_pdgId"].array()
        unmergedDepE = tree["SimCluster_recEnergy"].array()
        unmergedMatchIdx = tree["MergedSimCluster_SimCluster_MatchIdx"].array()
        unmergedMatches = tree["MergedSimCluster_SimClusterNumMatch"].array()
        unmergedDepEMuOnly = unmergedDepE
        unmergedDepEMuOnly[np.abs(unmergedId) != 13] = 0
        # Add another layer of nesting, then sum over all unmerged associated to merged
        unmergedDepEMuOnly = ak.JaggedArray.fromcounts(
            unmergedMatches.counts,
            ak.JaggedArray.fromcounts(
                unmergedMatches.content,
                unmergedDepEMuOnly[unmergedMatchIdx].flatten()))
        depEMuOnly = unmergedDepEMuOnly.sum()

        simClusterEnergyMuCorr = simClusterEnergyNoMu + depEMuOnly

        simClusterX = tree["MergedSimCluster_impactPoint_x"].array()
        simClusterY = tree["MergedSimCluster_impactPoint_y"].array()
        simClusterZ = tree["MergedSimCluster_impactPoint_z"].array()
        simClusterTime = tree["MergedSimCluster_impactPoint_t"].array()
        simClusterEta = tree["MergedSimCluster_impactPoint_eta"].array()
        simClusterPhi = tree["MergedSimCluster_impactPoint_phi"].array()
        simClusterPdgId = tree["MergedSimCluster_pdgId"].array()

        # Mark simclusters outside of volume or with very few hits as noise
        # Maybe not a good idea if the merged SC pdgId is screwed up
        # Probably removing neutrons is a good idea though
        #noNeutrons = simClusterPdgId[recHitSimClusIdx] == 2112

        #filter non-boundary positions. Hopefully working?
        goodSimClus = tree["MergedSimCluster_isTrainable"].array()
        # Don't split by index here to keep same dimensions as SimClusIdx
        markNoise = self.truthObjects(~goodSimClus,
                                      recHitSimClusIdx,
                                      False,
                                      split=False,
                                      flatten=False).astype(np.bool_)

        nbefore = (recHitSimClusIdx < 0).sum().sum()
        recHitSimClusIdx[markNoise] = -1
        nafter = (recHitSimClusIdx < 0).sum().sum()

        print("Number of noise hits before", nbefore, "after", nafter)
        print('removed another factor of', nafter / nbefore,
              ' bad simclusters')

        recHitTruthPID = self.truthObjects(simClusterPdgId, recHitSimClusIdx,
                                           0.)
        recHitTruthEnergy = self.truthObjects(simClusterEnergy,
                                              recHitSimClusIdx, 0)
        recHitTruthDepEnergy = self.truthObjects(simClusterDepEnergy,
                                                 recHitSimClusIdx, 0)
        recHitTruthEnergyCorrMu = self.truthObjects(simClusterEnergyMuCorr,
                                                    recHitSimClusIdx, 0)
        recHitTruthX = self.truthObjects(simClusterX, recHitSimClusIdx, 0)
        recHitTruthY = self.truthObjects(simClusterY, recHitSimClusIdx, 0)
        recHitTruthZ = self.truthObjects(simClusterZ, recHitSimClusIdx, 0)
        recHitTruthTime = self.truthObjects(simClusterTime, recHitSimClusIdx,
                                            0)
        recHitTruthR = np.sqrt(recHitTruthX * recHitTruthX +
                               recHitTruthY * recHitTruthY +
                               recHitTruthZ * recHitTruthZ)
        recHitTruthTheta = np.arccos(
            np.divide(recHitTruthZ,
                      recHitTruthR,
                      out=np.zeros_like(recHitTruthZ),
                      where=recHitTruthR != 0))
        recHitTruthPhi = self.truthObjects(simClusterPhi, recHitSimClusIdx, 0)
        recHitTruthEta = self.truthObjects(simClusterEta, recHitSimClusIdx, 0)
        #print(recHitTruthPhi)
        #print(np.max(recHitTruthPhi))
        #print(np.min(recHitTruthPhi))

        # Placeholder
        zeroFeature = np.zeros(shape=(len(recHitEnergy), 1), dtype='float32')

        features = np.concatenate(
            [
                recHitEnergy,
                recHitEta,
                zeroFeature,  #indicator if it is track or not
                recHitTheta,
                recHitR,
                recHitX,
                recHitY,
                recHitZ,
                recHitTime,
            ],
            axis=1)

        farr = SimpleArray(name="recHitFeatures")
        farr.createFromNumpy(features, offsets)
        del features

        recHitSimClusIdx = np.expand_dims(
            self.splitJaggedArray(recHitSimClusIdx).content.astype(np.int32),
            axis=1)

        print('noise', (100 * np.count_nonzero(recHitSimClusIdx < 0)) //
              recHitSimClusIdx.shape[0], '% of hits')
        print('truth eta min max',
              np.min(np.abs(recHitTruthEta[recHitSimClusIdx >= 0])),
              np.max(np.abs(recHitTruthEta[recHitSimClusIdx >= 0])))
        print(
            'non-boundary truth positions',
            np.count_nonzero(
                np.abs(np.abs(recHitTruthZ[recHitSimClusIdx >= 0]) - 320) > 5)
            / recHitTruthZ[recHitSimClusIdx >= 0].shape[0])

        #now all numpy
        recHitTruthX[recHitSimClusIdx < 0] = recHitX[recHitSimClusIdx < 0]
        recHitTruthY[recHitSimClusIdx < 0] = recHitY[recHitSimClusIdx < 0]
        recHitTruthZ[recHitSimClusIdx < 0] = recHitZ[recHitSimClusIdx < 0]
        recHitTruthEnergyCorrMu[recHitSimClusIdx < 0] = recHitEnergy[
            recHitSimClusIdx < 0]
        recHitTruthTime[recHitSimClusIdx < 0] = recHitTime[
            recHitSimClusIdx < 0]

        #import matplotlib.pyplot as plt
        #plt.hist(np.abs(recHitTruthEnergyCorrMu[recHitSimClusIdx>=0]/recHitTruthDepEnergy[recHitSimClusIdx>=0]))
        #plt.yscale('log')
        #plt.savefig("scat.pdf")

        truth = np.concatenate(
            [
                np.array(recHitSimClusIdx, dtype='float32'),  # 0
                recHitTruthEnergyCorrMu,
                recHitTruthX,
                recHitTruthY,
                recHitTruthZ,  #4
                zeroFeature,  #truthHitAssignedDirX,
                zeroFeature,  #6
                zeroFeature,
                recHitTruthEta,
                recHitTruthPhi,
                recHitTruthTime,  #10
                zeroFeature,
                zeroFeature,
                recHitTruthDepEnergy,  #13
                zeroFeature,  #14
                zeroFeature,  #15
                recHitTruthPID  #16 - 16+n_classes #won't be used anymore
            ],
            axis=1)

        t_idxarr = SimpleArray(recHitSimClusIdx,
                               offsets,
                               name="recHitTruthClusterIdx")

        t_energyarr = SimpleArray(name="recHitTruthEnergy")
        t_energyarr.createFromNumpy(recHitTruthEnergy, offsets)

        t_posarr = SimpleArray(name="recHitTruthPosition")
        t_posarr.createFromNumpy(
            np.concatenate([recHitTruthX, recHitTruthY], axis=-1), offsets)

        t_time = SimpleArray(name="recHitTruthTime")
        t_time.createFromNumpy(recHitTruthTime, offsets)

        t_pid = SimpleArray(name="recHitTruthID")
        t_pid.createFromNumpy(recHitTruthPID, offsets)

        #remaining truth is mostly for consistency in the plotting tools
        t_rest = SimpleArray(name="recHitTruth")
        t_rest.createFromNumpy(truth, offsets)

        return [farr, t_idxarr, t_energyarr, t_posarr, t_time,
                t_pid], [t_rest], []
Ejemplo n.º 6
0
    def base_convertFromSourceFile(self,
                                   filename,
                                   weighterobjects,
                                   istraining,
                                   onlytruth,
                                   treename="WindowNTupler/tree"):

        fileTimeOut(filename, 10)  #10 seconds for eos to recover

        tree = uproot.open(filename)[treename]
        nevents = tree.numentries

        print("n entries: ", nevents)

        selection = None

        if onlytruth:
            selection = (tree["truthHitAssignementIdx"]).array() > -0.1  #0

        #remove zero energy hits from removing of bad simclusters
        if selection is None:
            selection = (tree["recHitEnergy"]).array() > 0
        else:
            selection = np.logical_and(selection,
                                       (tree["recHitEnergy"]).array() > 0)

        recHitEnergy, rs = self.branchToFlatArray(tree["recHitEnergy"], True,
                                                  selection)
        recHitEta = self.branchToFlatArray(tree["recHitEta"], False, selection)
        #recHitRelPhi             = self.branchToFlatArray(tree["recHitRelPhi"], False,selection)
        recHitTheta = self.branchToFlatArray(tree["recHitTheta"], False,
                                             selection)
        recHitR = self.branchToFlatArray(tree["recHitR"], False, selection)
        recHitX = self.branchToFlatArray(tree["recHitX"], False, selection)
        recHitY = self.branchToFlatArray(tree["recHitY"], False, selection)
        recHitZ = self.branchToFlatArray(tree["recHitZ"], False, selection)
        recHitDetID = self.branchToFlatArray(tree["recHitDetID"], False,
                                             selection)
        recHitTime = self.branchToFlatArray(tree["recHitTime"], False,
                                            selection)
        recHitID = self.branchToFlatArray(tree["recHitID"], False, selection)
        recHitPad = self.branchToFlatArray(tree["recHitPad"], False, selection)

        ## weird shape for this truthHitFractions        = self.branchToFlatArray(tree["truthHitFractions"], False)
        truthHitAssignementIdx = self.branchToFlatArray(
            tree["truthHitAssignementIdx"], False, selection)  #0
        truthHitAssignedEnergies = self.branchToFlatArray(
            tree["truthHitAssignedEnergies"], False, selection)  #1
        truthHitAssignedX = self.branchToFlatArray(tree["truthHitAssignedX"],
                                                   False, selection)  #2
        truthHitAssignedY = self.branchToFlatArray(tree["truthHitAssignedY"],
                                                   False, selection)  #3
        truthHitAssignedZ = self.branchToFlatArray(tree["truthHitAssignedZ"],
                                                   False, selection)  #3
        truthHitAssignedDirX = self.branchToFlatArray(
            tree["truthHitAssignedDirX"], False, selection)  #4
        truthHitAssignedDirY = self.branchToFlatArray(
            tree["truthHitAssignedDirY"], False, selection)  #4
        truthHitAssignedDirZ = self.branchToFlatArray(
            tree["truthHitAssignedDirZ"], False, selection)  #4
        truthHitAssignedEta = self.branchToFlatArray(
            tree["truthHitAssignedEta"], False, selection)  #2
        truthHitAssignedPhi = self.branchToFlatArray(
            tree["truthHitAssignedPhi"], False, selection)  #3
        #truthHitAssignedR       = self.branchToFlatArray(tree["truthHitAssignedR"], False,selection)  #3
        truthHitAssignedDirEta = self.branchToFlatArray(
            tree["truthHitAssignedDirEta"], False, selection)  #4
        truthHitAssignedDirPhi = self.branchToFlatArray(
            tree["truthHitAssignedDirPhi"], False, selection)  #4
        truthHitAssignedDirR = self.branchToFlatArray(
            tree["truthHitAssignedDirR"], False, selection)  #4
        ## weird shape for this truthHitAssignedPIDs     = self.branchToFlatArray(tree["truthHitAssignedPIDs"], False)

        truthHitAssignedPIDs = self.branchToFlatArray(
            tree["truthHitAssignedPIDs"], False, selection, is3d=True)
        #print('truthHitAssignedPIDs',truthHitAssignedPIDs.shape)
        #print('truthHitAssignedEnergies',truthHitAssignedEnergies.shape)

        #print(truthHitAssignedPIDs)

        #truthHitAssignedPIDs = np.zeros_like(truthHitAssignedEnergies)
        #truthHitAssignedPIDs = np.tile(truthHitAssignedPIDs, [1, n_classes])

        #type_ambiguous,
        #type_electron,
        #type_photon,
        #type_mip,
        #type_charged_hadron,
        #type_neutral_hadron,

        truthHitAssignedT = self.branchToFlatArray(tree["truthHitAssignedT"],
                                                   False, selection)

        ticlHitAssignementIdx = self.branchToFlatArray(
            tree["ticlHitAssignementIdx"], False, selection)  #4
        ticlHitAssignedEnergies = self.branchToFlatArray(
            tree["ticlHitAssignedEnergies"], False, selection)  #4

        # For calculating spectators
        rechitsSum = findRechitsSum(truthHitAssignementIdx, recHitEnergy, rs)
        spectator = np.where(recHitEnergy < 0.0005 * rechitsSum,
                             np.ones_like(recHitEnergy),
                             np.zeros_like(recHitEnergy))

        # If truth shower energy < 5% of sum of rechits, assign rechits sum to it instead
        truthShowerEnergies = truthHitAssignedEnergies.copy()

        #take them as is
        #truthShowerEnergies[rechitsSum<0.25*truthHitAssignedEnergies] = rechitsSum[rechitsSum<0.25*truthHitAssignedEnergies]

        #for now!
        truthShowerEnergies = rechitsSum

        features = np.concatenate(
            [
                recHitEnergy,
                recHitEta,
                truthHitAssignementIdx,  #no phi anymore!
                truthShowerEnergies,
                recHitR,
                recHitX,
                recHitY,
                recHitZ,
                recHitTime
            ],
            axis=-1)

        farr = SimpleArray()
        farr.createFromNumpy(features, rs)
        #farr.cout()
        print("features", features.shape)

        del features

        truth = np.concatenate(
            [
                truthHitAssignementIdx,  # 0
                truthShowerEnergies,
                truthHitAssignedX,
                truthHitAssignedY,
                truthHitAssignedZ,  #4
                truthHitAssignedDirX,
                truthHitAssignedDirY,  #6
                truthHitAssignedDirZ,
                truthHitAssignedEta,
                truthHitAssignedPhi,
                truthHitAssignedT,  #10
                truthHitAssignedDirEta,
                truthHitAssignedDirPhi,  #12
                truthHitAssignedDirR,
                spectator,  #14
                truthHitAssignedEnergies,  #15
                rechitsSum,  #16
                ticlHitAssignementIdx,  #17
                ticlHitAssignedEnergies,  #18
                truthHitAssignedPIDs  #19 - 19+n_classes
            ],
            axis=-1)

        tarr = SimpleArray()
        tarr.createFromNumpy(truth, rs)

        print("truth", truth.shape)

        return [farr], [tarr], []
Ejemplo n.º 7
0
    def base_convertFromSourceFile(self,
                                   filename,
                                   weighterobjects,
                                   istraining,
                                   onlytruth,
                                   treename="WindowNTupler/tree",
                                   removeTracks=True):

        fileTimeOut(filename, 10)  #10 seconds for eos to recover

        tree = uproot.open(filename)[treename]
        nevents = tree.numentries

        print("n entries: ", nevents)

        selection = None

        if onlytruth:
            selection = (tree["truthHitAssignementIdx"]).array() > -0.1  #0

        #remove zero energy hits from removing of bad simclusters
        if selection is None:
            selection = (tree["recHitEnergy"]).array() > 0
        else:
            selection = np.logical_and(selection,
                                       (tree["recHitEnergy"]).array() > 0)

        if removeTracks:
            selection = np.logical_and(selection,
                                       (tree["recHitID"]).array() > -0.5)

        recHitEnergy, rs = self.branchToFlatArray(tree["recHitEnergy"], True,
                                                  selection)
        recHitEta = self.branchToFlatArray(tree["recHitEta"], False, selection)
        #recHitRelPhi             = self.branchToFlatArray(tree["recHitRelPhi"], False,selection)
        recHitTheta = self.branchToFlatArray(tree["recHitTheta"], False,
                                             selection)
        recHitR = self.branchToFlatArray(tree["recHitR"], False, selection)
        recHitX = self.branchToFlatArray(tree["recHitX"], False, selection)
        recHitY = self.branchToFlatArray(tree["recHitY"], False, selection)
        recHitZ = self.branchToFlatArray(tree["recHitZ"], False, selection)
        recHitDetID = self.branchToFlatArray(tree["recHitDetID"], False,
                                             selection)
        recHitTime = self.branchToFlatArray(tree["recHitTime"], False,
                                            selection)
        recHitID = self.branchToFlatArray(tree["recHitID"], False, selection)
        recHitPad = self.branchToFlatArray(tree["recHitPad"], False, selection)

        ## weird shape for this truthHitFractions        = self.branchToFlatArray(tree["truthHitFractions"], False)
        truthHitAssignementIdx = self.branchToFlatArray(
            tree["truthHitAssignementIdx"], False, selection)  #0
        truthHitAssignedEnergies = self.branchToFlatArray(
            tree["truthHitAssignedEnergies"], False, selection)  #1
        truthHitAssignedX = self.branchToFlatArray(tree["truthHitAssignedX"],
                                                   False, selection)  #2
        truthHitAssignedY = self.branchToFlatArray(tree["truthHitAssignedY"],
                                                   False, selection)  #3
        truthHitAssignedZ = self.branchToFlatArray(tree["truthHitAssignedZ"],
                                                   False, selection)  #3
        truthHitAssignedDirX = self.branchToFlatArray(
            tree["truthHitAssignedDirX"], False, selection)  #4
        truthHitAssignedDirY = self.branchToFlatArray(
            tree["truthHitAssignedDirY"], False, selection)  #4
        truthHitAssignedDirZ = self.branchToFlatArray(
            tree["truthHitAssignedDirZ"], False, selection)  #4
        truthHitAssignedEta = self.branchToFlatArray(
            tree["truthHitAssignedEta"], False, selection)  #2
        truthHitAssignedPhi = self.branchToFlatArray(
            tree["truthHitAssignedPhi"], False, selection)  #3
        #truthHitAssignedR       = self.branchToFlatArray(tree["truthHitAssignedR"], False,selection)  #3
        truthHitAssignedDirEta = self.branchToFlatArray(
            tree["truthHitAssignedDirEta"], False, selection)  #4
        truthHitAssignedDepEnergies = self.branchToFlatArray(
            tree["truthHitAssignedDepEnergies"], False, selection)  #4
        truthHitAssignedDirR = self.branchToFlatArray(
            tree["truthHitAssignedDirR"], False, selection)  #4
        ## weird shape for this truthHitAssignedPIDs     = self.branchToFlatArray(tree["truthHitAssignedPIDs"], False)

        truthHitAssignedPIDs = self.branchToFlatArray(
            tree["truthHitAssignedPIDs"], False, selection, is3d=True)
        #print('truthHitAssignedPIDs',truthHitAssignedPIDs.shape)
        #print('truthHitAssignedEnergies',truthHitAssignedEnergies.shape)

        #print(truthHitAssignedPIDs)

        #truthHitAssignedPIDs = np.zeros_like(truthHitAssignedEnergies)
        #truthHitAssignedPIDs = np.tile(truthHitAssignedPIDs, [1, n_classes])

        #type_ambiguous,
        #type_electron,
        #type_photon,
        #type_mip,
        #type_charged_hadron,
        #type_neutral_hadron,

        # For tracks
        #
        #     *(data++) = t->obj->p();                  *(data++) = recHit->hit->energy();
        ####  *(data++) = t->pos.eta();                 *(data++) = recHit->pos.eta();
        ####  *(data++) = t->pos.phi();                 *(data++) = recHit->pos.phi();
        ####  *(data++) = t->pos.theta();               *(data++) = recHit->pos.theta();
        ####  *(data++) = t->pos.mag();                 *(data++) = recHit->pos.mag();
        ####  *(data++) = t->pos.x();                   *(data++) = recHit->pos.x();
        ####  *(data++) = t->pos.y();                   *(data++) = recHit->pos.y();
        ####  *(data++) = t->pos.z();                   *(data++) = recHit->pos.z();
        ####  *(data++) = t->obj->charge();             *(data++) = (float)recHit->hit->detid();
        ####  *(data++) = t->obj->chi2();               *(data++) = recHit->hit->time();
        ####  *(data++) = -1.; //track ID bit           *(data++) = 0.; //rechit ID bit
        ####  *(data++) = 0.; //pad                     *(data++) = 0.; //pad
        #

        #
        #

        #make these the only spectators, and set 'energy' to zero

        truthHitAssignedT = self.branchToFlatArray(tree["truthHitAssignedT"],
                                                   False, selection)

        ticlHitAssignementIdx = self.branchToFlatArray(
            tree["ticlHitAssignementIdx"], False, selection)  #4
        ticlHitAssignedEnergies = self.branchToFlatArray(
            tree["ticlHitAssignedEnergies"], False, selection)  #4

        # For calculating spectators
        #rechitsSum = findRechitsSum(truthHitAssignementIdx, recHitEnergy, rs)
        #spectator = np.where(recHitEnergy < 0.0005 * rechitsSum, np.ones_like(recHitEnergy), np.zeros_like(recHitEnergy))

        ############ special track treatment for now
        #make tracks spectators
        isTrack = recHitID < 0
        spectator = np.where(isTrack, np.ones_like(isTrack),
                             np.zeros_like(isTrack))
        recHitEnergy[isTrack] = 0.  #don't use track momenta just use as seeds
        ##############

        # If truth shower energy < 5% of sum of rechits, assign rechits sum to it instead
        truthShowerEnergies = truthHitAssignedEnergies.copy()

        #take them as is
        #truthShowerEnergies[rechitsSum<0.25*truthHitAssignedEnergies] = rechitsSum[rechitsSum<0.25*truthHitAssignedEnergies]

        #for now!
        truthShowerEnergies = truthHitAssignedDepEnergies  #for now rechitsSum

        features = np.concatenate(
            [
                recHitEnergy,
                recHitEta,
                recHitID,  #indicator if it is track or not
                recHitTheta,
                recHitR,
                recHitX,
                recHitY,
                recHitZ,
                recHitTime
            ],
            axis=-1)

        np.savetxt("textarr.txt", features[0:rs[1]])
        farr = SimpleArray()
        farr.createFromNumpy(features, rs)
        #farr.cout()
        print("features", features.shape)

        del features

        truth = np.concatenate(
            [
                truthHitAssignementIdx,  # 0
                truthShowerEnergies,
                truthHitAssignedX,
                truthHitAssignedY,
                truthHitAssignedZ,  #4
                truthHitAssignedDirX,
                truthHitAssignedDirY,  #6
                truthHitAssignedDirZ,
                truthHitAssignedEta,
                truthHitAssignedPhi,
                truthHitAssignedT,  #10
                truthHitAssignedDirEta,
                np.zeros_like(truthHitAssignedDirEta),  #12
                truthHitAssignedDirR,
                spectator,  #14
                truthHitAssignedEnergies,  #15
                truthHitAssignedDepEnergies,  #16
                ticlHitAssignementIdx,  #17
                ticlHitAssignedEnergies,  #18
                truthHitAssignedPIDs  #19 - 19+n_classes
            ],
            axis=-1)

        tarr = SimpleArray()
        tarr.createFromNumpy(truth, rs)

        print("truth", truth.shape)

        return [farr], [tarr], []