Python TrainData_NanoML Examples, datastructures.TrainData_NanoML Python Examples

Example #1

0

Show file

    def _make_plot(self, counter, feat, predicted, truth):
        try:
            td = TrainData_NanoML()  #contains all dicts
            truths = td.createTruthDict(feat)
            feats = td.createFeatureDict(feat, addxycomb=False)

            data = {}
            data.update(truths)
            data.update(feats)
            data['recHitLogEnergy'] = np.log(data['recHitEnergy'] + 1)

            coords = predicted[self.use_prediction_idx]
            if not coords.shape[-1] == 3:
                print(
                    "plotGravNetCoordsDuringTraining only supports 3D coordinates"
                )  #2D and >3D TBI
                return  #not supported

            data['coord A'] = coords[:, 0:1]
            data['coord B'] = coords[:, 1:2]
            data['coord C'] = coords[:, 2:3]

            df = pd.DataFrame(np.concatenate([data[k] for k in data], axis=1),
                              columns=[k for k in data])
            shuffle_truth_colors(df)

            fig = px.scatter_3d(
                df,
                x="coord A",
                y="coord B",
                z="coord C",
                color="truthHitAssignementIdx",
                size="recHitLogEnergy",
                symbol="recHitID",
                #hover_data=[],
                template='plotly_dark',
                color_continuous_scale=px.colors.sequential.Rainbow)
            fig.update_traces(marker=dict(line=dict(width=0)))
            ccfile = self.outputfile + str(
                self.keep_counter) + "_coords_" + str(
                    self.use_prediction_idx) + ".html"
            fig.write_html(ccfile)

            if self.publish is not None:
                publish(ccfile, self.publish)

        except Exception as e:
            print(e)
            raise e

Example #2

0

Show file

def create_outputs(x,
                   feat,
                   energy=None,
                   n_ccoords=3,
                   n_classes=4,
                   td=TrainData_NanoML(),
                   add_features=True,
                   fix_distance_scale=False,
                   scale_energy=False,
                   energy_factor=True,
                   energy_proxy=None,
                   name_prefix="output_module"):
    '''
    returns pred_beta, pred_ccoords, pred_energy, pred_pos, pred_time, pred_id
    '''
    assert scale_energy != energy_factor

    feat = td.createFeatureDict(feat)

    pred_beta = Dense(1, activation='sigmoid', name=name_prefix + '_beta')(x)
    pred_ccoords = Dense(
        n_ccoords,
        #this initialisation is much better than standard glorot
        kernel_initializer=EyeInitializer(stddev=0.001),
        use_bias=False,
        name=name_prefix + '_clustercoords')(x)  #bias has no effect

    if energy_proxy is None:
        energy_proxy = x
    else:
        energy_proxy = Concatenate()([energy_proxy, x])
    energy_act = None
    if energy_factor:
        energy_act = 'relu'
    pred_energy = Dense(
        1,
        name=name_prefix + '_energy',
        bias_initializer='ones',  #no effect if full scale, useful if corr factor
        activation=energy_act)(energy_proxy)
    if scale_energy:
        pred_energy = ScalarMultiply(10.)(pred_energy)
    if energy is not None:
        pred_energy = Multiply()([pred_energy, energy])

    pred_pos = Dense(2, use_bias=False, name=name_prefix + '_pos')(x)
    pred_time = ScalarMultiply(10.)(Dense(1)(x))

    if add_features:
        pred_pos = Add()([feat['recHitXY'], pred_pos])
    pred_id = Dense(n_classes,
                    activation="softmax",
                    name=name_prefix + '_class')(x)

    pred_dist = OnesLike()(pred_time)
    if not fix_distance_scale:
        pred_dist = ScalarMultiply(2.)(Dense(1,
                                             activation='sigmoid',
                                             name=name_prefix + '_dist')(x))
        #this needs to be bound otherwise fully anti-correlated with coordates scale
    return pred_beta, pred_ccoords, pred_dist, pred_energy, pred_pos, pred_time, pred_id

Example #3

0

Show file

def invokeGen(infile):
    if infile[-6:] == '.djcdc':
        dc = DataCollection(infile)
        td = dc.dataclass()
        tdclass = dc.dataclass
        dc.setBatchSize(1)
        gen = dc.invokeGenerator()
    elif infile[-6:] == '.djctd':
        td = TrainData_NanoML()
        tdclass = TrainData_NanoML
        td.readFromFile(infile)
        gen = TrainDataGenerator()
        gen.setBatchSize(1)
        gen.setBuffer(td)
    elif infile[-5:] == '.root':
        print('reading from root file')
        td = TrainData_NanoML()
        tdclass = TrainData_NanoML
        td.readFromSourceFile(infile,{},True)
        td.writeToFile(infile+'.djctd')
        td.readFromFile(infile+'.djctd')
        gen = TrainDataGenerator()
        gen.setBatchSize(1)
        gen.setBuffer(td)
        
    gen.setSkipTooLargeBatches(False)
    nevents = gen.getNBatches()
    gen.cast_to = tdclass
    return gen.feedTrainData,nevents,td

Example #4

0

Show file

File: june_format_example_nf_pca_double_coords.py Project: kdlong/HGCalML

from plotting_callbacks import plotEventDuringTraining, plotGravNetCoordsDuringTraining, plotClusteringDuringTraining
from DeepJetCore.DJCLayers import StopGradient,ScalarMultiply, SelectFeatures, ReduceSumEntirely

from clr_callback import CyclicLR
from lossLayers import LLFullObjectCondensation, LLClusterCoordinates

from model_blocks import create_outputs

from Layers import LocalClusterReshapeFromNeighbours2,ManualCoordTransform,RaggedGlobalExchange,LocalDistanceScaling,CheckNaN,NeighbourApproxPCA,LocalClusterReshapeFromNeighbours,GraphClusterReshape, SortAndSelectNeighbours, LLLocalClusterCoordinates,DistanceWeightedMessagePassing,CollectNeighbourAverageAndMax,CreateGlobalIndices, LocalClustering, SelectFromIndices, MultiBackGather, KNN, MessagePassing, RobustModel
from Layers import GooeyBatchNorm #make a new line
from datastructures import TrainData_OC
import sql_credentials
from datetime import datetime


td=TrainData_NanoML()
'''

'''


def gravnet_model(Inputs,
                  viscosity=0.2,
                  print_viscosity=False,
                  fluidity_decay=1e-3,
                  max_viscosity=0.9 # to start with
                  ):

    feature_dropout=-1.
    addBackGatherInfo=True,

Example #5

0

Show file

    def _make_plot(self, counter, feat, predicted,
                   truth):  #all these are lists and also include row splits
        try:
            td = TrainData_NanoML()  #contains all dicts
            #row splits not needed
            feats = td.createFeatureDict(feat, addxycomb=False)
            backgather = predicted[self.use_backgather_idx]
            truths = td.createTruthDict(feat)

            data = {}
            data.update(feats)
            data.update(truths)

            if len(backgather.shape) < 2:
                backgather = np.expand_dims(backgather, axis=1)

            data['recHitLogEnergy'] = np.log(data['recHitEnergy'] + 1)
            data['hitBackGatherIdx'] = backgather

            from globals import cluster_space as cs
            removednoise = np.logical_and(data["recHitX"] == cs.noise_coord,
                                          data["recHitY"] == cs.noise_coord)
            removednoise = np.logical_and(data["recHitZ"] == cs.noise_coord,
                                          removednoise)
            #remove removed noise
            for k in data.keys():
                data[k] = data[k][not removednoise]

            df = pd.DataFrame(np.concatenate([data[k] for k in data], axis=1),
                              columns=[k for k in data])

            shuffle_truth_colors(df)

            fig = px.scatter_3d(
                df,
                x="recHitX",
                y="recHitZ",
                z="recHitY",
                color="truthHitAssignementIdx",
                size="recHitLogEnergy",
                symbol="recHitID",
                #template='plotly_dark',
                color_continuous_scale=px.colors.sequential.Rainbow)
            fig.update_traces(marker=dict(line=dict(width=0)))
            fig.write_html(self.outputfile + str(self.keep_counter) +
                           "_truth.html")

            bgfile = self.outputfile + str(
                self.keep_counter) + "_backgather.html"
            #now the cluster indices

            shuffle_truth_colors(df, "hitBackGatherIdx")

            fig = px.scatter_3d(
                df,
                x="recHitX",
                y="recHitZ",
                z="recHitY",
                color="hitBackGatherIdx",
                size="recHitLogEnergy",
                #template='plotly_dark',
                color_continuous_scale=px.colors.sequential.Rainbow)
            fig.update_traces(marker=dict(line=dict(width=0)))
            fig.write_html(bgfile)

            if self.publish is not None:
                publish(bgfile, self.publish)

        except Exception as e:
            print(e)
            raise e

Example #6

0

Show file

    def _make_plot(self, counter, feat, predicted, truth):

        td = TrainData_NanoML()
        preddict = predicted

        cdata = td.createTruthDict(feat)
        cdata['predBeta'] = preddict['pred_beta']
        cdata['predCCoords'] = preddict['pred_ccoords']
        cdata['predD'] = preddict['pred_dist']
        rs = feat[-1]  #last one has to be row splits
        # this will not work, since it will be adapted by batch, and not anymore the right tow splits
        #rs = preddict['row_splits']

        eid = 0
        eids = []
        #make event id
        for i in range(len(rs) - 1):
            eids.append(
                np.zeros((rs[i + 1, 0] - rs[i, 0], ), dtype='int64') + eid)
            eid += 1
        cdata['eid'] = np.concatenate(eids, axis=0)

        pids = []
        vdtom = []
        did = []
        for i in range(eid):
            a, b, pid = self.run_per_event(
                self.subdict(cdata, i == cdata['eid']))
            vdtom.append(a)
            did.append(b)
            pids.append(pid)

        vdtom = np.concatenate(vdtom, axis=0)
        did = np.concatenate(did, axis=0)
        pids = np.concatenate(pids, axis=0)[:, 0]
        upids = np.unique(pids).tolist()
        upids.append(0)
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        print(upids)
        for p in upids:
            svdtom = vdtom
            sdid = did
            if p:
                svdtom = vdtom[pids == p]
                sdid = did[pids == p]

            if not len(svdtom):
                continue

            fig = plt.figure()
            plt.hist(svdtom,
                     bins=51,
                     color='tab:blue',
                     alpha=0.5,
                     label='same')
            plt.hist(sdid,
                     bins=51,
                     color='tab:orange',
                     alpha=0.5,
                     label='other')
            plt.yscale('log')
            plt.xlabel('normalised distance')
            plt.ylabel('A.U.')
            plt.legend()
            ccfile = self.outputfile + str(p) + '_cluster.pdf'
            plt.savefig(ccfile)
            plt.cla()
            plt.clf()
            plt.close(fig)
            if self.publish is not None:
                publish(ccfile, self.publish)

Example #7

0

Show file

    def _make_plot(self, counter, feat, predicted, truth):

        try:
            '''
            [pred_beta, 
             pred_ccoords,
             pred_energy, 
             pred_pos, 
             pred_time, 
             pred_id
            '''
            td = TrainData_NanoML()  #contains all dicts
            #row splits not needed
            feats = td.createFeatureDict(feat, addxycomb=False)
            truths = td.createTruthDict(feat)

            predBeta = predicted['pred_beta']

            print('>>>> plotting cluster coordinates... average beta',
                  np.mean(predBeta), ' lowest beta ', np.min(predBeta),
                  'highest beta', np.max(predBeta))

            #for later
            predEnergy = predicted['pred_energy_corr_factor']
            predX = predicted['pred_pos'][:, 0:1]
            predY = predicted['pred_pos'][:, 1:2]
            predT = predicted['pred_time']
            predD = predicted['pred_dist']

            data = {}
            data.update(feats)
            data.update(truths)

            predCCoords = predicted['pred_ccoords']

            data['recHitLogEnergy'] = np.log(data['recHitEnergy'] + 1)
            data['predBeta'] = predBeta
            data[
                'predBeta+0.05'] = predBeta + 0.05  #so that the others don't disappear
            data['predEnergy'] = predEnergy
            data['predX'] = predX
            data['predY'] = predY
            data['predT'] = predT
            data['predD'] = predD
            data['(predBeta+0.05)**2'] = data['predBeta+0.05']**2
            data['(thresh(predBeta)+0.05))**2'] = np.where(
                predBeta > self.beta_threshold, data['(predBeta+0.05)**2'], 0.)

            if not predCCoords.shape[-1] == 3:
                self.projection_plot(data, predCCoords)
                return

            data['predCCoordsX'] = predCCoords[:, 0:1]
            data['predCCoordsY'] = predCCoords[:, 1:2]
            data['predCCoordsZ'] = predCCoords[:, 2:3]

            from globals import cluster_space as cs
            removednoise = np.logical_and(
                data["predCCoordsX"] == cs.noise_coord,
                data["predCCoordsY"] == cs.noise_coord)
            removednoise = np.logical_and(
                data["predCCoordsZ"] == cs.noise_coord, removednoise)
            removednoise = np.where(removednoise[:, 0], False, True)
            #remove removed noise

            for k in data.keys():
                data[k] = data[k][removednoise]

            df = pd.DataFrame(np.concatenate([data[k] for k in data], axis=1),
                              columns=[k for k in data])

            #fig = px.scatter_3d(df, x="recHitX", y="recHitZ", z="recHitY", color="truthHitAssignementIdx", size="recHitLogEnergy")
            #fig.write_html(self.outputfile + str(self.keep_counter) + "_truth.html")
            shuffle_truth_colors(df)
            #now the cluster indices

            hover_data = [
                'predBeta', 'predD', 'predEnergy', 'truthHitAssignedEnergies',
                'predT', 'truthHitAssignedT', 'predX', 'truthHitAssignedX',
                'predY', 'truthHitAssignedY', 'truthHitAssignementIdx'
            ]

            fig = px.scatter_3d(
                df,
                x="predCCoordsX",
                y="predCCoordsY",
                z="predCCoordsZ",
                color="truthHitAssignementIdx",
                size="recHitLogEnergy",
                symbol="recHitID",
                hover_data=hover_data,
                template='plotly_dark',
                color_continuous_scale=px.colors.sequential.Rainbow)
            fig.update_traces(marker=dict(line=dict(width=0)))
            ccfile = self.outputfile + str(self.keep_counter) + "_ccoords.html"
            fig.write_html(ccfile)

            if self.publish is not None:
                publish(ccfile, self.publish)

            fig = px.scatter_3d(
                df,
                x="predCCoordsX",
                y="predCCoordsY",
                z="predCCoordsZ",
                color="truthHitAssignementIdx",
                size="(predBeta+0.05)**2",
                hover_data=hover_data,
                symbol="recHitID",
                template='plotly_dark',
                color_continuous_scale=px.colors.sequential.Rainbow)
            fig.update_traces(marker=dict(line=dict(width=0)))
            ccfile = self.outputfile + str(
                self.keep_counter) + "_ccoords_betasize.html"
            fig.write_html(ccfile)

            if self.publish is not None:
                publish(ccfile, self.publish)

            # thresholded
            fig = px.scatter_3d(
                df,
                x="recHitX",
                y="recHitZ",
                z="recHitY",
                color="truthHitAssignementIdx",
                size="recHitLogEnergy",
                symbol="recHitID",
                hover_data=[
                    'predBeta', 'predEnergy', 'predX', 'predY',
                    'truthHitAssignementIdx', 'truthHitAssignedEnergies',
                    'truthHitAssignedX', 'truthHitAssignedY'
                ],
                template='plotly_dark',
                color_continuous_scale=px.colors.sequential.Rainbow)
            fig.update_traces(marker=dict(line=dict(width=0)))
            ccfile = self.outputfile + str(self.keep_counter) + "_truth.html"
            fig.write_html(ccfile)

            if self.publish is not None:
                publish(ccfile, self.publish)

        except Exception as e:
            print(e)
            raise e

Example #8

0

Show file

parser = ArgumentParser('')
parser.add_argument('--inputFile')
parser.add_argument('--outputDir')
#parser.add_argument('--outName', default='recHits_3D.html',type=str)
args = parser.parse_args()

infile = args.inputFile
outdir = args.outputDir + "/"
events_max = 1
os.system('mkdir -p ' + outdir)
#outfile = args.outName
#if not outfile[-5:] == ".html":
#    outfile+=".html"

td = TrainData_NanoML()
td.readFromFile(infile)

#for event_num in range(1,17,2): #looking at jsut one half of event
for event_num in range(1, 2, 2):  #looking at jsut one half of event
    df = (td.createPandasDataFrame(event_num)
          )  #[:1000] just looking at some 1000 hits
    front_face_z = 323
    noise_filter = (df['truthHitAssignementIdx'] > -1)
    #hgcal_front_face_filter = (abs(df['truthHitAssignedZ']) < front_face_z) # < - on front, > not on front
    hgcal_front_face_filter = (df['truthHitFullyContainedFlag'] > 0)
    selected_pids = [22, 11, 211, 2211, 13, 2112]
    pid_filter = np.isin(abs(df['truthHitAssignedPIDs']), selected_pids)
    filt = noise_filter & hgcal_front_face_filter  #if including the filter np.logical_not(pid_filter)
    df = df[filt]
    spectator_filter = (df['truthHitSpectatorFlag'] > 7)