def _make_plot(self, counter, feat, predicted, truth): try: td = TrainData_NanoML() #contains all dicts truths = td.createTruthDict(feat) feats = td.createFeatureDict(feat, addxycomb=False) data = {} data.update(truths) data.update(feats) data['recHitLogEnergy'] = np.log(data['recHitEnergy'] + 1) coords = predicted[self.use_prediction_idx] if not coords.shape[-1] == 3: print( "plotGravNetCoordsDuringTraining only supports 3D coordinates" ) #2D and >3D TBI return #not supported data['coord A'] = coords[:, 0:1] data['coord B'] = coords[:, 1:2] data['coord C'] = coords[:, 2:3] df = pd.DataFrame(np.concatenate([data[k] for k in data], axis=1), columns=[k for k in data]) shuffle_truth_colors(df) fig = px.scatter_3d( df, x="coord A", y="coord B", z="coord C", color="truthHitAssignementIdx", size="recHitLogEnergy", symbol="recHitID", #hover_data=[], template='plotly_dark', color_continuous_scale=px.colors.sequential.Rainbow) fig.update_traces(marker=dict(line=dict(width=0))) ccfile = self.outputfile + str( self.keep_counter) + "_coords_" + str( self.use_prediction_idx) + ".html" fig.write_html(ccfile) if self.publish is not None: publish(ccfile, self.publish) except Exception as e: print(e) raise e
def create_outputs(x, feat, energy=None, n_ccoords=3, n_classes=4, td=TrainData_NanoML(), add_features=True, fix_distance_scale=False, scale_energy=False, energy_factor=True, energy_proxy=None, name_prefix="output_module"): ''' returns pred_beta, pred_ccoords, pred_energy, pred_pos, pred_time, pred_id ''' assert scale_energy != energy_factor feat = td.createFeatureDict(feat) pred_beta = Dense(1, activation='sigmoid', name=name_prefix + '_beta')(x) pred_ccoords = Dense( n_ccoords, #this initialisation is much better than standard glorot kernel_initializer=EyeInitializer(stddev=0.001), use_bias=False, name=name_prefix + '_clustercoords')(x) #bias has no effect if energy_proxy is None: energy_proxy = x else: energy_proxy = Concatenate()([energy_proxy, x]) energy_act = None if energy_factor: energy_act = 'relu' pred_energy = Dense( 1, name=name_prefix + '_energy', bias_initializer='ones', #no effect if full scale, useful if corr factor activation=energy_act)(energy_proxy) if scale_energy: pred_energy = ScalarMultiply(10.)(pred_energy) if energy is not None: pred_energy = Multiply()([pred_energy, energy]) pred_pos = Dense(2, use_bias=False, name=name_prefix + '_pos')(x) pred_time = ScalarMultiply(10.)(Dense(1)(x)) if add_features: pred_pos = Add()([feat['recHitXY'], pred_pos]) pred_id = Dense(n_classes, activation="softmax", name=name_prefix + '_class')(x) pred_dist = OnesLike()(pred_time) if not fix_distance_scale: pred_dist = ScalarMultiply(2.)(Dense(1, activation='sigmoid', name=name_prefix + '_dist')(x)) #this needs to be bound otherwise fully anti-correlated with coordates scale return pred_beta, pred_ccoords, pred_dist, pred_energy, pred_pos, pred_time, pred_id
def invokeGen(infile): if infile[-6:] == '.djcdc': dc = DataCollection(infile) td = dc.dataclass() tdclass = dc.dataclass dc.setBatchSize(1) gen = dc.invokeGenerator() elif infile[-6:] == '.djctd': td = TrainData_NanoML() tdclass = TrainData_NanoML td.readFromFile(infile) gen = TrainDataGenerator() gen.setBatchSize(1) gen.setBuffer(td) elif infile[-5:] == '.root': print('reading from root file') td = TrainData_NanoML() tdclass = TrainData_NanoML td.readFromSourceFile(infile,{},True) td.writeToFile(infile+'.djctd') td.readFromFile(infile+'.djctd') gen = TrainDataGenerator() gen.setBatchSize(1) gen.setBuffer(td) gen.setSkipTooLargeBatches(False) nevents = gen.getNBatches() gen.cast_to = tdclass return gen.feedTrainData,nevents,td
from plotting_callbacks import plotEventDuringTraining, plotGravNetCoordsDuringTraining, plotClusteringDuringTraining from DeepJetCore.DJCLayers import StopGradient,ScalarMultiply, SelectFeatures, ReduceSumEntirely from clr_callback import CyclicLR from lossLayers import LLFullObjectCondensation, LLClusterCoordinates from model_blocks import create_outputs from Layers import LocalClusterReshapeFromNeighbours2,ManualCoordTransform,RaggedGlobalExchange,LocalDistanceScaling,CheckNaN,NeighbourApproxPCA,LocalClusterReshapeFromNeighbours,GraphClusterReshape, SortAndSelectNeighbours, LLLocalClusterCoordinates,DistanceWeightedMessagePassing,CollectNeighbourAverageAndMax,CreateGlobalIndices, LocalClustering, SelectFromIndices, MultiBackGather, KNN, MessagePassing, RobustModel from Layers import GooeyBatchNorm #make a new line from datastructures import TrainData_OC import sql_credentials from datetime import datetime td=TrainData_NanoML() ''' ''' def gravnet_model(Inputs, viscosity=0.2, print_viscosity=False, fluidity_decay=1e-3, max_viscosity=0.9 # to start with ): feature_dropout=-1. addBackGatherInfo=True,
def _make_plot(self, counter, feat, predicted, truth): #all these are lists and also include row splits try: td = TrainData_NanoML() #contains all dicts #row splits not needed feats = td.createFeatureDict(feat, addxycomb=False) backgather = predicted[self.use_backgather_idx] truths = td.createTruthDict(feat) data = {} data.update(feats) data.update(truths) if len(backgather.shape) < 2: backgather = np.expand_dims(backgather, axis=1) data['recHitLogEnergy'] = np.log(data['recHitEnergy'] + 1) data['hitBackGatherIdx'] = backgather from globals import cluster_space as cs removednoise = np.logical_and(data["recHitX"] == cs.noise_coord, data["recHitY"] == cs.noise_coord) removednoise = np.logical_and(data["recHitZ"] == cs.noise_coord, removednoise) #remove removed noise for k in data.keys(): data[k] = data[k][not removednoise] df = pd.DataFrame(np.concatenate([data[k] for k in data], axis=1), columns=[k for k in data]) shuffle_truth_colors(df) fig = px.scatter_3d( df, x="recHitX", y="recHitZ", z="recHitY", color="truthHitAssignementIdx", size="recHitLogEnergy", symbol="recHitID", #template='plotly_dark', color_continuous_scale=px.colors.sequential.Rainbow) fig.update_traces(marker=dict(line=dict(width=0))) fig.write_html(self.outputfile + str(self.keep_counter) + "_truth.html") bgfile = self.outputfile + str( self.keep_counter) + "_backgather.html" #now the cluster indices shuffle_truth_colors(df, "hitBackGatherIdx") fig = px.scatter_3d( df, x="recHitX", y="recHitZ", z="recHitY", color="hitBackGatherIdx", size="recHitLogEnergy", #template='plotly_dark', color_continuous_scale=px.colors.sequential.Rainbow) fig.update_traces(marker=dict(line=dict(width=0))) fig.write_html(bgfile) if self.publish is not None: publish(bgfile, self.publish) except Exception as e: print(e) raise e
def _make_plot(self, counter, feat, predicted, truth): td = TrainData_NanoML() preddict = predicted cdata = td.createTruthDict(feat) cdata['predBeta'] = preddict['pred_beta'] cdata['predCCoords'] = preddict['pred_ccoords'] cdata['predD'] = preddict['pred_dist'] rs = feat[-1] #last one has to be row splits # this will not work, since it will be adapted by batch, and not anymore the right tow splits #rs = preddict['row_splits'] eid = 0 eids = [] #make event id for i in range(len(rs) - 1): eids.append( np.zeros((rs[i + 1, 0] - rs[i, 0], ), dtype='int64') + eid) eid += 1 cdata['eid'] = np.concatenate(eids, axis=0) pids = [] vdtom = [] did = [] for i in range(eid): a, b, pid = self.run_per_event( self.subdict(cdata, i == cdata['eid'])) vdtom.append(a) did.append(b) pids.append(pid) vdtom = np.concatenate(vdtom, axis=0) did = np.concatenate(did, axis=0) pids = np.concatenate(pids, axis=0)[:, 0] upids = np.unique(pids).tolist() upids.append(0) import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt print(upids) for p in upids: svdtom = vdtom sdid = did if p: svdtom = vdtom[pids == p] sdid = did[pids == p] if not len(svdtom): continue fig = plt.figure() plt.hist(svdtom, bins=51, color='tab:blue', alpha=0.5, label='same') plt.hist(sdid, bins=51, color='tab:orange', alpha=0.5, label='other') plt.yscale('log') plt.xlabel('normalised distance') plt.ylabel('A.U.') plt.legend() ccfile = self.outputfile + str(p) + '_cluster.pdf' plt.savefig(ccfile) plt.cla() plt.clf() plt.close(fig) if self.publish is not None: publish(ccfile, self.publish)
def _make_plot(self, counter, feat, predicted, truth): try: ''' [pred_beta, pred_ccoords, pred_energy, pred_pos, pred_time, pred_id ''' td = TrainData_NanoML() #contains all dicts #row splits not needed feats = td.createFeatureDict(feat, addxycomb=False) truths = td.createTruthDict(feat) predBeta = predicted['pred_beta'] print('>>>> plotting cluster coordinates... average beta', np.mean(predBeta), ' lowest beta ', np.min(predBeta), 'highest beta', np.max(predBeta)) #for later predEnergy = predicted['pred_energy_corr_factor'] predX = predicted['pred_pos'][:, 0:1] predY = predicted['pred_pos'][:, 1:2] predT = predicted['pred_time'] predD = predicted['pred_dist'] data = {} data.update(feats) data.update(truths) predCCoords = predicted['pred_ccoords'] data['recHitLogEnergy'] = np.log(data['recHitEnergy'] + 1) data['predBeta'] = predBeta data[ 'predBeta+0.05'] = predBeta + 0.05 #so that the others don't disappear data['predEnergy'] = predEnergy data['predX'] = predX data['predY'] = predY data['predT'] = predT data['predD'] = predD data['(predBeta+0.05)**2'] = data['predBeta+0.05']**2 data['(thresh(predBeta)+0.05))**2'] = np.where( predBeta > self.beta_threshold, data['(predBeta+0.05)**2'], 0.) if not predCCoords.shape[-1] == 3: self.projection_plot(data, predCCoords) return data['predCCoordsX'] = predCCoords[:, 0:1] data['predCCoordsY'] = predCCoords[:, 1:2] data['predCCoordsZ'] = predCCoords[:, 2:3] from globals import cluster_space as cs removednoise = np.logical_and( data["predCCoordsX"] == cs.noise_coord, data["predCCoordsY"] == cs.noise_coord) removednoise = np.logical_and( data["predCCoordsZ"] == cs.noise_coord, removednoise) removednoise = np.where(removednoise[:, 0], False, True) #remove removed noise for k in data.keys(): data[k] = data[k][removednoise] df = pd.DataFrame(np.concatenate([data[k] for k in data], axis=1), columns=[k for k in data]) #fig = px.scatter_3d(df, x="recHitX", y="recHitZ", z="recHitY", color="truthHitAssignementIdx", size="recHitLogEnergy") #fig.write_html(self.outputfile + str(self.keep_counter) + "_truth.html") shuffle_truth_colors(df) #now the cluster indices hover_data = [ 'predBeta', 'predD', 'predEnergy', 'truthHitAssignedEnergies', 'predT', 'truthHitAssignedT', 'predX', 'truthHitAssignedX', 'predY', 'truthHitAssignedY', 'truthHitAssignementIdx' ] fig = px.scatter_3d( df, x="predCCoordsX", y="predCCoordsY", z="predCCoordsZ", color="truthHitAssignementIdx", size="recHitLogEnergy", symbol="recHitID", hover_data=hover_data, template='plotly_dark', color_continuous_scale=px.colors.sequential.Rainbow) fig.update_traces(marker=dict(line=dict(width=0))) ccfile = self.outputfile + str(self.keep_counter) + "_ccoords.html" fig.write_html(ccfile) if self.publish is not None: publish(ccfile, self.publish) fig = px.scatter_3d( df, x="predCCoordsX", y="predCCoordsY", z="predCCoordsZ", color="truthHitAssignementIdx", size="(predBeta+0.05)**2", hover_data=hover_data, symbol="recHitID", template='plotly_dark', color_continuous_scale=px.colors.sequential.Rainbow) fig.update_traces(marker=dict(line=dict(width=0))) ccfile = self.outputfile + str( self.keep_counter) + "_ccoords_betasize.html" fig.write_html(ccfile) if self.publish is not None: publish(ccfile, self.publish) # thresholded fig = px.scatter_3d( df, x="recHitX", y="recHitZ", z="recHitY", color="truthHitAssignementIdx", size="recHitLogEnergy", symbol="recHitID", hover_data=[ 'predBeta', 'predEnergy', 'predX', 'predY', 'truthHitAssignementIdx', 'truthHitAssignedEnergies', 'truthHitAssignedX', 'truthHitAssignedY' ], template='plotly_dark', color_continuous_scale=px.colors.sequential.Rainbow) fig.update_traces(marker=dict(line=dict(width=0))) ccfile = self.outputfile + str(self.keep_counter) + "_truth.html" fig.write_html(ccfile) if self.publish is not None: publish(ccfile, self.publish) except Exception as e: print(e) raise e
parser = ArgumentParser('') parser.add_argument('--inputFile') parser.add_argument('--outputDir') #parser.add_argument('--outName', default='recHits_3D.html',type=str) args = parser.parse_args() infile = args.inputFile outdir = args.outputDir + "/" events_max = 1 os.system('mkdir -p ' + outdir) #outfile = args.outName #if not outfile[-5:] == ".html": # outfile+=".html" td = TrainData_NanoML() td.readFromFile(infile) #for event_num in range(1,17,2): #looking at jsut one half of event for event_num in range(1, 2, 2): #looking at jsut one half of event df = (td.createPandasDataFrame(event_num) ) #[:1000] just looking at some 1000 hits front_face_z = 323 noise_filter = (df['truthHitAssignementIdx'] > -1) #hgcal_front_face_filter = (abs(df['truthHitAssignedZ']) < front_face_z) # < - on front, > not on front hgcal_front_face_filter = (df['truthHitFullyContainedFlag'] > 0) selected_pids = [22, 11, 211, 2211, 13, 2112] pid_filter = np.isin(abs(df['truthHitAssignedPIDs']), selected_pids) filt = noise_filter & hgcal_front_face_filter #if including the filter np.logical_not(pid_filter) df = df[filt] spectator_filter = (df['truthHitSpectatorFlag'] > 7)