def test_scoring(weights,multiprocessing=False): print('Weights: ') print('Barrel\t' + str(weights[0])) print('Caps \t' + str(weights[1])) print('-'*50) #weights = np.insert(weights, 0, 1) event_prefix = "event00000%d" if multiprocessing: jobs = [] pool=mp.Pool(processes=4) else: test_dataset_submissions = [] scores = [] for event in [1000 + i for i in range(1)]: event_id = event_prefix % event hits, cells, particles, truth = load_event(os.path.join(path_to_train, event_id)) if multiprocessing: jobs.append(pool.apply_async(add_submission, args=(event,hits,weights,True,truth,True))) else: results = add_submission(event,hits,weights,True,truth,False) test_dataset_submissions.append(results[0]) scores.append(results[1]) if multiprocessing: test_dataset_submissions = [job.get()[0] for job in jobs] scores = [job.get()[1] for job in jobs] print('Avg of events: ') avg_score = sum(scores)/float(len(scores)) print(avg_score) return avg_score
def test_events(): ''' Process a few events to demonstrate the method before using it on the entire testing dataset. ''' events = range(1000, 1012) path_to_train = '../../data/raw/train_100_events/' parts_to_load = ['hits', 'truth'] model = Clusterer(350, 2, 40, 40) for event_id in events: with timer('Processing %s' % event_id): hits, truth = load_event(path_to_train + 'event00000' + str(event_id), parts=parts_to_load) # Add the needed information. r = np.sqrt(hits['x'].values**2 + hits['y'].values**2) d = np.sqrt(hits['x'].values**2 + hits['y'].values**2 + hits['z'].values**2) hits['phi'] = np.arctan2(hits['y'].values, hits['x'].values) hits['sphi'] = np.sin(hits['phi'].values) hits['cphi'] = np.cos(hits['phi'].values) hits['zd'] = hits['z'].values / d hits['zr'] = hits['z'].values / r scaler = MinMaxScaler() X = scaler.fit_transform(hits[['zr', 'zd', 'sphi', 'cphi']]) labels = model.predict(X) submission = create_one_event_submission(event_id, hits, labels) score = score_event_fast(truth, submission) print('score = %.4f' % score)
def test(model, test, shift=10000): """create prediction from train model over the test data. then append the prediction to the test data tp create the submission model -- trained model test -- test data shift -- coordinate shift needed to eliminate negitive numbers """ df_test = [] for e in test: hits, cells = load_event(e, parts=['hits', 'cells']) hits['event_id'] = int(e[-9:]) cells = cells.groupby(by=['hit_id'])['ch0', 'ch1', 'value'].agg(['mean' ]).reset_index() cells.columns = ['hit_id', 'ch0', 'ch1', 'value'] hits = pd.merge(hits, cells, how='left', on='hit_id') # Pulls in all needed data from the diffrent folders on a event by even bases hits = pd.merge(hits, truth, how='left', on='hit_id') print(e, "Test") # the +10000 shifts the event space into a non-negitive space, the model won't take negitives hits['x'] += shift hits['y'] += shift hits['z'] += shift # Predicts the test set hits['particle_id'] = model.predict(hits[[ 'hit_id', 'x', 'y', 'z', 'volume_id', 'layer_id', 'module_id', 'event_id', 'ch0', 'ch1', 'value' ]].values, verbose=1) # append the predictions onto the test data frame df_test.append(hits[['event_id', 'hit_id', 'particle_id']].copy()) return df_test
def process_single_event(event_number): start = time.time() file_name = 'event00000' + str(event_number) event_id = file_name hits_orig, cells, particles, truth = load_event('data/train_sample/' + event_id) merge_by_hit_ids = hits_orig.merge(truth, how='inner', on='hit_id') merge_by_particle_ids = merge_by_hit_ids.merge(particles, how='inner', on='particle_id') partid_dict = {} hitloc_dict = {} for row in merge_by_particle_ids.itertuples(): particleID = row.__getattribute__('particle_id') volID = row.__getattribute__('volume_id') layerID = row.__getattribute__('layer_id') modID = row.__getattribute__('module_id') hitID = row.__getattribute__('hit_id') key_name = event_id + '-' + str(particleID) hitloc_dict = { 'hit_id': hitID, 'volume_id': volID, 'layer_id': layerID, 'module_id': modID } if key_name in partid_dict: partid_dict[key_name].append(hitloc_dict) else: partid_dict[key_name] = [hitloc_dict] with open('mappings/' + event_id + '.json', 'w') as outfile: json.dump(partid_dict, outfile) end = time.time() return partid_dict
def read(self, evtid): prefix = os.path.join(os.path.expandvars(self._evt_dir), 'event{:09d}'.format(evtid)) all_data = load_event(prefix, parts=['hits', 'particles', 'truth', 'cells']) if all_data is None: return False hits, particles, truth, cells = all_data hits = hits.assign(evtid=evtid) if self._blacklist_dir: prefix_bl = os.path.join(os.path.expandvars(self._blacklist_dir), 'event{:09d}-blacklist_'.format(evtid)) hits_exclude = pd.read_csv(prefix_bl + 'hits.csv') particles_exclude = pd.read_csv(prefix_bl + 'particles.csv') hits = hits[~hits['hit_id'].isin(hits_exclude['hit_id'])] particles = particles[~particles['particle_id']. isin(particles_exclude['particle_id'])] ## add pT to particles px = particles.px py = particles.py pt = np.sqrt(px**2 + py**2) particles = particles.assign(pt=pt) self._evtid = evtid self._hits = hits self._particles = particles self._truth = truth self._cells = cells self.merge_truth_info_to_hits() return True
def get_training_sample(path_to_data, event_names): events = [] track_id = 0 for name in event_names: print(name) # Read an event hits, cells, particles, truth = load_event( os.path.join(path_to_data, name)) # Generate new vector of particle id particle_ids = truth.particle_id.values particle2track = {} for pid in np.unique(particle_ids): particle2track[pid] = track_id track_id += 1 hits['particle_id'] = [particle2track[pid] for pid in particle_ids] # Collect hits events.append(hits) # Put all hits into one sample with unique tracj ids data = pd.concat(events, axis=0) return data
def init_event(self, event_file, first_init=False): if self.verbose > 0: print('\n\nLoading event:', event_file) self.hits, self.cells, self.particles, self.truth = load_event( event_file) hits_xyz = self.hits.values[:, 1:4] hits_r = np.sqrt( np.power(hits_xyz[:, 0], 2) + np.power(hits_xyz[:, 1], 2)) hits_phi_x = np.sign(hits_xyz[:, 1]) * np.arccos( hits_xyz[:, 0] / hits_r) hits_phi_y = np.sign(-hits_xyz[:, 0]) * np.arccos( hits_xyz[:, 1] / hits_r) hits_theta = np.arctan2(hits_xyz[:, 2], hits_r) hits_layers = self.hits.values[:, 4:6] hits_layers_onehot = np.array([ self.onehot_layer[tuple(hits_layer)] for hits_layer in hits_layers ]) hits_xyzrphiphitheta = np.concatenate( (hits_xyz, np.reshape(hits_r, (-1, 1)), np.reshape(hits_phi_x, (-1, 1)), np.reshape(hits_phi_y, (-1, 1)), np.reshape(hits_theta, (-1, 1))), axis=1) if self.std_scale: if first_init: self.coord_rescaler = preprocessing.StandardScaler().fit( hits_xyzrphiphitheta) hits_xyzrphiphitheta = self.coord_rescaler.transform( hits_xyzrphiphitheta) self.hits_input = np.append(hits_xyzrphiphitheta, hits_layers_onehot, axis=1) hits_module_array = self.hits.values[:, 4:] self.hits_module = [] for module in hits_module_array: self.hits_module.append(tuple(module)) # Collect all ids except for id 0. self.track_unique_ids = np.unique( np.append([0], self.truth.values[:, 1])) self.track_unique_ids = self.track_unique_ids[1:] rand.shuffle(self.track_unique_ids) self.ntracks = len(self.track_unique_ids) self.track_hit_dict = pickle.load( open(event_file + "-trackdict.p", "rb")) if self.verbose > 0: print('Finished loading event:', event_file)
def read_meas(self, event): # Read meas files. hits, cells = load_event(event, parts=['hits', 'cells']) cells = cells.groupby(by=['hit_id'])['value'].agg(['count', 'sum' ]).reset_index() cells.columns = ['hit_id', 'hit_ncell', 'hit_edep'] hits = pd.merge(hits, cells, how='left', on='hit_id') return hits
def main(): # load 90 train events data data_l = [] for i in range(10, 100): event = '../input/train_1/event0000010%d' % i print('event:', event) hits, cells, particles, truth = load_event(event) data = hits data = data.merge(truth, how='left', on='hit_id') data = data.merge(particles, how='left', on='particle_id') # keep hits from tracks orginating from vertex data['rv'] = np.sqrt(data.vx**2 + data.vy**2) data = data[(data.rv <= 1) & (data.vz <= 50) & (data.vz >= -50)].copy() data = data[data.weight > 0] data['event_id'] = i data['pt'] = np.sqrt(data.px**2 + data.py**2) # use a simple relationship to compute alpha0 from pt, see documentaiton or EDA notebook. data['alpha0'] = np.exp(-8.115 - np.log(data.pt)) data_l.append(data) data = pd.concat(data_l, axis=0) # compute track level statistics df = data.groupby(['event_id', 'particle_id'])[['alpha0', 'vz']].first() df = df.dropna() np.save('../data/scan_center.npy', df.values) # compute tracklet frequencies # tracklets are sub tracks of length 4 # assign a unique layer to each hit data['layer'] = 100 * data.volume_id + data.layer_id # for each track compute a string containing the sequence of layers traversed by the track data = data.sort_values(by=['particle_id', 'z']).reset_index(drop=True) df = data.groupby(['event_id', 'particle_id' ]).layer.apply(lambda s: ' '.join([str(i) for i in s])) df = df.to_frame('layers') # count each tracklet occurences cnt = Counter() for x in tqdm(df.itertuples(name=None, index=False)): layers = x[0].split() for i in range(len(layers) - 3): s = ' '.join(layers[i:i + 4]) cnt[s] += 1 #save result with open('../data/layers_4_center_fix.pkl', 'wb') as file: pkl.dump(cnt, file)
def get_fake_tracks(event, n_sample=5000): hits, = load_event(event, parts=['hits']) fake_tracks = [] for idx in range(n_sample): rand_hits = random.randint(0, 20) data = hits.sample(rand_hits)[['x', 'y', 'z']].values data = np.pad(data, ((0, 20), (0, 0)), 'constant', constant_values=(4, 99999))[:20, :] fake_tracks.append(data) return np.array(fake_tracks)
def visualize_prediction_v_truth(): fig, ax = plt.subplots(figsize=(15, 15)) ax = Axes3D(fig) event = 1000 event_prefix = "event00000%d" weights = [0.1, 0.01, 0.1, 1, 1] event_id = event_prefix % event hits, cells, particles, truth = load_event( os.path.join(path_to_train, event_id)) one_submission, score = add_submission(event, hits, weights, True, truth) plot_tracks(ax, one_submission.track_id.unique(), hits) show_3Dplot(ax)
def read_event(self, path, eta_cut=3.2): hits, cells, particles, truth = load_event(path) hits_features = get_features(hits) # apply the eta cuts on hits hits_features = hits_features[(hits_features['eta'] > eta_cut) | (hits_features['eta'] < -1 * eta_cut)] hits_with_truth = hits_features.merge(filter_truth(truth), on='hit_id') particles = pd.Series(np.unique(hits_with_truth['particle_id'])) return hits_with_truth, particles
def open(self, use_true_coords=False): """Load event data from disk.""" parts = ['hits'] if self._with_cells: parts.append('cells') if self._with_truth: parts.extend(['particles', 'truth']) data = load_event(self._prefix, parts=parts) hits = data[0] if self._with_cells: self.cells_df = data[1] else: self.cells_df = None self.hits_df = hits self._validate() min_hit_id = hits['hit_id'].min() max_hit_id = hits['hit_id'].max() self.max_hit_id = max_hit_id # assume that hit_ids are exactly the integers 1,...,#hits # this way we can speed up the coordinate lookup quite a bit assert min_hit_id == 1 and max_hit_id == len(hits) if self._with_truth: particles, truth = data[-2:] self._particles_columns_orig = particles.columns self._truth_columns_orig = truth.columns self.particles_df = particles self.truth_df = truth else: assert not use_true_coords self.particles_df = None self.truth_df = None self.hits_coords_by_id = [] for i, col in enumerate(('x', 'y', 'z')): # IMPORTANT: We convert coordinates to float64. Otherwise we run # into numerical precision problems in our helix arithmetic. # Note: index 0 must map to np.nan. coord = np.full(1 + max_hit_id, np.nan, dtype=np.float64) if use_true_coords: true_col = 't' + col coord[self.truth_df['hit_id']. values] = self.truth_df[true_col].values else: coord[hits['hit_id'].values] = hits[col].values self.hits_coords_by_id.append(coord) self._hits_module_id_by_id = np.full(1 + max_hit_id, -1, dtype=np.int16) assert np.all( hits['module_id'] <= np.iinfo(self._hits_module_id_by_id.dtype).max ) self._hits_module_id_by_id[ hits['hit_id'].values] = hits['module_id'].values self._isopen = True
def main(): print('\n') print('-' * 50) print('Starting job...') print('-' * 50) t0 = time.time() event_id = '000001000' event_path = os.path.join(path_to_train, 'event' + event_id) hits, cells, particles, truth = load_event(event_path) truth = truth.merge(hits, on=['hit_id'], how='left') df = truth.copy() df = df.assign(r=np.sqrt(df.x**2 + df.y**2)) df = df.assign(d=np.sqrt(df.x**2 + df.y**2 + df.z**2)) df = df.assign(a=np.arctan2(df.y, df.x)) df = df.assign(cosa=np.cos(df.a)) df = df.assign(sina=np.sin(df.a)) df = df.assign(phi=np.arctan2(df.z, df.r)) fig, ax = plt.subplots(figsize=(12, 12)) ax = Axes3D(fig) #ax.scatter(xs=seed.a, # ys=seed.r, # zs=seed.z, # c='gray', # s=1) d_delta = 200 overlap = 0.3 its = math.ceil((1050 / d_delta)) for i in range(its): start = (i * d_delta) - (d_delta * overlap) if start < 0: start = 0 end = (i * d_delta) + d_delta print(start, end) seed_tracks(event_id, df, start, end, ax) ax.set_xlabel('a') ax.set_ylabel('r') ax.set_zlabel('z (mm)') plt.show() print('-' * 50) print('Success!') t1 = time.time() print('Total time', (t1 - t0) / 60) print('-' * 50) print('\n' * 2)
def DBScan(): data_dir = 'path' idd = ['***'] sum = 0 sum_score = 0 for i, eve_id in enumerate(idd): hits, cells, particles, truth = load_event(data_dir + '/event' + eve_id) labels = do_dbscan_predict(hits) submission = create_one_event_submission(0, hits['hit_id'].values, labels) score = score_event(truth, submission) print('[%2d] score : %0.8f' % (i, score)) sum_score += score sum += 1
def read_mc(self, event): # Read MC files. mchits, mctracks = load_event(event, parts=['truth', 'particles']) print(len(mctracks)) # Fill dummy data for non-particle hits. mctracks = pd.concat([ mctracks, pd.DataFrame([[0, 0, 0, 0, 0, 0, 0, 0, 0]], columns=mctracks.columns) ]) mchits = pd.merge(mchits, mctracks, how='left', on='particle_id') return mchits
def analyse_event(event_number, volume_id, size_of_hit): hits, cells, particles, truth = load_event( '/Users/pjfox/Dropbox/NN/TrackML/train_100_events/event00000' + str(event_number)) selectedhits, directions = goodhit_and_directions(hits, truth, volume_id) hitinfo = np.empty([len(selectedhits), size_of_hit]) for ii, hit_id in enumerate(selectedhits[:, 0]): hitinfo[ii] = makelistofhits(cells, hits, hit_id, size_of_hit) np.save( "hits_info_Vol_" + str(volume_id) + "_event_" + str(event_number) + ".npy", hitinfo) np.save( "direction_info_Vol_" + str(volume_id) + "_event_" + str(event_number) + ".npy", directions) return None
def training_data_with_eta_cut(train_dir='input/train_1', event_prefix="event000001000", eta_cut=3.2): hits, cells, particles, truth = load_event( os.path.join(train_dir, event_prefix)) hits_features = get_features(hits) # high_eta_hits = hits_features[(hits_features['eta'] > eta_cut) | (hits_features['eta'] < -1 * eta_cut)] high_eta_hits = hits_features[(hits_features['eta'] > eta_cut)] uID_for_higheta = make_uID(high_eta_hits) high_eta_hits_uID = pd.merge(high_eta_hits, uID_for_higheta, on=['volume_id', 'layer_id', 'module_id']) train_data_higheta = high_eta_hits_uID.merge( filter_truth(truth), on='hit_id')[['uID', 'particle_id']] return train_data_higheta, uID_for_higheta
def get_real_tracks(event): hits, cells, particles, truth = load_event(event) hits_truth = pd.merge(hits, truth, on=['hit_id']) pIDs = np.unique(hits_truth['particle_id']) track_list = [] for pID in pIDs: if pID == 0: continue this_track = hits_truth[hits_truth['particle_id'] == pID][[ 'x', 'y', 'z' ]].values this_track = np.pad(this_track, ((0, 20), (0, 0)), 'constant', constant_values=(4, 99999))[:20, :] track_list.append(this_track) return np.array(track_list)
def dicover_highest_amount_of_hits(): maxV = 0 with open('/tmp/Events_Train_1') as f: for line in f: event_prefix = line.strip() print(event_prefix) hits, cells, particles, truth = load_event( os.path.join('/data/trackMLDB/train_1', event_prefix)) for index, row in particles.iterrows(): truth_0 = truth[truth.particle_id == row['particle_id']] AUX = truth_0['hit_id'].count() if (AUX > maxV): maxV = AUX print(maxV)
def process_particles(event): hits, cells, particles, truth = load_event(event) # select interesting columns and calculate distance from origin 't' hits = hits[['hit_id', 'x', 'y', 'z']] hits = hits.assign(t=lambda hit: np.sqrt(hit.x**2 + hit.y**2 + hit.z**2)) # merge datasets to associate particle_id, hit_id, layer_id, and module_id, # and sort by distance from the origin data = pd.merge(truth[['particle_id', 'hit_id']], hits, on='hit_id', how='inner').sort_values(by=['particle_id', 't']) # Add a new column to each row containing the previous and next hit data = data.assign(x0=data['x'].shift(1), x1=data['x'], x2=data['x'].shift(-1), y0=data['y'].shift(1), y1=data['y'], y2=data['y'].shift(-1)) # a mask that selects all but the first and last rows def mask(x): result = np.ones_like(x) result[0] = 0 result[x.shape[0] - 1] = 0 return result # Create a mask that selects all but the first and last element from each group mask = data.groupby(['particle_id' ])['particle_id'].transform(mask).astype(bool) # Applies mask to remove first and last hits from each particle and # calculates helix parameters for remaining hits data = data.loc[mask].assign(hx=lambda n: fit_circle( (n.x0, n.y0), (n.x1, n.y1), (n.x2, n.y2))[0], hy=lambda n: fit_circle((n.x0, n.y0), (n.x1, n.y1), (n.x2, n.y2))[1], hr=lambda n: fit_circle((n.x0, n.y0), (n.x1, n.y1), (n.x2, n.y2))[2]) # Iterate through each group and yield it for name, group in data.groupby('particle_id'): yield (name, group[['x', 'y', 'z', 'hx', 'hy', 'hr']])
def process_event(prefix, pt_min, n_phi_sectors, select_phi_sector, phi_slope_max, phi_slope_mid_max, phi_slope_outer_max, z0_max, no_missing_hits, n_tracks): # Load the data evtid = int(prefix[-9:]) logging.info('Event %i, loading data' % evtid) hits, particles, truth = dataset.load_event( prefix, parts=['hits', 'particles', 'truth']) # Apply hit selection logging.info('Event %i, selecting hits' % evtid) hits = select_hits(hits, truth, particles, pt_min=pt_min, no_missing_hits=no_missing_hits).assign(evtid=evtid) hits_sectors = split_phi_sectors(hits, n_phi_sectors=n_phi_sectors, select_phi_sector=select_phi_sector) # Graph features and scale feature_names = ['r', 'phi', 'z'] feature_scale = np.array([1000., np.pi / n_phi_sectors, 1000.]) # Define adjacent layers n_det_layers = 10 l = np.arange(n_det_layers) layer_pairs = np.stack([l[:-1], l[1:]], axis=1) # Construct the graph logging.info('Event %i, constructing graphs' % evtid) graphs = [ construct_graph(sector_hits, layer_pairs=layer_pairs, phi_slope_max=phi_slope_max, phi_slope_mid_max=phi_slope_mid_max, phi_slope_outer_max=phi_slope_outer_max, z0_max=z0_max, feature_names=feature_names, feature_scale=feature_scale, max_tracks=n_tracks, no_missing_hits=no_missing_hits) for sector_hits in hits_sectors ] return graphs
def train(model, train, shift=10000, polar=0): """Takes in the training data, cleans it, and the uses it to train the model params model -- created, but untrained model train -- training data set shift -- coordinate shift needed to eliminate negative numbers polar -- used to turn polar coordinate on and off """ for e in train: #seprates out the hits, cells, and truth from the data hits, cells, truth = load_event(e, parts=['hits', 'cells', 'truth']) hits['event_id'] = int(e[-9:]) #group the data by hit ID sorted by channel and mean value cells = cells.groupby(by=['hit_id'])['ch0', 'ch1', 'value'].agg(['mean' ]).reset_index() cells.columns = ['hit_id', 'ch0', 'ch1', 'value'] hits = pd.merge(hits, cells, how='left', on='hit_id') hits = pd.merge(hits, truth, how='left', on='hit_id') print(e, "Train") #applies the shift to eliminate negitive numbers hits['x'] += shift hits['y'] += shift hits['z'] += shift #If polar coordinate are turned on, calculate the radious and theta and use them to train the model if polar == 1: hits['radius'] = np.sqrt(hits['y'] * hits['y'] + hits['x'] * hits['x']) hits['theta'] = np.tan(hits['y'] / hits['x'])**-1 hits['particle_id'] = model.fit(hits[[ 'hit_id', 'radius', 'theta', 'z', 'volume_id', 'layer_id', 'module_id', 'event_id', 'ch0', 'ch1', 'value' ]].values, hits['particle_id'], batch_size=10000, epochs=1) else: #Train the model on the non polar x, y, and z values hits['particle_id'] = model.fit(hits[[ 'hit_id', 'x', 'y', 'z', 'volume_id', 'layer_id', 'module_id', 'event_id', 'ch0', 'ch1', 'value' ]].values, hits['particle_id'], batch_size=10000, epochs=1) return model
def work_sub(param): # merges tracks for event i and saves result into a file (i, ) = param th_b = 0.16 th_c = 0.45 event = get_event(i) print('event:', event) hits, cells = load_event('../input/test/' + event, parts=['hits', 'cells']) data = pd.read_csv('../submissions/final/' + event) data = data.merge(hits, how='left', on='hit_id') inner = pd.read_csv('../submissions/final_inner/' + event) data = merge_track(data, inner, th_b, th_c, 1) data['event_id'] = i data[['event_id', 'hit_id', 'track_id']].to_csv('../submissions/merge_final/' + event + '.csv', index=False) return i
def test_scoring(): event_prefix = "event00000%d" jobs = [] weights = [0.1, 0.01, 0.1, 1, 1] pool = mp.Pool(processes=1) for event in [1000 + i for i in range(1)]: event_id = event_prefix % event hits, cells, particles, truth = load_event( os.path.join(path_to_train, event_id)) jobs.append( pool.apply_async(add_submission, args=(event, hits, weights, True, truth))) test_dataset_submissions = [job.get()[0] for job in jobs] scores = [job.get()[1] for job in jobs] print('Avg of events: ') print(sum(scores) / float(len(scores)))
def read(data_dir, evtid, info=False): prefix = os.path.join(os.path.expandvars(data_dir), 'event{:09d}'.format(evtid)) all_data = load_event(prefix, parts=['hits', 'particles', 'truth', 'cells']) if all_data is None: return None hits, particles, truth, cells = all_data hits = hits.assign(evtid=evtid) px = particles.px py = particles.py pt = np.sqrt(px**2 + py**2) particles = particles.assign(pt=pt) if info: print("# of hits: ", hits.shape[0]) print("# of particles: ", particles.shape[0]) return hits, particles, truth, cells
def run_dbscan(): data_dir = '../input/train_1' event_ids = ['000001000'] sum = 0 sum_score = 0 for i, event_id in enumerate(event_ids): hits, cells, particles, truth = load_event(data_dir + '/event' + event_id) labels = do_dbscan_predict(hits) submission = create_one_event_submission(0, hits['hit_id'].values, labels) score = score_event(truth, submission) print('[%2d] score : %0.8f' % (i, score)) sum_score += score sum += 1 print('--------------------------------------') print(sum_score / sum)
def __init__( self, event_file, detector_file, detector=None, use_null_module=True, import_track_hits=True ): # Null module is an empty module to indicate no detector hit. self.hits, self.cells, self.particles, self.truth = load_event( event_file) if detector is None: self.detector = Detector(detector_file) else: self.detector = detector self.use_null_module = use_null_module self.track_hit_dict = pickle.load( open(event_file + "-trackdict.p", "rb")) self.init_detector_geometry()
def load_training(): if os.path.exists(data_name): with open(data_name, 'rb') as fp: training_df = pickle.load(fp) else: det = pd.read_csv('input/detectors.csv') hits_pd = make_uID(det) training_df = [] train = np.unique([p.split('-')[0] for p in sorted(glob.glob('input/train_1/**'))]) for ds_name in train: hits, truth = load_event(ds_name, parts=['hits', 'truth']) hits_with_uID = pd.merge(hits, hits_pd, on=['volume_id', 'layer_id', 'module_id']) filtered_truth = truth[ (truth['weight'] > 5e-7 ) & (truth['particle_id'] != 0) ] training = hits_with_uID.merge(filtered_truth, on='hit_id')[['uID', 'particle_id']] unique_truth = pd.Series(np.unique(training['particle_id'])) training_df.append( (training, unique_truth) ) with open(data_name, 'wb') as fp: pickle.dump(training_df, fp) return training_df
def get_score_of_one_event(self, ievt): event_str = "event00000{0}".format(1000+ievt) logging.debug("processing {} event:".format(event_str)) # get score of the track candidates # load the event info from tracking ml event_input_name = os.path.join(self.trackdata_dir, event_str) hits, truth = load_event(event_input_name, parts=['hits', 'truth']) all_gnn_tracks = [] all_true_tracks = [] for i in range(self.n_sections): res_tracks = self.get_tracks_of_one_sector(event_str, i, hits, truth) all_gnn_tracks += res_tracks[0] all_true_tracks += res_tracks[1] event_gnn = score_tracks(all_gnn_tracks, hits, truth) event_truth = score_tracks(all_true_tracks, hits, truth) logging.debug("SCORE of {} event: {:.4f} {:.4f} {:.4f}, {:.4f}\n".format( ievt, event_gnn[0], event_truth[0], event_gnn[0]/event_truth[0], event_truth[1]) ) return [event_gnn[0], event_truth[0]]