def run(): max_istep = 4 full_submission_list = [ pd.read_csv("step{0}.submission.csv".format(istep)) for istep in range(max_istep + 1) ] good_submission_list = [ pd.read_csv("step{0}_good.submission.csv".format(istep)) for istep in range(max_istep) ] path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): for mi in range(max_istep + 1): submission_list = [full_submission_list[mi]] + [ good_submission_list[i] for i in range(mi) ] submission = pd.concat(submission_list) score0 = score_event(truth, submission) print("step = {0}, score = {1}".format(mi, score0)) if (mi == max_istep): for i in range(5): submission = extension.extend(submission, hits) score0 = score_event(truth, submission) print("with extension = {0}, score = {1}".format( i + 1, score0))
def run(): print(datetime.datetime.now(), sys.argv[0], " begin") submission = pd.read_csv("01_merge.submission.csv") model = extension.RemoveOutliersByQuadric() path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step1, score: %0.5f" % (score)) submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step2, score: %0.5f" % (score)) submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step3, score: %0.5f" % (score)) submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step4, score: %0.5f" % (score)) submission = model.run(submission, hits) score = score_event(truth, submission) print("step5, score: %0.5f" % (score)) print(datetime.datetime.now(), sys.argv[0], " end")
def run(): print(datetime.datetime.now(), sys.argv[0], " begin") args = get_args() if (not os.path.exists(args.out_path)): os.makedirs(args.out_path) if (args.test): print("calculation begin for test") path_to_input = os.path.join(path_to_trackml, "test") nevents = 125 parts = ["hits"] else: print("calculation begin for train_1") path_to_input = os.path.join(path_to_trackml, "train_1") nevents = 1 parts = ["hits", "truth"] for (i, data) in tqdm(enumerate( load_dataset(path_to_input, parts=parts, nevents=nevents)), total=nevents): if (len(data) == 3): (event_id, hits, truth) = data elif (len(data) == 2): (event_id, hits) = data else: print(data) raise RuntimeError("not match") if (np.mod(i, args.num) == args.idx): print("go calc for event_id=", event_id) else: print("pass calc for event_id=", event_id) continue print("len(hits): ", len(hits)) hits["event_id"] = event_id model = get_model(args) path_to_sub = get_path_to_submission(args.in_path, event_id) if (path_to_sub is None): print("submission set None") else: print("submission read from {0}".format(path_to_sub)) model.set_submission(pd.read_csv(path_to_sub)) model.fit(hits) submission = model.submission path_to_sub = get_path_to_submission(args.out_path, event_id) if (not os.path.exists(os.path.dirname(path_to_sub))): os.makedirs(os.path.dirname(path_to_sub)) submission.to_csv(path_to_sub, index=None) if (len(data) == 3): score = score_event(truth, model.submission) print("score: ", score) print(datetime.datetime.now(), sys.argv[0], " end")
def score_tracks(all_tracks, hits, truth): # this part takes most of time # need improvement total_tracks = len(all_tracks) # logging.info("total tracks: {}".format(total_tracks)) results = [] for itrk, track in enumerate(all_tracks): results += [(x, itrk) for x in track] new_df = pd.DataFrame(results, columns=['hit_id', 'track_id']) new_df = new_df.drop_duplicates(subset='hit_id') df_sub = hits[['hit_id']] df_sub = df_sub.merge(new_df, on='hit_id', how='outer').fillna(total_tracks+1) matched = truth.merge(new_df, on='hit_id', how='inner') tot_truth_weight = np.sum(matched['weight']) ## remove the hits that belong to the same particle # but of that the total number is less than 50% of the hits of the particle particle_ids = np.unique(matched['particle_id']) for p_id in particle_ids: pID_match = matched[matched['particle_id'] == p_id] if pID_match.shape[0] <= truth[truth['particle_id'] == p_id].shape[0]*0.5: tot_truth_weight -= np.sum(pID_match['weight']) return [score_event(truth, df_sub), tot_truth_weight]
def multiple_tracks_merge_by_layer(lab_list, hits, classifier, truth=None, return_result=True): ''' Merge N tracks by taking the one with a higher number of hits belonging to different volume-layers lab_list - list of predicted id track for a "hits" hits - "hits" file classifier - volume-layers classifier truth - "truth" file to calculate the score return_results - boolean to return the result or the merged tracks ''' s_combo = lab_list[0] vlm_predicted = classifier.predict(hits[['x', 'y', 'z']]) for lb in lab_list[1:]: ## Calculate the number of hits with different volume-layer in a track N1 = number_hits_different_module(s_combo, vlm_predicted) N2 = number_hits_different_module(lb, vlm_predicted) ## Merge lb and s_combo s_combo = choose_longest_track_by_layer(N2, N1, lb, s_combo, threshold_value=17) if return_result: return score_event(truth, create_one_event_submission(0, hits, s_combo)) else: return s_combo
def find_clusters(min_points, max_radius, shift, phi_wraparound=False, plot_intermediate=False): global clustering, remaining_hits, n_clusters_found remaining_hits['phiCR'] = remaining_hits['phi'] - shift*remaining_hits['R'] X = remaining_hits[['eta', 'phiCR']] eps = max_radius min_samp = min_points db = DBSCAN(eps=eps, min_samples=min_samp, metric='euclidean').fit(X) labels = db.labels_ labels = [i+n_clusters_found if i!=-1 else -1 for i in labels] if max(labels)>-1: n_clusters_found = max(labels)+1 remaining_hits['track_id'] = labels clustering.update(remaining_hits['track_id']) remaining_hits = remaining_hits[remaining_hits.track_id==-1] # plot currently found clusters if (plot_intermediate): hits['phiCR'] = hits['phi'] - shift*hits['R'] fig = plt.figure(figsize=(20,7)) ax = fig.add_subplot(111) clusters = np.unique(clustering['track_id']) for cluster in clusters: cluster_hit_ids = clustering[clustering['track_id'] == cluster]['hit_id'] # all hits in cluster t = hits[hits['hit_id'].isin(cluster_hit_ids)][['eta', 'phiCR']] if cluster != -1: ax.plot(t.phiCR, t.eta, '.-', ms=10) plt.show() # print score score = score_event(truth, clustering) print('track-ml custom metric score:', round(score, 4), '- %d hits remaining to match' % len(remaining_hits), '- %d clusters found' % n_clusters_found)
def display_score(event_id, hits, labels, truth, message): if truth is not None: one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) print(message + "%d: %.8f" % (event_id, score)) else: print(message + '%d: no score available' % (event_id))
def get_score(self, hits, truth): tracks_pred = [] while len(hits.index) > 0: self.find_track(hits, tracks_pred) submission = self.make_submission(tracks_pred) score = score_event(truth, submission) return score
def foo(i): model = Clusterer() model.initialize(hits) hits_with_dz = preprocess_hits(hits, 0.055 * i) result = model.Hough_clustering(hits_with_dz, coef=c, epsilon=0.0048, min_samples=min_samples_in_cluster, n_loop=300, verbose=True) second = [] for k in range(10): np.random.shuffle(result) #result = res0 labels = range(result.shape[1]) for k in [0]: for i in range(len(result[:])): labels = merge(labels, result[i], k) submission = create_one_event_submission(0, hits['hit_id'].values, labels) print(score_event(truth, submission)) second += [labels] result = np.array(second) labels = range(result.shape[1]) for k in [0]: for i in range(len(result[:])): labels = merge(labels, result[i], k) submission = create_one_event_submission(0, hits['hit_id'].values, labels) print(score_event(truth, submission)) np.save('predicts/53/{}'.format(i), labels) return None
def Fun4BO(w1, w2, w3, niter): model.dbscan_weight[0] = w1 model.dbscan_weight[1] = w1 model.dbscan_weight[2] = w2 model.dbscan_weight[3] = w3 model.niter = int(niter) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) return score
def GA_eval(weights): eps = weights[0] z_scale = weights[1] model = Clusterer(eps=eps) labels = model.predict(hits, rz_scale=z_scale) submission = create_one_event_submission(0, hits, labels) score = score_event(truth, submission) print('score: %f' % score) return 1-score
def run(): model = merge.LengthMerge() path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): submission = model.run(event_id, hits) submission.to_csv("01_merge.submission.csv" ,index=None) score = score_event(truth, submission) print("score: %0.5f" % (score))
def run(filename): model = models.UnrollingHelices(use_outlier=False, dbscan_features = ["sina1", "cosa1", "z1", "x1", "x2", "x_y", "x_rt", "y_rt"], dbscan_weight = [1.0, 1.0, 0.75, 0.5, 0.5, 0.2, 0.2, 0.2]) path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): def Fun4BO(w_a1, w_z1 w_x1, w_x2, w_x_y, w_xy_rt, niter): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_x1 model.dbscan_weight[4] = w_x2 model.dbscan_weight[5] = w_x_y model.dbscan_weight[6] = w_xy_rt model.dbscan_weight[7] = w_xy_rt model.iter_size_helix = int(niter) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) return score print("Bayesian Optimization") opt = BayesianOptimization(Fun4BO, {"w1": (0.9, 1.2), "w2": (0.3, 0.8), "w3": (0.1, 0.6), "w4": (0.1, 0.6), "w5": (0.1, 0.6), "w6": (0.1, 0.6), "niter": (140, 190)}, #(140, 190) verbose = True) opt.maximize(init_points = 3, n_iter = 20, acq = "ucb", kappa = 2.576) # [string] labels = opt.res["max"]["max_params"].keys() # [dict(string, [float])] params = opt.res["all"]["params"] len_params = len(params) data_dic = {} for label in labels: val = [opt.res["max"]["max_params"][label]] for i in range(len_params): val.append(params[i][label]) data_dic[label] = val data_dic["value"] = [opt.res["max"]["max_val"]] + opt.res["all"]["values"] data_dic["label"] = ["max"] + [str(x) for x in range(len_params)] df = pd.DataFrame(data_dic) df.to_csv(filename, label=None)
def compute_trackml_score(self, final_tracks: List[TXplet], submission=None) -> float: """ :param final_tracks: a list of xplets representing tracks :param submission: (optional) a TrackML submission, see :py:meth:~`create_submission` :return: the trackml score (between 0 and 1) """ if submission is None: submission = self.create_submission(final_tracks) return score_event(self.truth, submission)
def score_graph_use_kcomponents(hits, G): can_trkx = nx.k_components(G)[1] n_candidates = len(can_trkx) results = [] for itrk, tracks in enumerate(can_trkx): results += [(G.nodes[track]['hit_id'], itrk) for track in tracks] trk_df = pd.DataFrame(results, columns=['hit_id', 'track_id']) score = score_event(hits, trk_df) print("{} track candidates with score: {:.4f}".format(n_candidates, score)) return trk_df, score
def run(): model = merge.QuadricMerge(candidates_output_dir="../09_UH_len/candidates") path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): submission = model.run(event_id, hits) submission.to_csv("01_merge.submission.csv", index=None) score = score_event(truth, submission) print("score: %0.5f" % (score))
def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt, niter): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 model.dbscan_weight[4] = w_xy_rt model.dbscan_weight[5] = w_xy_rt model.niter = int(niter) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) return score
def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 model.dbscan_weight[4] = w_xy_rt model.dbscan_weight[5] = w_xy_rt score_list = [] for (hits, truth) in zip(hits_list, truth_list): labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) score_list.append(score) return np.sum(score_list)/len(score_list)
def Fun4BO(w_a1, w_z1, w_z2, w_xy, w_xy_rt, c_r1, c_r2): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 model.dbscan_weight[4] = w_xy model.dbscan_weight[5] = w_xy_rt model.dbscan_weight[6] = w_xy_rt model.coef_rt1 = c_r1 model.coef_rt2 = c_r2 labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) return score
def run(): path_to_input = os.path.join(path_to_trackml, "train_1") nevents = 1 old_submission = pd.read_csv("02.csv") sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): submission = extension.extend(old_submission, hits) submission.to_csv("09.csv", index=None) score = score_event(truth, submission) print("") print("score: %0.5f" % (score))
def calc_steps(niter, eps0s, th_lens, num_exts, output_dir): if(type(niter)!=list): niter = [niter] * len(eps0s) print("eps list: ", eps0s) print("th_len list: ", th_lens) print("num ext list: ", num_exts) path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_" + sys.argv[0].split(".")[0] for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): print("len(hits): ", len(hits)) sub_out = None subs_good = [] subs_out = [] for i in range(len(eps0s)): sub_good, sub_out = calc_one(i+1, sub_out, event_id, hits, path_to_out, UnrollingHelices(niter=niter[i], eps0=eps0s[i]), LengthMerge(), th_lens[i], num_exts[i] ) subs_good.append(sub_good) subs_out.append( sub_out) submission = pd.concat(subs_good + [subs_out[-1]]) total_score = score_event(truth, submission) print("step {0}, total_score:{1}".format(i+1, total_score)) truth_good = sub_good.merge(truth, on="hit_id")[truth.columns] score_good = score_event(truth_good, sub_good) print("step {0}, good_score: {1}".format(i+1, score_good)) return total_score
def run(): print(datetime.datetime.now(), sys.argv[0], " begin") submission = pd.read_csv("01_merge.submission.csv") path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): for i in range(5): submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step%d, score: %0.5f" % (i + 1, score)) print(datetime.datetime.now(), sys.argv[0], " end")
def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt, c_rt1, c_rt2, eps0, step_eps): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 model.dbscan_weight[4] = w_xy_rt model.dbscan_weight[5] = w_xy_rt model.coef_rt1 = c_rt1 model.coef_rt2 = c_rt2 model.eps0 = eps0 model.step_eps = step_eps labels = model.predict(hits) one_submission = create_one_event_submission( event_id, hits, labels) score = score_event(truth, one_submission) return score
def run(): f = open("10.log", "w") f.write("extention many times\n") path_to_input = os.path.join(path_to_trackml, "train_1") nevents = 1 submission = pd.read_csv("09.csv") sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): for i in range(5): submission = extension.extend(submission, hits) submission.to_csv("10_{0}.csv".format(i), index=None) score = score_event(truth, submission) f.write("i={0} score={1}\n".format(i, score))
def run_candidate(): print("script begin", datetime.datetime.now()) EPS = 1e-12 model = models.UnrollingHelicesShiftingZ( djs=[-20, -10, 0, 10, 20], dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"], dbscan_weight=[0.9, 0.9, 0.35, 0.22, 0.01, 0.01], coef_rt1=1.33, coef_rt2=0.0, niter=150) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): print("size(hits) : ", len(hits)) event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) sys.stderr.write("scan\n") for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): truth = truth.merge(hits, on=['hit_id'], how='left') dfh = truth.copy() label = model.predict(dfh) submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([ int(event_id), ] * len(dfh), dfh.hit_id.values, label))).astype(int) submission.to_csv("02.csv", index=None) score = score_event(dfh, submission) max_score = dfh.weight.sum() print("score: %0.5f (%0.5f)" % (score * max_score, score)) print("script end", datetime.datetime.now())
def run_candidate(): print("script begin", datetime.datetime.now()) EPS = 1e-12 model = models.ZAScale(djs=np.arange(-20, 20 + EPS, 10), dis=np.arange(-0.003, 0.003 + EPS, 0.00025), min_ncand=1) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) os.makedirs(path_to_out, exist_ok=True) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): print("size(hits) : ", len(hits)) event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) sys.stderr.write("scan\n") for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): truth = truth.merge(hits, on=['hit_id'], how='left') dfh = truth.copy() dfh["rt"] = np.sqrt(dfh['x'].values**2 + dfh['y'].values**2) dfh = dfh.loc[dfh.z > 500] dfh = dfh.loc[(dfh.rt > 50) & (dfh.rt < 100)] label = model.predict(dfh) submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([ int(event_id), ] * len(dfh), dfh.hit_id.values, label))).astype(int) score = score_event(dfh, submission) max_score = dfh.weight.sum() print("score: %0.5f (%0.5f)" % (score * max_score, score)) print("script end", datetime.datetime.now())
def run_candidate(): print("script begin", datetime.datetime.now()) EPS = 1e-12 model = models.UnrollingHelicesShiftingZ( dbscan_features = ["sina1", "cosa1", "z1", "z2", "x_y", "x_r", "y_r", "rt_r"], dbscan_weight = [2.7474448671796874, 2.7474448671796874, 1.3649721713529086, 0.7034918842926337, 0.0005549122352940002, 0.023096034747190672,0.04619756315527515,0.2437077420144654], djs = [-20, -10, 0, 10, 20], niter = 150, eps0 = 0.00975) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): print("size(hits) : ", len(hits)) event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) sys.stderr.write("scan\n") for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): truth = truth.merge(hits, on=['hit_id'], how='left') dfh = truth.copy() label = model.predict(dfh) submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([int(event_id),]*len(dfh), dfh.hit_id.values, label))).astype(int) submission.to_csv("05.csv", index=None) score = score_event(dfh, submission) max_score = dfh.weight.sum() print("score: %0.5f (%0.5f)" % (score*max_score, score)) print("script end", datetime.datetime.now())
def Fun4BO(w_a1, w_z1, w_z2): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 sys.stderr.write("scan\n") score_list = [] for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): label = model.predict(hits) submission = pd.DataFrame( columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([ int(event_id), ] * len(hits), hits.hit_id.values, label))).astype(int) score = score_event(truth, submission) score_list.append(score) return np.sum(score_list) / len(score_list)
def run_dbscan(): data_dir = '../input/train_1' event_ids = ['000001000'] sum = 0 sum_score = 0 for i, event_id in enumerate(event_ids): hits, cells, particles, truth = load_event(data_dir + '/event' + event_id) labels = do_dbscan_predict(hits) submission = create_one_event_submission(0, hits['hit_id'].values, labels) score = score_event(truth, submission) print('[%2d] score : %0.8f' % (i, score)) sum_score += score sum += 1 print('--------------------------------------') print(sum_score / sum)
def run_candidate(): print("script begin", datetime.datetime.now()) EPS = 1e-12 # sigma of z is 5.5 mm model = models.ZAScaleNFilter(djs=np.linspace(-2.25, 2.25 + EPS, 10), dis=np.linspace(-0.003, 0.003 + EPS, 25)) # model = models.ZAScaleNFilter(djs=[-20, 0.0, 20], # dis=[0.0]) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): print("size(hits) : ", len(hits)) event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) sys.stderr.write("scan\n") for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): truth = truth.merge(hits, on=['hit_id'], how='left') dfh = truth.copy() label = model.predict(dfh) submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([ int(event_id), ] * len(dfh), dfh.hit_id.values, label))).astype(int) score = score_event(dfh, submission) max_score = dfh.weight.sum() print("score: %0.5f (%0.5f)" % (score * max_score, score)) print("script end", datetime.datetime.now())