def run(filename): w_a1 = 0.90291 w_xy_rt = 0.010809 w_z1 = 0.357996 w_z2 = 0.229602 c_rt1 = 1.330075 c_rt2 = 1.92522 model = models.UnrollingHelicesRt2( dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"], dbscan_weight=[w_a1, w_a1, w_z1, w_z2, w_xy_rt, w_xy_rt]) model.coef_rt1 = c_rt1 model.coef_rt2 = c_rt2 model.niter = 150 path_to_input = os.path.join(path_to_trackml, "test") dataset_submission = [] for event_id, hits in load_dataset(path_to_input, parts=["hits"]): labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) dataset_submission.append(one_submission) submission = pd.concat(dataset_submission) submission.to_csv(filename, index=None)
def Fun4BO(w1, w2, w3, niter): model.dbscan_weight[0] = w1 model.dbscan_weight[1] = w1 model.dbscan_weight[2] = w2 model.dbscan_weight[3] = w3 model.niter = int(niter) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) return score
def run(filename): model = models.UnrollingHelices(use_outlier=False, dbscan_features = ["sina1", "cosa1", "z1", "x1", "x2", "x_y", "x_rt", "y_rt"], dbscan_weight = [1.0, 1.0, 0.75, 0.5, 0.5, 0.2, 0.2, 0.2]) path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): def Fun4BO(w_a1, w_z1 w_x1, w_x2, w_x_y, w_xy_rt, niter): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_x1 model.dbscan_weight[4] = w_x2 model.dbscan_weight[5] = w_x_y model.dbscan_weight[6] = w_xy_rt model.dbscan_weight[7] = w_xy_rt model.iter_size_helix = int(niter) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) return score print("Bayesian Optimization") opt = BayesianOptimization(Fun4BO, {"w1": (0.9, 1.2), "w2": (0.3, 0.8), "w3": (0.1, 0.6), "w4": (0.1, 0.6), "w5": (0.1, 0.6), "w6": (0.1, 0.6), "niter": (140, 190)}, #(140, 190) verbose = True) opt.maximize(init_points = 3, n_iter = 20, acq = "ucb", kappa = 2.576) # [string] labels = opt.res["max"]["max_params"].keys() # [dict(string, [float])] params = opt.res["all"]["params"] len_params = len(params) data_dic = {} for label in labels: val = [opt.res["max"]["max_params"][label]] for i in range(len_params): val.append(params[i][label]) data_dic[label] = val data_dic["value"] = [opt.res["max"]["max_val"]] + opt.res["all"]["values"] data_dic["label"] = ["max"] + [str(x) for x in range(len_params)] df = pd.DataFrame(data_dic) df.to_csv(filename, label=None)
def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt, niter): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 model.dbscan_weight[4] = w_xy_rt model.dbscan_weight[5] = w_xy_rt model.niter = int(niter) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) return score
def run(model, test_or_train, path_to_out, nevents=None): if(test_or_train not in ["test", "train_1"]): sys.stderr.write("Error. test_or_train must be \"test\" or \"train_1\"\n") sys.exit() if(test_or_train=="test" and (nevents is not None)): sys.strerr.write("Error") sys.exit() path_to_input = os.path.join(path_to_trackml, test_or_train) os.makedirs(path_to_out, exist_ok=True) print("calculation begin : {0}".format(datetime.datetime.today())) dataset_submission = [] if(test_or_train == "test"): for event_id, hits in load_dataset(path_to_input, parts=["hits"]): sys.stderr.write("processing event_id : {0}".format(event_id)) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) dataset_submission.append(one_submission) else: dataset_score = [] for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): sys.stderr.write("processing event_id : {0}".format(event_id)) labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) dataset_submission.append(one_submission) score = score_event(truth, one_submission) dataset_score.append(score) print("Score for event %d:%.8f" % (event_id, score)) print("Mean Score : %.8f" % (np.sum(dataset_score)/len(dataset_score))) submission = pd.concat(dataset_submission) submission.to_csv(os.path.join(path_to_out, "submission.csv"), index=None) print("calculation end : {0}".format(datetime.datetime.today()))
def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 model.dbscan_weight[4] = w_xy_rt model.dbscan_weight[5] = w_xy_rt score_list = [] for (hits, truth) in zip(hits_list, truth_list): labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) score_list.append(score) return np.sum(score_list)/len(score_list)
def Fun4BO(w_a1, w_z1, w_z2, w_xy, w_xy_rt, c_r1, c_r2): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 model.dbscan_weight[4] = w_xy model.dbscan_weight[5] = w_xy_rt model.dbscan_weight[6] = w_xy_rt model.coef_rt1 = c_r1 model.coef_rt2 = c_r2 labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) score = score_event(truth, one_submission) return score
def Fun4BO(w_a1, w_z1, w_z2, w_xy_rt, c_rt1, c_rt2, eps0, step_eps): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 model.dbscan_weight[4] = w_xy_rt model.dbscan_weight[5] = w_xy_rt model.coef_rt1 = c_rt1 model.coef_rt2 = c_rt2 model.eps0 = eps0 model.step_eps = step_eps labels = model.predict(hits) one_submission = create_one_event_submission( event_id, hits, labels) score = score_event(truth, one_submission) return score
def fit(dfh, y=None): dfh["s1"] = dfh.hit_id dfh["N1"] = 1 dfh['r'] = np.sqrt(dfh['x'].values**2 + dfh['y'].values**2 + dfh['z'].values**2) dfh['rt'] = np.sqrt(dfh['x'].values**2 + dfh['y'].values**2) dfh['a0'] = np.arctan2(dfh['y'].values, dfh['x'].values) mm = 1 for ii in tqdm(range(self.niter), total=self.niter): # unroll helices mm = mm * (-1) dfh['z1'] = (dfh['z'].values + dj) / dfh['rt'].values dfh['z2'] = (dfh['z'].values + dj) / dfh['r'].values dfh["a1"] = dfh.a0 + mm * (self.coef_rt * dfh.rt.values ) / 1000.0 * (ii / 2) / 180.0 * np.pi dfh["sina1"] = np.sin(dfh["a1"].values) dfh["cosa1"] = np.cos(dfh["a1"].values) # scaling ss = StandardScaler() dfs = ss.fit_transform(dfh[self.features].values) dfs[:, :] = dfs[:, :] * self.weight[np.newaxis, :] # clustering res = DBSCAN(eps=self.eps0, min_samples=1, metric='euclidean', n_jobs=4).fit(dfs).labels_ dfh["s2"] = res dfh['N2'] = dfh.groupby('s2')['s2'].transform('count') maxs1 = np.max(dfh.s1) dfh.s1 = np.where((dfh.N2 > dfh.N1) & (dfh.N2 < 20), dfh.s2 + maxs1, dfh.s1) dfh['s1'] = dfh['s1'].astype('int64') dfh['N1'] = dfh.groupby('s1')['s1'].transform('count') labels = dfh["s1"] submission = create_one_event_submission(0, dfh, labels) for i in range(self.nextend): submission = extend(submission, dfh) self.submission = submission return self
if __name__ == "__main__": sys.path.append(path_to_trackmllib) from trackml.dataset import load_dataset from trackml.score import score_event os.makedirs(path_to_out, exist_ok=True) f_log = open(path_to_log, "w") f_log.write("calculation begin\n") f_log.write(str(datetime.datetime.today()) + "\n") dataset_submission = [] dataset_score = [] for event_id, hits, cells, particles, truth in load_dataset( path_to_input, skip=0, nevents=nevents): labels = model.predict(hits) one_submission = create_one_event_submission(event_id, hits, labels) dataset_submission.append(one_submission) score = score_event(truth, one_submission) dataset_score.append(score) f_log.write("Score for event %d:%.8f\n" % (event_id, score)) submission = pd.concat(dataset_submission) submission.to_csv(os.path.join(path_to_out, "submission.csv"), index=None) f_log.write("Mean Score : %.8f\n" % (np.sum(dataset_score) / len(dataset_score))) f_log.write("calculation end\n") f_log.write(str(datetime.datetime.today()))