def run_candidate(): print("script begin", datetime.datetime.now()) EPS = 1e-12 model = models.UnrollingHelicesShiftingZ( djs=[-20, -10, 0, 10, 20], dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"], dbscan_weight=[0.9, 0.9, 0.35, 0.22, 0.01, 0.01], coef_rt1=1.33, coef_rt2=0.0, niter=150) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): print("size(hits) : ", len(hits)) event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) sys.stderr.write("scan\n") for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): truth = truth.merge(hits, on=['hit_id'], how='left') dfh = truth.copy() label = model.predict(dfh) submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([ int(event_id), ] * len(dfh), dfh.hit_id.values, label))).astype(int) submission.to_csv("02.csv", index=None) score = score_event(dfh, submission) max_score = dfh.weight.sum() print("score: %0.5f (%0.5f)" % (score * max_score, score)) print("script end", datetime.datetime.now())
def run_candidate(): print("script begin", datetime.datetime.now()) EPS = 1e-12 model = models.UnrollingHelicesShiftingZ( dbscan_features = ["sina1", "cosa1", "z1", "z2", "x_y", "x_r", "y_r", "rt_r"], dbscan_weight = [2.7474448671796874, 2.7474448671796874, 1.3649721713529086, 0.7034918842926337, 0.0005549122352940002, 0.023096034747190672,0.04619756315527515,0.2437077420144654], djs = [-20, -10, 0, 10, 20], niter = 150, eps0 = 0.00975) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): print("size(hits) : ", len(hits)) event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) sys.stderr.write("scan\n") for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): truth = truth.merge(hits, on=['hit_id'], how='left') dfh = truth.copy() label = model.predict(dfh) submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([int(event_id),]*len(dfh), dfh.hit_id.values, label))).astype(int) submission.to_csv("05.csv", index=None) score = score_event(dfh, submission) max_score = dfh.weight.sum() print("score: %0.5f (%0.5f)" % (score*max_score, score)) print("script end", datetime.datetime.now())
def run(filename): print("script begin", datetime.datetime.now()) EPS = 1e-12 model = models.UnrollingHelicesShiftingZ( djs=np.arange(-20, 20 + EPS, 10), dbscan_features=["sina1", "cosa1", "z1", "z2"], dbscan_weight=[1.0, 1.0, 0.75, 0.2], niter=150) nevents = 1 path_to_input = os.path.join(path_to_trackml, "train_1") path_to_out = "out_{0}".format(sys.argv[0].split(".")[0]) event_id_list = [] hits_list = [] truth_list = [] sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): event_id_list.append(event_id) hits_list.append(hits) truth_list.append(truth) def Fun4BO(w_a1, w_z1, w_z2): model.dbscan_weight[0] = w_a1 model.dbscan_weight[1] = w_a1 model.dbscan_weight[2] = w_z1 model.dbscan_weight[3] = w_z2 sys.stderr.write("scan\n") score_list = [] for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list): label = model.predict(hits) submission = pd.DataFrame( columns=['event_id', 'hit_id', 'track_id'], data=np.column_stack(([ int(event_id), ] * len(hits), hits.hit_id.values, label))).astype(int) score = score_event(truth, submission) score_list.append(score) return np.sum(score_list) / len(score_list) opt = BayesianOptimization(Fun4BO, { "w_a1": (0.9, 1.2), "w_z1": (0.3, 0.7), "w_z2": (0.1, 0.4) }, verbose=True) opt.maximize( init_points=3, n_iter=30, # 30 kappa=2.576) # [string] labels = opt.res["max"]["max_params"].keys() # [dict(string, [float])] params = opt.res["all"]["params"] len_params = len(params) data_dic = {} for label in labels: val = [opt.res["max"]["max_params"][label]] for i in range(len_params): val.append(params[i][label]) data_dic[label] = val data_dic["value"] = [opt.res["max"]["max_val"]] + opt.res["all"]["values"] data_dic["label"] = ["max"] + [str(x) for x in range(len_params)] df = pd.DataFrame(data_dic) df.to_csv(filename, index=None)