def run(): print(datetime.datetime.now(), sys.argv[0], " begin") submission = pd.read_csv("01_merge.submission.csv") model = extension.RemoveOutliersByQuadric() path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step1, score: %0.5f" % (score)) submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step2, score: %0.5f" % (score)) submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step3, score: %0.5f" % (score)) submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step4, score: %0.5f" % (score)) submission = model.run(submission, hits) score = score_event(truth, submission) print("step5, score: %0.5f" % (score)) print(datetime.datetime.now(), sys.argv[0], " end")
def run(): max_istep = 4 full_submission_list = [ pd.read_csv("step{0}.submission.csv".format(istep)) for istep in range(max_istep + 1) ] good_submission_list = [ pd.read_csv("step{0}_good.submission.csv".format(istep)) for istep in range(max_istep) ] path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): for mi in range(max_istep + 1): submission_list = [full_submission_list[mi]] + [ good_submission_list[i] for i in range(mi) ] submission = pd.concat(submission_list) score0 = score_event(truth, submission) print("step = {0}, score = {1}".format(mi, score0)) if (mi == max_istep): for i in range(5): submission = extension.extend(submission, hits) score0 = score_event(truth, submission) print("with extension = {0}, score = {1}".format( i + 1, score0))
def run(): f = open("08.log", "w") f.write("extention of 07 results.\n") path_to_input = os.path.join(path_to_trackml, "test") old_submission = pd.read_csv("07_test_UHBO_submission.csv") sys.stderr.write("load data\n") for event_id, hits in load_dataset(path_to_input, parts=["hits"]): submission = extension.extend(old_submission, hits) submission.to_csv("08_ext07_submission.csv", index=None) f.close()
def run(): path_to_input = os.path.join(path_to_trackml, "train_1") nevents = 1 old_submission = pd.read_csv("02.csv") sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): submission = extension.extend(old_submission, hits) submission.to_csv("09.csv", index=None) score = score_event(truth, submission) print("") print("score: %0.5f" % (score))
def run(): print(datetime.datetime.now(), sys.argv[0], " begin") submission = pd.read_csv("01_merge.submission.csv") path_to_input = os.path.join(path_to_trackml, "train_1") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=1): for i in range(5): submission = extension.extend(submission, hits) score = score_event(truth, submission) print("step%d, score: %0.5f" % (i + 1, score)) print(datetime.datetime.now(), sys.argv[0], " end")
def run(): f = open("10.log", "w") f.write("extention many times\n") path_to_input = os.path.join(path_to_trackml, "train_1") nevents = 1 submission = pd.read_csv("09.csv") sys.stderr.write("load data\n") for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"], skip=0, nevents=nevents): for i in range(5): submission = extension.extend(submission, hits) submission.to_csv("10_{0}.csv".format(i), index=None) score = score_event(truth, submission) f.write("i={0} score={1}\n".format(i, score))
def calc_one(istep, submission0, event_id, hits, output_dir, mk_cand, merger, th_len, num_extend): candidates_dir = os.path.join(output_dir, "candidates{0}".format(istep)) mk_cand.output_dir = candidates_dir merger.candidates_output_dir = candidates_dir if(submission0 is None): hits0 = hits else: hits0 = submission0.merge(hits, on="hit_id")[hits.columns] # -- make candidate -- print(datetime.datetime.now(), "step{0} make_candidates".format(istep)) mk_cand.run(event_id, hits0) # -- merge -- print(datetime.datetime.now(), "step{0} merge".format(istep)) submission = merger.run(event_id, hits0) # -- extend -- print(datetime.datetime.now(), "step{0} extend {1} times".format(istep, num_extend)) for i in range(num_extend): submission = extend(submission, hits0) # -- separate outlier -- print(datetime.datetime.now(), "step{0} compute outlier".format(istep, num_extend)) tscore = score_track.score_by_length(submission, hits) outlier_mask = tscore < th_len submission_outlier = submission[outlier_mask] submission_good = submission[~outlier_mask] print("# of outlier: ", len(submission_outlier)) print("# of good: ", len(submission_good)) # -- save -- submission_outlier.to_csv( os.path.join(output_dir, "step{0}_outlier.submission.csv".format(istep)), index=None) submission_good.to_csv( os.path.join(output_dir, "step{0}_good.submission.csv".format(istep)), index=None) submission.to_csv( os.path.join(output_dir, "step{0}.submission.csv".format(istep)), index=None) return (submission_good, submission_outlier)
def fit(dfh, y=None): dfh["s1"] = dfh.hit_id dfh["N1"] = 1 dfh['r'] = np.sqrt(dfh['x'].values**2 + dfh['y'].values**2 + dfh['z'].values**2) dfh['rt'] = np.sqrt(dfh['x'].values**2 + dfh['y'].values**2) dfh['a0'] = np.arctan2(dfh['y'].values, dfh['x'].values) mm = 1 for ii in tqdm(range(self.niter), total=self.niter): # unroll helices mm = mm * (-1) dfh['z1'] = (dfh['z'].values + dj) / dfh['rt'].values dfh['z2'] = (dfh['z'].values + dj) / dfh['r'].values dfh["a1"] = dfh.a0 + mm * (self.coef_rt * dfh.rt.values ) / 1000.0 * (ii / 2) / 180.0 * np.pi dfh["sina1"] = np.sin(dfh["a1"].values) dfh["cosa1"] = np.cos(dfh["a1"].values) # scaling ss = StandardScaler() dfs = ss.fit_transform(dfh[self.features].values) dfs[:, :] = dfs[:, :] * self.weight[np.newaxis, :] # clustering res = DBSCAN(eps=self.eps0, min_samples=1, metric='euclidean', n_jobs=4).fit(dfs).labels_ dfh["s2"] = res dfh['N2'] = dfh.groupby('s2')['s2'].transform('count') maxs1 = np.max(dfh.s1) dfh.s1 = np.where((dfh.N2 > dfh.N1) & (dfh.N2 < 20), dfh.s2 + maxs1, dfh.s1) dfh['s1'] = dfh['s1'].astype('int64') dfh['N1'] = dfh.groupby('s1')['s1'].transform('count') labels = dfh["s1"] submission = create_one_event_submission(0, dfh, labels) for i in range(self.nextend): submission = extend(submission, dfh) self.submission = submission return self