Example #1
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")
    submission = pd.read_csv("01_merge.submission.csv")
    model = extension.RemoveOutliersByQuadric()

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step1, score: %0.5f" % (score))

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step2, score: %0.5f" % (score))

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step3, score: %0.5f" % (score))

        submission = extension.extend(submission, hits)
        score = score_event(truth, submission)
        print("step4, score: %0.5f" % (score))

        submission = model.run(submission, hits)
        score = score_event(truth, submission)
        print("step5, score: %0.5f" % (score))

    print(datetime.datetime.now(), sys.argv[0], " end")
Example #2
0
def run():

    max_istep = 4

    full_submission_list = [
        pd.read_csv("step{0}.submission.csv".format(istep))
        for istep in range(max_istep + 1)
    ]
    good_submission_list = [
        pd.read_csv("step{0}_good.submission.csv".format(istep))
        for istep in range(max_istep)
    ]
    path_to_input = os.path.join(path_to_trackml, "train_1")

    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):
        for mi in range(max_istep + 1):
            submission_list = [full_submission_list[mi]] + [
                good_submission_list[i] for i in range(mi)
            ]
            submission = pd.concat(submission_list)
            score0 = score_event(truth, submission)
            print("step = {0}, score = {1}".format(mi, score0))

            if (mi == max_istep):
                for i in range(5):
                    submission = extension.extend(submission, hits)
                    score0 = score_event(truth, submission)
                    print("with extension = {0}, score = {1}".format(
                        i + 1, score0))
Example #3
0
def run():
    f = open("08.log", "w")
    f.write("extention of 07 results.\n")
    path_to_input = os.path.join(path_to_trackml, "test")
    old_submission = pd.read_csv("07_test_UHBO_submission.csv")
    sys.stderr.write("load data\n")
    for event_id, hits in load_dataset(path_to_input, parts=["hits"]):
        submission = extension.extend(old_submission, hits)
        submission.to_csv("08_ext07_submission.csv", index=None)
    f.close()
Example #4
0
def run():
    path_to_input = os.path.join(path_to_trackml, "train_1")
    nevents = 1
    old_submission = pd.read_csv("02.csv")
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        submission = extension.extend(old_submission, hits)
        submission.to_csv("09.csv", index=None)
        score = score_event(truth, submission)
        print("")
        print("score: %0.5f" % (score))
Example #5
0
def run():
    print(datetime.datetime.now(), sys.argv[0], " begin")
    submission = pd.read_csv("01_merge.submission.csv")

    path_to_input = os.path.join(path_to_trackml, "train_1")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=1):
        for i in range(5):
            submission = extension.extend(submission, hits)
            score = score_event(truth, submission)
            print("step%d, score: %0.5f" % (i + 1, score))

    print(datetime.datetime.now(), sys.argv[0], " end")
Example #6
0
def run():
    f = open("10.log", "w")
    f.write("extention many times\n")
    path_to_input = os.path.join(path_to_trackml, "train_1")
    nevents = 1
    submission = pd.read_csv("09.csv")
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        for i in range(5):
            submission = extension.extend(submission, hits)
            submission.to_csv("10_{0}.csv".format(i), index=None)
            score = score_event(truth, submission)
            f.write("i={0} score={1}\n".format(i, score))
Example #7
0
def calc_one(istep, submission0, event_id, hits, output_dir, mk_cand, merger, th_len, num_extend):
    
    candidates_dir = os.path.join(output_dir, "candidates{0}".format(istep))
    mk_cand.output_dir = candidates_dir
    merger.candidates_output_dir = candidates_dir
    
    if(submission0 is None):
        hits0 = hits
    else:
        hits0 = submission0.merge(hits, on="hit_id")[hits.columns]

    # -- make candidate --
    print(datetime.datetime.now(), "step{0} make_candidates".format(istep))
    mk_cand.run(event_id, hits0)
    
    # -- merge --
    print(datetime.datetime.now(), "step{0} merge".format(istep))
    submission = merger.run(event_id, hits0)

    # -- extend --
    print(datetime.datetime.now(), "step{0} extend {1} times".format(istep, num_extend))
    for i in range(num_extend):
        submission = extend(submission, hits0)

    # -- separate outlier --
    print(datetime.datetime.now(), "step{0} compute outlier".format(istep, num_extend))
    tscore = score_track.score_by_length(submission, hits)
    outlier_mask = tscore < th_len
    submission_outlier = submission[outlier_mask]
    submission_good    = submission[~outlier_mask]
    print("# of outlier: ", len(submission_outlier))
    print("# of good: ",    len(submission_good))
    

    # -- save --
    submission_outlier.to_csv(
        os.path.join(output_dir,
                     "step{0}_outlier.submission.csv".format(istep)), index=None)
    submission_good.to_csv(
        os.path.join(output_dir,
                     "step{0}_good.submission.csv".format(istep)), index=None)
    submission.to_csv(
        os.path.join(output_dir,
                     "step{0}.submission.csv".format(istep)), index=None)
        
    return (submission_good, submission_outlier)
Example #8
0
    def fit(dfh, y=None):
        dfh["s1"] = dfh.hit_id
        dfh["N1"] = 1
        dfh['r'] = np.sqrt(dfh['x'].values**2 + dfh['y'].values**2 +
                           dfh['z'].values**2)
        dfh['rt'] = np.sqrt(dfh['x'].values**2 + dfh['y'].values**2)
        dfh['a0'] = np.arctan2(dfh['y'].values, dfh['x'].values)

        mm = 1
        for ii in tqdm(range(self.niter), total=self.niter):
            # unroll helices
            mm = mm * (-1)
            dfh['z1'] = (dfh['z'].values + dj) / dfh['rt'].values
            dfh['z2'] = (dfh['z'].values + dj) / dfh['r'].values
            dfh["a1"] = dfh.a0 + mm * (self.coef_rt * dfh.rt.values
                                       ) / 1000.0 * (ii / 2) / 180.0 * np.pi

            dfh["sina1"] = np.sin(dfh["a1"].values)
            dfh["cosa1"] = np.cos(dfh["a1"].values)

            # scaling
            ss = StandardScaler()
            dfs = ss.fit_transform(dfh[self.features].values)
            dfs[:, :] = dfs[:, :] * self.weight[np.newaxis, :]

            # clustering
            res = DBSCAN(eps=self.eps0,
                         min_samples=1,
                         metric='euclidean',
                         n_jobs=4).fit(dfs).labels_
            dfh["s2"] = res
            dfh['N2'] = dfh.groupby('s2')['s2'].transform('count')
            maxs1 = np.max(dfh.s1)
            dfh.s1 = np.where((dfh.N2 > dfh.N1) & (dfh.N2 < 20),
                              dfh.s2 + maxs1, dfh.s1)
            dfh['s1'] = dfh['s1'].astype('int64')
            dfh['N1'] = dfh.groupby('s1')['s1'].transform('count')

        labels = dfh["s1"]
        submission = create_one_event_submission(0, dfh, labels)
        for i in range(self.nextend):
            submission = extend(submission, dfh)

        self.submission = submission
        return self