Esempio n. 1
0
def run_candidate():
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    model = models.UnrollingHelicesShiftingZ(
        djs=[-20, -10, 0, 10, 20],
        dbscan_features=["sina1", "cosa1", "z1", "z2", "x_rt", "y_rt"],
        dbscan_weight=[0.9, 0.9, 0.35, 0.22, 0.01, 0.01],
        coef_rt1=1.33,
        coef_rt2=0.0,
        niter=150)

    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out = "out_{0}".format(sys.argv[0].split(".")[0])

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        print("size(hits) : ", len(hits))
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    sys.stderr.write("scan\n")
    for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list):

        truth = truth.merge(hits, on=['hit_id'], how='left')
        dfh = truth.copy()
        label = model.predict(dfh)

        submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'],
                                  data=np.column_stack(([
                                      int(event_id),
                                  ] * len(dfh), dfh.hit_id.values,
                                                        label))).astype(int)
        submission.to_csv("02.csv", index=None)
        score = score_event(dfh, submission)
        max_score = dfh.weight.sum()
        print("score: %0.5f  (%0.5f)" % (score * max_score, score))

    print("script end", datetime.datetime.now())
Esempio n. 2
0
def run_candidate():
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    model = models.UnrollingHelicesShiftingZ(
        dbscan_features = ["sina1", "cosa1", "z1", "z2", "x_y", "x_r", "y_r", "rt_r"],
        dbscan_weight   = [2.7474448671796874, 2.7474448671796874,
                           1.3649721713529086, 0.7034918842926337,
                           0.0005549122352940002, 0.023096034747190672,0.04619756315527515,0.2437077420144654],
        djs = [-20, -10, 0, 10, 20],
        niter = 150,
        eps0 = 0.00975)

    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out   = "out_{0}".format(sys.argv[0].split(".")[0])

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input, parts=["hits", "truth"],
                                              skip=0, nevents=nevents):
        print("size(hits) : ", len(hits))
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    sys.stderr.write("scan\n")
    for (event_id, hits, truth) in zip(event_id_list, hits_list, truth_list):

        truth = truth.merge(hits,       on=['hit_id'],      how='left')
        dfh = truth.copy()
        label = model.predict(dfh)

        submission = pd.DataFrame(columns=['event_id', 'hit_id', 'track_id'],
                                  data=np.column_stack(([int(event_id),]*len(dfh),
                                                        dfh.hit_id.values,
                                                        label))).astype(int)
        submission.to_csv("05.csv", index=None)
        score = score_event(dfh, submission)
        max_score = dfh.weight.sum()        
        print("score: %0.5f  (%0.5f)" % (score*max_score, score))

    print("script end", datetime.datetime.now())
Esempio n. 3
0
def run(filename):
    print("script begin", datetime.datetime.now())
    EPS = 1e-12
    model = models.UnrollingHelicesShiftingZ(
        djs=np.arange(-20, 20 + EPS, 10),
        dbscan_features=["sina1", "cosa1", "z1", "z2"],
        dbscan_weight=[1.0, 1.0, 0.75, 0.2],
        niter=150)

    nevents = 1
    path_to_input = os.path.join(path_to_trackml, "train_1")
    path_to_out = "out_{0}".format(sys.argv[0].split(".")[0])

    event_id_list = []
    hits_list = []
    truth_list = []
    sys.stderr.write("load data\n")
    for event_id, hits, truth in load_dataset(path_to_input,
                                              parts=["hits", "truth"],
                                              skip=0,
                                              nevents=nevents):
        event_id_list.append(event_id)
        hits_list.append(hits)
        truth_list.append(truth)

    def Fun4BO(w_a1, w_z1, w_z2):
        model.dbscan_weight[0] = w_a1
        model.dbscan_weight[1] = w_a1
        model.dbscan_weight[2] = w_z1
        model.dbscan_weight[3] = w_z2

        sys.stderr.write("scan\n")
        score_list = []
        for (event_id, hits, truth) in zip(event_id_list, hits_list,
                                           truth_list):
            label = model.predict(hits)
            submission = pd.DataFrame(
                columns=['event_id', 'hit_id', 'track_id'],
                data=np.column_stack(([
                    int(event_id),
                ] * len(hits), hits.hit_id.values, label))).astype(int)
            score = score_event(truth, submission)
            score_list.append(score)
        return np.sum(score_list) / len(score_list)

    opt = BayesianOptimization(Fun4BO, {
        "w_a1": (0.9, 1.2),
        "w_z1": (0.3, 0.7),
        "w_z2": (0.1, 0.4)
    },
                               verbose=True)
    opt.maximize(
        init_points=3,
        n_iter=30,  # 30
        kappa=2.576)

    # [string]
    labels = opt.res["max"]["max_params"].keys()
    # [dict(string, [float])]
    params = opt.res["all"]["params"]
    len_params = len(params)

    data_dic = {}

    for label in labels:
        val = [opt.res["max"]["max_params"][label]]
        for i in range(len_params):
            val.append(params[i][label])
            data_dic[label] = val
    data_dic["value"] = [opt.res["max"]["max_val"]] + opt.res["all"]["values"]
    data_dic["label"] = ["max"] + [str(x) for x in range(len_params)]
    df = pd.DataFrame(data_dic)
    df.to_csv(filename, index=None)