Example #1
0
    def doit(self):
        logger = pocket_logger.get_my_logger()
        timer = pocket_timer.GoldenTimer(logger)

        # (file_name, col_name)
        files = ["subset_exp_" + str(idx) for idx in range(40, 70)]
        files = [(f, f) for f in files]

        train, test = self.make_files(files)
        timer.time("load csv in ")
        print(train.describe())

        self.print_corr(train, test, files)
        timer.time("corr check")
        self.print_score(train, files)
        timer.time("score check")
        self.do_preds(train, files)
Example #2
0
    def doit(self):
        logger = pocket_logger.get_my_logger()
        timer = pocket_timer.GoldenTimer(logger)

        # (file_name, col_name)
        files = [
            ("bin_team", "bin"),
        ]

        train, test = self.make_files(files)
        timer.time("load csv in ")
        print(train.describe())

        self.print_corr(train, test, files)
        timer.time("corr check")
        self.print_score(train, files)
        timer.time("score check")
        self.do_preds(train, test, files)
Example #3
0
    def doit(self):
        logger = pocket_logger.get_my_logger()
        timer = pocket_timer.GoldenTimer(logger)

        # (file_name, col_name)
        files = [("team_v63", "lgb"), ("bin_team", "bin"),
                 ("no_out_team", "no_out"), ("rnd_feat_bridge", "rnd_feat"),
                 ("small_team", "small")]

        train, test = self.make_files(files)
        timer.time("load csv in ")
        print(train.describe())

        self.print_corr(train, test, files)
        timer.time("corr check")
        self.print_score(train, files)
        timer.time("score check")
        self.do_cv_pred(train, test, files)
Example #4
0
    def doit(self):
        logger = pocket_logger.get_my_logger()
        timer = pocket_timer.GoldenTimer(logger)

        # (file_name, col_name)
        files = [
            ("team_v63", "lgb"),
            ("bin_team", "bin"),
            ("no_out_team", "no_out"),
            # ("rnd_feat_bridge", "rnd_feat"),
            ("small_team", "small")
        ]

        team_files = [
            'select_v44_ridge',
            'tune_stack_57_v1',
            'select_v51_ridge',
            # 'tune_stack_57_2_v1',
            'tune_stack_cgb_v1',
            # 'elo_rnd_feat_bridge',

            'outlier_lgb_v3_kh_time_feature2_pocket',
            # 'delete_outlier_kh_pocket_stack_correct_ridge',
            # 'outlier_lgb_pocket_logistic',
            'delete_outlier_kh_pocket_stack_correct2_ridge'
        ]
        team_files = [(t, t) for t in team_files]

        train, test = self.make_files(files)
        for f in team_files:
            train, test = self.add_team_file(f[0], f[1], train, test)
        timer.time("load csv in ")
        print(train.describe())

        self.print_corr(train, test, files)
        timer.time("corr check")
        self.print_score(train, files)
        timer.time("score check")
        self.do_cv_pred(train, test, files)
Example #5
0
    def doit(self):
        logger = pocket_logger.get_my_logger()
        timer = pocket_timer.GoldenTimer(logger)

        # (file_name, col_name)
        sig_idx = [
            0, 9, 12, 13, 18, 19, 22, 24, 28, 29, 33, 35, 36, 37, 43, 45, 52,
            54, 55, 56, 58, 59, 61, 65, 67, 69, 71, 72, 74, 75, 76, 77, 78, 81,
            83, 84, 88, 90, 91, 97, 98
        ]
        # files = ["subset_exp_" + str(idx) for idx in range(100)]
        files = ["subset_exp_" + str(idx) for idx in sig_idx]
        files = [(f, f) for f in files]

        train, test = self.make_files(files)
        timer.time("load csv in ")
        print(train.describe())

        self.print_corr(train, test, files)
        timer.time("corr check")
        self.print_score(train, files)
        timer.time("score check")
        self.do_cv_pred(train, test, files)
Example #6
0
import os, sys
ROOT = os.path.abspath(
    os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../'))
sys.path.append(ROOT)

import pandas as pd
import numpy as np
from elo.common import pocket_timer, pocket_logger, pocket_file_io, path_const
from elo.common import pocket_lgb, evaluator
from elo.loader import input_loader
from sklearn import model_selection

logger = pocket_logger.get_my_logger()
timer = pocket_timer.GoldenTimer(logger)
csv_io = pocket_file_io.GoldenCsv()

loader = input_loader.GoldenLoader()
_train, _test = loader.load_whole_input()
timer.time("load csv")
pred_col = loader.small_col


def try_some(train, test):
    train_x, test_x = train[pred_col], test[pred_col]
    train_y = train["target"]
    print(train_x.shape)
    print(train_y.shape)
    print(test_x.shape)

    submission = pd.DataFrame()
    submission["card_id"] = test["card_id"]
Example #7
0
    def load_small_input():
        logger = pocket_logger.get_my_logger()
        timer = pocket_timer.GoldenTimer(logger)
        csv_io = pocket_file_io.GoldenCsv()

        train = csv_io.read_file(path_const.TRAIN1)
        test = csv_io.read_file(path_const.TEST1)
        new_trans = csv_io.read_file(path_const.RE_NEW_TRANS1)
        old_trans = csv_io.read_file(path_const.RE_OLD_TRANS1)
        new_trans6 = csv_io.read_file(path_const.NEW_TRANS6)
        old_trans6 = csv_io.read_file(path_const.OLD_TRANS6)
        print(train.shape)
        print(test.shape)
        timer.time("load csv in ")

        train = pd.merge(train, new_trans, on="card_id", how="left")
        train = pd.merge(train, old_trans, on="card_id", how="left")
        train = pd.merge(train, new_trans6, on="card_id", how="left")
        train = pd.merge(train, old_trans6, on="card_id", how="left")
        #
        test = pd.merge(test, new_trans, on="card_id", how="left")
        test = pd.merge(test, old_trans, on="card_id", how="left")
        test = pd.merge(test, new_trans6, on="card_id", how="left")
        test = pd.merge(test, old_trans6, on="card_id", how="left")
        # print(train.shape)
        # print(test.shape)
        #
        fer = jit_fe.JitFe()
        train = fer.do_fe(train)
        test = fer.do_fe(test)

        train_y = train["target"]
        # 3.660 - 3.658
        use_col = [
            "new_trans_elapsed_days_max",
            "new_trans_elapsed_days_min",
            "new_trans_elapsed_days_mean",  # 0.001
            "old_trans_elapsed_days_max",
            "old_trans_elapsed_days_min",
            "old_trans_elapsed_days_mean",  # 0.025 mean001
            "new_last_day",  # 0.005
            "old_installments_sum",
            "old_installments_mean",  # 0.005
            "old_month_nunique",
            "old_woy_nunique",  # 0.010
            "old_merchant_id_nunique",  # 0.002
            "new_month_lag_mean",
            "old_month_lag_mean",
            "elapsed_days",  # 0.010
            "new_purchase_amount_max",
            "new_purchase_amount_count",
            "new_purchase_amount_mean",  # 0.020
            "old_purchase_amount_max",
            "old_purchase_amount_count",
            "old_purchase_amount_mean",  # 0.002
            "old_category_1_mean",
            "new_category_1_mean",  # 0.006
            "old_authorized_flag_sum",  # "old_authorized_flag_mean", bad?
            "old_no_city_purchase_amount_min",  # 0.003
            "old_no_city_purchase_amount_max",
            "old_no_city_purchase_amount_mean",  # 0.002
            "rec1_purchase_amount_count",  # 0.005
            "old_month_lag_max",  # 0.002
            "new_time_diff_mean",
            "new_trans_elapsed_days_std",  # 0.002
            "old_month_diff_mean",
            "old_pa2_month_diff_min",  # 0.004
        ]
        train_x = train[use_col]
        test_x = test[use_col]

        print(train_x.shape)
        print(train_y.shape)
        print(test_x.shape)
        timer.time("prepare train in ")

        return train[["card_id",
                      "target"]], test[["card_id"]], train_x, train_y, test_x
Example #8
0
    def __init__(self):
        self.small_col = [
            # "new_trans_elapsed_days_max", "new_trans_elapsed_days_min", "new_trans_elapsed_days_mean",  # 0.001
            "old_trans_elapsed_days_max",
            "old_trans_elapsed_days_min",
            "old_trans_elapsed_days_mean",  # 0.025 mean001
            # "new_last_day",  # 0.005
            "new_to_last_day",
            "old_installments_sum",
            "old_installments_mean",  # 0.005
            "old_month_nunique",
            "old_woy_nunique",  # 0.010
            "old_merchant_id_nunique",  # 0.002
            "new_month_lag_mean",
            "old_month_lag_mean",
            "elapsed_days",  # 0.010
            "new_purchase_amount_max",
            "new_purchase_amount_count",
            "new_purchase_amount_mean",  # 0.020
            "old_purchase_amount_max",
            "old_purchase_amount_count",
            "old_purchase_amount_mean",  # 0.002
            "old_category_1_mean",
            "new_category_1_mean",  # 0.006
            "old_authorized_flag_sum",  # "old_authorized_flag_mean", bad?
            "old_no_city_purchase_amount_min",  # 0.003
            "old_no_city_purchase_amount_max",
            "old_no_city_purchase_amount_mean",  # 0.002
            "rec1_purchase_amount_count",  # 0.005
            "old_month_lag_max",  # 0.002
            "new_time_diff_mean",
            "new_trans_elapsed_days_std",  # 0.002
            "old_month_diff_mean",
            "old_pa2_month_diff_min",  # 0.004
            "old_mer_cnt_whole_mean",  # 0.001
        ]
        self.medium_col = self.small_col + ["pred_diff"]

        self.drop_col = [
            "card_id",
            "target",  # "feature_1", "feature_2", "feature_3",
            "old_weekend_mean",
            "new_weekend_mean",
            "new_authorized_flag_mean",
            "old_null_state",
            "new_null_state",
            "new_null_install",  # "old_null_install",
            "old_cat3_pur_mean",
            "new_cat3_pur_mean",
            "old_cat2_pur_mean",
            "new_cat2_pur_mean",
            "new_category_4_mean",  # "new_merchant_group_id_nunique", "old_merchant_group_id_nunique"
            "new_mon_nunique_mean",
            "new_woy_nunique_mean",
            # "new_month_lag_ptp", "new_month_lag_min",
            "new_purchase_amount_skew",  # "new_purchase_amount_std",
            "old_purchase_amount_skew",  # "old_purchase_amount_std",
            # "new_category_2_nunique", "old_category_2_nunique",
            # "old_null_merchant", "new_null_merchant",
            # "old_ym_target_encode_mean", "new_ym_target_encode_mean",
            # "old_hour_target_encode_mean", "new_hour_target_encode_mean",
            # "old_subsector_id_target_encode_mean",
            # "new_merchant_id_target_encode_mean", "old_merchant_id_target_encode_mean",
            "pred_new",
            "old_same_buy_count",
            "old_purchase_amount_nunique",
            "new_purchase_amount_nunique",
            "old_installments_nunique",
            "new_installments_nunique",  # "pred_new_pur_max",
            "new_trans_elapsed_days_max",
            "new_trans_elapsed_days_min",
            "new_trans_elapsed_days_mean",  # +0.001
        ]

        self.team_small_col = [
            "first_mer_old_woy_nunique",
            "kh_hist_kh__purchase_date_seconds_diff_std",
            "merchant_id_most",
            "new_category_3_mean",
            "old_time_diff_std",
            "old_hour_0_count",
            "old_time_diff_min",
            "authorized_flag_y_ratio",
            "hist_merchant_id_nunique",
            "kh_ratio_kh__purchase_days_diff_min",
            "new_subsector_id_nunique",
        ]

        self.logger = pocket_logger.get_my_logger()
        self.timer = pocket_timer.GoldenTimer(self.logger)
Example #9
0
 def __init__(self, epochs, batch_size):
     self.logger = pocket_logger.get_my_logger()
     self.epochs = epochs
     self.batch_size = batch_size