Esempio n. 1
0
    def load_whole_input(self):
        logger = pocket_logger.get_my_logger()
        timer = pocket_timer.GoldenTimer(logger)
        csv_io = pocket_file_io.GoldenCsv()

        train = csv_io.read_file(path_const.TRAIN1)
        test = csv_io.read_file(path_const.TEST1)
        use_files = [
            path_const.RE_NEW_TRANS1,
            path_const.RE_OLD_TRANS1,
            path_const.OLD_TRANS3,
            path_const.NEW_TRANS6,
            path_const.OLD_TRANS6,
            path_const.OLD_TRANS9,
            # path_const.NEW_TRANS11,
            # path_const.OLD_TRANS11,
        ]
        for f in use_files:
            train, test = self.load_file_and_merge(train, test, f, csv_io)

        pred_train = csv_io.read_file(path_const.NEW_DAY_PRED_OOF)
        pred_test = csv_io.read_file(path_const.NEW_DAY_PRED_SUB)
        train = pd.merge(train, pred_train, on="card_id", how="left")
        test = pd.merge(test, pred_test, on="card_id", how="left")
        # train, test = self.load_lda(train, test, csv_io)

        print(train.shape)
        print(test.shape)
        timer.time("load csv in ")

        fer = jit_fe.JitFe()
        train = fer.do_fe(train)
        test = fer.do_fe(test)
        return train, test
Esempio n. 2
0
    def load_ts():
        logger = pocket_logger.get_my_logger()
        timer = pocket_timer.GoldenTimer(logger)
        csv_io = pocket_file_io.GoldenCsv()

        num = csv_io.read_file(path_const.NEW_NUM)
        cat = csv_io.read_file(path_const.NEW_CAT)
        key = csv_io.read_file(path_const.NEW_KEY)
        timer.time("load ts")
Esempio n. 3
0
    def load_whole_input(self, use_pred=True):
        csv_io = pocket_file_io.GoldenCsv()

        train = csv_io.read_file(path_const.ORG_TRAIN)[["card_id"]]
        test = csv_io.read_file(path_const.ORG_TEST)[["card_id"]]
        train_test_files = [
            (path_const.TRAIN1, path_const.TEST1),
            (path_const.NEW_DAY_PRED_OOF, path_const.NEW_DAY_PRED_SUB),
            (path_const.NEW_PUR_MAX_PRED_OOF, path_const.NEW_PUR_MAX_PRED_SUB),
        ]
        if not use_pred:
            train_test_files = [
                (path_const.TRAIN1, path_const.TEST1),
            ]
        use_files = [
            path_const.RE_NEW_TRANS1,
            path_const.RE_OLD_TRANS1,
            path_const.OLD_TRANS3,
            path_const.NEW_TRANS6,
            path_const.OLD_TRANS6,
            path_const.OLD_TRANS9,
            path_const.NEW_TRANS11,
            path_const.OLD_TRANS11,
            # path_const.NEW_TRANS13,
            # path_const.OLD_TRANS13,
            # path_const.FEAT_FROM_TS_NEW,
            # path_const.FEAT_FROM_TS_OLD,
            # path_const.FEAT_FROM_TS_NEW2,
            # path_const.FEAT_FROM_TS_OLD2,
        ]
        for f in train_test_files:
            train, test = self.load_train_test_and_merge(
                train, test, f[0], f[1], csv_io)
        for f in use_files:
            train, test = self.load_file_and_merge(train, test, f, csv_io)

        print(train.shape)
        print(test.shape)

        fer = jit_fe.JitFe()
        train = fer.do_fe(train)
        test = fer.do_fe(test)
        return train, test
Esempio n. 4
0
import os, sys
ROOT = os.path.abspath(
    os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../'))
sys.path.append(ROOT)

import pandas as pd
import numpy as np
from elo.common import pocket_timer, pocket_logger, pocket_file_io, path_const
from elo.common import pocket_lgb, evaluator
from elo.loader import input_loader
from sklearn import model_selection

logger = pocket_logger.get_my_logger()
timer = pocket_timer.GoldenTimer(logger)
csv_io = pocket_file_io.GoldenCsv()

loader = input_loader.GoldenLoader()
_train, _test = loader.load_whole_input()
timer.time("load csv")
pred_col = loader.small_col


def try_some(train, test):
    train_x, test_x = train[pred_col], test[pred_col]
    train_y = train["target"]
    print(train_x.shape)
    print(train_y.shape)
    print(test_x.shape)

    submission = pd.DataFrame()
    submission["card_id"] = test["card_id"]
Esempio n. 5
0
 def __init__(self):
     self.csv_io = pocket_file_io.GoldenCsv()
Esempio n. 6
0
    def load_small_input():
        logger = pocket_logger.get_my_logger()
        timer = pocket_timer.GoldenTimer(logger)
        csv_io = pocket_file_io.GoldenCsv()

        train = csv_io.read_file(path_const.TRAIN1)
        test = csv_io.read_file(path_const.TEST1)
        new_trans = csv_io.read_file(path_const.RE_NEW_TRANS1)
        old_trans = csv_io.read_file(path_const.RE_OLD_TRANS1)
        new_trans6 = csv_io.read_file(path_const.NEW_TRANS6)
        old_trans6 = csv_io.read_file(path_const.OLD_TRANS6)
        print(train.shape)
        print(test.shape)
        timer.time("load csv in ")

        train = pd.merge(train, new_trans, on="card_id", how="left")
        train = pd.merge(train, old_trans, on="card_id", how="left")
        train = pd.merge(train, new_trans6, on="card_id", how="left")
        train = pd.merge(train, old_trans6, on="card_id", how="left")
        #
        test = pd.merge(test, new_trans, on="card_id", how="left")
        test = pd.merge(test, old_trans, on="card_id", how="left")
        test = pd.merge(test, new_trans6, on="card_id", how="left")
        test = pd.merge(test, old_trans6, on="card_id", how="left")
        # print(train.shape)
        # print(test.shape)
        #
        fer = jit_fe.JitFe()
        train = fer.do_fe(train)
        test = fer.do_fe(test)

        train_y = train["target"]
        # 3.660 - 3.658
        use_col = [
            "new_trans_elapsed_days_max",
            "new_trans_elapsed_days_min",
            "new_trans_elapsed_days_mean",  # 0.001
            "old_trans_elapsed_days_max",
            "old_trans_elapsed_days_min",
            "old_trans_elapsed_days_mean",  # 0.025 mean001
            "new_last_day",  # 0.005
            "old_installments_sum",
            "old_installments_mean",  # 0.005
            "old_month_nunique",
            "old_woy_nunique",  # 0.010
            "old_merchant_id_nunique",  # 0.002
            "new_month_lag_mean",
            "old_month_lag_mean",
            "elapsed_days",  # 0.010
            "new_purchase_amount_max",
            "new_purchase_amount_count",
            "new_purchase_amount_mean",  # 0.020
            "old_purchase_amount_max",
            "old_purchase_amount_count",
            "old_purchase_amount_mean",  # 0.002
            "old_category_1_mean",
            "new_category_1_mean",  # 0.006
            "old_authorized_flag_sum",  # "old_authorized_flag_mean", bad?
            "old_no_city_purchase_amount_min",  # 0.003
            "old_no_city_purchase_amount_max",
            "old_no_city_purchase_amount_mean",  # 0.002
            "rec1_purchase_amount_count",  # 0.005
            "old_month_lag_max",  # 0.002
            "new_time_diff_mean",
            "new_trans_elapsed_days_std",  # 0.002
            "old_month_diff_mean",
            "old_pa2_month_diff_min",  # 0.004
        ]
        train_x = train[use_col]
        test_x = test[use_col]

        print(train_x.shape)
        print(train_y.shape)
        print(test_x.shape)
        timer.time("prepare train in ")

        return train[["card_id",
                      "target"]], test[["card_id"]], train_x, train_y, test_x
 def load_ts_input_new2():
     csv_io = pocket_file_io.GoldenCsv()
     train = csv_io.read_file(path_const.TS_NEW_TRAIN2)
     test = csv_io.read_file(path_const.TS_NEW_TEST2)
     return train, test
 def load_ts_input_old():
     csv_io = pocket_file_io.GoldenCsv()
     train = csv_io.read_file(path_const.TS_OLD_TRAIN)
     test = csv_io.read_file(path_const.TS_OLD_TEST)
     return train, test
 def load_org_input():
     csv_io = pocket_file_io.GoldenCsv()
     train = csv_io.read_file(path_const.ORG_TRAIN)
     test = csv_io.read_file(path_const.ORG_TEST)
     return train, test