from utils import logger_func try: if not logger: logger = logger_func() except NameError: logger = logger_func() #======================================================================== """ argv[1]: comment argv[2]: feature_key """ # Basic Args seed = 1208 set_type = 'all' fold_n = 4 key, raw_target, ignore_list = MS_utils.get_basic_var() ignore_list = [key, raw_target] comment = sys.argv[1] # Base vi_col = 'f000_AvSigVersion' base_path = '../input/base_exclude*' base_path = '../input/base_Av*' # base_path = '../input/base_group*' base = utils.read_df_pkl(base_path) #======================================================================== # Make Validation cv = KFold(n_splits=fold_n, shuffle=False, random_state=seed) if is_debug: base_train = base[base[raw_target].isnull()].head(10000).sort_values(
try: if not logger: logger = logger_func() except NameError: logger = logger_func() #======================================================================== """ argv[1]: comment argv[2]: feature_key argv[3]: group """ # Columns base_path = '../input/base_exclude*' base_path = '../input/base_Av*' key, target, ignore_list = MS_utils.get_basic_var() ignore_list = [key, target, 'country_group', 'down_flg'] base = utils.read_df_pkl(base_path)[[key, target, 'country_group']] # Basic Args seed = 1208 set_type = 'all' comment = sys.argv[1] if sys.argv[2].count('f'): train, test = MS_utils.get_feature_set(feat_key=sys.argv[2], base_path=base_path) else: train, test = MS_utils.get_dataset(base=base) print(train.shape, test.shape)
HOME = os.path.expanduser('~') sys.path.append(f"{HOME}/kaggle/data_analysis/library/") sys.path.append(f"../py/") import MS_utils import utils, ml_utils, kaggle_utils from utils import logger_func try: if not logger: logger=logger_func() except NameError: logger=logger_func() import time from sklearn.metrics import roc_auc_score, mean_squared_error # Columns key, target, ignore_list = MS_utils.get_basic_var() if is_multi: import tensorflow as tf from keras.utils.training_utils import multi_gpu_model # add gpu_count = 4 # add #======================================================================== # Keras from os.path import dirname #sys.path.append(dirname(dirname(__file__))) from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences
HOME = os.path.expanduser('~') sys.path.append(f"{HOME}/kaggle/data_analysis/library/") sys.path.append(f"../py/") import MS_utils import utils, ml_utils, kaggle_utils from utils import logger_func try: if not logger: logger = logger_func() except NameError: logger = logger_func() import time from sklearn.metrics import roc_auc_score, mean_squared_error # Columns key, target, ignore_list = MS_utils.get_basic_var() from sklearn.metrics import mean_squared_error, roc_auc_score #======================================================================== # Keras # Corporación Favorita Grocery Sales Forecasting sys.path.append(f'{HOME}/kaggle/data_analysis/model') from nn_keras import MS_NN from keras import callbacks from keras import optimizers from keras import backend as K from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau #======================================================================== start_time = "{0:%Y%m%d_%H%M%S}".format(datetime.datetime.now())
from utils import logger_func try: if not logger: logger = logger_func() except NameError: logger = logger_func() #======================================================================== """ argv[1]: comment argv[2]: feature_key """ # Basic Args seed = 1208 set_type = 'all' fold_n = 5 key, target, ignore_list = MS_utils.get_basic_var() ignore_list = [key, target, 'country_group', 'down_flg'] comment = sys.argv[1] # Base base_path = '../input/base_exclude*' base_path = '../input/base_Av*' base_path = '../input/base_group*' base = utils.read_df_pkl(base_path)[[key, target, 'country_group']] #======================================================================== # Make Validation # vi_col = 'f000_AvSigVersion' # ignore_list.append(vi_col) # cv = KFold(n_splits=fold_n, shuffle=False, random_state=seed)
HOME = os.path.expanduser('~') sys.path.append(f"{HOME}/kaggle/data_analysis/library/") sys.path.append(f"../py/") import MS_utils import utils, ml_utils, kaggle_utils from utils import logger_func try: if not logger: logger = logger_func() except NameError: logger = logger_func() import time from sklearn.metrics import roc_auc_score, mean_squared_error # Columns key, target, ignore_list = MS_utils.get_basic_var() if is_multi: import tensorflow as tf from keras.utils.training_utils import multi_gpu_model # add gpu_count = 4 # add try: comment = sys.argv[1] except IndexError: comment = "-" start_time = time.time() #======================================================================== # Keras from os.path import dirname
try: if not logger: logger = logger_func() except NameError: logger = logger_func() #======================================================================== """ argv[1]: comment argv[2]: feature_key """ # Basic Args seed = 605 # seed = 328 set_type = 'all' fold_n = 5 key, target, ignore_list = MS_utils.get_basic_var() ignore_list = [key, target, 'country_group', 'down_flg'] comment = sys.argv[1] # Base base_path = '../input/base_exclude*' base_path = '../input/base_Av*' base_path = '../input/base_group*' base = utils.read_df_pkl(base_path)[[key, target, 'country_group']] base_train = base[~base[target].isnull()] from scipy.sparse import vstack, csr_matrix, save_npz, load_npz, hstack train = load_npz('../input/sp_train_8114.npz') x_test = load_npz('../input/sp_test_8114.npz') # train = load_npz('../input/sp_train_1032.npz')
HOME = os.path.expanduser('~') sys.path.append(f"{HOME}/kaggle/data_analysis/library/") sys.path.append(f"../py/") import MS_utils import utils, ml_utils, kaggle_utils from utils import logger_func try: if not logger: logger = logger_func() except NameError: logger = logger_func() import time from sklearn.metrics import roc_auc_score, mean_squared_error # Columns key, target, ignore_list = MS_utils.get_basic_var() if is_multi: import tensorflow as tf from keras.utils.training_utils import multi_gpu_model # add gpu_count = 4 # add #======================================================================== # Keras from os.path import dirname #sys.path.append(dirname(dirname(__file__))) from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences from keras.layers import Dense, Input, CuDNNLSTM, Embedding, Dropout, Activation, CuDNNGRU, Conv1D from keras.layers import Bidirectional, GlobalMaxPool1D, GlobalMaxPooling1D, GlobalAveragePooling1D
import time import numpy as np import pandas as pd import glob import sys import os HOME = os.path.expanduser('~') sys.path.append(f"{HOME}/kaggle/data_analysis/library/") import kaggle_utils import ml_utils import utils import MS_utils # Columns key, target, ignore_list = MS_utils.get_basic_var() # Train Test Load # train, test = MS_utils.get_dataset(feat_path='../features/2_second_valid/*.gz') train, test = MS_utils.get_dataset() """ Numetricは無視する """ num_list = [ 'f000_Census_TotalPhysicalRAM' 'f000_Census_InternalPrimaryDiagonalDisplaySizeInInches' 'f000_Census_InternalBatteryNumberOfCharges' 'f000_Census_InternalBatteryType' 'f000_Census_InternalPrimaryDisplayResolutionVertical' 'f000_Census_PrimaryDiskTotalCapacity' 'f000_Census_InternalPrimaryDisplayResolutionHorizontal'