Esempio n. 1
0
from utils import logger_func
try:
    if not logger:
        logger = logger_func()
except NameError:
    logger = logger_func()
#========================================================================
"""
argv[1]: comment
argv[2]: feature_key
"""
# Basic Args
seed = 1208
set_type = 'all'
fold_n = 4
key, raw_target, ignore_list = MS_utils.get_basic_var()
ignore_list = [key, raw_target]
comment = sys.argv[1]

# Base
vi_col = 'f000_AvSigVersion'
base_path = '../input/base_exclude*'
base_path = '../input/base_Av*'
#  base_path = '../input/base_group*'
base = utils.read_df_pkl(base_path)

#========================================================================
# Make Validation
cv = KFold(n_splits=fold_n, shuffle=False, random_state=seed)
if is_debug:
    base_train = base[base[raw_target].isnull()].head(10000).sort_values(
Esempio n. 2
0
try:
    if not logger:
        logger = logger_func()
except NameError:
    logger = logger_func()
#========================================================================
"""
argv[1]: comment
argv[2]: feature_key
argv[3]: group
"""

# Columns
base_path = '../input/base_exclude*'
base_path = '../input/base_Av*'
key, target, ignore_list = MS_utils.get_basic_var()
ignore_list = [key, target, 'country_group', 'down_flg']
base = utils.read_df_pkl(base_path)[[key, target, 'country_group']]

# Basic Args
seed = 1208
set_type = 'all'

comment = sys.argv[1]

if sys.argv[2].count('f'):
    train, test = MS_utils.get_feature_set(feat_key=sys.argv[2],
                                           base_path=base_path)
else:
    train, test = MS_utils.get_dataset(base=base)
print(train.shape, test.shape)
Esempio n. 3
0
HOME = os.path.expanduser('~')
sys.path.append(f"{HOME}/kaggle/data_analysis/library/")
sys.path.append(f"../py/")
import MS_utils
import utils, ml_utils, kaggle_utils
from utils import logger_func
try:
    if not logger:
        logger=logger_func()
except NameError:
    logger=logger_func()
import time
from sklearn.metrics import roc_auc_score, mean_squared_error

# Columns
key, target, ignore_list = MS_utils.get_basic_var()


if is_multi:
    import tensorflow as tf
    from keras.utils.training_utils import multi_gpu_model # add
    gpu_count = 4 # add


#========================================================================
# Keras
from os.path import dirname
#sys.path.append(dirname(dirname(__file__)))

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
Esempio n. 4
0
HOME = os.path.expanduser('~')
sys.path.append(f"{HOME}/kaggle/data_analysis/library/")
sys.path.append(f"../py/")
import MS_utils
import utils, ml_utils, kaggle_utils
from utils import logger_func
try:
    if not logger:
        logger = logger_func()
except NameError:
    logger = logger_func()
import time
from sklearn.metrics import roc_auc_score, mean_squared_error

# Columns
key, target, ignore_list = MS_utils.get_basic_var()

from sklearn.metrics import mean_squared_error, roc_auc_score
#========================================================================
# Keras
# Corporación Favorita Grocery Sales Forecasting
sys.path.append(f'{HOME}/kaggle/data_analysis/model')
from nn_keras import MS_NN
from keras import callbacks
from keras import optimizers
from keras import backend as K
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
#========================================================================

start_time = "{0:%Y%m%d_%H%M%S}".format(datetime.datetime.now())
Esempio n. 5
0
from utils import logger_func
try:
    if not logger:
        logger = logger_func()
except NameError:
    logger = logger_func()
#========================================================================
"""
argv[1]: comment
argv[2]: feature_key
"""
# Basic Args
seed = 1208
set_type = 'all'
fold_n = 5
key, target, ignore_list = MS_utils.get_basic_var()
ignore_list = [key, target, 'country_group', 'down_flg']
comment = sys.argv[1]

# Base
base_path = '../input/base_exclude*'
base_path = '../input/base_Av*'
base_path = '../input/base_group*'
base = utils.read_df_pkl(base_path)[[key, target, 'country_group']]

#========================================================================
# Make Validation
#  vi_col = 'f000_AvSigVersion'
#  ignore_list.append(vi_col)

#  cv = KFold(n_splits=fold_n, shuffle=False, random_state=seed)
Esempio n. 6
0
HOME = os.path.expanduser('~')
sys.path.append(f"{HOME}/kaggle/data_analysis/library/")
sys.path.append(f"../py/")
import MS_utils
import utils, ml_utils, kaggle_utils
from utils import logger_func
try:
    if not logger:
        logger = logger_func()
except NameError:
    logger = logger_func()
import time
from sklearn.metrics import roc_auc_score, mean_squared_error

# Columns
key, target, ignore_list = MS_utils.get_basic_var()

if is_multi:
    import tensorflow as tf
    from keras.utils.training_utils import multi_gpu_model  # add
    gpu_count = 4  # add

try:
    comment = sys.argv[1]
except IndexError:
    comment = "-"
start_time = time.time()

#========================================================================
# Keras
from os.path import dirname
Esempio n. 7
0
try:
    if not logger:
        logger = logger_func()
except NameError:
    logger = logger_func()
#========================================================================
"""
argv[1]: comment
argv[2]: feature_key
"""
# Basic Args
seed = 605
#  seed = 328
set_type = 'all'
fold_n = 5
key, target, ignore_list = MS_utils.get_basic_var()
ignore_list = [key, target, 'country_group', 'down_flg']
comment = sys.argv[1]

# Base
base_path = '../input/base_exclude*'
base_path = '../input/base_Av*'
base_path = '../input/base_group*'
base = utils.read_df_pkl(base_path)[[key, target, 'country_group']]
base_train = base[~base[target].isnull()]

from scipy.sparse import vstack, csr_matrix, save_npz, load_npz, hstack

train = load_npz('../input/sp_train_8114.npz')
x_test = load_npz('../input/sp_test_8114.npz')
#  train = load_npz('../input/sp_train_1032.npz')
Esempio n. 8
0
HOME = os.path.expanduser('~')
sys.path.append(f"{HOME}/kaggle/data_analysis/library/")
sys.path.append(f"../py/")
import MS_utils
import utils, ml_utils, kaggle_utils
from utils import logger_func
try:
    if not logger:
        logger = logger_func()
except NameError:
    logger = logger_func()
import time
from sklearn.metrics import roc_auc_score, mean_squared_error

# Columns
key, target, ignore_list = MS_utils.get_basic_var()

if is_multi:
    import tensorflow as tf
    from keras.utils.training_utils import multi_gpu_model  # add
    gpu_count = 4  # add

#========================================================================
# Keras
from os.path import dirname
#sys.path.append(dirname(dirname(__file__)))

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, CuDNNLSTM, Embedding, Dropout, Activation, CuDNNGRU, Conv1D
from keras.layers import Bidirectional, GlobalMaxPool1D, GlobalMaxPooling1D, GlobalAveragePooling1D
Esempio n. 9
0
import time
import numpy as np
import pandas as pd
import glob
import sys
import os

HOME = os.path.expanduser('~')
sys.path.append(f"{HOME}/kaggle/data_analysis/library/")
import kaggle_utils
import ml_utils
import utils
import MS_utils

# Columns
key, target, ignore_list = MS_utils.get_basic_var()

# Train Test Load
# train, test = MS_utils.get_dataset(feat_path='../features/2_second_valid/*.gz')
train, test = MS_utils.get_dataset()
"""
Numetricは無視する
"""
num_list = [
    'f000_Census_TotalPhysicalRAM'
    'f000_Census_InternalPrimaryDiagonalDisplaySizeInInches'
    'f000_Census_InternalBatteryNumberOfCharges'
    'f000_Census_InternalBatteryType'
    'f000_Census_InternalPrimaryDisplayResolutionVertical'
    'f000_Census_PrimaryDiskTotalCapacity'
    'f000_Census_InternalPrimaryDisplayResolutionHorizontal'