Ejemplo n.º 1
0
import shutil
from Model.preprocess import preprocess

# Delete old model data and event summaries
dir1 = './model_data'
path1 = Path(dir1)
dir2 = 'C:/Users/Think/AnacondaProjects/tmp/otto'
path2 = Path(dir2)
if path1.is_dir():
    shutil.rmtree(dir1)
if path2.is_dir():
    shutil.rmtree(dir2)

model_path = './model_data/model_ann.ckpt'

inputs, labels = preprocess()

# split training set into train set and test set (for purpose of quicker observation)
cv_size = 0.3
X_train, X_cv, y_train, y_cv = train_test_split(inputs,
                                                labels,
                                                test_size=cv_size)
# Encode labels
num_classes = 9
y_train_enc = label_binarize(y_train, classes=range(0, 9))
y_cv_enc = label_binarize(y_cv, classes=range(0, 9))

# Parameters
learning_rate = 0.0001
batch_size = 100
epochs = 200
Ejemplo n.º 2
0
from xgboost.sklearn import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from Model.preprocess import preprocess
import time

X_train, y_train = preprocess()

# initialize xgboost model
model = XGBClassifier(learning_rate=0.1,
                      n_estimators=100,
                      max_depth=9,
                      min_child_weight=1,
                      gamma=0,
                      subsample=0.8,
                      colsample_bytree=0.8,
                      objective='multi:softprob',
                      seed=27)

# set grid search parameters
gamma = [i / 10.0 for i in range(0, 5)]  # optimal: 0.2
num_fits = len(gamma) * 5
param_grid = dict(gamma=gamma)

kfold = StratifiedKFold(n_splits=5, shuffle=True)
grid_search = GridSearchCV(model,
                           param_grid,
                           scoring="neg_log_loss",
                           cv=kfold,
                           verbose=num_fits)