Ejemplo n.º 1
0
def load_xgb_train_data(train_csv, train_buf):

    X, y, encoder, scaler = load_train_data(train_csv)
    
    # save data for xgboost
    dtrain = xgb.DMatrix(X, label=y)
    dtrain.save_binary(train_buf)

    return X, y, encoder, scaler, dtrain
Ejemplo n.º 2
0
from lasagne.layers import InputLayer
from lasagne.layers import DropoutLayer
from lasagne.nonlinearities import softmax
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

pardir = os.path.realpath('..')
if pardir not in sys.path:
    sys.path.append(pardir)


from otto_utils import (load_train_data, load_test_data,
                        mkdir_p, calc_ll_from_proba)


X, y, encoder, scaler = load_train_data('../data/train.csv')
X_test, ids = load_test_data('../data/test.csv', scaler)

num_classes = len(encoder.classes_)
num_features = X.shape[1]
n = X.shape[0]

n_folds = 5

# create cv number of files for cross validation
kf = cross_validation.KFold(n, n_folds=n_folds,
                            shuffle=True,
                            random_state=1234)


# Train Neural Net
Ejemplo n.º 3
0
ou.mkdir_p(simdir)


num_rounds = 2000
params = """{"eval_metric": "mlogloss", "early_stopping_rounds": 10, "colsample_bytree": "0.5", "num_class": 9, "silent": 1, "nthread": 16, "min_child_weight": "4", "subsample": "0.8", "eta": "0.0125","objective": "multi:softprob", "max_depth": "14", "gamma": "0.025"}"""

params = json.loads(params)


# files
train_csv = '../data/train.csv'
train_buf = 'data/train.buffer'

# first clean the train data and save
print 'loading data...'
X, y, encoder, scaler = ou.load_train_data(train_csv)
n_folds = 5

n, p = X.shape

# create cv number of files for cross validation
kf = cross_validation.KFold(n, n_folds=n_folds,
                            shuffle=True,
                            random_state=1234)

ll = []
i = 0
for train_index, test_index in kf:
    if i != ncv:
        i += 1
        continue