Example #1
0
# param_list = param_list[:11]

# labels
lbl_enc = preprocessing.LabelEncoder()
labels = lbl_enc.fit_transform(labels)
oneHotEnc = preprocessing.OneHotEncoder()
labels = oneHotEnc.fit_transform(labels.reshape((-1, 1)))
labels = labels.toarray()


train_name = "feat_sel_mult_train3"
i = 0
preds_train = []
for loss, ntree, param in param_list:
    part_train = pd.read_csv(otto_utils.get_train_preds_from_param(param, train_name, ntree), index_col=0)
    preds_train.append(np.asarray(part_train))
    i += 1

lr = 0.1
decay = 0.05
max_epoch = 500
tol = 0.00007
search_param = True
if search_param:
    best_loss = None
    # split dataset
    c_preds_train = []
    c_preds_test = []
    num = len(preds_train[0])
    idx = np.random.permutation(num)
for loss, num_round, param in param_list:
    full_param = other.copy()
    full_param.update(param)
    plst = full_param.items()
    # train on the trainning set
    bst,loss,ntree = xgb.train(full_param, dtrain, num_round, watchlist)

    # dump bst model
    bst_dump_model = otto_utils.get_model_name_from_param(param, train_name, ntree)
    bst.save_model(bst_dump_model)

    # output train predictions
    preds = bst.predict(dtrain)
    preds = pd.DataFrame(preds, index=train_ids, columns=sample.columns[1:])
    preds.to_csv(otto_utils.get_train_preds_from_param(param, train_name, ntree), index_label='id')

    # output test predictions
    preds = bst.predict(dtest)
    preds = pd.DataFrame(preds, index=sample.id.values, columns=sample.columns[1:])
    preds.to_csv(otto_utils.get_test_preds_from_param(param, train_name, ntree), index_label='id')

    if avg_preds is None:
        avg_preds = preds
    else:
        avg_preds += preds
    
avg_preds /= len(param_list)

# create submission file
avg_preds = pd.DataFrame(avg_preds, index=sample.id.values, columns=sample.columns[1:])