X_train = X_all[:num_train]
X_test = X_all[num_train:]

# 5-cv Xgb
xgb_params = {
    'n_trees': 500,
    'eta': 0.005,
    'max_depth': 4,
    'subsample': 0.95,
    'objective': 'reg:linear',
    'eval_metric': 'rmse',
    'silent': 1
}

clf = MyXgbClassifier2(xgb_params)
stacking = Stacking(5, [clf])
pred_oof, pred_test = stacking.fit_predict(X_train, y_train, X_test)


# Save test
submission = pd.DataFrame({'ID': test_ID, 'y': pred_test[num_out:, 0]})
submission.to_csv(join(
    OUTPUT_PATH, 'stacking/Submission-XgbBaseline120-Test.csv'), index=False)

# Save out
out_pred = pd.DataFrame({'ID': out_ID, 'y': pred_test[:num_out, 0]})
out_pred.to_csv(join(
    OUTPUT_PATH, 'stacking/Submission-XgbBaseline120-Out.csv'), index=False)
# Save oof
oof_pred = pd.DataFrame({'ID': train_ID, 'y': pred_oof[:, 0]})
oof_pred.to_csv(join(
Ejemplo n.º 2
0
# oof cv
for col in oof_df.columns:
    print col, r2_score(oof_df.y, oof_df[col])

# data transform
model_list = filter(lambda x: x in oof_df.columns, model_list)
y_train = oof_df.y
X_train = oof_df[model_list].values
X_test = test_df[model_list].values
train_ID = oof_df.index.values
test_ID = test_df.index.values

# 5cv
clf = BayesianRidge()
stacking = Stacking(5, [clf], metric=r2_score)
pred_oof, pred_test = stacking.fit_predict(X_train, y_train, X_test)

# r^2 0.56200717888
for pred_oof_single in pred_oof.T:
    print r2_score(y_train, pred_oof_single)
metric_result = stacking.metric_result
print np.mean(metric_result), np.std(metric_result)
estimator0 = stacking.estimators[0]
for model_name, coef in zip(model_list, estimator0.coef_):
    print model_name, coef

# Save test
submission = pd.DataFrame({'ID': test_ID, 'y': pred_test[:, 0]})
submission.to_csv(join(OUTPUT_PATH,
                       'stacking/Submission-{}-Test.csv'.format(title)),
# oof cv
for col in oof_df.columns:
    print col, r2_score(oof_df.y, oof_df[col])

# data transform
model_list = filter(lambda x: x in oof_df.columns, model_list)
y_train = oof_df.y
X_train = oof_df[model_list].values
X_test = test_df[model_list].values
train_ID = oof_df.index.values
test_ID = test_df.index.values

# 5cv
clf = BayesianRidge()
stacking = Stacking(5, [clf], metric=r2_score, random_state=67373)
pred_oof, pred_test = stacking.fit_predict(X_train, y_train, X_test)

# r^2 0.56200717888
for pred_oof_single in pred_oof.T:
    print r2_score(y_train, pred_oof_single)
metric_result = stacking.metric_result
print np.mean(metric_result), np.std(metric_result)
estimator0 = stacking.estimators[0]
for model_name, coef in zip(model_list, estimator0.coef_):
    print model_name, coef

# Save test
submission = pd.DataFrame({'ID': test_ID, 'y': pred_test[:, 0]})
submission.to_csv(join(OUTPUT_PATH,
                       'stacking/Submission-{}-Test.csv'.format(title)),