Exemple #1
0
def cross_validation_models(models_set, X_train_set, y_train_set):
    scores = []
    names = []
    for name, model in models_set:
        names.append(name)
        scores.append(
            np.sqrt(pp.score_model(model, X_train_set, y_train_set)).mean())
    tab = pd.DataFrame({"Model": names, "Score": scores})
    tab = tab.sort_values(by=['Score'], ascending=True)
    return (tab)
Exemple #2
0
def get_validation_scores(models, X_train, y_train, X_test = [], y_test = []):
    scores_val = []
    scores_test = []
    names = []
    for name, model in models:
        names.append(name)
        scores_val.append(np.sqrt(pp.score_model(model, X_train, y_train)).mean())
        if len(X_test) != 0:
            model.fit(X_train, y_train)
            st = get_test_scores(model, X_test, y_test)
            scores_test.append(st)
    if len(X_test) != 0:
        tab = pd.DataFrame({ "Model" : names, "Cross Validation" : scores_val, "Test": scores_test })
        tab = tab.sort_values(by=['Test'], ascending = True)
    else:
        tab = pd.DataFrame({ "Model" : names, "Cross Validation" : scores_val })
        tab = tab.sort_values(by=['Cross Validation'], ascending = True)
    return(tab)
Exemple #3
0
def get_validation_scores(models,
                          X_train,
                          y_train,
                          folds,
                          X_test=[],
                          y_test=[]):
    scores_val_mean = []
    scores_val_std = []
    scores_val = []
    scores_test = []
    names = []
    for name, model in models:
        names.append(name)
        val_scores = np.sqrt(
            pp.score_model(model, X_train, y_train, n_folds=folds))
        scores_val.append(val_scores)
        scores_val_mean.append(val_scores.mean())
        scores_val_std.append(val_scores.std())
        if len(X_test) != 0:
            model.fit(X_train, y_train)
            st = get_test_scores(model, X_test, y_test)
            scores_test.append(st)
    if len(X_test) != 0:
        tab = pd.DataFrame({
            "Model": names,
            "Cross Validation (Mean)": scores_val_mean,
            "Cross Validation (Std)": scores_val_std,
            "Cross Validation (Scores)": scores_val,
            "Test": scores_test
        })
        tab.sort_values(by=['Test'], ascending=True, inplace=True)
    else:
        tab = pd.DataFrame({
            "Model": names,
            "Cross Validation (Mean)": scores_val_mean,
            "Cross Validation (Std)": scores_val_std,
            "Cross Validation (Scores)": scores_val
        })
        tab.sort_values(by=['Cross Validation (Mean)'],
                        ascending=True,
                        inplace=True)
    return (tab)
Exemple #4
0
models.append(("byr", model_byr))
models.append(("rforest", model_rforest))
models.append(("xgb", model_xgb))
models.append(("GBoost", model_GBoost))
models.append(("lgb", model_lgb))
models.append(("lasso_lars", model_lasso_lars))
models.append(("lsvr", model_lsvr))
models.append(("sgd", model_sgd))
models.append(("extra", model_extra))

scores = []
names = []
for name, model in models:
    names.append(name)
    scores.append(
        np.sqrt(pp.score_model(model, train_X_reduced, train_y)).mean())
tab = pd.DataFrame({"Model": names, "Score": scores})
tab = tab.sort_values(by=['Score'], ascending=True)
print(tab)

for model in models:
    model.fit(train_X, train_y)

averaged_models = em.AveragingModels(
    models=[model_svr, model_ENet, model_KRR, model_xgb])

score_avg = np.sqrt(pp.score_model(averaged_models, train_X_reduced, train_y))
print(" Averaged base models score: {:.6f} ({:.6f})\n".format(
    score_avg.mean(), score_avg.std()))

averaged_models.fit(train_X_reduced, train_y)
Exemple #5
0
                 file_name='submission.csv'):
    model.fit(X_train_set, y_train_set)
    predicted = np.expm1(model.predict(test_X_to_submit))
    print(predicted)
    submission = pd.DataFrame({'Id': ids, 'SalePrice': predicted})
    submission.to_csv(file_name, index=False)


train_submit(em.AveragingModels(models=[model_svr, model_byr, model_xgb]),
             train_X_reduced, train_y, test_X_reduced, 'submission_avg_1.csv')

# Best one so far: model_svr, model_KRR, model_xgb
# Try this combination: model_svr, model_KRR, averaged_models_tree
averaged_models = em.AveragingModels(models=[model_svr, model_KRR, model_xgb])

score_avg = np.sqrt(pp.score_model(averaged_models, train_X_reduced, train_y))
print(" Averaged base models score: {:.6f} ({:.6f})\n".format(
    score_avg.mean(), score_avg.std()))

stacked_averaged_models = em.StackingAveragedModels(
    base_models=[model_KRR, model_xgb], meta_model=model_svr)

score_stacked_averaged = np.sqrt(
    pp.score_model(stacked_averaged_models, train_X_reduced, train_y))
print(" Stacked Averaged base models score: {:.6f} ({:.6f})\n".format(
    score_stacked_averaged.mean(), score_stacked_averaged.std()))

stacked_averaged_models.fit(train_X_reduced, train_y)
predicted_prices_stacked_averaged = np.expm1(
    stacked_averaged_models.predict(test_X_reduced))
print(predicted_prices_stacked_averaged)