예제 #1
0
models.append(("extra", model_extra))

scores = []
names = []
for name, model in models:
    names.append(name)
    scores.append(
        np.sqrt(pp.score_model(model, train_X_reduced, train_y)).mean())
tab = pd.DataFrame({"Model": names, "Score": scores})
tab = tab.sort_values(by=['Score'], ascending=True)
print(tab)

for model in models:
    model.fit(train_X, train_y)

averaged_models = em.AveragingModels(
    models=[model_svr, model_ENet, model_KRR, model_xgb])

score_avg = np.sqrt(pp.score_model(averaged_models, train_X_reduced, train_y))
print(" Averaged base models score: {:.6f} ({:.6f})\n".format(
    score_avg.mean(), score_avg.std()))

averaged_models.fit(train_X_reduced, train_y)
predicted_prices_averaged = np.expm1(averaged_models.predict(test_X_reduced))
print(predicted_prices_averaged)
my_submission = pd.DataFrame({
    'Id': test.Id,
    'SalePrice': predicted_prices_averaged
})
my_submission.to_csv('submission_avg.csv', index=False)

stacked_averaged_models = em.StackingAveragedModels(
예제 #2
0
                              random_seed = s
                              )
    tree_models.append(("lgb_" + str(s), model_lgb))
    #tree_models.append(("rf", model_rforest))
    #tree_models.append(("xgb", model_xgb))
    models.append(model_lgb)

tree_results = pp.get_cross_validate(tree_models, train_set_X, train_set_y, 
                                     folds = 10, seed = 2018, train_score = False, jobs = -1)
print(tree_results)

model_lgb.fit(train_set_X, train_set_y)
predicted = model_lgb.predict(test_set_X)
score_val = cvl.score_sq(test_set_y, predicted)

averaged_models = em.AveragingModels(models = models)

ensemble_models = []
ensemble_models.append(("averaged", averaged_models))

cross_val_table_avg = pp.get_validation_scores(ensemble_models, train_X_reduced, train_y, 5)
print(cross_val_table_avg)

pp.make_submission(averaged_models, train_X_reduced, train_y, 
                   test_X_reduced, ids, filename = 'submission.csv')


lgbm_params =  {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'regression',
예제 #3
0
linear_models.append(("enet", model_ENet))
linear_models.append(("enet", model_byr))

cross_val_table = get_validation_scores(tree_models, X_train, y_train, 5,
                                        X_test, y_test)
print(cross_val_table)

cross_val_table = get_validation_scores(tree_models, train_X_reduced, train_y,
                                        5)
print(cross_val_table)

cross_val_table_linear = get_validation_scores(linear_models, train_X_reduced,
                                               train_y, 5)
print(cross_val_table_linear)

averaged_models = em.AveragingModels(
    models=[model_lgb, model_byr, model_svr, model_ridge])
stacked_averaged_models = em.StackingAveragedModels(
    base_models=[model_svr, model_lgb], meta_model=model_KRR)
averaged_plus = em.AveragingModels(
    models=[averaged_models, model_GBoost, model_xgb], weights=[0.7, 0.2, 0.1])
averaged_plus_plus = em.AveragingModels(
    models=[stacked_averaged_models, model_GBoost, model_xgb],
    weights=[0.7, 0.2, 0.1])

ensemble_models = []
ensemble_models.append(("averaged", averaged_models))
ensemble_models.append(("stacked", stacked_averaged_models))
ensemble_models.append(("averaged_plus", averaged_plus))
ensemble_models.append(("averaged_plus_plus", averaged_plus_plus))

cross_val_table_ensemble = get_validation_scores(ensemble_models, X_train,
예제 #4
0
        #early_stopping_rounds=100
        verbose_eval=10,
        random_seed=s)
    tree_models.append(("lgb_" + str(s), model_lgb))
    #tree_models.append(("rf", model_rforest))
    #tree_models.append(("xgb", model_xgb))

cross_val_table = pp.get_validation_scores(tree_models, train_set_X,
                                           train_set_y, 5)
print(cross_val_table)

model_lgb.fit(train_set_X, train_set_y)
predicted = model_lgb.predict(test_set_X)
score_val = cvl.score_sq(test_set_y, predicted)

averaged_models = em.AveragingModels(models=[model_lgb, model_rforest])

ensemble_models = []
ensemble_models.append(("averaged", averaged_models))

cross_val_table_avg = pp.get_validation_scores(ensemble_models,
                                               train_X_reduced, train_y, 5)
print(cross_val_table_avg)

pp.make_submission(model_lgb,
                   train_X_reduced,
                   train_y,
                   test_X_reduced,
                   ids,
                   filename='submission.csv')
예제 #5
0
models.append(("lasso", model_lasso))
models.append(("ridge", model_ridge))
models.append(("svr", model_svr))
models.append(("ENet", model_ENet))
models.append(("KRR", model_KRR))
models.append(("byr", model_byr))
models.append(("rforest", model_rforest))
models.append(("xgb", model_xgb))
models.append(("GBoost", model_GBoost))
models.append(("lgb", model_lgb))
models.append(("lasso_lars", model_lasso_lars))
models.append(("lsvr", model_lsvr))
#models.append(("sgd", model_sgd))
#models.append(("extra", model_extra))
models.append(("average",
               em.AveragingModels(models=[model_ridge, model_byr, model_xgb])))


######## cross validation #####################################################
def cross_validation_models(models_set, X_train_set, y_train_set):
    scores = []
    names = []
    for name, model in models_set:
        names.append(name)
        scores.append(
            np.sqrt(pp.score_model(model, X_train_set, y_train_set)).mean())
    tab = pd.DataFrame({"Model": names, "Score": scores})
    tab = tab.sort_values(by=['Score'], ascending=True)
    return (tab)

예제 #6
0
                                       train_y.ravel(),
                                       folds=10,
                                       seed=seed,
                                       train_score=False,
                                       jobs=2)
print(linear_results)

tree_results = pp.get_cross_validate(tree_models,
                                     train_X_reduced,
                                     train_y.ravel(),
                                     folds=10,
                                     seed=seed,
                                     train_score=False)
print(tree_results)

averaged_models = em.AveragingModels(models=[model_lgb, model_KRR, model_svr])
stacked_averaged_models = em.StackingAveragedModels(
    base_models=[model_KRR, model_lsvr, model_lgb], meta_model=model_ridge)
averaged_plus = em.AveragingModels(
    models=[averaged_models, model_GBoost, model_xgb], weights=[0.7, 0.2, 0.1])
averaged_plus_plus = em.AveragingModels(
    models=[stacked_averaged_models, model_GBoost, model_xgb],
    weights=[0.7, 0.2, 0.1])

avg_full = em.AveragingModels(models=[
    em.AveragingModels(models=[model_KRR, model_ridge, model_lsvr]),
    em.AveragingModels(models=[model_lgb, model_GBoost, model_xgb])
])

ensemble_models = []
ensemble_models.append(("averaged", averaged_models))