models.append(("extra", model_extra)) scores = [] names = [] for name, model in models: names.append(name) scores.append( np.sqrt(pp.score_model(model, train_X_reduced, train_y)).mean()) tab = pd.DataFrame({"Model": names, "Score": scores}) tab = tab.sort_values(by=['Score'], ascending=True) print(tab) for model in models: model.fit(train_X, train_y) averaged_models = em.AveragingModels( models=[model_svr, model_ENet, model_KRR, model_xgb]) score_avg = np.sqrt(pp.score_model(averaged_models, train_X_reduced, train_y)) print(" Averaged base models score: {:.6f} ({:.6f})\n".format( score_avg.mean(), score_avg.std())) averaged_models.fit(train_X_reduced, train_y) predicted_prices_averaged = np.expm1(averaged_models.predict(test_X_reduced)) print(predicted_prices_averaged) my_submission = pd.DataFrame({ 'Id': test.Id, 'SalePrice': predicted_prices_averaged }) my_submission.to_csv('submission_avg.csv', index=False) stacked_averaged_models = em.StackingAveragedModels(
random_seed = s ) tree_models.append(("lgb_" + str(s), model_lgb)) #tree_models.append(("rf", model_rforest)) #tree_models.append(("xgb", model_xgb)) models.append(model_lgb) tree_results = pp.get_cross_validate(tree_models, train_set_X, train_set_y, folds = 10, seed = 2018, train_score = False, jobs = -1) print(tree_results) model_lgb.fit(train_set_X, train_set_y) predicted = model_lgb.predict(test_set_X) score_val = cvl.score_sq(test_set_y, predicted) averaged_models = em.AveragingModels(models = models) ensemble_models = [] ensemble_models.append(("averaged", averaged_models)) cross_val_table_avg = pp.get_validation_scores(ensemble_models, train_X_reduced, train_y, 5) print(cross_val_table_avg) pp.make_submission(averaged_models, train_X_reduced, train_y, test_X_reduced, ids, filename = 'submission.csv') lgbm_params = { 'task': 'train', 'boosting_type': 'gbdt', 'objective': 'regression',
linear_models.append(("enet", model_ENet)) linear_models.append(("enet", model_byr)) cross_val_table = get_validation_scores(tree_models, X_train, y_train, 5, X_test, y_test) print(cross_val_table) cross_val_table = get_validation_scores(tree_models, train_X_reduced, train_y, 5) print(cross_val_table) cross_val_table_linear = get_validation_scores(linear_models, train_X_reduced, train_y, 5) print(cross_val_table_linear) averaged_models = em.AveragingModels( models=[model_lgb, model_byr, model_svr, model_ridge]) stacked_averaged_models = em.StackingAveragedModels( base_models=[model_svr, model_lgb], meta_model=model_KRR) averaged_plus = em.AveragingModels( models=[averaged_models, model_GBoost, model_xgb], weights=[0.7, 0.2, 0.1]) averaged_plus_plus = em.AveragingModels( models=[stacked_averaged_models, model_GBoost, model_xgb], weights=[0.7, 0.2, 0.1]) ensemble_models = [] ensemble_models.append(("averaged", averaged_models)) ensemble_models.append(("stacked", stacked_averaged_models)) ensemble_models.append(("averaged_plus", averaged_plus)) ensemble_models.append(("averaged_plus_plus", averaged_plus_plus)) cross_val_table_ensemble = get_validation_scores(ensemble_models, X_train,
#early_stopping_rounds=100 verbose_eval=10, random_seed=s) tree_models.append(("lgb_" + str(s), model_lgb)) #tree_models.append(("rf", model_rforest)) #tree_models.append(("xgb", model_xgb)) cross_val_table = pp.get_validation_scores(tree_models, train_set_X, train_set_y, 5) print(cross_val_table) model_lgb.fit(train_set_X, train_set_y) predicted = model_lgb.predict(test_set_X) score_val = cvl.score_sq(test_set_y, predicted) averaged_models = em.AveragingModels(models=[model_lgb, model_rforest]) ensemble_models = [] ensemble_models.append(("averaged", averaged_models)) cross_val_table_avg = pp.get_validation_scores(ensemble_models, train_X_reduced, train_y, 5) print(cross_val_table_avg) pp.make_submission(model_lgb, train_X_reduced, train_y, test_X_reduced, ids, filename='submission.csv')
models.append(("lasso", model_lasso)) models.append(("ridge", model_ridge)) models.append(("svr", model_svr)) models.append(("ENet", model_ENet)) models.append(("KRR", model_KRR)) models.append(("byr", model_byr)) models.append(("rforest", model_rforest)) models.append(("xgb", model_xgb)) models.append(("GBoost", model_GBoost)) models.append(("lgb", model_lgb)) models.append(("lasso_lars", model_lasso_lars)) models.append(("lsvr", model_lsvr)) #models.append(("sgd", model_sgd)) #models.append(("extra", model_extra)) models.append(("average", em.AveragingModels(models=[model_ridge, model_byr, model_xgb]))) ######## cross validation ##################################################### def cross_validation_models(models_set, X_train_set, y_train_set): scores = [] names = [] for name, model in models_set: names.append(name) scores.append( np.sqrt(pp.score_model(model, X_train_set, y_train_set)).mean()) tab = pd.DataFrame({"Model": names, "Score": scores}) tab = tab.sort_values(by=['Score'], ascending=True) return (tab)
train_y.ravel(), folds=10, seed=seed, train_score=False, jobs=2) print(linear_results) tree_results = pp.get_cross_validate(tree_models, train_X_reduced, train_y.ravel(), folds=10, seed=seed, train_score=False) print(tree_results) averaged_models = em.AveragingModels(models=[model_lgb, model_KRR, model_svr]) stacked_averaged_models = em.StackingAveragedModels( base_models=[model_KRR, model_lsvr, model_lgb], meta_model=model_ridge) averaged_plus = em.AveragingModels( models=[averaged_models, model_GBoost, model_xgb], weights=[0.7, 0.2, 0.1]) averaged_plus_plus = em.AveragingModels( models=[stacked_averaged_models, model_GBoost, model_xgb], weights=[0.7, 0.2, 0.1]) avg_full = em.AveragingModels(models=[ em.AveragingModels(models=[model_KRR, model_ridge, model_lsvr]), em.AveragingModels(models=[model_lgb, model_GBoost, model_xgb]) ]) ensemble_models = [] ensemble_models.append(("averaged", averaged_models))