def cross_validation_models(models_set, X_train_set, y_train_set): scores = [] names = [] for name, model in models_set: names.append(name) scores.append( np.sqrt(pp.score_model(model, X_train_set, y_train_set)).mean()) tab = pd.DataFrame({"Model": names, "Score": scores}) tab = tab.sort_values(by=['Score'], ascending=True) return (tab)
def get_validation_scores(models, X_train, y_train, X_test = [], y_test = []): scores_val = [] scores_test = [] names = [] for name, model in models: names.append(name) scores_val.append(np.sqrt(pp.score_model(model, X_train, y_train)).mean()) if len(X_test) != 0: model.fit(X_train, y_train) st = get_test_scores(model, X_test, y_test) scores_test.append(st) if len(X_test) != 0: tab = pd.DataFrame({ "Model" : names, "Cross Validation" : scores_val, "Test": scores_test }) tab = tab.sort_values(by=['Test'], ascending = True) else: tab = pd.DataFrame({ "Model" : names, "Cross Validation" : scores_val }) tab = tab.sort_values(by=['Cross Validation'], ascending = True) return(tab)
def get_validation_scores(models, X_train, y_train, folds, X_test=[], y_test=[]): scores_val_mean = [] scores_val_std = [] scores_val = [] scores_test = [] names = [] for name, model in models: names.append(name) val_scores = np.sqrt( pp.score_model(model, X_train, y_train, n_folds=folds)) scores_val.append(val_scores) scores_val_mean.append(val_scores.mean()) scores_val_std.append(val_scores.std()) if len(X_test) != 0: model.fit(X_train, y_train) st = get_test_scores(model, X_test, y_test) scores_test.append(st) if len(X_test) != 0: tab = pd.DataFrame({ "Model": names, "Cross Validation (Mean)": scores_val_mean, "Cross Validation (Std)": scores_val_std, "Cross Validation (Scores)": scores_val, "Test": scores_test }) tab.sort_values(by=['Test'], ascending=True, inplace=True) else: tab = pd.DataFrame({ "Model": names, "Cross Validation (Mean)": scores_val_mean, "Cross Validation (Std)": scores_val_std, "Cross Validation (Scores)": scores_val }) tab.sort_values(by=['Cross Validation (Mean)'], ascending=True, inplace=True) return (tab)
models.append(("byr", model_byr)) models.append(("rforest", model_rforest)) models.append(("xgb", model_xgb)) models.append(("GBoost", model_GBoost)) models.append(("lgb", model_lgb)) models.append(("lasso_lars", model_lasso_lars)) models.append(("lsvr", model_lsvr)) models.append(("sgd", model_sgd)) models.append(("extra", model_extra)) scores = [] names = [] for name, model in models: names.append(name) scores.append( np.sqrt(pp.score_model(model, train_X_reduced, train_y)).mean()) tab = pd.DataFrame({"Model": names, "Score": scores}) tab = tab.sort_values(by=['Score'], ascending=True) print(tab) for model in models: model.fit(train_X, train_y) averaged_models = em.AveragingModels( models=[model_svr, model_ENet, model_KRR, model_xgb]) score_avg = np.sqrt(pp.score_model(averaged_models, train_X_reduced, train_y)) print(" Averaged base models score: {:.6f} ({:.6f})\n".format( score_avg.mean(), score_avg.std())) averaged_models.fit(train_X_reduced, train_y)
file_name='submission.csv'): model.fit(X_train_set, y_train_set) predicted = np.expm1(model.predict(test_X_to_submit)) print(predicted) submission = pd.DataFrame({'Id': ids, 'SalePrice': predicted}) submission.to_csv(file_name, index=False) train_submit(em.AveragingModels(models=[model_svr, model_byr, model_xgb]), train_X_reduced, train_y, test_X_reduced, 'submission_avg_1.csv') # Best one so far: model_svr, model_KRR, model_xgb # Try this combination: model_svr, model_KRR, averaged_models_tree averaged_models = em.AveragingModels(models=[model_svr, model_KRR, model_xgb]) score_avg = np.sqrt(pp.score_model(averaged_models, train_X_reduced, train_y)) print(" Averaged base models score: {:.6f} ({:.6f})\n".format( score_avg.mean(), score_avg.std())) stacked_averaged_models = em.StackingAveragedModels( base_models=[model_KRR, model_xgb], meta_model=model_svr) score_stacked_averaged = np.sqrt( pp.score_model(stacked_averaged_models, train_X_reduced, train_y)) print(" Stacked Averaged base models score: {:.6f} ({:.6f})\n".format( score_stacked_averaged.mean(), score_stacked_averaged.std())) stacked_averaged_models.fit(train_X_reduced, train_y) predicted_prices_stacked_averaged = np.expm1( stacked_averaged_models.predict(test_X_reduced)) print(predicted_prices_stacked_averaged)