def create_stats(x_train, x_test, y_train, y_test): l1, y_pred_l1, lasso_stats = lasso(x_train, x_test, y_train, y_test, alpha=0.1) l2, y_pred_l2, ridge_stats = ridge(x_train, x_test, y_train, y_test, alpha=0.1) features = feature_selection(x_train, y_train, k=50) x_trainft = x_train[features].copy() x_testft = x_test[features].copy() l1ft, y_pred_l1ft, lasso_statsft = lasso(x_trainft, x_testft, y_train, y_test, alpha=0.1) l2ft, y_pred_l2ft, ridge_statsft = ridge(x_trainft, x_testft, y_train, y_test, alpha=0.1) complete_stats = pd.concat( [lasso_stats, lasso_statsft, ridge_stats, ridge_statsft]) return complete_stats
def create_stats(x_train, x_test, y_train, y_test): features = feature_selection(x_train, y_train, 50) x_train_ft = x_train[features] x_test_ft = x_test[features] _, _, stats_lasso = lasso(x_train, x_test, y_train, y_test) _, _, stats_lasso_ft = lasso(x_train_ft, x_test_ft, y_train, y_test) _, _, stats_ridge = ridge(x_train, x_test, y_train, y_test) _, _, stats_ridge_ft = ridge(x_train_ft, x_test_ft, y_train, y_test) complete_stats = pd.concat([stats_lasso,stats_lasso_ft,stats_ridge,stats_ridge_ft]) return complete_stats
def create_stats(x_train, x_test, y_train, y_test): ft = feature_selection(x_train,y_train,50) model_Lasso,preds_lasso,stats_lasso = lasso(x_train, x_test, y_train, y_test) model_Lasso_ft,pred_lasso_ft,stats_lasso_ft = lasso(x_train.loc[:,ft],x_test.loc[:,ft],y_train,y_test) model_Ridge,preds_ridge,stats_ridge = ridge(x_train, x_test, y_train, y_test) model_Ridge_ft,preds_ridge_ft,stats_ridge_ft = ridge(x_train.loc[:,ft],x_test.loc[:,ft],y_train,y_test) complete_stats = pd.concat([stats_lasso,stats_lasso_ft,stats_ridge,stats_ridge_ft]) return complete_stats
def create_stats(x_train, x_test, y_train, y_test, enc="labelencoder"): #Call basic model (Linear, Lasso and Ridge) lm_model, lm_y_pred, lm_stats = linear_model(x_train, x_test, y_train, y_test, 0.01) la_model, la_y_pred, la_stats = lasso(x_train, x_test, y_train, y_test, alpha=0.1) ri_model, ri_y_pred, ri_stats = ridge(x_train, x_test, y_train, y_test, alpha=0.1) #Filter Best feature using K percentile feat_sel = feature_selection(x_train, y_train, k=50) x_train = x_train[feat_sel] x_test = x_test[feat_sel] #Call basic model with selected features only (Linear, Lasso and Ridge) lm_model_fs, lm_y_pred_fs, lm_stats_fs = linear_model( x_train, x_test, y_train, y_test, 0.01) la_model_fs, la_y_pred_fs, la_stats_fs = lasso(x_train, x_test, y_train, y_test, alpha=0.1) ri_model_fs, ri_y_pred_fs, ri_stats_fs = ridge(x_train, x_test, y_train, y_test, alpha=0.1) #Concate the returned response. complete_stats = pd.concat( [lm_stats, lm_stats_fs, la_stats, la_stats_fs, ri_stats, ri_stats_fs]) complete_stats.index = [ 'lm_score', 'lm_features_score', 'la_score', 'la_features_score', 'ri_score', 'ri_features_score' ] #Rmse and mse for certain models are NaN. need to fill with zeros. complete_stats[['rmse', 'mse']] = complete_stats[['rmse', 'mse']].fillna(0) complete_stats.mse += complete_stats.rmse complete_stats = complete_stats.drop(['rmse'], axis=1) return complete_stats
def create_stats(x_train, x_test, y_train, y_test): lasso_stats = lasso(x_train, x_test, y_train, y_test, alpha=0.1)[2] lasso_f_ft = feature_selection(x_train, y_train, k=50) lasso_stats_ft = lasso(x_train[lasso_f_ft], x_test[lasso_f_ft], y_train, y_test, alpha=0.1)[2] ridge_stats = ridge(x_train, x_test, y_train, y_test, alpha=0.1)[2] ridge_f_ft = feature_selection(x_train, y_train, k=50) ridge_stats_ft = ridge(x_train[ridge_f_ft], x_test[ridge_f_ft], y_train, y_test, alpha=0.1)[2] complete_stats = pd.concat( [lasso_stats, lasso_stats_ft, ridge_stats, ridge_stats_ft], axis=1) return complete_stats
def create_stats(X_train, X_test, y_train, y_test, enc="labelencoder"): _, _, linear_model_scores = linear_model(X_train, X_test, y_train, y_test, 0.01) _, _, lasso_scores = lasso(X_train, X_test, y_train, y_test) _, _, ridge_scores = ridge(X_train, X_test, y_train, y_test) selected_features = percentile_k_features(X_train, y_train, k=50) X_train_features = X_train[selected_features] X_test_features = X_test[selected_features] _, _, linear_model_scores_features = linear_model(X_train_features, X_test_features, y_train, y_test, 0.01) _, _, lasso_scores_features = lasso(X_train_features, X_test_features, y_train, y_test) _, _, ridge_scores_features = ridge(X_train_features, X_test_features, y_train, y_test) complete_stats = pd.concat([ linear_model_scores, linear_model_scores_features, #chain_stats_with_features_selection ,stats_chain, lasso_scores, lasso_scores_features, ridge_scores, ridge_scores_features ]) complete_stats.index = [ 'linear_model_scores', 'linear_model_scores_features', #chain_stats_with_features_selection ,stats_chain, 'lasso_scores', 'lasso_scores_features', 'ridge_scores', 'ridge_scores_features' ] #complete_stats.columns=['Name', 'cross_validation','rmse','mae','r2'] complete_stats.mse = complete_stats.mse.fillna(0) complete_stats.rmse = complete_stats.rmse.fillna(0) complete_stats.rmse = complete_stats.mse + complete_stats.rmse complete_stats = complete_stats.drop(['mse'], axis=1) return complete_stats
def create_stats(x_train, x_test, y_train, y_test): # lin_model, y_pred_lin, stats_lin = linear_model(x_train, x_test, y_train, y_test) # plot_residuals(y_test,y_pred,"images/linear baseline "+enc) lasso_model, y_pred_lasso, stats_lasso = lasso(x_train, x_test, y_train, y_test, alpha=0.1) # plot_residuals(y_test,y_pred,"images/lasso baseline "+enc) ridge_model, y_pred_ridge, stats_ridge = ridge(x_train, x_test, y_train, y_test, alpha=0.1) # plot_residuals(y_test,y_pred,"images/ridge baseline "+enc) feature_list = feature_selection(x_train, y_train, 50) x_train = x_train[feature_list] x_test = x_test[feature_list] # lin_model_new, y_pred_lin_new, stats_lin_ft_new = linear_model(x_train, x_test, y_train, y_test) # plot_residuals(y_test,y_pred,"images/linear ft "+enc) lasso_model_new, y_pred_lasso_new, stats_lasso_ft_new = lasso(x_train, x_test, y_train, y_test, alpha=0.1) # plot_residuals(y_test,y_pred,"images/lasso ft "+enc) ridge_model_new, y_pred_ridge_new, stats_ridge_ft_new = ridge(x_train, x_test, y_train, y_test, alpha=0.1) # plot_residuals(y_test,y_pred,"images/ridge ft "+enc) complete_stats = pd.concat( [stats_lasso, stats_lasso_ft_new, stats_ridge, stats_ridge_ft_new]) return complete_stats
def create_stats(x_train, x_test, y_train, y_test): lasso_modl, lasso_y_pred, lasso_stat = lasso(x_train, x_test, y_train, y_test, 0.1) ridge_modl, ridge_y_pred, ridge_stat = ridge(x_train, x_test, y_train, y_test, 0.1) features = feature_selection(x_train, y_train, k=50) x_train_ft = x_train[features].copy() x_test_ft = x_test[features].copy() lasso_modl_ft, lasso_y_pred_ft, lasso_stat_ft = lasso( x_train_ft, x_test_ft, y_train, y_test, 0.1) ridge_modl_ft, ridge_y_pred_ft, ridge_stat_ft = ridge( x_train_ft, x_test_ft, y_train, y_test, 0.1) features = feature_selection(x_train_ft, y_train, k=50) complete_stats = pd.concat( [lasso_stat, lasso_stat_ft, ridge_stat, ridge_stat_ft]) return complete_stats
def create_stats(X_train, X_test, y_train, y_test, enc="labelencoder"): a, b, lm_score = linear_model(X_train, X_test, y_train, y_test, '') c, d, lasso_score = lasso(X_train, X_test, y_train, y_test) e, f, ridge_score = ridge(X_train, X_test, y_train, y_test) best_features = feature_selection(X_train, y_train, k=50) a, b, lm_score_bf = linear_model(X_train[best_features], X_test[best_features], y_train, y_test, '') c, d, lasso_score_bf = lasso(X_train[best_features], X_test[best_features], y_train, y_test) e, f, ridge_score_bf = ridge(X_train[best_features], X_test[best_features], y_train, y_test) complete_stats = pd.concat([ lm_score, lasso_score, ridge_score, lm_score_bf, lasso_score_bf, ridge_score_bf ], ignore_index=True) del complete_stats['rmse'] return complete_stats #lm_score,lasso_score,ridge_score,lm_score_bf,lasso_score_bf,ridge_score_bf