Exemple #1
0
def create_stats(x_train, x_test, y_train, y_test):

    l1, y_pred_l1, lasso_stats = lasso(x_train,
                                       x_test,
                                       y_train,
                                       y_test,
                                       alpha=0.1)
    l2, y_pred_l2, ridge_stats = ridge(x_train,
                                       x_test,
                                       y_train,
                                       y_test,
                                       alpha=0.1)

    features = feature_selection(x_train, y_train, k=50)
    x_trainft = x_train[features].copy()
    x_testft = x_test[features].copy()
    l1ft, y_pred_l1ft, lasso_statsft = lasso(x_trainft,
                                             x_testft,
                                             y_train,
                                             y_test,
                                             alpha=0.1)
    l2ft, y_pred_l2ft, ridge_statsft = ridge(x_trainft,
                                             x_testft,
                                             y_train,
                                             y_test,
                                             alpha=0.1)
    complete_stats = pd.concat(
        [lasso_stats, lasso_statsft, ridge_stats, ridge_statsft])
    return complete_stats
Exemple #2
0
def create_stats(x_train, x_test, y_train, y_test):
    features = feature_selection(x_train, y_train, 50)
    x_train_ft = x_train[features]
    x_test_ft = x_test[features]
    _, _, stats_lasso = lasso(x_train, x_test, y_train, y_test)
    _, _, stats_lasso_ft = lasso(x_train_ft, x_test_ft, y_train, y_test)
    _, _, stats_ridge = ridge(x_train, x_test, y_train, y_test)
    _, _, stats_ridge_ft = ridge(x_train_ft, x_test_ft, y_train, y_test)
    complete_stats = pd.concat([stats_lasso,stats_lasso_ft,stats_ridge,stats_ridge_ft])
    return complete_stats
def create_stats(x_train, x_test, y_train, y_test):
    ft = feature_selection(x_train,y_train,50)
    
    model_Lasso,preds_lasso,stats_lasso = lasso(x_train, x_test, y_train, y_test)
    model_Lasso_ft,pred_lasso_ft,stats_lasso_ft = lasso(x_train.loc[:,ft],x_test.loc[:,ft],y_train,y_test)
    model_Ridge,preds_ridge,stats_ridge = ridge(x_train, x_test, y_train, y_test)
    model_Ridge_ft,preds_ridge_ft,stats_ridge_ft = ridge(x_train.loc[:,ft],x_test.loc[:,ft],y_train,y_test)
    
    complete_stats = pd.concat([stats_lasso,stats_lasso_ft,stats_ridge,stats_ridge_ft]) 
    return complete_stats
Exemple #4
0
def create_stats(x_train, x_test, y_train, y_test, enc="labelencoder"):

    #Call basic model (Linear, Lasso and Ridge)
    lm_model, lm_y_pred, lm_stats = linear_model(x_train, x_test, y_train,
                                                 y_test, 0.01)
    la_model, la_y_pred, la_stats = lasso(x_train,
                                          x_test,
                                          y_train,
                                          y_test,
                                          alpha=0.1)
    ri_model, ri_y_pred, ri_stats = ridge(x_train,
                                          x_test,
                                          y_train,
                                          y_test,
                                          alpha=0.1)

    #Filter Best feature using K percentile
    feat_sel = feature_selection(x_train, y_train, k=50)
    x_train = x_train[feat_sel]
    x_test = x_test[feat_sel]

    #Call basic model with selected features only (Linear, Lasso and Ridge)
    lm_model_fs, lm_y_pred_fs, lm_stats_fs = linear_model(
        x_train, x_test, y_train, y_test, 0.01)
    la_model_fs, la_y_pred_fs, la_stats_fs = lasso(x_train,
                                                   x_test,
                                                   y_train,
                                                   y_test,
                                                   alpha=0.1)
    ri_model_fs, ri_y_pred_fs, ri_stats_fs = ridge(x_train,
                                                   x_test,
                                                   y_train,
                                                   y_test,
                                                   alpha=0.1)

    #Concate the returned response.
    complete_stats = pd.concat(
        [lm_stats, lm_stats_fs, la_stats, la_stats_fs, ri_stats, ri_stats_fs])
    complete_stats.index = [
        'lm_score', 'lm_features_score', 'la_score', 'la_features_score',
        'ri_score', 'ri_features_score'
    ]

    #Rmse and mse for certain models are NaN. need to fill with zeros.
    complete_stats[['rmse', 'mse']] = complete_stats[['rmse', 'mse']].fillna(0)

    complete_stats.mse += complete_stats.rmse
    complete_stats = complete_stats.drop(['rmse'], axis=1)

    return complete_stats
Exemple #5
0
def create_stats(x_train, x_test, y_train, y_test):
    lasso_stats = lasso(x_train, x_test, y_train, y_test, alpha=0.1)[2]
    lasso_f_ft = feature_selection(x_train, y_train, k=50)
    lasso_stats_ft = lasso(x_train[lasso_f_ft],
                           x_test[lasso_f_ft],
                           y_train,
                           y_test,
                           alpha=0.1)[2]
    ridge_stats = ridge(x_train, x_test, y_train, y_test, alpha=0.1)[2]
    ridge_f_ft = feature_selection(x_train, y_train, k=50)
    ridge_stats_ft = ridge(x_train[ridge_f_ft],
                           x_test[ridge_f_ft],
                           y_train,
                           y_test,
                           alpha=0.1)[2]
    complete_stats = pd.concat(
        [lasso_stats, lasso_stats_ft, ridge_stats, ridge_stats_ft], axis=1)
    return complete_stats
Exemple #6
0
def create_stats(X_train, X_test, y_train, y_test, enc="labelencoder"):
    _, _, linear_model_scores = linear_model(X_train, X_test, y_train, y_test,
                                             0.01)
    _, _, lasso_scores = lasso(X_train, X_test, y_train, y_test)
    _, _, ridge_scores = ridge(X_train, X_test, y_train, y_test)

    selected_features = percentile_k_features(X_train, y_train, k=50)
    X_train_features = X_train[selected_features]
    X_test_features = X_test[selected_features]

    _, _, linear_model_scores_features = linear_model(X_train_features,
                                                      X_test_features, y_train,
                                                      y_test, 0.01)
    _, _, lasso_scores_features = lasso(X_train_features, X_test_features,
                                        y_train, y_test)
    _, _, ridge_scores_features = ridge(X_train_features, X_test_features,
                                        y_train, y_test)

    complete_stats = pd.concat([
        linear_model_scores,
        linear_model_scores_features,
        #chain_stats_with_features_selection ,stats_chain,
        lasso_scores,
        lasso_scores_features,
        ridge_scores,
        ridge_scores_features
    ])
    complete_stats.index = [
        'linear_model_scores',
        'linear_model_scores_features',
        #chain_stats_with_features_selection ,stats_chain,
        'lasso_scores',
        'lasso_scores_features',
        'ridge_scores',
        'ridge_scores_features'
    ]
    #complete_stats.columns=['Name', 'cross_validation','rmse','mae','r2']
    complete_stats.mse = complete_stats.mse.fillna(0)
    complete_stats.rmse = complete_stats.rmse.fillna(0)

    complete_stats.rmse = complete_stats.mse + complete_stats.rmse
    complete_stats = complete_stats.drop(['mse'], axis=1)
    return complete_stats
def create_stats(x_train, x_test, y_train, y_test):
    # lin_model, y_pred_lin, stats_lin = linear_model(x_train, x_test, y_train, y_test)
    # plot_residuals(y_test,y_pred,"images/linear baseline "+enc)
    lasso_model, y_pred_lasso, stats_lasso = lasso(x_train,
                                                   x_test,
                                                   y_train,
                                                   y_test,
                                                   alpha=0.1)
    # plot_residuals(y_test,y_pred,"images/lasso baseline "+enc)

    ridge_model, y_pred_ridge, stats_ridge = ridge(x_train,
                                                   x_test,
                                                   y_train,
                                                   y_test,
                                                   alpha=0.1)
    # plot_residuals(y_test,y_pred,"images/ridge baseline "+enc)

    feature_list = feature_selection(x_train, y_train, 50)

    x_train = x_train[feature_list]
    x_test = x_test[feature_list]

    # lin_model_new, y_pred_lin_new, stats_lin_ft_new = linear_model(x_train, x_test, y_train, y_test)
    # plot_residuals(y_test,y_pred,"images/linear ft "+enc)

    lasso_model_new, y_pred_lasso_new, stats_lasso_ft_new = lasso(x_train,
                                                                  x_test,
                                                                  y_train,
                                                                  y_test,
                                                                  alpha=0.1)
    # plot_residuals(y_test,y_pred,"images/lasso ft "+enc)

    ridge_model_new, y_pred_ridge_new, stats_ridge_ft_new = ridge(x_train,
                                                                  x_test,
                                                                  y_train,
                                                                  y_test,
                                                                  alpha=0.1)
    # plot_residuals(y_test,y_pred,"images/ridge ft "+enc)

    complete_stats = pd.concat(
        [stats_lasso, stats_lasso_ft_new, stats_ridge, stats_ridge_ft_new])
    return complete_stats
Exemple #8
0
def create_stats(x_train, x_test, y_train, y_test):
    lasso_modl, lasso_y_pred, lasso_stat = lasso(x_train, x_test, y_train,
                                                 y_test, 0.1)
    ridge_modl, ridge_y_pred, ridge_stat = ridge(x_train, x_test, y_train,
                                                 y_test, 0.1)
    features = feature_selection(x_train, y_train, k=50)

    x_train_ft = x_train[features].copy()
    x_test_ft = x_test[features].copy()

    lasso_modl_ft, lasso_y_pred_ft, lasso_stat_ft = lasso(
        x_train_ft, x_test_ft, y_train, y_test, 0.1)
    ridge_modl_ft, ridge_y_pred_ft, ridge_stat_ft = ridge(
        x_train_ft, x_test_ft, y_train, y_test, 0.1)
    features = feature_selection(x_train_ft, y_train, k=50)

    complete_stats = pd.concat(
        [lasso_stat, lasso_stat_ft, ridge_stat, ridge_stat_ft])

    return complete_stats
def create_stats(X_train, X_test, y_train, y_test, enc="labelencoder"):
    a, b, lm_score = linear_model(X_train, X_test, y_train, y_test, '')
    c, d, lasso_score = lasso(X_train, X_test, y_train, y_test)
    e, f, ridge_score = ridge(X_train, X_test, y_train, y_test)
    best_features = feature_selection(X_train, y_train, k=50)
    a, b, lm_score_bf = linear_model(X_train[best_features],
                                     X_test[best_features], y_train, y_test,
                                     '')
    c, d, lasso_score_bf = lasso(X_train[best_features], X_test[best_features],
                                 y_train, y_test)
    e, f, ridge_score_bf = ridge(X_train[best_features], X_test[best_features],
                                 y_train, y_test)

    complete_stats = pd.concat([
        lm_score, lasso_score, ridge_score, lm_score_bf, lasso_score_bf,
        ridge_score_bf
    ],
                               ignore_index=True)
    del complete_stats['rmse']
    return complete_stats  #lm_score,lasso_score,ridge_score,lm_score_bf,lasso_score_bf,ridge_score_bf