Exemple #1
0
def create_stats(x_train, x_test, y_train, y_test, enc="labelencoder"):

    #Call basic model (Linear, Lasso and Ridge)
    lm_model, lm_y_pred, lm_stats = linear_model(x_train, x_test, y_train,
                                                 y_test, 0.01)
    la_model, la_y_pred, la_stats = lasso(x_train,
                                          x_test,
                                          y_train,
                                          y_test,
                                          alpha=0.1)
    ri_model, ri_y_pred, ri_stats = ridge(x_train,
                                          x_test,
                                          y_train,
                                          y_test,
                                          alpha=0.1)

    #Filter Best feature using K percentile
    feat_sel = feature_selection(x_train, y_train, k=50)
    x_train = x_train[feat_sel]
    x_test = x_test[feat_sel]

    #Call basic model with selected features only (Linear, Lasso and Ridge)
    lm_model_fs, lm_y_pred_fs, lm_stats_fs = linear_model(
        x_train, x_test, y_train, y_test, 0.01)
    la_model_fs, la_y_pred_fs, la_stats_fs = lasso(x_train,
                                                   x_test,
                                                   y_train,
                                                   y_test,
                                                   alpha=0.1)
    ri_model_fs, ri_y_pred_fs, ri_stats_fs = ridge(x_train,
                                                   x_test,
                                                   y_train,
                                                   y_test,
                                                   alpha=0.1)

    #Concate the returned response.
    complete_stats = pd.concat(
        [lm_stats, lm_stats_fs, la_stats, la_stats_fs, ri_stats, ri_stats_fs])
    complete_stats.index = [
        'lm_score', 'lm_features_score', 'la_score', 'la_features_score',
        'ri_score', 'ri_features_score'
    ]

    #Rmse and mse for certain models are NaN. need to fill with zeros.
    complete_stats[['rmse', 'mse']] = complete_stats[['rmse', 'mse']].fillna(0)

    complete_stats.mse += complete_stats.rmse
    complete_stats = complete_stats.drop(['rmse'], axis=1)

    return complete_stats
Exemple #2
0
def create_stats(X_train, X_test, y_train, y_test, enc="labelencoder"):
    _, _, linear_model_scores = linear_model(X_train, X_test, y_train, y_test,
                                             0.01)
    _, _, lasso_scores = lasso(X_train, X_test, y_train, y_test)
    _, _, ridge_scores = ridge(X_train, X_test, y_train, y_test)

    selected_features = percentile_k_features(X_train, y_train, k=50)
    X_train_features = X_train[selected_features]
    X_test_features = X_test[selected_features]

    _, _, linear_model_scores_features = linear_model(X_train_features,
                                                      X_test_features, y_train,
                                                      y_test, 0.01)
    _, _, lasso_scores_features = lasso(X_train_features, X_test_features,
                                        y_train, y_test)
    _, _, ridge_scores_features = ridge(X_train_features, X_test_features,
                                        y_train, y_test)

    complete_stats = pd.concat([
        linear_model_scores,
        linear_model_scores_features,
        #chain_stats_with_features_selection ,stats_chain,
        lasso_scores,
        lasso_scores_features,
        ridge_scores,
        ridge_scores_features
    ])
    complete_stats.index = [
        'linear_model_scores',
        'linear_model_scores_features',
        #chain_stats_with_features_selection ,stats_chain,
        'lasso_scores',
        'lasso_scores_features',
        'ridge_scores',
        'ridge_scores_features'
    ]
    #complete_stats.columns=['Name', 'cross_validation','rmse','mae','r2']
    complete_stats.mse = complete_stats.mse.fillna(0)
    complete_stats.rmse = complete_stats.rmse.fillna(0)

    complete_stats.rmse = complete_stats.mse + complete_stats.rmse
    complete_stats = complete_stats.drop(['mse'], axis=1)
    return complete_stats
def create_stats(X_train, X_test, y_train, y_test, enc="labelencoder"):
    a, b, lm_score = linear_model(X_train, X_test, y_train, y_test, '')
    c, d, lasso_score = lasso(X_train, X_test, y_train, y_test)
    e, f, ridge_score = ridge(X_train, X_test, y_train, y_test)
    best_features = feature_selection(X_train, y_train, k=50)
    a, b, lm_score_bf = linear_model(X_train[best_features],
                                     X_test[best_features], y_train, y_test,
                                     '')
    c, d, lasso_score_bf = lasso(X_train[best_features], X_test[best_features],
                                 y_train, y_test)
    e, f, ridge_score_bf = ridge(X_train[best_features], X_test[best_features],
                                 y_train, y_test)

    complete_stats = pd.concat([
        lm_score, lasso_score, ridge_score, lm_score_bf, lasso_score_bf,
        ridge_score_bf
    ],
                               ignore_index=True)
    del complete_stats['rmse']
    return complete_stats  #lm_score,lasso_score,ridge_score,lm_score_bf,lasso_score_bf,ridge_score_bf