Ejemplo n.º 1
0
def spline_classification_plot(ax, X, y, X_eval, y_eval, gam_ref):
    # gam = LogisticGAM(s(0)).gridsearch(X, y)
    # documentation of LogisticGAM: https://pygam.readthedocs.io/en/latest/api/logisticgam.html
    gam = LogisticGAM(s(0, constraints='monotonic_inc',
                        n_splines=5)).gridsearch(X, y)  # add a linear term
    #XX = gam.generate_X_grid(term=0)
    XX = np.linspace(0, 1, 100)
    ax.plot(XX, gam.predict_proba(XX), c='g')
    ax.plot(XX, gam.confidence_intervals(XX, width=0.95), c='r', ls='--')
    # compute ece and acc after calibration
    y_ = gam.predict_proba(X_eval)
    ece = EceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100)
    mce = MceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100)
    brier = BrierEval(np.array([1 - y_, y_]).T, y_eval)
    mse = MseEval(gam, gam_ref, num_bins=100)
    acc = gam.accuracy(X_eval, y_eval)
    ax.text(0.05,
            0.75,
            'ECE=%.4f\nMCE=%.4f\nBrier=%.4f\nACC=%.4f\nMSE=%.4f' %
            (ece, mce, brier, acc, mse),
            size=6,
            ha='left',
            va='center',
            bbox={
                'facecolor': 'green',
                'alpha': 0.5,
                'pad': 4
            })
    ax.set_xlim(0.0, 1.0)
    ax.set_ylim(0.0, 1.0)
    confi = gam.confidence_intervals(X_eval, width=0.95)
    print gam.summary()
    return ece, mce, brier, acc, mse, ax, confi
Ejemplo n.º 2
0
class AdaptiveLogisticGAM(BaseEstimator, RegressorMixin):
    def __init__(self, param_grid=None, gam_params=None):
        # create GAM
        if gam_params is None:
            gam_params = {}
        self.model = LogisticGAM(**gam_params)

        # set grid search parameters
        if param_grid is None:
            param_grid = GAM_GRID_BASE
        self.param_grid = param_grid

    def fit(self, X, y):
        if isinstance(X, pd.DataFrame):
            X = X.values

        # fit using grid-search
        self.model.gridsearch(X, y, progress=False, **self.param_grid)

    def predict(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.values
        return self.model.predict(X)

    def predict_proba(self, X):
        if isinstance(X, pd.DataFrame):
            X = X.values
        return self.model.predict_proba(X)
Ejemplo n.º 3
0
def spline_calibration(X, y):
    gam = LogisticGAM(s(0, constraints='monotonic_inc')).gridsearch(
        X, y)  # add a linear term
    # documentation of LogisticGAM: https://pygam.readthedocs.io/en/latest/api/logisticgam.html
    # gam = LogisticGAM(s(0, constraints='monotonic_inc')).gridsearch(X, y) # add a linear term
    # compute ece and acc after calibration
    y_ = gam.predict_proba(X)
    return y_
Ejemplo n.º 4
0
def calibrate_propensities(propensities, treatment):
    """Post-hoc calibration of propensity scores given the true treatments

    Args:
        propensities: propensity scores
        treatment: treatment indicator

    Returns:
        p: calibrated version of the propensities given
    """
    gam = LogisticGAM(s(0)).fit(propensities, treatment)
    return gam.predict_proba(propensities)
Ejemplo n.º 5
0
def calibrate(ps, treatment):
    """Calibrate propensity scores with logistic GAM.

    Ref: https://pygam.readthedocs.io/en/latest/api/logisticgam.html

    Args:
        ps (numpy.array): a propensity score vector
        treatment (numpy.array): a binary treatment vector (0: control, 1: treated)

    Returns:
        (numpy.array): a calibrated propensity score vector
    """

    gam = LogisticGAM(s(0)).fit(ps, treatment)

    return gam.predict_proba(ps)
Ejemplo n.º 6
0
def spline_classification(X, y, X_eval, y_eval, gam_ref):
    # gam = LogisticGAM(s(0)).gridsearch(X, y)
    # documentation of LogisticGAM: https://pygam.readthedocs.io/en/latest/api/logisticgam.html
    gam = LogisticGAM(s(0, constraints='monotonic_inc',
                        n_splines=5)).gridsearch(X, y)  # add a linear term
    #XX = gam.generate_X_grid(term=0)
    # compute ece and acc after calibration
    y_ = gam.predict_proba(X_eval)
    ece = EceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100)
    mce = MceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100)
    brier = BrierEval(np.array([1 - y_, y_]).T, y_eval)
    mse = MseEval(gam, gam_ref, num_bins=100)
    acc = gam.accuracy(X_eval, y_eval)
    # compute the confidence on datapoints of X_eval
    confi = gam.confidence_intervals(X_eval, width=0.95)
    return ece, mce, brier, acc, mse, confi
Ejemplo n.º 7
0
rfc_predictions_2020
rfc_predictions_2020.to_csv("rfc_predictions.csv")

##### ----- ##### ----- ##### ----- ##### -----# #### ----- ##### ----- ##### ----- ##### ----- #####
# Model 1.3 - Generalized Additive Models

from pygam import LogisticGAM


#Fit a GAM model with the default parameters
gam_model =  LogisticGAM()
gam_model.fit(X_train, y_train)


gam_pred_prob = gam_model.predict_proba(X_test)


gam_preds, complete_gam_dat = top_15_predictions(entire_test_data, gam_pred_prob )


gam_performance = all_nba_test_report(complete_gam_dat)


players_missed(complete_gam_dat)



gam_predict_probs_2020 = gam_model.predict_proba(features_2020)
gam_predict_binary_2020 = gam_model.predict(features_2020)
Ejemplo n.º 8
0
ds = load_breast_cancer()

X, y = ds.data, ds.target

#select first 6 features only
X = X[:, 0:6]

selected_features = ds.feature_names[0:6]

#-----------------------------------------------------
#Fit a model with the default parameters
gam = LogisticGAM().fit(X, y)
gam.summary()

roc_auc_score(y, gam.predict_proba(X))  #0.994173140954495
gam.accuracy(X, y)  #0.9560632688927944

#-----------------------------------------------------
# Explore and interpret individual features

plt.ion()
plt.rcParams['figure.figsize'] = (28, 8)

fig, axs = plt.subplots(1, X.shape[1])

for i, ax in enumerate(axs):
    XX = gam.generate_X_grid(term=i, meshgrid=True)
    pdep, confi = gam.partial_dependence(term=i,
                                         X=XX,
                                         meshgrid=True,
Ejemplo n.º 9
0
acc_loggamc = cross_val_score(gam, X_train_scaled, Y_train, cv=10, scoring='accuracy').mean()
print('acc_loggam_cross-validation, train_scaled',acc_loggamc)


# make predictions for testing set
Y_scaler_pred_class = logreg.predict(X_test_scaled)
# calculate testing accuracy
from sklearn import metrics

print('\n ------------------------------------------------------------------')
print("\n calculate testing accuracy (M1. X_train_scaled):", metrics.accuracy_score(Y_test, Y_scaler_pred_class))
print('\n ------------------------------------------------------------------')
#  ROC curves and AUC
# https://www.medcalc.org/manual/roc-curves.php
# predict probability of survival
Y_scaler_pred_prob_GAM = gam.predict_proba(X_test_scaled)[:, 1]

import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (8, 6)
plt.rcParams['font.size'] = 14
# plot ROC curve. Receiver Operating Characteristic (ROC) curve the true positive rate (Sensitivity)
fpr, tpr, thresholds = metrics.roc_curve(Y_test, Y_scaler_pred_prob_GAM)

plt.rcParams['figure.figsize'] = (8, 6)
plt.rcParams['font.size'] = 14
# plot ROC curve. Receiver Operating Characteristic (ROC) curve the true positive rate (Sensitivity)
fpr, tpr, thresholds = metrics.roc_curve(Y_test, Y_scaler_pred_prob)
fig = plt.figure()
fig.subplots_adjust(bottom=0)
fig.subplots_adjust(top=1)