def spline_classification_plot(ax, X, y, X_eval, y_eval, gam_ref): # gam = LogisticGAM(s(0)).gridsearch(X, y) # documentation of LogisticGAM: https://pygam.readthedocs.io/en/latest/api/logisticgam.html gam = LogisticGAM(s(0, constraints='monotonic_inc', n_splines=5)).gridsearch(X, y) # add a linear term #XX = gam.generate_X_grid(term=0) XX = np.linspace(0, 1, 100) ax.plot(XX, gam.predict_proba(XX), c='g') ax.plot(XX, gam.confidence_intervals(XX, width=0.95), c='r', ls='--') # compute ece and acc after calibration y_ = gam.predict_proba(X_eval) ece = EceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100) mce = MceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100) brier = BrierEval(np.array([1 - y_, y_]).T, y_eval) mse = MseEval(gam, gam_ref, num_bins=100) acc = gam.accuracy(X_eval, y_eval) ax.text(0.05, 0.75, 'ECE=%.4f\nMCE=%.4f\nBrier=%.4f\nACC=%.4f\nMSE=%.4f' % (ece, mce, brier, acc, mse), size=6, ha='left', va='center', bbox={ 'facecolor': 'green', 'alpha': 0.5, 'pad': 4 }) ax.set_xlim(0.0, 1.0) ax.set_ylim(0.0, 1.0) confi = gam.confidence_intervals(X_eval, width=0.95) print gam.summary() return ece, mce, brier, acc, mse, ax, confi
class AdaptiveLogisticGAM(BaseEstimator, RegressorMixin): def __init__(self, param_grid=None, gam_params=None): # create GAM if gam_params is None: gam_params = {} self.model = LogisticGAM(**gam_params) # set grid search parameters if param_grid is None: param_grid = GAM_GRID_BASE self.param_grid = param_grid def fit(self, X, y): if isinstance(X, pd.DataFrame): X = X.values # fit using grid-search self.model.gridsearch(X, y, progress=False, **self.param_grid) def predict(self, X): if isinstance(X, pd.DataFrame): X = X.values return self.model.predict(X) def predict_proba(self, X): if isinstance(X, pd.DataFrame): X = X.values return self.model.predict_proba(X)
def spline_calibration(X, y): gam = LogisticGAM(s(0, constraints='monotonic_inc')).gridsearch( X, y) # add a linear term # documentation of LogisticGAM: https://pygam.readthedocs.io/en/latest/api/logisticgam.html # gam = LogisticGAM(s(0, constraints='monotonic_inc')).gridsearch(X, y) # add a linear term # compute ece and acc after calibration y_ = gam.predict_proba(X) return y_
def calibrate_propensities(propensities, treatment): """Post-hoc calibration of propensity scores given the true treatments Args: propensities: propensity scores treatment: treatment indicator Returns: p: calibrated version of the propensities given """ gam = LogisticGAM(s(0)).fit(propensities, treatment) return gam.predict_proba(propensities)
def calibrate(ps, treatment): """Calibrate propensity scores with logistic GAM. Ref: https://pygam.readthedocs.io/en/latest/api/logisticgam.html Args: ps (numpy.array): a propensity score vector treatment (numpy.array): a binary treatment vector (0: control, 1: treated) Returns: (numpy.array): a calibrated propensity score vector """ gam = LogisticGAM(s(0)).fit(ps, treatment) return gam.predict_proba(ps)
def spline_classification(X, y, X_eval, y_eval, gam_ref): # gam = LogisticGAM(s(0)).gridsearch(X, y) # documentation of LogisticGAM: https://pygam.readthedocs.io/en/latest/api/logisticgam.html gam = LogisticGAM(s(0, constraints='monotonic_inc', n_splines=5)).gridsearch(X, y) # add a linear term #XX = gam.generate_X_grid(term=0) # compute ece and acc after calibration y_ = gam.predict_proba(X_eval) ece = EceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100) mce = MceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100) brier = BrierEval(np.array([1 - y_, y_]).T, y_eval) mse = MseEval(gam, gam_ref, num_bins=100) acc = gam.accuracy(X_eval, y_eval) # compute the confidence on datapoints of X_eval confi = gam.confidence_intervals(X_eval, width=0.95) return ece, mce, brier, acc, mse, confi
rfc_predictions_2020 rfc_predictions_2020.to_csv("rfc_predictions.csv") ##### ----- ##### ----- ##### ----- ##### -----# #### ----- ##### ----- ##### ----- ##### ----- ##### # Model 1.3 - Generalized Additive Models from pygam import LogisticGAM #Fit a GAM model with the default parameters gam_model = LogisticGAM() gam_model.fit(X_train, y_train) gam_pred_prob = gam_model.predict_proba(X_test) gam_preds, complete_gam_dat = top_15_predictions(entire_test_data, gam_pred_prob ) gam_performance = all_nba_test_report(complete_gam_dat) players_missed(complete_gam_dat) gam_predict_probs_2020 = gam_model.predict_proba(features_2020) gam_predict_binary_2020 = gam_model.predict(features_2020)
ds = load_breast_cancer() X, y = ds.data, ds.target #select first 6 features only X = X[:, 0:6] selected_features = ds.feature_names[0:6] #----------------------------------------------------- #Fit a model with the default parameters gam = LogisticGAM().fit(X, y) gam.summary() roc_auc_score(y, gam.predict_proba(X)) #0.994173140954495 gam.accuracy(X, y) #0.9560632688927944 #----------------------------------------------------- # Explore and interpret individual features plt.ion() plt.rcParams['figure.figsize'] = (28, 8) fig, axs = plt.subplots(1, X.shape[1]) for i, ax in enumerate(axs): XX = gam.generate_X_grid(term=i, meshgrid=True) pdep, confi = gam.partial_dependence(term=i, X=XX, meshgrid=True,
acc_loggamc = cross_val_score(gam, X_train_scaled, Y_train, cv=10, scoring='accuracy').mean() print('acc_loggam_cross-validation, train_scaled',acc_loggamc) # make predictions for testing set Y_scaler_pred_class = logreg.predict(X_test_scaled) # calculate testing accuracy from sklearn import metrics print('\n ------------------------------------------------------------------') print("\n calculate testing accuracy (M1. X_train_scaled):", metrics.accuracy_score(Y_test, Y_scaler_pred_class)) print('\n ------------------------------------------------------------------') # ROC curves and AUC # https://www.medcalc.org/manual/roc-curves.php # predict probability of survival Y_scaler_pred_prob_GAM = gam.predict_proba(X_test_scaled)[:, 1] import matplotlib.pyplot as plt plt.rcParams['figure.figsize'] = (8, 6) plt.rcParams['font.size'] = 14 # plot ROC curve. Receiver Operating Characteristic (ROC) curve the true positive rate (Sensitivity) fpr, tpr, thresholds = metrics.roc_curve(Y_test, Y_scaler_pred_prob_GAM) plt.rcParams['figure.figsize'] = (8, 6) plt.rcParams['font.size'] = 14 # plot ROC curve. Receiver Operating Characteristic (ROC) curve the true positive rate (Sensitivity) fpr, tpr, thresholds = metrics.roc_curve(Y_test, Y_scaler_pred_prob) fig = plt.figure() fig.subplots_adjust(bottom=0) fig.subplots_adjust(top=1)