def spline_classification_plot(ax, X, y, X_eval, y_eval, gam_ref): # gam = LogisticGAM(s(0)).gridsearch(X, y) # documentation of LogisticGAM: https://pygam.readthedocs.io/en/latest/api/logisticgam.html gam = LogisticGAM(s(0, constraints='monotonic_inc', n_splines=5)).gridsearch(X, y) # add a linear term #XX = gam.generate_X_grid(term=0) XX = np.linspace(0, 1, 100) ax.plot(XX, gam.predict_proba(XX), c='g') ax.plot(XX, gam.confidence_intervals(XX, width=0.95), c='r', ls='--') # compute ece and acc after calibration y_ = gam.predict_proba(X_eval) ece = EceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100) mce = MceEval(np.array([1 - y_, y_]).T, y_eval, num_bins=100) brier = BrierEval(np.array([1 - y_, y_]).T, y_eval) mse = MseEval(gam, gam_ref, num_bins=100) acc = gam.accuracy(X_eval, y_eval) ax.text(0.05, 0.75, 'ECE=%.4f\nMCE=%.4f\nBrier=%.4f\nACC=%.4f\nMSE=%.4f' % (ece, mce, brier, acc, mse), size=6, ha='left', va='center', bbox={ 'facecolor': 'green', 'alpha': 0.5, 'pad': 4 }) ax.set_xlim(0.0, 1.0) ax.set_ylim(0.0, 1.0) confi = gam.confidence_intervals(X_eval, width=0.95) print gam.summary() return ece, mce, brier, acc, mse, ax, confi
#----------------------------------------------------- #load the breast cancer data set ds = load_breast_cancer() X, y = ds.data, ds.target #select first 6 features only X = X[:, 0:6] selected_features = ds.feature_names[0:6] #----------------------------------------------------- #Fit a model with the default parameters gam = LogisticGAM().fit(X, y) gam.summary() roc_auc_score(y, gam.predict_proba(X)) #0.994173140954495 gam.accuracy(X, y) #0.9560632688927944 #----------------------------------------------------- # Explore and interpret individual features plt.ion() plt.rcParams['figure.figsize'] = (28, 8) fig, axs = plt.subplots(1, X.shape[1]) for i, ax in enumerate(axs): XX = gam.generate_X_grid(term=i, meshgrid=True) pdep, confi = gam.partial_dependence(term=i,
print("\n calculate cross-validated AUC (M2. X_train_scaled_poly):", auc_log2) acc_log3 = cross_val_score(logreg, X_train_scaled_poly, Y_train, cv=10, scoring='accuracy').mean() print("\n calculate cross-validated accurancy (M2. X_train_scaled_poly):", acc_log2) acc_logs3 = cross_validation.cross_val_predict(logreg, X_train_scaled_poly, Y_train, cv=10) print(metrics.accuracy_score(Y_train, acc_logs2)) print(metrics.classification_report(Y_train, acc_logs3)) print(logreg.coef_) print('\n ------------------------------------------------------------------') # call predict_proba() to get the list of probabilities that the classifier assigned to each instance for each class: ############################################################################################################################### # GAM import pandas as pd from pygam import LogisticGAM # Fit a model with the default parameters gam = LogisticGAM().fit(X_train_scaled, Y_train) gam.summary() print('gam.accuracy(X_train_scaled, Y_train):',gam.accuracy(X_train_scaled, Y_train)) print('gam.accuracy(X_test_scaled, Y_test):',gam.accuracy(X_test_scaled, Y_test)) acc_loggamc = cross_val_score(gam, X_train_scaled, Y_train, cv=10, scoring='accuracy').mean() print('acc_loggam_cross-validation, train_scaled',acc_loggamc) # make predictions for testing set Y_scaler_pred_class = logreg.predict(X_test_scaled) # calculate testing accuracy from sklearn import metrics print('\n ------------------------------------------------------------------') print("\n calculate testing accuracy (M1. X_train_scaled):", metrics.accuracy_score(Y_test, Y_scaler_pred_class)) print('\n ------------------------------------------------------------------') # ROC curves and AUC