# For a tutorial see: [[Tutorial](https://nbviewer.jupyter.org/github/interpretml/interpret/blob/master/examples/python/notebooks/Interpretable%20Classification%20Methods.ipynb)] # # **Q7**. Report (global) feature importances for EBM as a table or figure. What are the most important three features in EBM? Are they the same as in the linear model? # # w_1X + w_2Y + w_3(XY) = Z # %% from interpret.glassbox import ExplainableBoostingClassifier from interpret import show train_features, train_labels, dev_features, dev_labels, test_features, test_labels = prepare_load_classification_data( ) ebm = ExplainableBoostingClassifier(n_jobs=-1) ebm.fit(train_features, train_labels) # EBM #%% # Global Explanation ebm_global = ebm.explain_global(name='EBM') show(ebm_global) #%% # Local Explanation ebm_local = ebm.explain_local(dev_features[:5], dev_labels[:5], name='EBM') show(ebm_local) #%% # Performance from interpret.perf import ROC ebm_perf = ROC(ebm.predict_proba).explain_perf(dev_features, dev_labels, name='EBM') show(ebm_perf) # %% [markdown] # ### Training and Explaining Neural Networks # Train two Neural Networks: # 1. One-layer MLP (ReLU activation function + 50 hidden neurons) # 2. Two-layer MLP (ReLU activation function + (20, 20) hidden neurons)
test_data = test_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch']] X_train, X_validate, y_train, y_validate = train_test_split( train_data.drop('Survived', axis=1), train_data['Survived'], test_size=.25) ebm = ExplainableBoostingClassifier() lrm = LogisticRegression() ebm.fit(X_train, y_train) le = LabelEncoder() X_train_lr = X_train X_train_lr['Sex'] = le.fit_transform(X_train['Sex']) lrm.fit(X_train_lr, y_train) ebm_global = ebm.explain_global() show(ebm_global) ebm_local = ebm.explain_local(X_validate, y_validate) show(ebm_local) lrm_global = lrm.explain_global() show(lrm_global) X_validate_lr = X_validate X_validate_lr['Sex'] = le.fit_transform(X_validate['Sex']) lrm_local = lrm.explain_local(X_validate, y_validate) show(lrm_local) ## Age binning ages = pd.DataFrame({'ages': [10, 20, 24, 25, 29, 41, 45, 55, 56]}) ages['ages2'] = pd.cut(ages.ages, bins=[0, 20, 40, 60], include_lowest=True) ages
#define training df (first 500 elements of each cathegory) training_columns = ['x', 'y'] training_df = pd.concat([df_A.iloc[:500], df_B.iloc[:500]], ignore_index=True, sort=True) #define test df (second 500 elements of each cathegory) test_df = pd.concat([df_A.iloc[500:], df_B.iloc[500:]], ignore_index=True, sort=True) ebm_clf = ExplainableBoostingClassifier() ebm_clf.fit(training_df[training_columns], training_df['category']) probabilities = ebm_clf.predict_proba(test_df[training_columns]) ebm_global = ebm_clf.explain_global() show(ebm_global) for prob in range(2): test_df['prob_{0}'.format(prob)] = probabilities[:, prob] figcontur = plt.figure(figsize=(18, 7.5)) contourax = figcontur.add_subplot(111) xx, yy = make_meshgrid(test_df['x'], test_df['y']) plot_contours(contourax, ebm_clf, xx, yy, cmap='RdYlBu', alpha=0.8) contourax.scatter(test_df.x, test_df.y, c=test_df['category'], cmap='RdYlBu', s=20, edgecolors='k')