예제 #1
0
# For a tutorial see: [[Tutorial](https://nbviewer.jupyter.org/github/interpretml/interpret/blob/master/examples/python/notebooks/Interpretable%20Classification%20Methods.ipynb)]
#
# **Q7**. Report (global) feature importances for EBM as a table or figure. What are the most important three features in EBM? Are they the same as in the linear model?
#
# w_1X + w_2Y + w_3(XY) = Z
# %%
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show

train_features, train_labels, dev_features, dev_labels, test_features, test_labels = prepare_load_classification_data(
)
ebm = ExplainableBoostingClassifier(n_jobs=-1)
ebm.fit(train_features, train_labels)
# EBM
#%% # Global Explanation
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)
#%% # Local Explanation
ebm_local = ebm.explain_local(dev_features[:5], dev_labels[:5], name='EBM')
show(ebm_local)
#%% # Performance
from interpret.perf import ROC
ebm_perf = ROC(ebm.predict_proba).explain_perf(dev_features,
                                               dev_labels,
                                               name='EBM')
show(ebm_perf)
# %% [markdown]
# ### Training and Explaining Neural Networks
# Train two Neural Networks:
# 1. One-layer MLP (ReLU activation function + 50 hidden neurons)
# 2. Two-layer MLP (ReLU activation function + (20, 20) hidden neurons)
예제 #2
0
test_data = test_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch']]

X_train, X_validate, y_train, y_validate = train_test_split(
    train_data.drop('Survived', axis=1), train_data['Survived'], test_size=.25)

ebm = ExplainableBoostingClassifier()
lrm = LogisticRegression()

ebm.fit(X_train, y_train)

le = LabelEncoder()
X_train_lr = X_train
X_train_lr['Sex'] = le.fit_transform(X_train['Sex'])
lrm.fit(X_train_lr, y_train)

ebm_global = ebm.explain_global()
show(ebm_global)
ebm_local = ebm.explain_local(X_validate, y_validate)
show(ebm_local)

lrm_global = lrm.explain_global()
show(lrm_global)
X_validate_lr = X_validate
X_validate_lr['Sex'] = le.fit_transform(X_validate['Sex'])
lrm_local = lrm.explain_local(X_validate, y_validate)
show(lrm_local)

## Age binning
ages = pd.DataFrame({'ages': [10, 20, 24, 25, 29, 41, 45, 55, 56]})
ages['ages2'] = pd.cut(ages.ages, bins=[0, 20, 40, 60], include_lowest=True)
ages
예제 #3
0
#define training df (first 500 elements of each cathegory)
training_columns = ['x', 'y']
training_df = pd.concat([df_A.iloc[:500], df_B.iloc[:500]],
                        ignore_index=True,
                        sort=True)

#define test df (second 500 elements of each cathegory)
test_df = pd.concat([df_A.iloc[500:], df_B.iloc[500:]],
                    ignore_index=True,
                    sort=True)

ebm_clf = ExplainableBoostingClassifier()
ebm_clf.fit(training_df[training_columns], training_df['category'])

probabilities = ebm_clf.predict_proba(test_df[training_columns])
ebm_global = ebm_clf.explain_global()
show(ebm_global)

for prob in range(2):
    test_df['prob_{0}'.format(prob)] = probabilities[:, prob]

figcontur = plt.figure(figsize=(18, 7.5))
contourax = figcontur.add_subplot(111)
xx, yy = make_meshgrid(test_df['x'], test_df['y'])
plot_contours(contourax, ebm_clf, xx, yy, cmap='RdYlBu', alpha=0.8)
contourax.scatter(test_df.x,
                  test_df.y,
                  c=test_df['category'],
                  cmap='RdYlBu',
                  s=20,
                  edgecolors='k')