Esempio n. 1
0
    def ga2m(self):
        # Explore the Data
        marginal = Marginal().explain_data(self.x, self.y, name="Raw Data")

        # Train the Explainable Boosting Machine(EBM)
        lr = LinearRegression()
        lr.fit(self.x, self.y)

        rt = RegressionTree()
        rt.fit(self.x, self.y)

        ebm = ExplainableBoostingRegressor(
        )  # For Classifier, use ebm = ExplainableBoostingClassifier()
        ebm.fit(self.x, self.y)

        # How Does the EBM Model Perform?
        ebm_perf = RegressionPerf(ebm.predict).explain_perf(self.x,
                                                            self.y,
                                                            name="EBM")
        lr_perf = RegressionPerf(lr.predict).explain_perf(
            self.x, self.y, name="Linear Regression")
        rt_perf = RegressionPerf(rt.predict).explain_perf(
            self.x, self.y, name="Regression Tree")

        # Global Interpretability - What the Model says for All Data
        ebm_global = ebm.explain_global(name="EBM")
        lr_global = lr.explain_global(name="LinearRegression")
        rt_global = rt.explain_global(name="Regression Tree")

        # Put All in a Dashboard - This is the best
        show([
            marginal, lr_global, lr_perf, rt_global, rt_perf, ebm_perf,
            ebm_global
        ])
Esempio n. 2
0
    def blackbox_show_performance(self,
                                  method,
                                  predictions="default",
                                  show=True):
        """
        Plots an interpretable display of your model based off a performance metric.

        Can either be 'ROC' or 'PR' for precision, recall for classification problems.

        Can be 'regperf' for regression problems.
        
        Parameters
        ----------
        method : str
            Performance metric, either 'roc' or 'PR'

        predictions : str, optional
            Prediction type, can either be 'default' (.predict) or 'probability' if the model can predict probabilities, by default 'default'

        show : bool, optional
            False to not display the plot, by default True
        
        Returns
        -------
        Interpret
            Interpretable dashboard of your model
        """

        if predictions == "probability":
            predict_fn = self.model.predict_proba
        else:
            predict_fn = self.model.predict

        if self.problem in INTERPRET_EXPLAINERS["problem"]:
            if method.lower() in INTERPRET_EXPLAINERS["problem"][self.problem]:
                blackbox_perf = INTERPRET_EXPLAINERS["problem"][self.problem][
                    method.lower()](predict_fn).explain_perf(
                        self.x_test, self.y_test, name=method.upper())
        else:
            raise ValueError(
                "Supported blackbox explainers are only {} for classification problems and {} for regression problems"
                .format(
                    ",".join(INTERPRET_EXPLAINERS["problem"]
                             ["classification"].keys()),
                    ",".join(
                        INTERPRET_EXPLAINERS["problem"]["regression"].keys()),
                ))

        if show:
            interpret.show(blackbox_perf)

        self.trained_blackbox_explainers[method.lower()] = blackbox_perf

        return blackbox_perf
Esempio n. 3
0
 def test_interpret_dashboard(self, mimic_explainer):
     # Validate our explanation works with the interpret dashboard
     x_train, x_test, y_train, y_test, feature_names, target_names = create_cancer_data(
     )
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     explainer = mimic_explainer(model,
                                 x_train,
                                 LGBMExplainableModel,
                                 features=feature_names,
                                 classes=target_names)
     explanation = explainer.explain_global(x_test)
     show(explanation)
Esempio n. 4
0
    def blackbox_global_explanation(self,
                                    method="morris",
                                    predictions="default",
                                    show=True,
                                    **kwargs):
        """
        Provides an interpretable summary of your models behaviour based off an explainer.

        Can either be 'morris' or 'dependence' for Partial Dependence.
        
        Parameters
        ----------
        method : str, optional
            Explainer type, can either be 'morris' or 'dependence', by default 'morris'

        predictions : str, optional
            Prediction type, can either be 'default' (.predict) or 'probability' if the model can predict probabilities, by default 'default'

        show : bool, optional
            False to not display the plot, by default True
        
        Returns
        -------
        Interpret
            Interpretable dashboard of your model
        """

        if predictions == "probability":
            predict_fn = self.model.predict_proba
        else:
            predict_fn = self.model.predict

        if method.lower() in INTERPRET_EXPLAINERS["global"]:
            sensitivity = INTERPRET_EXPLAINERS["global"][method.lower()](
                predict_fn=predict_fn, data=self.x_train, **kwargs)

        else:
            raise ValueError(
                'Supported blackbox global explainers are only "morris" and partial "dependence".'
            )

        sensitivity_global = sensitivity.explain_global(name=method.upper())

        self.trained_blackbox_explainers[method.lower()] = sensitivity_global

        if show:
            interpret.show(sensitivity_global)

        return sensitivity_global
Esempio n. 5
0
    def create_dashboard(self):  # pragma: no cover
        """
        Displays an interpretable dashboard of already created interpretable plots.
        
        If a plot hasn't been interpreted yet it is created using default parameters for the dashboard.
        """

        dashboard_plots = []

        for explainer_type in INTERPRET_EXPLAINERS:

            if explainer_type == "problem":
                temp_explainer_type = INTERPRET_EXPLAINERS[explainer_type][self.problem]
            else:
                temp_explainer_type = INTERPRET_EXPLAINERS[explainer_type]

            for explainer in temp_explainer_type:
                if explainer in self.trained_blackbox_explainers:
                    dashboard_plots.append(self.trained_blackbox_explainers[explainer])
                else:
                    if explainer_type == "problem":
                        dashboard_plots.append(
                            self.blackbox_show_performance(explainer, show=False)
                        )
                    elif explainer_type == "local":
                        dashboard_plots.append(
                            self.blackbox_local_explanation(
                                method=explainer, show=False
                            )
                        )
                    else:
                        dashboard_plots.append(
                            self.blackbox_global_explanation(
                                method=explainer, show=False
                            )
                        )

        interpret.show(dashboard_plots)
Esempio n. 6
0
X_train, X_validate, y_train, y_validate = train_test_split(
    train_data.drop('Survived', axis=1), train_data['Survived'], test_size=.25)

ebm = ExplainableBoostingClassifier()
lrm = LogisticRegression()

ebm.fit(X_train, y_train)

le = LabelEncoder()
X_train_lr = X_train
X_train_lr['Sex'] = le.fit_transform(X_train['Sex'])
lrm.fit(X_train_lr, y_train)

ebm_global = ebm.explain_global()
show(ebm_global)
ebm_local = ebm.explain_local(X_validate, y_validate)
show(ebm_local)

lrm_global = lrm.explain_global()
show(lrm_global)
X_validate_lr = X_validate
X_validate_lr['Sex'] = le.fit_transform(X_validate['Sex'])
lrm_local = lrm.explain_local(X_validate, y_validate)
show(lrm_local)

## Age binning
ages = pd.DataFrame({'ages': [10, 20, 24, 25, 29, 41, 45, 55, 56]})
ages['ages2'] = pd.cut(ages.ages, bins=[0, 20, 40, 60], include_lowest=True)
ages
Esempio n. 7
0
#
# **Q7**. Report (global) feature importances for EBM as a table or figure. What are the most important three features in EBM? Are they the same as in the linear model?
#
# w_1X + w_2Y + w_3(XY) = Z
# %%
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show

train_features, train_labels, dev_features, dev_labels, test_features, test_labels = prepare_load_classification_data(
)
ebm = ExplainableBoostingClassifier(n_jobs=-1)
ebm.fit(train_features, train_labels)
# EBM
#%% # Global Explanation
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)
#%% # Local Explanation
ebm_local = ebm.explain_local(dev_features[:5], dev_labels[:5], name='EBM')
show(ebm_local)
#%% # Performance
from interpret.perf import ROC
ebm_perf = ROC(ebm.predict_proba).explain_perf(dev_features,
                                               dev_labels,
                                               name='EBM')
show(ebm_perf)
# %% [markdown]
# ### Training and Explaining Neural Networks
# Train two Neural Networks:
# 1. One-layer MLP (ReLU activation function + 50 hidden neurons)
# 2. Two-layer MLP (ReLU activation function + (20, 20) hidden neurons)
#
Esempio n. 8
0
model = model.fit(X=X_train, y=y_train)
model.predict(X_train).mean()
model.coef_
X_train.columns
model.intercept_
model.get_params()

# %% Explainable gbm
from interpret.glassbox import ExplainableBoostingClassifier, LogisticRegression
from interpret import show

ebm = ExplainableBoostingClassifier()
ebm.fit(X=X_train, y=y_train)

ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)

# %%
log_model = LogisticRegression()
log_model.fit(X=X_train, y=y_train)
log_global = log_model.explain_global(name='LogReg')
show(log_global)

show([ebm_global, log_global], share_tables=True)

# %%
from interpret.data import ClassHistogram

hist = ClassHistogram().explain_data(X_train, y_train, name='Train Data')
show(hist)
Esempio n. 9
0
from interpret.blackbox import LimeTabular
from interpret import show

# %% Load and preprocess data
data_loader = DataLoader()
data_loader.load_dataset()
data_loader.preprocess_data()
# Split the data for evaluation
X_train, X_test, y_train, y_test = data_loader.get_data_split()
# Oversample the train data
X_train, y_train = data_loader.oversample(X_train, y_train)
print(X_train.shape)
print(X_test.shape)

# %% Fit blackbox model
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

# %% Apply lime
# Initilize Lime for Tabular data
lime = LimeTabular(predict_fn=rf.predict_proba, data=X_train, random_state=1)
# Get local explanations
lime_local = lime.explain_local(X_test[-20:], y_test[-20:], name='LIME')

show(lime_local)

# %%
Esempio n. 10
0
    def blackbox_local_explanation(
        self,
        num_samples=0.5,
        sample_no=None,
        method="lime",
        predictions="default",
        show=True,
        **kwargs,
    ):
        """
        Plots an interpretable display that explains individual predictions of your model.

        Supported explainers are either 'lime' or 'shap'.
        
        Parameters
        ----------
        num_samples : int, float, or 'all', optional
            Number of samples to display, if less than 1 it will treat it as a percentage, 'all' will include all samples
            , by default 0.25

        sample_no : int, optional
            Sample number to isolate and analyze, if provided it overrides num_samples, by default None

        method : str, optional
            Explainer type, can either be 'lime' or 'shap', by default 'lime'

        predictions : str, optional
            Prediction type, can either be 'default' (.predict) or 'probability' if the model can predict probabilities, by default 'default'

        show : bool, optional
            False to not display the plot, by default True
        
        Returns
        -------
        Interpret
            Interpretable dashboard of your model
        """

        if predictions == "probability":
            predict_fn = self.model.predict_proba
        else:
            predict_fn = self.model.predict

        # Determine method
        if method.lower() in INTERPRET_EXPLAINERS["local"]:
            if method.lower() == "lime":
                data = self.x_train
            elif method.lower() == "shap":
                data = np.median(self.x_train, axis=0).reshape(1, -1)
            else:
                raise ValueError

            explainer = INTERPRET_EXPLAINERS["local"][method.lower()](
                predict_fn=predict_fn, data=data, **kwargs
            )

        else:
            raise ValueError(
                'Supported blackbox local explainers are only "lime" and "shap".'
            )

        if sample_no is not None:
            if sample_no < 1 or not isinstance(sample_no, int):
                raise ValueError("Sample number must be greater than 1.")

            samples = slice(sample_no - 1, sample_no)
        else:
            if num_samples == "all":
                samples = slice(0, len(self.x_test))
            elif num_samples <= 0:
                raise ValueError(
                    "Number of samples must be greater than 0. If it is less than 1, it will be treated as a percentage."
                )
            elif num_samples > 0 and num_samples < 1:
                samples = slice(0, int(num_samples * len(self.x_test)))
            else:
                samples = slice(0, num_samples)

        explainer_local = explainer.explain_local(
            self.x_test[samples], self.y_test[samples], name=method.upper()
        )

        self.trained_blackbox_explainers[method.lower()] = explainer_local

        if show:
            interpret.show(explainer_local)

        return explainer_local
Esempio n. 11
0
train_cols = df.columns[0:-1]
label = df.columns[-1]
X = df[train_cols]
y = df[label]
#X,y = datasets.load_boston(return_X_y=True)
seed = 1
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.20,
                                                    random_state=seed)
from interpret import show
from interpret.data import ClassHistogram

hist = ClassHistogram().explain_data(X_train, y_train, name='Train Data')
show(hist)
print(type(hist))
from interpret.glassbox import ExplainableBoostingRegressor, LogisticRegression, ClassificationTree, DecisionListClassifier

ebm = ExplainableBoostingRegressor(random_state=seed)
ebm.fit(X_train, y_train)  #Works on dataframes and numpy arrays
ebm_global = ebm.explain_global(name='EBM')
for i in range(7):
    ebm_global.visualize(i).write_html('Concrete_Strength/CS_' +
                                       df.columns[i] + '.html')

preds = ebm.predict(X_test)
#for i in range(len(preds)):
#print(preds[i],y_test[i])
print(preds)
print(y_test)
Esempio n. 12
0
X_train, y_train = data_loader.oversample(X_train, y_train)
print("After oversampling:", X_train.shape)

# %% Fit logistic regression model
lr = LogisticRegression(random_state=2021, feature_names=X_train.columns, penalty='l1', solver='liblinear')
lr.fit(X_train, y_train)
print("Training finished.")

# %% Evaluate logistic regression model
y_pred = lr.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

# %% Explain local prediction
lr_local = lr.explain_local(X_test[:100], y_test[:100], name='Logistic Regression')
show(lr_local)

# %% Explain global logistic regression model
lr_global = lr.explain_global(name='Logistic Regression')
show(lr_global)

# %% Fit decision tree model
tree = ClassificationTree()
tree.fit(X_train, y_train)
print("Training finished.")
y_pred = tree.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

# %% Explain local prediction
tree_local = tree.explain_local(X_test[:100], y_test[:100], name='Tree')
Esempio n. 13
0
training_columns = ['x', 'y']
training_df = pd.concat([df_A.iloc[:500], df_B.iloc[:500]],
                        ignore_index=True,
                        sort=True)

#define test df (second 500 elements of each cathegory)
test_df = pd.concat([df_A.iloc[500:], df_B.iloc[500:]],
                    ignore_index=True,
                    sort=True)

ebm_clf = ExplainableBoostingClassifier()
ebm_clf.fit(training_df[training_columns], training_df['category'])

probabilities = ebm_clf.predict_proba(test_df[training_columns])
ebm_global = ebm_clf.explain_global()
show(ebm_global)

for prob in range(2):
    test_df['prob_{0}'.format(prob)] = probabilities[:, prob]

figcontur = plt.figure(figsize=(18, 7.5))
contourax = figcontur.add_subplot(111)
xx, yy = make_meshgrid(test_df['x'], test_df['y'])
plot_contours(contourax, ebm_clf, xx, yy, cmap='RdYlBu', alpha=0.8)
contourax.scatter(test_df.x,
                  test_df.y,
                  c=test_df['category'],
                  cmap='RdYlBu',
                  s=20,
                  edgecolors='k')
contourax.set_xlim(xx.min(), xx.max())