Exemple #1
0
    def dependence(shap_values,
                   X_vald,
                   model_file_path,
                   learner_name,
                   file_postfix=""):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            fig = plt.figure(figsize=(14, 7))
            plots_cnt = np.min([9, X_vald.shape[1]])
            cols_cnt = 3
            rows_cnt = 3
            if plots_cnt < 4:
                rows_cnt = 1
            elif plots_cnt < 7:
                rows_cnt = 2
            for i in range(plots_cnt):
                ax = fig.add_subplot(rows_cnt, cols_cnt, i + 1)
                shap.dependence_plot(
                    f"rank({i})",
                    shap_values,
                    X_vald,
                    show=False,
                    title=f"Importance #{i+1}",
                    ax=ax,
                )

            fig.tight_layout(pad=2.0)
            fig.savefig(
                os.path.join(
                    model_file_path,
                    f"{learner_name}_shap_dependence{file_postfix}.png"))
            plt.close("all")
def test_lightgbm_binary():
    try:
        import lightgbm
    except:
        print("Skipping test_lightgbm_binary!")
        return
    import shap
    from sklearn.model_selection import train_test_split

    # train lightgbm model
    X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.adult(),
                                                        test_size=0.2,
                                                        random_state=0)
    model = lightgbm.sklearn.LGBMClassifier()
    model.fit(X_train, Y_train)

    # explain the model's predictions using SHAP values
    shap_values = shap.TreeExplainer(model).shap_values(X_test)

    # validate structure of shap values, must be a list of ndarray for both classes
    assert isinstance(shap_values, list)
    assert len(shap_values) == 2

    # ensure plot works for first class
    shap.dependence_plot(0, shap_values[0], X_test, show=False)
Exemple #3
0
def plot_shap(model, test, instance=None, feature=None, dataset=False):
    """
    Displays shap plots to explain a black box model.

    :param model: the model considered. The shap plots are calculated only after the model has been fit.
    :param test: test dataset.
    :param instance: instance of the test dataset to explain. default_value=None
    :param feature: feature of the test dataset to explain. default_value=None
    :param dataset: if True the entire dataset is taken into account. default_value=False
    :return:
    """
    # Make an explainer on the model given. Not all the models are supported
    explainer = TreeExplainer(model)
    # Compute SHAP values
    shap_values = explainer.shap_values(test)
    initjs()
    # If not None explain single prediction
    if instance is not None:
        force_plot(explainer.expected_value,
                   shap_values[instance, :],
                   test.iloc[instance, :],
                   matplotlib=True)
    # If not None explain single feature
    if feature is not None:
        fig, ax = plt.subplots(figsize=(13, 10))
        dependence_plot(feature, shap_values, test, ax=ax)
    # If True explain the entire dataset
    if dataset:
        summary_plot(shap_values, test, plot_size=(8, 8))
        summary_plot(shap_values, test, plot_type="bar", plot_size=(8, 8))
Exemple #4
0
def test_front_page_sklearn():
    import sklearn.ensemble
    import shap

    # load JS visualization code to notebook
    shap.initjs()

    # train model
    X, y = shap.datasets.boston()
    models = [
        sklearn.ensemble.RandomForestRegressor(n_estimators=100),
        sklearn.ensemble.ExtraTreesRegressor(n_estimators=100),
    ]
    for model in models:
        model.fit(X, y)

        # explain the model's predictions using SHAP values
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X)

        # visualize the first prediction's explaination
        shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])

        # visualize the training set predictions
        shap.force_plot(explainer.expected_value, shap_values, X)

        # create a SHAP dependence plot to show the effect of a single feature across the whole dataset
        shap.dependence_plot(5, shap_values, X, show=False)
        shap.dependence_plot("RM", shap_values, X, show=False)

        # summarize the effects of all the features
        shap.summary_plot(shap_values, X, show=False)
Exemple #5
0
def test_front_page_xgboost():
    import xgboost
    import shap

    # load JS visualization code to notebook
    shap.initjs()

    # train XGBoost model
    X,y = shap.datasets.boston()
    bst = xgboost.train({"learning_rate": 0.01}, xgboost.DMatrix(X, label=y), 100)

    # explain the model's predictions using SHAP values (use pred_contrib in LightGBM)
    shap_values = bst.predict(xgboost.DMatrix(X), pred_contribs=True)

    # visualize the first prediction's explaination
    shap.force_plot(shap_values[0,:], X.iloc[0,:])

    # visualize the training set predictions
    shap.force_plot(shap_values, X)

    # create a SHAP dependence plot to show the effect of a single feature across the whole dataset
    shap.dependence_plot(5, shap_values, X, show=False)
    shap.dependence_plot("RM", shap_values, X, show=False)

    # summarize the effects of all the features
    shap.summary_plot(shap_values, X, show=False)
Exemple #6
0
    def shap_dependence_plot(self, ind, interaction_index, interaction=False):
        try:
            if not interaction:
                # explainer, shap_values, expected_value = self.calc_shap_values(attr=None,
                #                                                                background_sample=background_sample,
                #                                                                )
                shap.dependence_plot(ind=ind, interaction_index=interaction_index,
                                     shap_values=self.shap_v,
                                     features=self.x_train,
                                     display_features=self.x_train, show=False)
                fig_id = str(time.time()).split('.')[0]
                path = save_fig('dependence_plot_{}_{}'.format(ind, fig_id))
                return path
                # return
            else:
                explainer, shap_inter_values, expected_value = self.calc_shap_inter_values()
                shap_inter_values = np.array(shap_inter_values)
                shap.dependence_plot((ind, interaction_index),
                                     shap_inter_values,
                                     features=self.x_train,
                                     display_features=self.x_train, show=False)
                fig_id = str(time.time()).split('.')[0]
                path = save_fig('inter_dependence_{}_{}'.format(ind, fig_id))
                return path
                # return

        except Exception as err:
            print('Error: model is not supported by SHAP dependence plot')
            err_logging(err)
            raise Exception(err)
Exemple #7
0
def test_front_page_xgboost():
    try:
        import xgboost
    except Exception as e:
        print("Skipping test_front_page_xgboost!")
        return
    import shap

    # load JS visualization code to notebook
    shap.initjs()

    # train XGBoost model
    X, y = shap.datasets.boston()
    model = xgboost.train({"learning_rate": 0.01}, xgboost.DMatrix(X, label=y), 100)

    # explain the model's predictions using SHAP values
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)

    # visualize the first prediction's explaination
    shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])

    # visualize the training set predictions
    shap.force_plot(explainer.expected_value, shap_values, X)

    # create a SHAP dependence plot to show the effect of a single feature across the whole dataset
    shap.dependence_plot(5, shap_values, X, show=False)
    shap.dependence_plot("RM", shap_values, X, show=False)

    # summarize the effects of all the features
    shap.summary_plot(shap_values, X, show=False)
Exemple #8
0
def test_front_page_xgboost():
    xgboost = pytest.importorskip("xgboost")

    # load JS visualization code to notebook
    shap.initjs()

    # train XGBoost model
    X, y = shap.datasets.california(n_points=500)
    model = xgboost.train({"learning_rate": 0.01}, xgboost.DMatrix(X, label=y),
                          100)

    # explain the model's predictions using SHAP values
    explainer = shap.GPUTreeExplainer(model)
    shap_values = explainer.shap_values(X)

    # visualize the first prediction's explaination
    shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])

    # visualize the training set predictions
    shap.force_plot(explainer.expected_value, shap_values, X)

    # create a SHAP dependence plot to show the effect of a single feature across the whole dataset
    shap.dependence_plot(5, shap_values, X, show=False)
    shap.dependence_plot("Longitude", shap_values, X, show=False)

    # summarize the effects of all the features
    shap.summary_plot(shap_values, X, show=False)
Exemple #9
0
def test_random_dependence():
    fig, ax = plt.subplots(1, 1)
    shap.dependence_plot(0,
                         np.random.randn(20, 5),
                         np.random.randn(20, 5),
                         show=False,
                         ax=ax)
Exemple #10
0
def test_random_dependence():
    """ Make sure a dependence plot does not crash.
    """
    shap.dependence_plot(0,
                         np.random.randn(20, 5),
                         np.random.randn(20, 5),
                         show=False)
Exemple #11
0
def shapear(test):
    """
    Explain features
    """
    # Open model
    del test[target]
    with open(target + '_model.pkl', 'rb') as f:
        model = pickle.load(f)
    shap.initjs()
    shap_values = shap.TreeExplainer(model).shap_values(test)
    global_shap_vals = np.abs(shap_values).mean(0)
    global_shap_std = np.abs(shap_values).std(0)
    df = pd.DataFrame()
    df['features'] = test.columns
    df['shap'] = global_shap_vals
    df['shap_std'] = global_shap_std
    df = df.sort_values(by='shap', ascending=False)
    df.index = range(len(df))
    df.to_csv('shaps.csv')

    # Summary plot
    pdf_shap = PdfPages(target + '_shap.pdf')
    top_inds = np.argsort(-np.sum(np.abs(shap_values), 0))
    for i in top_inds:
        plt.figure()
        shap.dependence_plot(top_inds[i],
                             shap_values,
                             test,
                             show=False,
                             interaction_index=None,
                             alpha=0.2)
        pdf_shap.savefig()
        plt.close()
    pdf_shap.close()
    return
Exemple #12
0
    def plot_shap_dependence(self,
                             treatment_group,
                             feature_idx,
                             shap_dict=None,
                             interaction_idx='auto',
                             **kwargs):
        """
        Plots dependency of shapley values for a specified feature, colored by an interaction feature.
        Skips the calculation part if shap_dict is given.

        This plots the value of the feature on the x-axis and the SHAP value of the same feature
        on the y-axis. This shows how the model depends on the given feature, and is like a
        richer extenstion of the classical parital dependence plots. Vertical dispersion of the
        data points represents interaction effects.

       Args:
            treatment_group (str or int): name of treatment group to create dependency plot on
            feature_idx (str or int): feature index / name to create dependency plot on
            shap_dict (optional, dict): a dict of shapley value matrices. If None, shap_dict will be computed.
            interaction_idx (optional, str or int): feature index / name used in coloring scheme as interaction feature.
                If "auto" then shap.common.approximate_interactions is used to pick what seems to be the
                strongest interaction (note that to find to true stongest interaction you need to compute
                the SHAP interaction values).
        """
        if shap_dict is None:
            shap_dict = self.get_shap_values()

        shap_values = shap_dict[treatment_group]

        shap.dependence_plot(feature_idx,
                             shap_values,
                             self.X,
                             interaction_index=interaction_idx,
                             feature_names=self.features,
                             **kwargs)
Exemple #13
0
def test_random_dependence_no_interaction():
    """ Make sure a dependence plot does not crash when we are not showing interations.
    """
    shap.dependence_plot(0,
                         np.random.randn(20, 5),
                         np.random.randn(20, 5),
                         show=False,
                         interaction_index=None)
def SHAP_DepenContrib(X,feature_names,feature,shap_values,interact='auto'):
    import shap

    print('Creating SHAP dependence contribution plot')

    # SHAP dependence contribution plots
#    plt.figure()
    shap.dependence_plot(feature, shap_values[1],features=X,feature_names=feature_names,show=False,interaction_index=interact)
Exemple #15
0
def test_random_dependence_no_interaction():
    fig, ax = plt.subplots(1, 1)
    shap.dependence_plot(0,
                         np.random.randn(20, 5),
                         np.random.randn(20, 5),
                         show=False,
                         interaction_index=None,
                         ax=ax)
Exemple #16
0
 def generate_shap_dependency_plot(self):
     shap_values = self.shapexplainer.shap_values(self.training_data)
     plt.clf()
     for ftr in range(len(self.feature_names)):
         shap.dependence_plot(ftr, shap_values, self.training_data,feature_names=self.feature_names, show=False)
         # results_dir = os.path.join(os.getcwd(), '/app/home/static/graphs/')
         # plt.savefig(results_dir+'shap_summary.png')
         plt.savefig('shap_dependency_plot_' + self.feature_names[ftr] + '.png')
 def show_SHAP_PDP_interaction(self, features=[]):
     for f in features:
         try:
             shap.dependence_plot(tuple(f), 
                              self.explainer.shap_interaction_values(self.X_train), 
                              self.X_train)
         except:
             print("Linear estimators don't have interaction values.")
             return
Exemple #18
0
def test_xgboost_mixed_types():
    xgboost = pytest.importorskip('xgboost')

    X, y = shap.datasets.california(n_points=500)
    X["HouseAge"] = X["HouseAge"].astype(np.int64)
    X['IsOld'] = (X['HouseAge'] > 30)
    bst = xgboost.train({"learning_rate": 0.01, "silent": 1}, xgboost.DMatrix(X, label=y), 1000)
    shap_values = shap.TreeExplainer(bst).shap_values(X)
    shap.dependence_plot(0, shap_values, X, show=False)
def plot_print_feature_shap(model_path, data_feats, type):
    '''
    利用shap打印特征重要度
    :param model_path:
    :param data_feats:
    :param type:
    :return:
    '''

    if not (os.path.exists(model_path) and os.path.exists(data_feats)):
        print("file no exists! {}, {}".format(model_path, data_feats))
        sys.exit(0)
    gbm = lgb.Booster(model_file=model_path)
    gbm.params["objective"] = "regression"
    #feature列名
    feats_col_name = []
    for feat_index in range(46):
        feats_col_name.append('feat' + str(feat_index) + 'name')
    X_train, _ = ds.load_svmlight_file(data_feats)
    #features
    feature_mat = X_train.todense()
    df_feature = pd.DataFrame(feature_mat)
    #增加表头
    df_feature.columns = feats_col_name
    explainer = shap.TreeExplainer(gbm)
    shap_values = explainer.shap_values(df_feature[feats_col_name])

    #特征总体分析,分别绘出散点图和条状图
    if type == 1:
        #把一个特征对目标变量影响程度的绝对值的均值作为这个特征的重要性(不同于feature_importance的计算方式)
        shap.summary_plot(shap_values,
                          df_feature[feats_col_name],
                          plot_type="bar")
        # 对特征总体分析
        shap.summary_plot(shap_values, df_feature[feats_col_name])
    #部分依赖图的功能,与传统的部分依赖图不同的是,这里纵坐标不是目标变量y的数值而是SHAP值
    if type == 2:
        shap.dependence_plot('feat3name',
                             shap_values,
                             df_feature[feats_col_name],
                             interaction_index=None,
                             show=True)
    # 两个变量交互下变量对目标值的影响
    if type == 3:
        shap.dependence_plot('feat3name',
                             shap_values,
                             df_feature[feats_col_name],
                             interaction_index='feat5name',
                             show=True)
    #多个变量的交互进行分析
    if type == 4:
        shap_interaction_values = explainer.shap_interaction_values(
            df_feature[feats_col_name])
        shap.summary_plot(shap_interaction_values,
                          df_feature[feats_col_name],
                          max_display=4,
                          show=True)
Exemple #20
0
def test_xgboost_mixed_types():
    xgboost = pytest.importorskip('xgboost')

    X, y = shap.datasets.boston()
    X["LSTAT"] = X["LSTAT"].astype(np.int64)
    X["B"] = X["B"].astype(np.bool)
    bst = xgboost.train({"learning_rate": 0.01, "silent": 1}, xgboost.DMatrix(X, label=y), 1000)
    shap_values = shap.TreeExplainer(bst).shap_values(X)
    shap.dependence_plot(0, shap_values, X, show=False)
Exemple #21
0
def plot_shap_values(
    shap_values: dict,
    raw_data: pd.core.frame.DataFrame,
    processed_data: Union[None, pd.core.frame.DataFrame] = None,
    no_summary_col: str = Union[None, str],
    alpha: float = 0.5,
    path: str = "",
) -> None:
    """Make plots of SHAP values.

    SHAP values quantify feature contributions to predictions.

    Args:
        shap_values: A dictionary of numpy arrays, each of which contains SHAP
            values for the outcome given by its key.
        raw_data: Feature values prior to processing into model input.
        processed_data: Feature values used as model input.
        no_summary_col: The name of a column to never use for summary plots.
        alpha: The opacity of plotted points, from 2e-8 (nearly transparent) to
            1 (opaque).
        path: The path preceding the Output folder in which the plots will be
            saved.
    """
    shap.initjs()
    if processed_data is None:
        processed_data = raw_data
    for col in processed_data.select_dtypes("category"):
        processed_data[col] = processed_data[col].cat.codes
    for key, arr in shap_values.items():
        shap.summary_plot(arr,
                          plot_type="bar",
                          feature_names=raw_data.columns,
                          show=False)
        save_plot(f"Importance_{key}", path=path)
        shap.summary_plot(arr, raw_data, alpha=alpha, show=False)
        save_plot(f"Summary_{key}", path=path)
        if raw_data.columns[np.argmax(
                np.abs(arr).mean(axis=0))] == no_summary_col:
            shap.dependence_plot(
                f"rank(1)",
                arr,
                processed_data,
                display_features=raw_data,
                alpha=alpha,
                show=False,
            )
        else:
            shap.dependence_plot(
                f"rank(0)",
                arr,
                processed_data,
                display_features=raw_data,
                alpha=alpha,
                show=False,
            )
        save_plot(f"Dependence_{key}", path=path)
Exemple #22
0
def plot_shap(X, model, label):
    shap_values = shap.TreeExplainer(model).shap_values(X.values)
    pyplot.figure()#figsize=(10, 15))
    title = f'SHAP summary {label}'
    pyplot.title(title)
    shap.summary_plot(shap_values, X)
    pyplot.show()
    title = f'SHAP dependence for {label}'
    for col in X.columns.difference(['PATIENT_AGE_YEARS', 'PATIENT_GNDR']):
        shap.dependence_plot(col, shap_values, X, interaction_index=None)
Exemple #23
0
def makeDependence(X_train, shap_values):
	
	for col in X_train.columns:
		
		for i in len(shap.values):
			f = plt.figure()
			shap.dependence_plot(col, shap_values[1], X_train)
			f.savefig(col + "_dependence.png", bbox_inches='tight', dpi=600)

	pass
Exemple #24
0
 def shap_dep_plot(self, top_features, outcome):
     shap_values = self.explainer.shap_values(self.x)
     shap.dependence_plot(top_features[0],
                          shap_values[outcome],
                          self.x,
                          interaction_index=top_features[1],
                          show=False)
     plt.tight_layout()
     plt.savefig(self.out + "/shap_dependence.jpg", dpi=400)
     plt.close()
Exemple #25
0
def test_dependence_one_string_feature_auto_interaction():
    X = _create_sample_dataset(string_features={"Sex"})

    shap.dependence_plot(
        "Sex",
        np.random.randn(*X.values.shape),
        X,
        interaction_index='auto',
        show=False
    )
def test_dependence_one_string_feature():
    """ Test the dependence plot with a string feature.
    """
    X = _create_sample_dataset(string_features={"Sex"})

    shap.dependence_plot("Sex",
                         np.random.randn(*X.values.shape),
                         X,
                         interaction_index="Age",
                         show=False)
Exemple #27
0
 def plot_dependency(self,
                     feature: str,
                     interaction_index: str = "auto",
                     save: Path = None):
     shap.dependence_plot(feature,
                          self.stable_shap_values,
                          self.partitions.X,
                          feature_names=self.feature_names,
                          interaction_index=interaction_index)
     return self.first.make_figure(save)
def test_dependence_two_string_features():
    """ Test the dependence plot with two string features.
    """
    X = _create_sample_dataset(string_features={"Sex", "Blood group"})

    shap.dependence_plot("Sex",
                         np.random.randn(*X.values.shape),
                         X,
                         interaction_index="Blood group",
                         show=False)
Exemple #29
0
 def shap_dependence_viz(self, shap_df, features_df, model_dict, i, name):
     shap.dependence_plot(i,
                          shap_df.loc[:,
                                      model_dict['features_list']].values,
                          features_df.loc[:, model_dict['features_list']],
                          show=(not self.plots_dict['save']['plots']))
     if self.plots_dict['save']['plots'] is True:
         dependence_path = '{}/dependence_plots'.format(self.plots_dir)
         if not os.path.exists(dependence_path):
             os.mkdir(dependence_path)
         plt.savefig(f'{dependence_path}/dependence_plot_{name}_{i}.png')
         plt.clf()
Exemple #30
0
def test_lightgbm_multiclass():
    lightgbm = pytest.importorskip("lightgbm")
    # train lightgbm model
    X, Y = shap.datasets.iris()
    model = lightgbm.sklearn.LGBMClassifier()
    model.fit(X, Y)

    # explain the model's predictions using SHAP values
    shap_values = shap.TreeExplainer(model).shap_values(X)

    # ensure plot works for first class
    shap.dependence_plot(0, shap_values[0], X, show=False)