Ejemplo n.º 1
0
 def test_shap_contributions_0(self):
     """
     test shap_backend
     """
     for model in self.modellist:
         print(type(model))
         model.fit(self.x_df, self.y_df)
         shap_contributions(model, self.x_df)
Ejemplo n.º 2
0
    def test_shap_contributions_1(self):
        """
        test shap_backend with explainer pre-compute
        """
        for model in self.modellist:
            print(type(model))
            model.fit(self.x_df, self.y_df)
            if str(type(model)) in simple_tree_model:
                explainer = shap.TreeExplainer(model)

            elif str(type(model)) in catboost_model:
                explainer = shap.TreeExplainer(model)

            elif str(type(model)) in linear_model:
                explainer = shap.LinearExplainer(model, self.x_df)

            elif str(type(model)) in svm_model:
                explainer = shap.KernelExplainer(model.predict, self.x_df)

            shap_contributions(model, self.x_df, explainer)
Ejemplo n.º 3
0
    def compute_contributions(self, contributions=None, use_groups=None):
        """
        The compute_contributions compute the contributions associated to data ypred specified.
        Need a data ypred specified in an add_input to display detail_contributions.

        Parameters
        -------
        contributions : object (optional)
            Local contributions, or list of local contributions.
        use_groups : bool (optional)
            Whether or not to compute groups of features contributions.

        Returns
        -------
        pandas.DataFrame
            Data with contributions associated to the ypred specified.
        pandas.DataFrame
            ypred data with right probabilities associated.

        """
        use_groups = True if (use_groups is not False
                              and self.features_groups is not None) else False

        if not hasattr(self, "data"):
            raise ValueError("add_input method must be called at least once.")
        if self.data["x"] is None:
            raise ValueError("""
                x must be specified in an add_input method to apply detail_contributions.
                """)
        if self.data["ypred_init"] is None:
            self.predict()

        if contributions is None:
            contributions, explainer = shap_contributions(
                self.model, self.data["x_preprocessed"], self.explainer)
        adapt_contrib = self.adapt_contributions(contributions)
        self.state = self.choose_state(adapt_contrib)
        contributions = self.validate_contributions(adapt_contrib)
        contributions = self.apply_preprocessing_for_contributions(
            contributions, self.preprocessing)
        self.check_contributions(contributions)
        proba_values = self.predict_proba(
        ) if self._case == "classification" else None
        y_pred, match_contrib = keep_right_contributions(
            self.data["ypred_init"], contributions, self._case, self._classes,
            self.label_dict, proba_values)
        if use_groups:
            match_contrib = group_contributions(
                match_contrib, features_groups=self.features_groups)

        return y_pred, match_contrib
Ejemplo n.º 4
0
    def compile(self, x, model, explainer=None, contributions=None, y_pred=None,
                preprocessing=None, postprocessing=None, title_story: str = None):
        """
        The compile method is the first step to understand model and prediction. It performs the sorting
        of contributions, the reverse preprocessing steps and performs all the calculations necessary for
        a quick display of plots and efficient display of summary of explanation.
        Most of the parameters are optional but all help to display results that can be understood

        This step can last a few moments with large datasets.

        Parameters
        ----------
        x : pandas.DataFrame
            Prediction set.
            IMPORTANT: this should be the raw prediction set, whose values are seen by the end user.
            x is a preprocessed dataset: Shapash can apply the model to it
        model : model object
            model used to consistency check. model object can also be used by some method to compute
            predict and predict_proba values
        explainer : explainer object
            explainer must be a shap object
        contributions : pandas.DataFrame, np.ndarray or list
            single or multiple contributions (multi-class) to handle.
            if pandas.Dataframe, the index and columns should be share with the prediction set.
            if np.ndarray, index and columns will be generated according to x dataset
        y_pred : pandas.Series or pandas.DataFrame, optional (default: None)
            Prediction values (1 column only).
            The index must be identical to the index of x_pred.
            This is an interesting parameter for more explicit outputs. Shapash lets users define their own predict,
            as they may wish to set their own threshold (classification)
        preprocessing : category_encoders, ColumnTransformer, list, dict, optional (default: None)
            --> Differents types of preprocessing are available:

            - A single category_encoders (OrdinalEncoder/OnehotEncoder/BaseNEncoder/BinaryEncoder/TargetEncoder)
            - A single ColumnTransformer with scikit-learn encoding or category_encoders transformers
            - A list with multiple category_encoders with optional (dict, list of dict)
            - A list with a single ColumnTransformer with optional (dict, list of dict)
            - A dict
            - A list of dict
        postprocessing : dict, optional (default: None)
            Dictionnary of postprocessing modifications to apply in x_pred dataframe.
            Dictionnary with feature names as keys (or number, or well labels referencing to features names),
            which modifies dataset features by features.

            --> Different types of postprocessing are available, but the syntax is this one:
            One key by features, 5 different types of modifications:

            >>> {
            ‘feature1’ : { ‘type’ : ‘prefix’, ‘rule’ : ‘age: ‘ },
            ‘feature2’ : { ‘type’ : ‘suffix’, ‘rule’ : ‘$/week ‘ },
            ‘feature3’ : { ‘type’ : ‘transcoding’, ‘rule‘: { ‘code1’ : ‘single’, ‘code2’ : ‘married’}},
            ‘feature4’ : { ‘type’ : ‘regex’ , ‘rule‘: { ‘in’ : ‘AND’, ‘out’ : ‘ & ‘ }},
            ‘feature5’ : { ‘type’ : ‘case’ , ‘rule‘: ‘lower’‘ }
            }

            Only one transformation by features is possible.
        title_story: str (default: None)
            The default title is empty. You can specify a custom title
            which can be used the webapp, or other methods

        Example
        --------
        >>> xpl.compile(x=xtest_df,model=my_model)

        """
        self.x_init = x
        self.x_pred = inverse_transform(self.x_init, preprocessing)
        self.preprocessing = preprocessing
        self.model = model
        self._case, self._classes = self.check_model()
        self.check_label_dict()
        if self.label_dict:
            self.inv_label_dict = {v: k for k, v in self.label_dict.items()}
        if explainer is not None and contributions is not None:
            raise ValueError("You have to specify just one of these arguments: explainer, contributions")
        if contributions is None:
            contributions, explainer = shap_contributions(model, self.x_init, self.check_explainer(explainer))
        adapt_contrib = self.adapt_contributions(contributions)
        self.state = self.choose_state(adapt_contrib)
        self.contributions = self.apply_preprocessing(self.validate_contributions(adapt_contrib), preprocessing)
        self.check_contributions()
        self.explainer = explainer
        self.y_pred = self.check_y_pred(y_pred)
        self.columns_dict = {i: col for i, col in enumerate(self.x_pred.columns)}
        self.inv_columns_dict = {v: k for k, v in self.columns_dict.items()}
        self.check_features_dict()
        self.inv_features_dict = {v: k for k, v in self.features_dict.items()}
        postprocessing = self.modify_postprocessing(postprocessing)
        self.check_postprocessing(postprocessing)
        self.postprocessing_modifications = self.check_postprocessing_modif_strings(postprocessing)
        self.postprocessing = postprocessing
        if self.postprocessing_modifications:
            self.x_contrib_plot = copy.deepcopy(self.x_pred)
        self.x_pred = self.apply_postprocessing(postprocessing)
        self.data = self.state.assign_contributions(
            self.state.rank_contributions(
                self.contributions,
                self.x_pred
            )
        )
        self.features_imp = None
        self.features_desc = self.check_features_desc()
        if title_story is not None:
            self.title_story = title_story