Beispiel #1
0
    def compute_contributions(self, contributions=None, use_groups=None):
        """
        The compute_contributions compute the contributions associated to data ypred specified.
        Need a data ypred specified in an add_input to display detail_contributions.

        Parameters
        -------
        contributions : object (optional)
            Local contributions, or list of local contributions.
        use_groups : bool (optional)
            Whether or not to compute groups of features contributions.

        Returns
        -------
        pandas.DataFrame
            Data with contributions associated to the ypred specified.
        pandas.DataFrame
            ypred data with right probabilities associated.

        """
        use_groups = True if (use_groups is not False
                              and self.features_groups is not None) else False

        if not hasattr(self, "data"):
            raise ValueError("add_input method must be called at least once.")
        if self.data["x"] is None:
            raise ValueError("""
                x must be specified in an add_input method to apply detail_contributions.
                """)
        if self.data["ypred_init"] is None:
            self.predict()

        if contributions is None:
            contributions, explainer = shap_contributions(
                self.model, self.data["x_preprocessed"], self.explainer)
        adapt_contrib = self.adapt_contributions(contributions)
        self.state = self.choose_state(adapt_contrib)
        contributions = self.validate_contributions(adapt_contrib)
        contributions = self.apply_preprocessing_for_contributions(
            contributions, self.preprocessing)
        self.check_contributions(contributions)
        proba_values = self.predict_proba(
        ) if self._case == "classification" else None
        y_pred, match_contrib = keep_right_contributions(
            self.data["ypred_init"], contributions, self._case, self._classes,
            self.label_dict, proba_values)
        if use_groups:
            match_contrib = group_contributions(
                match_contrib, features_groups=self.features_groups)

        return y_pred, match_contrib
Beispiel #2
0
    def to_pandas(
            self,
            features_to_hide=None,
            threshold=None,
            positive=None,
            max_contrib=None,
            proba=False
    ):
        """
        The to_pandas method allows to export the summary of local explainability.
        This method proposes a set of parameters to summarize the explainability of each point.
        If the user does not specify any, the to_pandas method uses the parameter specified during
        the last execution of the filter method.

        In classification case, The method to_pandas summarizes the explicability which corresponds
        to the predicted values specified by the user (with compile or add method).
        the proba parameter displays the corresponding predict proba value for each point
        In classification case, There are 2 ways to use this to pandas method.
        - Provide a real prediction set to explain
        - Focus on a constant target value and look at the proba and explainability corresponding to each point.
        (in that case, specify a constant pd.Series with add or compile method)

        Examples are presented in the tutorial local_plot (please check tutorial part of this doc)

        Parameters
        ----------
        features_to_hide : list, optional (default: None)
            List of strings, containing features to hide.
        threshold : float, optional (default: None)
            Absolute threshold below which any contribution is hidden.
        positive: bool, optional (default: None)
            If True, hide negative values. Hide positive values otherwise. If None, hide nothing.
        max_contrib : int, optional (default: 5)
            Number of contributions to show in the pandas df
        proba : bool, optional (default: False)
            adding proba in output df

        Returns
        -------
        pandas.DataFrame
            - selected explanation of each row for classification case


        Examples
        --------
        >>> summary_df = xpl.to_pandas(max_contrib=2,proba=True)
        >>> summary_df
            pred	proba	    feature_1	value_1	    contribution_1	feature_2	value_2	    contribution_2
        0	0	    0.756416	Sex	        1.0	        0.322308	    Pclass	    3.0	        0.155069
        1	3	    0.628911	Sex	        2.0	        0.585475	    Pclass	    1.0	        0.370504
        2	0	    0.543308	Sex	        2.0	        -0.486667	    Pclass	    3.0	        0.255072
        """

        # Classification: y_pred is needed
        if self.y_pred is None:
            raise ValueError(
                "You have to specify y_pred argument. Please use add() or compile() method"
            )

        # Apply filter method if necessary
        if all(var is None for var in [features_to_hide, threshold, positive, max_contrib]) \
                and hasattr(self, 'mask_params'):
            print('to_pandas params: ' + str(self.mask_params))
        else:
            self.filter(features_to_hide=features_to_hide,
                        threshold=threshold,
                        positive=positive,
                        max_contrib=max_contrib)

        # Summarize information
        self.data['summary'] = self.state.summarize(
            self.data['contrib_sorted'],
            self.data['var_dict'],
            self.data['x_sorted'],
            self.mask,
            self.columns_dict,
            self.features_dict
        )
        # Matching with y_pred
        if proba:
            self.predict_proba() if proba else None
            proba_values = self.proba_values
        else:
            proba_values = None

        y_pred, summary = keep_right_contributions(self.y_pred, self.data['summary'],
                                                   self._case, self._classes,
                                                   self.label_dict, proba_values)

        return pd.concat([y_pred, summary], axis=1)