def compute_contributions(self, contributions=None, use_groups=None): """ The compute_contributions compute the contributions associated to data ypred specified. Need a data ypred specified in an add_input to display detail_contributions. Parameters ------- contributions : object (optional) Local contributions, or list of local contributions. use_groups : bool (optional) Whether or not to compute groups of features contributions. Returns ------- pandas.DataFrame Data with contributions associated to the ypred specified. pandas.DataFrame ypred data with right probabilities associated. """ use_groups = True if (use_groups is not False and self.features_groups is not None) else False if not hasattr(self, "data"): raise ValueError("add_input method must be called at least once.") if self.data["x"] is None: raise ValueError(""" x must be specified in an add_input method to apply detail_contributions. """) if self.data["ypred_init"] is None: self.predict() if contributions is None: contributions, explainer = shap_contributions( self.model, self.data["x_preprocessed"], self.explainer) adapt_contrib = self.adapt_contributions(contributions) self.state = self.choose_state(adapt_contrib) contributions = self.validate_contributions(adapt_contrib) contributions = self.apply_preprocessing_for_contributions( contributions, self.preprocessing) self.check_contributions(contributions) proba_values = self.predict_proba( ) if self._case == "classification" else None y_pred, match_contrib = keep_right_contributions( self.data["ypred_init"], contributions, self._case, self._classes, self.label_dict, proba_values) if use_groups: match_contrib = group_contributions( match_contrib, features_groups=self.features_groups) return y_pred, match_contrib
def to_pandas( self, features_to_hide=None, threshold=None, positive=None, max_contrib=None, proba=False ): """ The to_pandas method allows to export the summary of local explainability. This method proposes a set of parameters to summarize the explainability of each point. If the user does not specify any, the to_pandas method uses the parameter specified during the last execution of the filter method. In classification case, The method to_pandas summarizes the explicability which corresponds to the predicted values specified by the user (with compile or add method). the proba parameter displays the corresponding predict proba value for each point In classification case, There are 2 ways to use this to pandas method. - Provide a real prediction set to explain - Focus on a constant target value and look at the proba and explainability corresponding to each point. (in that case, specify a constant pd.Series with add or compile method) Examples are presented in the tutorial local_plot (please check tutorial part of this doc) Parameters ---------- features_to_hide : list, optional (default: None) List of strings, containing features to hide. threshold : float, optional (default: None) Absolute threshold below which any contribution is hidden. positive: bool, optional (default: None) If True, hide negative values. Hide positive values otherwise. If None, hide nothing. max_contrib : int, optional (default: 5) Number of contributions to show in the pandas df proba : bool, optional (default: False) adding proba in output df Returns ------- pandas.DataFrame - selected explanation of each row for classification case Examples -------- >>> summary_df = xpl.to_pandas(max_contrib=2,proba=True) >>> summary_df pred proba feature_1 value_1 contribution_1 feature_2 value_2 contribution_2 0 0 0.756416 Sex 1.0 0.322308 Pclass 3.0 0.155069 1 3 0.628911 Sex 2.0 0.585475 Pclass 1.0 0.370504 2 0 0.543308 Sex 2.0 -0.486667 Pclass 3.0 0.255072 """ # Classification: y_pred is needed if self.y_pred is None: raise ValueError( "You have to specify y_pred argument. Please use add() or compile() method" ) # Apply filter method if necessary if all(var is None for var in [features_to_hide, threshold, positive, max_contrib]) \ and hasattr(self, 'mask_params'): print('to_pandas params: ' + str(self.mask_params)) else: self.filter(features_to_hide=features_to_hide, threshold=threshold, positive=positive, max_contrib=max_contrib) # Summarize information self.data['summary'] = self.state.summarize( self.data['contrib_sorted'], self.data['var_dict'], self.data['x_sorted'], self.mask, self.columns_dict, self.features_dict ) # Matching with y_pred if proba: self.predict_proba() if proba else None proba_values = self.proba_values else: proba_values = None y_pred, summary = keep_right_contributions(self.y_pred, self.data['summary'], self._case, self._classes, self.label_dict, proba_values) return pd.concat([y_pred, summary], axis=1)