Exemple #1
0
    def summarize(self):
        """
        The summarize method allows to display the summary of local explainability.
        This method can be configured with modify_mask method to summarize the explainability to suit needs.

        If the user doesn't use modify_mask, the summarize method uses the mask_params parameters specified during
        the initialisation of the SmartPredictor.

        In classification case, The summarize method summarizes the explainability which corresponds to :
            - the predicted values specified by the user or automatically computed (with add_input method)
            - the right probabilities from predict_proba associated to the right predicted values
            - the right contributions ranked and filtered as specify with modify_mask method

        Returns
        -------
        pandas.DataFrame
            - selected explanation of each row for classification case

        Examples
        --------
        >>> summary_df = predictor.summarize()
        >>> summary_df
        	pred	proba	    feature_1	value_1	    contribution_1	feature_2	value_2	    contribution_2
        0	0	    0.756416	Sex	        1.0	        0.322308	    Pclass	    3.0	        0.155069
        1	3	    0.628911	Sex	        2.0	        0.585475	    Pclass	    1.0	        0.370504
        2	0	    0.543308	Sex	        2.0	        -0.486667	    Pclass	    3.0	        0.255072

        >>> predictor.modify_mask(max_contrib=1)
        >>> summary_df = predictor.summarize()
        >>> summary_df
        	pred	proba	    feature_1	value_1	    contribution_1
        0	0	    0.756416	Sex	        1.0	        0.322308
        1	3	    0.628911	Sex	        2.0	        0.585475
        2	0	    0.543308	Sex	        2.0	        -0.486667
        """
        # data is needed : add_input() method must be called at least once

        if not hasattr(self, "data"):
            raise ValueError("You have to specify dataset x and y_pred arguments. Please use add_input() method.")

        self.summary = assign_contributions(
            rank_contributions(
                self.data["contributions"],
                self.data["x_postprocessed"]
            )
        )
        # Apply filter method with mask_params attributes parameters
        self.filter()

        # Summarize information
        self.data['summary'] = summarize(self.summary['contrib_sorted'],
                                         self.summary['var_dict'],
                                         self.summary['x_sorted'],
                                         self.mask,
                                         self.columns_dict,
                                         self.features_dict)

        # Matching with y_pred
        return pd.concat([self.data["ypred"], self.data['summary']], axis=1)
Exemple #2
0
    def assign_contributions(self, ranked):
        """
        Turn a list of results into a dict.

        Parameters
        ----------
        ranked : list
            The output of rank_contributions.

        Returns
        -------
        dict
            Same data but rearrange into a dict with explicit names.

        Raises
        ------
        ValueError
            The output of rank_contributions should always be of length three.
        """
        return assign_contributions(ranked)
Exemple #3
0
    def summarize(self, use_groups=None):
        """
        The summarize method allows to display the summary of local explainability.
        This method can be configured with modify_mask method to summarize the explainability to suit needs.

        If the user doesn't use modify_mask, the summarize method uses the mask_params parameters specified during
        the initialisation of the SmartPredictor.

        In classification case, The summarize method summarizes the explainability which corresponds to :
            - the predicted values specified by the user or automatically computed (with add_input method)
            - the right probabilities from predict_proba associated to the right predicted values
            - the right contributions ranked and filtered as specify with modify_mask method

        Parameters
        ----------
        use_groups : bool (optional)
            Whether or not to compute groups of features contributions.

        Returns
        -------
        pandas.DataFrame
            - selected explanation of each row for classification case

        Examples
        --------
        >>> summary_df = predictor.summarize()
        >>> summary_df
        	pred	proba	    feature_1	value_1	    contribution_1	feature_2	value_2	    contribution_2
        0	0	    0.756416	Sex	        1.0	        0.322308	    Pclass	    3.0	        0.155069
        1	3	    0.628911	Sex	        2.0	        0.585475	    Pclass	    1.0	        0.370504
        2	0	    0.543308	Sex	        2.0	        -0.486667	    Pclass	    3.0	        0.255072

        >>> predictor.modify_mask(max_contrib=1)
        >>> summary_df = predictor.summarize()
        >>> summary_df
        	pred	proba	    feature_1	value_1	    contribution_1
        0	0	    0.756416	Sex	        1.0	        0.322308
        1	3	    0.628911	Sex	        2.0	        0.585475
        2	0	    0.543308	Sex	        2.0	        -0.486667
        """
        # data is needed : add_input() method must be called at least once
        use_groups = True if (use_groups is not False
                              and self.features_groups is not None) else False

        if not hasattr(self, "data"):
            raise ValueError(
                "You have to specify dataset x and y_pred arguments. Please use add_input() method."
            )

        if use_groups is True:
            data = self.data_groups
        else:
            data = self.data

        if self._drop_option is not None:
            columns_to_keep = [
                x for x in self._drop_option["columns_dict_op"].values()
                if x in data["x_postprocessed"].columns
            ]
            if use_groups:
                columns_to_keep += list(self.features_groups.keys())
            x_preprocessed = data["x_postprocessed"][columns_to_keep]
        else:
            x_preprocessed = data["x_postprocessed"]

        columns_dict = {i: col for i, col in enumerate(x_preprocessed.columns)}
        features_dict = {
            k: v
            for k, v in self.features_dict.items()
            if k in x_preprocessed.columns
        }

        self.summary = assign_contributions(
            rank_contributions(data["contributions"], x_preprocessed))
        # Apply filter method with mask_params attributes parameters
        self.filter()

        # Summarize information
        data['summary'] = summarize(self.summary['contrib_sorted'],
                                    self.summary['var_dict'],
                                    self.summary['x_sorted'], self.mask,
                                    columns_dict, features_dict)

        # Matching with y_pred
        return pd.concat([data["ypred"], data['summary']], axis=1)