コード例 #1
0
ファイル: smart_predictor.py プロジェクト: kokizzu/shapash
    def to_smartexplainer(self):
        """
        Create a SmartExplainer object compiled with the data specified in add_input method with
        SmartPredictor attributes
        """
        if not hasattr(self, "data"):
            raise ValueError("add_input method must be called at least once.")

        if self.data["x"] is None:
            raise ValueError("""
                x must be specified in an add_input method to apply to_smartexplainer method.
                """)

        list_preprocessing = preprocessing_tolist(self.preprocessing)
        for enc in list_preprocessing:
            if str(type(enc)) in columntransformer:
                raise ValueError(
                    "SmartPredictor can't switch to SmartExplainer for ColumnTransformer preprocessing."
                )

        xpl = shapash.explainer.smart_explainer.SmartExplainer(
            features_dict=copy.deepcopy(self.features_dict),
            label_dict=copy.deepcopy(self.label_dict))
        xpl.compile(x=copy.deepcopy(self.data["x_preprocessed"]),
                    model=self.model,
                    explainer=self.explainer,
                    y_pred=copy.deepcopy(self.data["ypred_init"]),
                    preprocessing=self.preprocessing,
                    postprocessing=self.postprocessing,
                    features_groups=self.features_groups)
        return xpl
コード例 #2
0
ファイル: contributions.py プロジェクト: kokizzu/shapash
def inverse_transform_contributions(contributions,
                                    preprocessing=None,
                                    agg_columns='sum'):
    """
    Reverse contribution giving a preprocessing.

    Preprocessing could be :
        - a single category_encoders
        - a single ColumnTransformer
        - list with multiple category_encoders with optional (dict, list of dict)
        - list with a single ColumnTransformer with optional (dict, list of dict)
        - dict
        - list of dict

    Parameters
    ----------
    contributions : pandas.DataFrame
        Contributions values.
    preprocessing : category_encoders, ColumnTransformer, list, dict, optional (default: None)
        The processing apply to the original data.
    agg_columns : str (default: 'sum')
        Type of aggregation performed. For Shap we want so sum contributions of one hot encoded variables.
        For ACV we want to take any value as ACV computes contributions of coalition of variables (like
        one hot encoded variables) differently from Shap and then give the same value to each variable of the
        coalition. As a result we just need to take the value of one of these variables to get the contribution
        value of the group.

    Returns
    -------
    pandas.Dataframe
        Return the aggregate contributions.

    """

    if not isinstance(contributions, pd.DataFrame):
        raise Exception('Shap values must be a pandas dataframe.')

    if preprocessing is None:
        return contributions
    else:
        #Transform preprocessing into a list
        list_encoding = preprocessing_tolist(preprocessing)

        # check supported inverse
        use_ct, use_ce = check_transformers(list_encoding)

        # Apply Inverse Transform
        x_contrib_invers = contributions.copy()
        if use_ct:
            for encoding in list_encoding:
                x_contrib_invers = calc_inv_contrib_ct(x_contrib_invers,
                                                       encoding, agg_columns)
        else:
            for encoding in list_encoding:
                x_contrib_invers = calc_inv_contrib_ce(x_contrib_invers,
                                                       encoding, agg_columns)
        return x_contrib_invers
コード例 #3
0
def check_preprocessing(preprocessing=None):
    """
    Check that all transformation of the preprocessing are supported.

    Parameters
    ----------
    preprocessing: category_encoders, ColumnTransformer, list, dict, optional (default: None)
        The processing apply to the original data
    """
    if preprocessing is not None:
        list_preprocessing = preprocessing_tolist(preprocessing)
        use_ct, use_ce = check_transformers(list_preprocessing)
        return use_ct, use_ce
コード例 #4
0
ファイル: smart_predictor.py プロジェクト: kokizzu/shapash
    def __init__(
        self,
        features_dict,
        model,
        columns_dict,
        explainer,
        features_types,
        label_dict=None,
        preprocessing=None,
        postprocessing=None,
        features_groups=None,
        mask_params={
            "features_to_hide": None,
            "threshold": None,
            "positive": None,
            "max_contrib": None
        }):

        params_dict = [
            features_dict, features_types, label_dict, columns_dict,
            postprocessing
        ]

        for params in params_dict:
            if params is not None and isinstance(params, dict) == False:
                raise ValueError("""
                    {0} must be a dict.
                    """.format(str(params)))

        self.model = model
        self._case, self._classes = self.check_model()
        self.explainer = self.check_explainer(explainer)
        self.preprocessing = preprocessing
        self.check_preprocessing()
        self.features_dict = features_dict
        self.features_types = features_types
        self.label_dict = label_dict
        self.check_label_dict()
        self.columns_dict = columns_dict
        self.mask_params = mask_params
        self.check_mask_params()
        self.postprocessing = postprocessing
        self.features_groups = features_groups
        list_preprocessing = preprocessing_tolist(self.preprocessing)
        check_consistency_model_features(
            self.features_dict, self.model, self.columns_dict,
            self.features_types, self.mask_params, self.preprocessing,
            self.postprocessing, list_preprocessing, self.features_groups)
        check_consistency_model_label(self.columns_dict, self.label_dict)
        self._drop_option = check_preprocessing_options(
            columns_dict, features_dict, preprocessing, list_preprocessing)
コード例 #5
0
ファイル: contributions.py プロジェクト: yumeone/shapash
def inverse_transform_contributions(contributions, preprocessing=None):
    """
    Reverse contribution giving a preprocessing.

    Preprocessing could be :
        - a single category_encoders
        - a single ColumnTransformer
        - list with multiple category_encoders with optional (dict, list of dict)
        - list with a single ColumnTransformer with optional (dict, list of dict)
        - dict
        - list of dict

    Parameters
    ----------
    contributions : pandas.DataFrame
        Contributions values.
    preprocessing : category_encoders, ColumnTransformer, list, dict, optional (default: None)
        The processing apply to the original data.

    Returns
    -------
    pandas.Dataframe
        Return the aggregate contributions.

    """

    if not isinstance(contributions, pd.DataFrame):
        raise Exception('Shap values must be a pandas dataframe.')

    if preprocessing is None:
        return contributions
    else:
        #Transform preprocessing into a list
        list_encoding = preprocessing_tolist(preprocessing)

        # check supported inverse
        use_ct, use_ce = check_transformers(list_encoding)

        # Apply Inverse Transform
        x_contrib_invers = contributions.copy()
        if use_ct:
            for encoding in list_encoding:
                x_contrib_invers = calc_inv_contrib_ct(x_contrib_invers,
                                                       encoding)
        else:
            for encoding in list_encoding:
                x_contrib_invers = calc_inv_contrib_ce(x_contrib_invers,
                                                       encoding)
        return x_contrib_invers
コード例 #6
0
ファイル: check.py プロジェクト: vhshemanth/shapash
def check_preprocessing_options(preprocessing=None):
    """
    Check if preprocessing for ColumnTransformer doesn't have "drop" option
    Parameters
    ----------
    preprocessing: category_encoders, ColumnTransformer, list or dict (optional)
        The processing apply to the original data.
    """
    if preprocessing is not None:
        list_encoding = preprocessing_tolist(preprocessing)
        for enc in list_encoding:
            if str(type(enc)) in columntransformer:
                for options in enc.transformers_:
                    if "drop" in options:
                        raise ValueError("ColumnTransformer remainder 'drop' isn't supported by the SmartPredictor.")