def to_smartexplainer(self): """ Create a SmartExplainer object compiled with the data specified in add_input method with SmartPredictor attributes """ if not hasattr(self, "data"): raise ValueError("add_input method must be called at least once.") if self.data["x"] is None: raise ValueError(""" x must be specified in an add_input method to apply to_smartexplainer method. """) list_preprocessing = preprocessing_tolist(self.preprocessing) for enc in list_preprocessing: if str(type(enc)) in columntransformer: raise ValueError( "SmartPredictor can't switch to SmartExplainer for ColumnTransformer preprocessing." ) xpl = shapash.explainer.smart_explainer.SmartExplainer( features_dict=copy.deepcopy(self.features_dict), label_dict=copy.deepcopy(self.label_dict)) xpl.compile(x=copy.deepcopy(self.data["x_preprocessed"]), model=self.model, explainer=self.explainer, y_pred=copy.deepcopy(self.data["ypred_init"]), preprocessing=self.preprocessing, postprocessing=self.postprocessing, features_groups=self.features_groups) return xpl
def inverse_transform_contributions(contributions, preprocessing=None, agg_columns='sum'): """ Reverse contribution giving a preprocessing. Preprocessing could be : - a single category_encoders - a single ColumnTransformer - list with multiple category_encoders with optional (dict, list of dict) - list with a single ColumnTransformer with optional (dict, list of dict) - dict - list of dict Parameters ---------- contributions : pandas.DataFrame Contributions values. preprocessing : category_encoders, ColumnTransformer, list, dict, optional (default: None) The processing apply to the original data. agg_columns : str (default: 'sum') Type of aggregation performed. For Shap we want so sum contributions of one hot encoded variables. For ACV we want to take any value as ACV computes contributions of coalition of variables (like one hot encoded variables) differently from Shap and then give the same value to each variable of the coalition. As a result we just need to take the value of one of these variables to get the contribution value of the group. Returns ------- pandas.Dataframe Return the aggregate contributions. """ if not isinstance(contributions, pd.DataFrame): raise Exception('Shap values must be a pandas dataframe.') if preprocessing is None: return contributions else: #Transform preprocessing into a list list_encoding = preprocessing_tolist(preprocessing) # check supported inverse use_ct, use_ce = check_transformers(list_encoding) # Apply Inverse Transform x_contrib_invers = contributions.copy() if use_ct: for encoding in list_encoding: x_contrib_invers = calc_inv_contrib_ct(x_contrib_invers, encoding, agg_columns) else: for encoding in list_encoding: x_contrib_invers = calc_inv_contrib_ce(x_contrib_invers, encoding, agg_columns) return x_contrib_invers
def check_preprocessing(preprocessing=None): """ Check that all transformation of the preprocessing are supported. Parameters ---------- preprocessing: category_encoders, ColumnTransformer, list, dict, optional (default: None) The processing apply to the original data """ if preprocessing is not None: list_preprocessing = preprocessing_tolist(preprocessing) use_ct, use_ce = check_transformers(list_preprocessing) return use_ct, use_ce
def __init__( self, features_dict, model, columns_dict, explainer, features_types, label_dict=None, preprocessing=None, postprocessing=None, features_groups=None, mask_params={ "features_to_hide": None, "threshold": None, "positive": None, "max_contrib": None }): params_dict = [ features_dict, features_types, label_dict, columns_dict, postprocessing ] for params in params_dict: if params is not None and isinstance(params, dict) == False: raise ValueError(""" {0} must be a dict. """.format(str(params))) self.model = model self._case, self._classes = self.check_model() self.explainer = self.check_explainer(explainer) self.preprocessing = preprocessing self.check_preprocessing() self.features_dict = features_dict self.features_types = features_types self.label_dict = label_dict self.check_label_dict() self.columns_dict = columns_dict self.mask_params = mask_params self.check_mask_params() self.postprocessing = postprocessing self.features_groups = features_groups list_preprocessing = preprocessing_tolist(self.preprocessing) check_consistency_model_features( self.features_dict, self.model, self.columns_dict, self.features_types, self.mask_params, self.preprocessing, self.postprocessing, list_preprocessing, self.features_groups) check_consistency_model_label(self.columns_dict, self.label_dict) self._drop_option = check_preprocessing_options( columns_dict, features_dict, preprocessing, list_preprocessing)
def inverse_transform_contributions(contributions, preprocessing=None): """ Reverse contribution giving a preprocessing. Preprocessing could be : - a single category_encoders - a single ColumnTransformer - list with multiple category_encoders with optional (dict, list of dict) - list with a single ColumnTransformer with optional (dict, list of dict) - dict - list of dict Parameters ---------- contributions : pandas.DataFrame Contributions values. preprocessing : category_encoders, ColumnTransformer, list, dict, optional (default: None) The processing apply to the original data. Returns ------- pandas.Dataframe Return the aggregate contributions. """ if not isinstance(contributions, pd.DataFrame): raise Exception('Shap values must be a pandas dataframe.') if preprocessing is None: return contributions else: #Transform preprocessing into a list list_encoding = preprocessing_tolist(preprocessing) # check supported inverse use_ct, use_ce = check_transformers(list_encoding) # Apply Inverse Transform x_contrib_invers = contributions.copy() if use_ct: for encoding in list_encoding: x_contrib_invers = calc_inv_contrib_ct(x_contrib_invers, encoding) else: for encoding in list_encoding: x_contrib_invers = calc_inv_contrib_ce(x_contrib_invers, encoding) return x_contrib_invers
def check_preprocessing_options(preprocessing=None): """ Check if preprocessing for ColumnTransformer doesn't have "drop" option Parameters ---------- preprocessing: category_encoders, ColumnTransformer, list or dict (optional) The processing apply to the original data. """ if preprocessing is not None: list_encoding = preprocessing_tolist(preprocessing) for enc in list_encoding: if str(type(enc)) in columntransformer: for options in enc.transformers_: if "drop" in options: raise ValueError("ColumnTransformer remainder 'drop' isn't supported by the SmartPredictor.")