def plot_shap_values(self, X=None, tau=None, model_tau_feature=None, features=None, shap_dict=None, **kwargs): """ Plots distribution of shapley values. If shapley values have been pre-computed, pass it through the shap_dict parameter. If shap_dict is not provided, this builds a new model (using X to predict estimated/actual tau), and then calculates shapley values. Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix. Required if shap_dict is None. tau (np.array): a treatment effect vector (estimated/actual) model_tau_feature (sklearn/lightgbm/xgboost model object): an unfitted model object features (optional, np.array): list/array of feature names. If None, an enumerated list will be used. shap_dict (optional, dict): a dict of shapley value matrices. If None, shap_dict will be computed. """ override_checks = False if shap_dict is None else True explainer = Explainer(method='shapley', control_name=self.control_name, X=X, tau=tau, model_tau=model_tau_feature, features=features, override_checks=override_checks, classes=self._classes) explainer.plot_shap_values(shap_dict=shap_dict)
def plot_importance(self, X=None, tau=None, model_tau_feature=None, features=None, method='auto', normalize=True): """ Builds a model (using X to predict estimated/actual tau), and then plots feature importances based on a specified method. Currently supported methods include: - auto (calculates importance based on estimator's default implementation of feature importance; estimator must be tree-based) Note: if none provided, it uses lightgbm's LGBMRegressor as estimator, and "gain" as importance type - permutation (calculates importance based on mean decrease in accuracy; estimator can be any form) Hint: for permutation, downsample data for better performance especially if X.shape[1] is large Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix tau (np.array): a treatment effect vector (estimated/actual) model_tau_feature (sklearn/lightgbm/xgboost model object): an unfitted model object features (optional, np.array): list/array of feature names. If None, an enumerated list will be used. method (str): auto, permutation normalize (bool): normalize by sum of importances if method=gini (defaults to True) """ explainer = Explainer(method=method, control_name=self.control_name, X=X, tau=tau, model_tau=None, r_learners=self.models_tau, features=features, classes=self._classes, normalize=normalize) explainer.plot_importance()
def plot_importance(self, X=None, tau=None, model_tau_feature=None, features=None, method='auto', normalize=True, test_size=0.3, random_state=None): """ Builds a model (using X to predict estimated/actual tau), and then plots feature importances based on a specified method. Currently supported methods are: - auto (calculates importance based on estimator's default implementation of feature importance; estimator must be tree-based) Note: if none provided, it uses lightgbm's LGBMRegressor as estimator, and "gain" as importance type - permutation (calculates importance based on mean decrease in accuracy when a feature column is permuted; estimator can be any form) Hint: for permutation, downsample data for better performance especially if X.shape[1] is large Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix tau (np.array): a treatment effect vector (estimated/actual) model_tau_feature (sklearn/lightgbm/xgboost model object): an unfitted model object features (optional, np.array): list/array of feature names. If None, an enumerated list will be used method (str): auto, permutation normalize (bool): normalize by sum of importances if method=auto (defaults to True) test_size (float/int): if float, represents the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples (used for estimating permutation importance) random_state (int/RandomState instance/None): random state used in permutation importance estimation """ explainer = Explainer(method=method, control_name=self.control_name, X=X, tau=tau, model_tau=model_tau_feature, features=features, classes=self._classes, normalize=normalize, test_size=test_size, random_state=random_state) explainer.plot_importance()
def plot_shap_dependence( self, treatment_group, feature_idx, X, tau, model_tau_feature=None, features=None, shap_dict=None, interaction_idx="auto", **kwargs ): """ Plots dependency of shapley values for a specified feature, colored by an interaction feature. If shapley values have been pre-computed, pass it through the shap_dict parameter. If shap_dict is not provided, this builds a new model (using X to predict estimated/actual tau), and then calculates shapley values. This plots the value of the feature on the x-axis and the SHAP value of the same feature on the y-axis. This shows how the model depends on the given feature, and is like a richer extension of the classical partial dependence plots. Vertical dispersion of the data points represents interaction effects. Args: treatment_group (str or int): name of treatment group to create dependency plot on feature_idx (str or int): feature index / name to create dependency plot on X (np.matrix or np.array or pd.Dataframe): a feature matrix tau (np.array): a treatment effect vector (estimated/actual) model_tau_feature (sklearn/lightgbm/xgboost model object): an unfitted model object features (optional, np.array): list/array of feature names. If None, an enumerated list will be used. shap_dict (optional, dict): a dict of shapley value matrices. If None, shap_dict will be computed. interaction_idx (optional, str or int): feature index / name used in coloring scheme as interaction feature. If "auto" then shap.common.approximate_interactions is used to pick what seems to be the strongest interaction (note that to find to true strongest interaction you need to compute the SHAP interaction values). """ override_checks = False if shap_dict is None else True explainer = Explainer( method="shapley", control_name=self.control_name, X=X, tau=tau, model_tau=model_tau_feature, features=features, override_checks=override_checks, classes=self._classes, ) explainer.plot_shap_dependence( treatment_group=treatment_group, feature_idx=feature_idx, shap_dict=shap_dict, interaction_idx=interaction_idx, **kwargs )
def get_shap_values(self, X=None, model_tau_feature=None, tau=None, features=None): """ Builds a model (using X to predict estimated/actual tau), and then calculates shapley values. Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix tau (np.array): a treatment effect vector (estimated/actual) model_tau_feature (sklearn/lightgbm/xgboost model object): an unfitted model object features (optional, np.array): list/array of feature names. If None, an enumerated list will be used. """ explainer = Explainer(method='shapley', control_name=self.control_name, X=X, tau=tau, model_tau=model_tau_feature, features=features, classes=self._classes) return explainer.get_shap_values()