def __init__(self, model, data, feature_names, mode, algorithm='tree'): self.model = model self.data = np.vstack(np.array(data)).astype(np.float) self.feature_names = feature_names self.algorithm = algorithm self.mode = mode if len(self.data) > 10: self.nsamples = 10 if len(self.data) > 2000: self.data = shap.sample(self.data, 2000) else: self.nsamples = len(self.data) if algorithm == 'tree': self.explainer = shap.TreeExplainer( model, data=shap.sample(self.data, 50), feature_perturbation='interventional') self.shap_values = self.explainer.shap_values( self.data, check_additivity=False) else: if self.mode == 'classification': self.explainer = shap.KernelExplainer(self.model.predict_proba, data=shap.sample( self.data, self.nsamples), link="logit") elif self.mode == 'regression': self.explainer = shap.KernelExplainer(self.model.predict, data=shap.sample( self.data, self.nsamples)) self.shap_values = self.explainer.shap_values( self.data, check_additivity=False, nsamples=self.nsamples)
def explain_model(model, train_data, test_data, samples): """ Function that computes and displays SHAP model explanations """ model_name = type(model).__name__ random.seed(13) samples_to_explain = samples if model_name not in ["RandomForestClassifier", "XGBClassifier"]: explainer = shap.KernelExplainer(model.predict_proba, train_data[:50], link="identity") shap_values = explainer.shap_values(train_data[:50], nsamples=200, l1_reg="num_features(100)") else: explainer = shap.TreeExplainer(model, data=shap.sample( train_data, samples_to_explain), feature_perturbation='interventional') shap_values = explainer.shap_values(shap.sample( train_data, samples_to_explain), check_additivity=False) fig = shap.summary_plot(shap_values, test_data, max_display=5, show=False) return fig
def data_for_shap(self, input_data): if is_classification(self.model): explainer, pred, pred_fcn = self.shap_explainer() if type(explainer) == shap.explainers._tree.Tree: global_shap_values = explainer.shap_values(input_data) data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values[0], in_data=input_data.copy(), scope="local") prediction = pred([input_data]) probabilities = pred_fcn([input_data]) data_with_shap['Model Decision'] = prediction[0] #data_with_shap['True Values'] = self.actual_data for i in range(len(np.unique(self.actual_data))): data_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] return data_with_shap else: predictions = pred(shap.sample(input_data,100)) global_shap_values = explainer.shap_values(shap.sample(input_data,100)) data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values[0], in_data=shap.sample(input_data,100).copy(), scope='local') prediction = pred(shap.sample(input_data,100)) probabilities = pred_fcn(shap.sample(input_data,100)) data_with_shap['Model Decision'] = prediction[0] #data_with_shap['True Values'] = self.actual_data for i in range(len(np.unique(self.actual_data))): data_with_shap['Probability: {}'.format(np.unique(self.actual_data)[i])] = probabilities[:,i][0] return data_with_shap else: explainer, pred = self.shap_explainer() if type(explainer) == shap.explainers._tree.Tree: #Complete! Do not change. global_shap_values = explainer.shap_values(input_data) data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values, in_data=self.input_data.copy(), scope="local") data_with_shap['Model Decision'] = pred(input_data) #data_with_shap['True Values'] = self.actual_data return data_with_shap else: global_shap_values = explainer.shap_values(shap.sample(input_data,100)) predictions = pred(shap.sample(input_data,100)) data_with_shap = self.append_shap_values_to_df(input_sv = global_shap_values, in_data=shap.sample(input_data,100).copy(), scope="local") data_with_shap['Model Decision'] = pred(shap.sample(self.input_data,100)) #data_with_shap['True Values'] = self.actual_data return data_with_shap
def shap_compute(X, model, model_name): if model_name == 'xgboost': explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X) # fig1=shap.force_plot(explainer.expected_value, shap_values[0,:], X.iloc[0,:]) shap_mean = np.mean(abs(shap_values), axis=0) shap_mean_df = pd.DataFrame({ 'columns_name': X.columns, 'shap_mean_values': shap_mean }).sort_values(by='shap_mean_values').reset_index().drop('index', axis=1) index = np.arange(len(X.columns)) return index, shap_mean_df else: explainer = shap.KernelExplainer(model.predict, X) #----- X = shap.sample(X, 100) #------ shap_values = explainer.shap_values(X) # fig1=shap.force_plot(explainer.expected_value, shap_values[0,:], X.iloc[0,:]) shap_mean = np.mean(abs(shap_values), axis=0) shap_mean_df = pd.DataFrame({ 'columns_name': X.columns, 'shap_mean_values': shap_mean }).sort_values(by='shap_mean_values').reset_index().drop('index', axis=1) index = np.arange(len(X.columns)) return index, shap_mean_df
def shap_select(model, X_train, X_test, feature_names, task='classification', agnostic=False): """ Return the feature ordering of a multidimensional dataset based on the features importance. The importance is calculated upon SHAP values, which takes into account a fitted model. :param model: a fitted model :param X_train: training data :param X_test: test data :param feature_names: feature names :return: Ordered feature names based on the importance computed using SHAP values """ explainer = None if not agnostic: explainer = shap.TreeExplainer(model) else: background = None if len(X) < 500: background = X_train else: background = shap.sample(X_train, int(len(X_train) * 0.05)) explainer = shap.KernelExplainer(model.predict_proba, background) shap_values = explainer.shap_values(X_test) ordering = _shap_ordering(feature_names, shap_values) return ordering
def create_blurred_baseline(self, X, sigma, iterations=1000): shuffled_gaussian_df = pd.DataFrame().reindex_like(X).fillna(0) features_to_shuffle = list(X.columns) df_to_shuffle = X.copy(deep=True) permutations = [] for i in range(iterations): unique = True gaussian_filter_df = pd.DataFrame(gaussian_filter(df_to_shuffle, sigma=sigma), columns=features_to_shuffle) for feature in features_to_shuffle: shuffled_gaussian_df[feature] += gaussian_filter_df[feature] permutations.append(features_to_shuffle[:]) random.shuffle(features_to_shuffle) while unique: unique = True for permutation in permutations: if features_to_shuffle == permutation: random.shuffle(features_to_shuffle) unique = False break if unique: break df_to_shuffle = df_to_shuffle[features_to_shuffle] shuffled_gaussian_df = shuffled_gaussian_df.div(iterations) return shap.sample(np.asarray(shuffled_gaussian_df), self.shap_sample_size)
def shap_collective(self): shap.initjs() z = shap.sample(self.X_test, nsamples=100) explainer = shap.KernelExplainer(self.cat.predict, z) k_shap_values = explainer.shap_values(self.X_test) return shap.force_plot(explainer.expected_value, k_shap_values, self.X_test)
def avatar_q_model( self, X_train, X_test, l1_reg="num_features(10)", check_additivity=False, n_samples=20, silent=True, ): assert shap is not None, "SHAP not found, so cannot do anything here." # Extract function to explain m = self.q_model f = self._extract_function_to_explain(self.q_model) # Data assert ( X_train.shape[1] == X_test.shape[1] ), "Inconsistent attribute count. Your carelessness is disappointing." if X_train.shape[1] != len(m.desc_ids): attribute_filter = m.desc_ids X_train = X_train[:, attribute_filter] X_test = X_test[:, attribute_filter] explainer = shap.KernelExplainer(f, shap.sample(X_train, n_samples)) raw_shaps = explainer.shap_values( X_test, l1_reg=l1_reg, check_additivity=check_additivity, silent=silent ) # Process Shap values abs_shaps = self._raw_to_abs_shaps(raw_shaps) nrm_shaps = self._abs_to_nrm_shaps(abs_shaps) return nrm_shaps
def shap_multiprocessing(patient, model, X_train, y_train, X_test, y_test): shap_list = [] print((patient)) # training model_list={} #model_list[patient]=clone(model_pat) #model_list[patient].fit(X_train, y_train) model.fit(X_train, y_train) # create explainer #shap_explainer = shap.KernelExplainer(model_list[patient].predict_proba, X_train.iloc[0:500, :]) shap_explainer = shap.KernelExplainer(model.predict_proba, shap.sample(X_train, 500)) shap_values = shap_explainer.shap_values(X_test) # for i in range(len(shap_values)): # shap_df = pd.DataFrame(data=shap_values[i], columns=X_test[i].columns.values) # shap_list.append(shap_df) # path_img_bar = path_shap + "model{}_patient{}_allclasses_tmp.png".format(model_name,patient) # plt.figure() # shap.summary_plot(shap_values, X_test[patient], plot_type="bar", show=False) # plt.savefig(path_img_bar) with open( '../resources/results_ordered/SHAP_no_correlated_features/SHAP_SVM_{}.pkl'.format(patient), 'wb') as f: pickle.dump(shap_values, f) return 1
def create_gaussian_baseline(self, X, sigma): gaussian_baseline = np.random.randn(*X.shape) * sigma + X # Make sure the min and max values are not higher then from X return shap.sample( np.clip(gaussian_baseline, a_min=X.min().min(), a_max=X.max().max()), self.shap_sample_size)
def bootstrap(self, X): """ Function that performs the bootstrapping. Randomly changes the background dataset and records the new attributions Parameters: X : instances to explain Returns: array of mean feature attributions for the samples in X """ self.values = np.empty( (self.num_straps, len(self.labels), len(self.features))) self.mean_std_arr = np.empty((2, len(self.labels), len(self.features))) for i in range(self.num_straps): background_i = shap.sample(self.data, self.back_size, random_state=np.random.randint(100)) if self.explainer_type == 'kernel': exp_i = shap.KernelExplainer(self.model, background_i) shapper = exp_i.shap_values(X) elif self.explainer_type == 'sample': exp_i = shap.SampleExplainer(self.model, background_i) shapper = exp_i.shap_values(X) elif self.explainer_type == 'lime': exp_i = MyLime(self.model, background_i, mode="regression") shapper = exp_i.attributions(X) self.values[i, 0, :] = shapper[0] self.values[i, 1, :] = shapper[1] for i in range(len(self.labels)): for j in range(len(self.features)): self.mean_std_arr[0, i, j] = np.mean(self.values[:, i, j]) self.mean_std_arr[1, i, j] = np.std(self.values[:, i, j]) return self.mean_std_arr
def get_shap_kernel(estimator: object, X_train): """compute the shap value importance for non-tree based model Args: estimator (a none tree based sklearn estimator): a sklearn non tree based estimator x_train ((pd.DataFrame, np.ndarray),): X training data x_test ((pd.DataFrame, np.ndarray),): X testing data Returns: shap plot """ warnings.filterwarnings("ignore") # because the kernel explainer for non-tree based model extremly slower # so we must use kmeans to extract mainly information from x_train # to speed up the calculation if X_train.shape[1] > 3: x_train_summary = shap.kmeans(X_train, 3) else: x_train_summary = shap.kmeans(X_train, X_train.shape[1]) explainer = shap.KernelExplainer(estimator.predict, x_train_summary) size = len(X_train) if size < 50: size = size elif size * 0.2 > 50: size = 50 else: size = int(size * 0.2) sample_values = shap.sample(X_train, size) shap_values = explainer.shap_values(sample_values, lr_reg='num_features(10)') return explainer, shap_values, sample_values
def shap_summary(self): z = shap.sample(self.X_test, nsamples=100) explainer = shap.KernelExplainer(self.cat.predict, z) k_shap_values = explainer.shap_values(self.X_test) print("Shap Summary Plot") plt.figure() shap.summary_plot(k_shap_values, self.X_test, show=False) plt.savefig('shap_summary.png')
def shapley_tree(model_predict, obs, dataset, column_names, plot_draw=False): explainer = shap.KernelExplainer(model_predict, shap.sample(dataset, 100)) shap_values = explainer.shap_values(obs) if plot_draw: shap.waterfall_plot(explainer.expected_value, shap_values, feature_names=column_names) return shap_values, explainer.expected_value
def test_HistGradientBoostingClassifier_proba(): # train a tree-based model X, y = shap.datasets.adult() model = sklearn.ensemble.HistGradientBoostingClassifier(max_iter=10, max_depth=6).fit(X, y) explainer = shap.TreeExplainer(model, shap.sample(X, 10), model_output="predict_proba") shap_values = explainer.shap_values(X) assert np.max(np.abs( shap_values[0].sum(1) + explainer.expected_value[0] - model.predict_proba(X)[:, 0])) < 1e-4
def kernel_explainer_with_ct(self): try: #classification case pred_fcn = lambda x : self.model.predict_proba(self.ct.transform(x)) explainer = shap.KernelExplainer(pred_fcn, shap.sample(self.input_data, 100), link='logit', feature_names=self.input_data.columns, seed=0) pred = lambda x : self.model.predict(self.ct.transform(x)) return explainer, pred, pred_fcn except: pred_fcn = lambda x : self.model.predict(self.ct.transform(x)) explainer = shap.KernelExplainer(pred_fcn, shap.sample(self.input_data, 100), link='identity', feature_names=self.input_data.columns, seed=0) return explainer, pred_fcn
def __init__(self, dt_model, X, num_samples=50): self.dt_model = dt_model self.num_samples = num_samples self.feature_names = X.columns.to_list() if num_samples is not None: samples = shap.sample(X, num_samples) else: samples = X self.explainer = shap.KernelExplainer(self.model_fn, samples)
def kernel_explainer(self): try: #classification case explainer = shap.KernelExplainer(self.model.predict_proba, shap.sample(self.input_data, 100), link='logit', feature_names=self.input_data.columns, seed=0) predictions = self.model.predict prediction_probabilities = self.model.predict_proba return explainer, predictions, prediction_probabilities except: #regression case explainer = shap.KernelExplainer(self.model.predict, shap.sample(self.input_data, 100), link='identity', feature_names=self.input_data.columns, seed=0) predictions = self.model.predict return explainer, predictions
def test_HistGradientBoostingClassifier_multidim(): # train a tree-based model X, y = shap.datasets.adult() X = X[:100] y = y[:100] y = np.random.randint(0, 3, len(y)) model = sklearn.ensemble.HistGradientBoostingClassifier(max_iter=10, max_depth=6).fit(X, y) explainer = shap.TreeExplainer(model, shap.sample(X, 10), model_output="raw") shap_values = explainer.shap_values(X) assert np.max(np.abs(shap_values[0].sum(1) + explainer.expected_value[0] - model.decision_function(X)[:, 0])) < 1e-4
def setUp(self): X_train, y_train, X_test, y_test = titanic_fare() self.test_len = len(X_test) model = LinearRegression().fit(X_train, y_train) self.explainer = RegressionExplainer(model, X_test.iloc[:20], y_test.iloc[:20], shap='kernel', X_background=shap.sample( X_train, 5))
def get_feature_importances(models, test_data, complication, train_columns): """ This function calculates the shap values of the top models for each of the investigated complication""" tree_explain = [LGBMClassifier] features = [] avg_shap_values = [] test_data_i = test_data[complication] importance_dfs = {} counter = 0 for x in models[complication]: # Get feature importances X_importance = test_data_i[train_columns] if (type(x) in tree_explain): explainer = shap.TreeExplainer(x) shap_values = explainer.shap_values(X_importance, check_additivity=False) else: model_results = x.predict_proba X_importance = X_importance.fillna(X_importance.median()) X_importance_ = shap.sample(X_importance, 50) explainer = shap.KernelExplainer(model_results, X_importance_) shap_values = explainer.shap_values(X_importance_, check_additivity=False)[0] values = np.abs(shap_values).mean(0) importance_df = pd.DataFrame() importance_df['column_name'] = train_columns importance_df["importance"] = values importance_df = importance_df.sort_values('column_name') importance_dfs[counter] = importance_df counter = counter + 1 importance_df = pd.DataFrame([X_importance.columns.tolist()]).T importance_df.columns = ['column_name'] for x in range(6): importance_df[f"importance_{str(x)}"] = importance_dfs[x]["importance"] col = importance_df.loc[:, "importance_0":"importance_6"] importance_df["avg"] = col.mean(axis=1) features.append( importance_df.sort_values(by="avg", ascending=False).column_name[:4].values) avg_shap_values.append( importance_df.sort_values(by="avg", ascending=False).avg[:4].values) display_top_features(features, complication) return (features, avg_shap_values)
def plot_prediction_desicion(model, X_test, pred, row_idx): #The decision plot below shows the model’s multiple outputs for a single observation #the dashed line is the prediction of our classifier explainer = shap.TreeExplainer(model, data=shap.sample(X_test, 100), feature_dependence="interventional") shap_values = explainer.shap_values(X_test) shap.multioutput_decision_plot([1, 2, 3], shap_values, row_index=row_idx, feature_names=list(X_test.columns) , highlight=int(pred[row_idx]), legend_labels=["0-18", "19-70", "70+"], #legend_labels=["0-23", "24-50", "50+"], legend_location='lower right') plt.show()
def shap_importances(model, X_train, X_test, n_shap, normalize=True, sort=True): start = timer() # only use n_shap from X_test X_test = X_test.sample(n=min(n_shap, len(X_test)), replace=False) if isinstance(model, RandomForestRegressor) or \ isinstance(model, GradientBoostingRegressor) or \ isinstance(model, xgb.XGBRegressor): """ We get this warning for big X_train so choose smaller 'Passing 20000 background samples may lead to slow runtimes. Consider using shap.sample(data, 100) to create a smaller background data set.' """ explainer = shap.TreeExplainer(model, data=shap.sample(X_train, 100), feature_perturbation='interventional') shap_values = explainer.shap_values(X_test, check_additivity=False) elif isinstance(model, Lasso) or isinstance(model, LinearRegression): explainer = shap.LinearExplainer(model, shap.sample(X_train, 100), feature_perturbation='interventional') shap_values = explainer.shap_values(X_test) else: # gotta use really small sample; verrry slow explainer = shap.KernelExplainer(model.predict, shap.sample(X_train, 100)) shap_values = explainer.shap_values(X_test, nsamples='auto') shapimp = np.mean(np.abs(shap_values), axis=0) stop = timer() print(f"SHAP time for {len(X_test)} test records using {model.__class__.__name__} = {(stop - start):.1f}s") total_imp = np.sum(shapimp) normalized_shap = shapimp if normalize: normalized_shap = shapimp / total_imp # print("SHAP", normalized_shap) shapI = pd.DataFrame(data={'Feature': X_test.columns, 'Importance': normalized_shap}) shapI = shapI.set_index('Feature') if sort: shapI = shapI.sort_values('Importance', ascending=False) # plot_importances(shapI) return shapI
def plot_feature_importance_for_class(model, X_train): explainer = shap.TreeExplainer(model, data=shap.sample(X_train, 100), feature_dependence="interventional") shap_values = explainer.shap_values(X_train) #shap.dependence_plot("rank(10)", shap_values, X_train) #raise TypeError("The passed shap_values are a list not an array! If you have a list of explanations try " \ # TypeError: The passed shap_values are a list not an array! If you have a list of explanations try passing shap_values[0] instead to explain the first output class of a multi-output model. shap.summary_plot(shap_values, X_train, plot_type="bar", class_names=model.classes_, color=pl.get_cmap("tab10")) #labels=model.classes_ # IMPORTANT for some reason the three lines below might break if the line above isn't commmented out #shap.summary_plot(shap_values[0], X_train, class_names=model.classes_) #shap.summary_plot(shap_values[1], X_train, class_names=model.classes_) #shap.summary_plot(shap_values[2], X_train, class_names=model.classes_) #show=False #Feature values in pink cause to increase the prediction. #Size of the bar shows the magnitude of the feature's effect. #Feature values in blue cause to decrease the plt.show()
def RF(): rf = RandomForestRegressor(n_estimators=30, oob_score=True, n_jobs=-1) rf.fit(X, y) print("OOB", rf.oob_score_) explainer = shap.TreeExplainer(rf, data=shap.sample(X, 300), feature_perturbation='interventional') shap_values = explainer.shap_values(X[:shap_test_size], check_additivity=False) print("shap_j averages:", np.mean(shap_values, axis=0)) shapimp = np.mean(np.abs(shap_values), axis=0) s = np.sum(shapimp) print("\nRF SHAP importances", list(shapimp), shapimp * xrange, list(shapimp / s)) return shap_values
def setUp(self): X_train, y_train, X_test, y_test = titanic_survive() train_names, test_names = titanic_names() model = LogisticRegression() model.fit(X_train, y_train) self.explainer = ClassifierExplainer( model, X_test.iloc[:20], y_test.iloc[:20], shap='kernel', model_output='probability', X_background=shap.sample(X_train, 5), cats=[{ 'Gender': ['Sex_female', 'Sex_male', 'Sex_nan'] }, 'Deck', 'Embarked'], labels=['Not survived', 'Survived'])
def _summarise_background(self, background_data: Union[shap.common.Data, pd.DataFrame, np.ndarray, sparse.spmatrix], n_background_samples: int) -> \ Union[shap.common.Data, pd.DataFrame, np.ndarray, sparse.spmatrix]: """ Summarises the background data to n_background_samples in order to reduce the computational cost. If the background data is a `shap.common.Data object`, no summarisation is performed. Returns ------- If the user has specified grouping, then the input object is subsampled and an object of the same type is returned. Otherwise, a `shap.common.Data` object containing the result of a k-means algorithm is wrapped in a `shap.common.DenseData` object and returned. The samples are weighted according to the frequency of the occurrence of the clusters in the original data. """ if isinstance(background_data, shap.common.Data): msg = "Received option to summarise the data but the background_data object " \ "was an instance of shap.common.Data. No summarisation will take place!" logger.warning(msg) return background_data if background_data.ndim == 1: msg = "Received option to summarise the data but the background_data object only had " \ "one record with {} features. No summarisation will take place!" logger.warning(msg.format(len(background_data))) return background_data self.summarise_background = True # if the input is sparse, we assume there are categorical variables and use random sampling, not kmeans if self.use_groups or self.categorical_names or isinstance( background_data, sparse.spmatrix): return shap.sample(background_data, nsamples=n_background_samples) else: logger.info( "When summarising with kmeans, the samples are weighted in proportion to their " "cluster occurrence frequency. Please specify a different weighting of the samples " "through the by passing a weights of len=n_background_samples to the constructor!" ) return shap.kmeans(background_data, n_background_samples)
def shap_plots(model, train_features, test_features, test_labels): print("Computing shapley values..") # compute SHAP values if isinstance( model, (MLP, MLPRegressor, MLPClassifier, ElasticNet, LogisticRegression)): train_sample = shap.sample(train_features, 10) explainer = shap.Explainer(model.predict, train_sample) elif isinstance(model, (RandomForestRegressor, RandomForestClassifier)): explainer = shap.TreeExplainer(model, train_features) else: explainer = shap.Explainer(model, train_features) shap_values = explainer(test_features) shap.plots.bar(shap_values, max_display=10) # shap.plots.bar(shap_values[0]) # Local # beeswarm plot shap.plots.beeswarm(shap_values) # Decision plot expected_value = explainer.expected_value select = range(20) features_sample = test_features.iloc[select] shap.decision_plot(expected_value, explainer.shap_values(features_sample), features_sample) # Heatmap shap.plots.heatmap(shap_values, max_display=10) # Scatter shap.plots.scatter(shap_values[:, "hs_child_age_None"], color=shap_values, alpha=0.8) # Feature clustering (redondant feature detection) clustering = shap.utils.hclust( test_features, test_labels ) # by default this trains (X.shape[1] choose 2) 2-feature XGBoost models shap.plots.bar(shap_values, clustering=clustering, clustering_cutoff=0.5)
def compute_shap_values(self) -> None: """Shap values depending on what model we are using `shap.TreeExplainer` by default and if not it uses `KernelExplainer` Also provides compatibility with sklearn pipelines `shap_values` are stored in `self.shap_values` """ with warnings.catch_warnings(): # Some `shap` warnings are not useful for this implementation warnings.simplefilter("ignore") try: explainer = shap.TreeExplainer( model=self.model, feature_perturbation='tree_path_dependent' ) shap_values_arguments = dict(X=self.X_test_to_shap) except Exception: def model_predict(data_array): data_frame = pd.DataFrame(data_array, columns=self.column_names) return self.model.predict_proba(data_frame)[:, 1] explainer = shap.KernelExplainer(model=model_predict, data=shap.sample( self.X_train_to_shap, 100 ), link='logit') shap_values_arguments = dict(X=self.X_test_to_shap, l1_reg='aic') self.shap_values = explainer.shap_values(**shap_values_arguments)
# continue parameters = { 'time': time, 'target': target, 'drop_opt': drop_opt, } X, y, info = _load_train_data(return_info=True, **parameters) estimators = load_models(time, target, drop_opt, model_names) # Subsample time indices to reduce autocorrelations X_subset, y_subset = get_independent_samples(X, y, info) explainer = InterpretToolkit(estimators=estimators, estimator_names=model_names, X=X_subset.copy(), y=y_subset.copy()) background_dataset = shap.sample(X, 100) results = explainer.local_contributions( method='shap', background_dataset=background_dataset, performance_based=True, n_samples=n_samples) results = explainer.save(fname=save_fname, data=results) duration = datetime.datetime.now() - start_time seconds = duration.total_seconds() hours = seconds // 3600 minutes = (seconds % 3600) // 60 seconds = seconds % 60