def visualize(shap_values, feature_names=None, data=None, out_names=None): """ Visualize the given SHAP values with an additive force layout. """ if type(shap_values) != np.ndarray: return iml.visualize(shap_values) if len(shap_values.shape) == 1: shap_values = np.reshape(shap_values, (1,len(shap_values))) if out_names is None: out_names = ["output value"] if shap_values.shape[0] == 1: if feature_names is None: feature_names = ["" for i in range(shap_values.shape[1]-1)] if data is None: data = ["" for i in range(len(feature_names))] if type(data) == np.ndarray: data = data.flatten() instance = Instance(np.zeros((1,len(feature_names))), data) e = AdditiveExplanation( shap_values[0,-1], np.sum(shap_values[0,:]), shap_values[0,:-1], None, instance, IdentityLink(), Model(None, out_names), DenseData(np.zeros((1,len(feature_names))), list(feature_names)) ) return e else: exps = [] for i in range(shap_values.shape[0]): if feature_names is None: feature_names = ["" for i in range(shap_values.shape[1]-1)] if data is None: display_data = ["" for i in range(len(feature_names))] else: display_data = data[i,:] instance = Instance(np.ones((1,len(feature_names))), display_data) e = AdditiveExplanation( shap_values[i,-1], np.sum(shap_values[i,:]), shap_values[i,:-1], None, instance, IdentityLink(), Model(None, out_names), DenseData(np.ones((1,len(feature_names))), list(feature_names)) ) exps.append(e) return exps
def __init__(self, model, data, link=IdentityLink(), **kwargs): # convert incoming inputs to standardized iml objects self.link = convert_to_link(link) self.model = convert_to_model(model) self.data = convert_to_data(data) match_model_to_data(self.model, self.data) # enforce our current input type limitations assert isinstance( self.data, DenseData ), "Shap explainer only supports the DenseData input currently." assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently." # init our parameters self.N = self.data.data.shape[0] self.P = self.data.data.shape[1] self.weights = kwargs.get("weights", np.ones(self.N)) # TODO: Use these weights! self.weights /= sum(self.weights) assert len( self.weights ) == self.N, "Provided 'weights' must match the number of representative data points {0}!".format( self.N) self.linkfv = np.vectorize(self.link.f) self.nsamplesAdded = 0 self.nsamplesRun = 0
def __init__(self, model, data, link=IdentityLink(), **kwargs): # convert incoming inputs to standardized iml objects self.link = convert_to_link(link) self.model = convert_to_model(model) self.keep_index = kwargs.get("keep_index", False) self.data = convert_to_data(data, keep_index=self.keep_index) match_model_to_data(self.model, self.data) # enforce our current input type limitations assert isinstance( self.data, DenseData ), "Shap explainer only supports the DenseData input currently." assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently." # warn users about large background data sets if len(self.data.weights) > 100: log.warning( "Using " + str(len(self.data.weights)) + " background data samples could cause " + "slower run times. Consider using shap.kmeans(data, K) to summarize the background " + "as K weighted samples.") # init our parameters self.N = self.data.data.shape[0] self.P = self.data.data.shape[1] self.linkfv = np.vectorize(self.link.f) self.nsamplesAdded = 0 self.nsamplesRun = 0
def visualize(shap_values, features=None, feature_names=None, out_names=None, data=None, link=IdentityLink()): warnings.warn("the visualize() function has been renamed to 'force_plot' for consistency") # backwards compatability if data is not None: warnings.warn("the 'data' parameter has been renamed to 'features' for consistency") if features is None: features = data return force_plot(shap_values, features, feature_names, out_names, link)
def __init__(self, model, data, link=IdentityLink(), **kwargs): # convert incoming inputs to standardized iml objects self.link = convert_to_link(link) self.model = convert_to_model(model) self.keep_index = kwargs.get("keep_index", False) self.keep_index_ordered = kwargs.get("keep_index_ordered", False) self.data = convert_to_data(data, keep_index=self.keep_index) match_model_to_data(self.model, self.data) # enforce our current input type limitations assert isinstance( self.data, DenseData ), "Shap explainer only supports the DenseData input currently." assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently." # warn users about large background data sets if len(self.data.weights) > 100: log.warning( "Using " + str(len(self.data.weights)) + " background data samples could cause " + "slower run times. Consider using shap.kmeans(data, K) to summarize the background " + "as K weighted samples.") # init our parameters self.N = self.data.data.shape[0] self.P = self.data.data.shape[1] self.linkfv = np.vectorize(self.link.f) self.nsamplesAdded = 0 self.nsamplesRun = 0 # find E_x[f(x)] if self.keep_index: model_null = self.model.f(self.data.convert_to_df()) else: model_null = self.model.f(self.data.data) if isinstance(model_null, (pd.DataFrame, pd.Series)): model_null = np.squeeze(model_null.values) self.fnull = np.sum((model_null.T * self.data.weights).T, 0) self.expected_value = self.fnull # see if we have a vector output self.vector_out = True if len(self.fnull.shape) == 0: self.vector_out = False self.fnull = np.array([self.fnull]) self.D = 1 else: self.D = self.fnull.shape[0]
def explain_instances(model, data, feature_names, out_names): if out_names is None: out_names = ["model output"] if feature_names is None: feature_names = [(i + 1) + "" for i in range(data.shape[1])] if type(model) == xgboost.core.Booster: exps = [] contribs = model.predict(xgboost.DMatrix(data), pred_contribs=True) for i in range(data.shape[0]): instance = Instance(data[i:i + 1, :], data[i, :]) e = AdditiveExplanation( contribs[i, -1], np.sum(contribs[i, :]), contribs[i, :-1], None, instance, IdentityLink(), Model(None, out_names), DenseData(np.zeros((1, data.shape[1])), list(feature_names))) exps.append(e) return exps
def __init__(self, model, data, link=IdentityLink(), **kwargs): # convert incoming inputs to standardized iml objects self.link = convert_to_link(link) self.model = convert_to_model(model) self.data = convert_to_data(data) match_model_to_data(self.model, self.data) # enforce our current input type limitations assert isinstance(self.data, DenseData), "Shap explainer only supports the DenseData input currently." assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently." # init our parameters self.N = self.data.data.shape[0] self.P = self.data.data.shape[1] self.linkfv = np.vectorize(self.link.f) self.nsamplesAdded = 0 self.nsamplesRun = 0
def explain_instance(model, data, feature_names, out_names): if out_names is None: out_names = ["model output"] if feature_names is None: feature_names = [(i + 1) + "" for i in range(data.shape[1])] if type(model) == xgboost.core.Booster: contribs = model.predict(xgboost.DMatrix(data), pred_contribs=True) elif type(model) == lightgbm.basic.Booster: contribs = model.predict(data, pred_contrib=True) else: return None instance = Instance(data[0:1, :], data[0, :]) e = AdditiveExplanation( contribs[0, -1], np.sum(contribs[0, :]), contribs[0, :-1], None, instance, IdentityLink(), Model(None, out_names), DenseData(np.zeros((1, data.shape[1])), list(feature_names))) return e
def visualize(shap_values, features=None, feature_names=None, out_names=None, data=None): """ Visualize the given SHAP values with an additive force layout. """ # backwards compatability if data is not None: warnings.warn( "the 'data' parameter has been renamed to 'features' for consistency" ) if features is None: features = data if type(shap_values) != np.ndarray: return iml.visualize(shap_values) # convert from a DataFrame or other types if str(type(features)) == "<class 'pandas.core.frame.DataFrame'>": if feature_names is None: feature_names = list(features.columns) features = features.as_matrix() elif str(type(features)) == "<class 'pandas.core.series.Series'>": if feature_names is None: feature_names = list(features.index) features = features.as_matrix() elif str(type(features)) == "list": if feature_names is None: feature_names = features features = None elif len(features.shape) == 1 and feature_names is None: feature_names = features features = None if len(shap_values.shape) == 1: shap_values = np.reshape(shap_values, (1, len(shap_values))) if out_names is None: out_names = ["output value"] if shap_values.shape[0] == 1: if feature_names is None: feature_names = ["" for i in range(shap_values.shape[1] - 1)] if features is None: features = ["" for i in range(len(feature_names))] if type(features) == np.ndarray: features = features.flatten() instance = Instance(np.zeros((1, len(feature_names))), features) e = AdditiveExplanation( shap_values[0, -1], np.sum(shap_values[0, :]), shap_values[0, :-1], None, instance, IdentityLink(), Model(None, out_names), DenseData(np.zeros((1, len(feature_names))), list(feature_names))) return e else: exps = [] for i in range(shap_values.shape[0]): if feature_names is None: feature_names = ["" for i in range(shap_values.shape[1] - 1)] if features is None: display_features = ["" for i in range(len(feature_names))] else: display_features = features[i, :] instance = Instance(np.ones((1, len(feature_names))), display_features) e = AdditiveExplanation( shap_values[i, -1], np.sum(shap_values[i, :]), shap_values[i, :-1], None, instance, IdentityLink(), Model(None, out_names), DenseData(np.ones((1, len(feature_names))), list(feature_names))) exps.append(e) return exps