def visualize(shap_values, feature_names=None, data=None, out_names=None):
    """ Visualize the given SHAP values with an additive force layout. """

    if type(shap_values) != np.ndarray:
        return iml.visualize(shap_values)

    if len(shap_values.shape) == 1:
        shap_values = np.reshape(shap_values, (1,len(shap_values)))

    if out_names is None:
        out_names = ["output value"]

    if shap_values.shape[0] == 1:
        if feature_names is None:
            feature_names = ["" for i in range(shap_values.shape[1]-1)]
        if data is None:
            data = ["" for i in range(len(feature_names))]
        if type(data) == np.ndarray:
            data = data.flatten()

        instance = Instance(np.zeros((1,len(feature_names))), data)
        e = AdditiveExplanation(
            shap_values[0,-1],
            np.sum(shap_values[0,:]),
            shap_values[0,:-1],
            None,
            instance,
            IdentityLink(),
            Model(None, out_names),
            DenseData(np.zeros((1,len(feature_names))), list(feature_names))
        )
        return e

    else:
        exps = []
        for i in range(shap_values.shape[0]):
            if feature_names is None:
                feature_names = ["" for i in range(shap_values.shape[1]-1)]
            if data is None:
                display_data = ["" for i in range(len(feature_names))]
            else:
                display_data = data[i,:]

            instance = Instance(np.ones((1,len(feature_names))), display_data)
            e = AdditiveExplanation(
                shap_values[i,-1],
                np.sum(shap_values[i,:]),
                shap_values[i,:-1],
                None,
                instance,
                IdentityLink(),
                Model(None, out_names),
                DenseData(np.ones((1,len(feature_names))), list(feature_names))
            )
            exps.append(e)
        return exps
예제 #2
0
파일: shap.py 프로젝트: ofergold/shap
    def __init__(self, model, data, link=IdentityLink(), **kwargs):

        # convert incoming inputs to standardized iml objects
        self.link = convert_to_link(link)
        self.model = convert_to_model(model)
        self.data = convert_to_data(data)
        match_model_to_data(self.model, self.data)

        # enforce our current input type limitations
        assert isinstance(
            self.data, DenseData
        ), "Shap explainer only supports the DenseData input currently."
        assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently."

        # init our parameters
        self.N = self.data.data.shape[0]
        self.P = self.data.data.shape[1]
        self.weights = kwargs.get("weights",
                                  np.ones(self.N))  # TODO: Use these weights!
        self.weights /= sum(self.weights)
        assert len(
            self.weights
        ) == self.N, "Provided 'weights' must match the number of representative data points {0}!".format(
            self.N)
        self.linkfv = np.vectorize(self.link.f)
        self.nsamplesAdded = 0
        self.nsamplesRun = 0
예제 #3
0
    def __init__(self, model, data, link=IdentityLink(), **kwargs):

        # convert incoming inputs to standardized iml objects
        self.link = convert_to_link(link)
        self.model = convert_to_model(model)
        self.keep_index = kwargs.get("keep_index", False)
        self.data = convert_to_data(data, keep_index=self.keep_index)
        match_model_to_data(self.model, self.data)

        # enforce our current input type limitations
        assert isinstance(
            self.data, DenseData
        ), "Shap explainer only supports the DenseData input currently."
        assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently."

        # warn users about large background data sets
        if len(self.data.weights) > 100:
            log.warning(
                "Using " + str(len(self.data.weights)) +
                " background data samples could cause " +
                "slower run times. Consider using shap.kmeans(data, K) to summarize the background "
                + "as K weighted samples.")

        # init our parameters
        self.N = self.data.data.shape[0]
        self.P = self.data.data.shape[1]
        self.linkfv = np.vectorize(self.link.f)
        self.nsamplesAdded = 0
        self.nsamplesRun = 0
예제 #4
0
def visualize(shap_values, features=None, feature_names=None, out_names=None, data=None,
              link=IdentityLink()):
    warnings.warn("the visualize() function has been renamed to 'force_plot' for consistency")

    # backwards compatability
    if data is not None:
        warnings.warn("the 'data' parameter has been renamed to 'features' for consistency")
        if features is None:
            features = data

    return force_plot(shap_values, features, feature_names, out_names, link)
예제 #5
0
    def __init__(self, model, data, link=IdentityLink(), **kwargs):

        # convert incoming inputs to standardized iml objects
        self.link = convert_to_link(link)
        self.model = convert_to_model(model)
        self.keep_index = kwargs.get("keep_index", False)
        self.keep_index_ordered = kwargs.get("keep_index_ordered", False)
        self.data = convert_to_data(data, keep_index=self.keep_index)
        match_model_to_data(self.model, self.data)

        # enforce our current input type limitations
        assert isinstance(
            self.data, DenseData
        ), "Shap explainer only supports the DenseData input currently."
        assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently."

        # warn users about large background data sets
        if len(self.data.weights) > 100:
            log.warning(
                "Using " + str(len(self.data.weights)) +
                " background data samples could cause " +
                "slower run times. Consider using shap.kmeans(data, K) to summarize the background "
                + "as K weighted samples.")

        # init our parameters
        self.N = self.data.data.shape[0]
        self.P = self.data.data.shape[1]
        self.linkfv = np.vectorize(self.link.f)
        self.nsamplesAdded = 0
        self.nsamplesRun = 0

        # find E_x[f(x)]
        if self.keep_index:
            model_null = self.model.f(self.data.convert_to_df())
        else:
            model_null = self.model.f(self.data.data)
        if isinstance(model_null, (pd.DataFrame, pd.Series)):
            model_null = np.squeeze(model_null.values)
        self.fnull = np.sum((model_null.T * self.data.weights).T, 0)
        self.expected_value = self.fnull

        # see if we have a vector output
        self.vector_out = True
        if len(self.fnull.shape) == 0:
            self.vector_out = False
            self.fnull = np.array([self.fnull])
            self.D = 1
        else:
            self.D = self.fnull.shape[0]
예제 #6
0
def explain_instances(model, data, feature_names, out_names):
    if out_names is None:
        out_names = ["model output"]
    if feature_names is None:
        feature_names = [(i + 1) + "" for i in range(data.shape[1])]

    if type(model) == xgboost.core.Booster:
        exps = []
        contribs = model.predict(xgboost.DMatrix(data), pred_contribs=True)
        for i in range(data.shape[0]):
            instance = Instance(data[i:i + 1, :], data[i, :])
            e = AdditiveExplanation(
                contribs[i, -1], np.sum(contribs[i, :]), contribs[i, :-1],
                None, instance, IdentityLink(), Model(None, out_names),
                DenseData(np.zeros((1, data.shape[1])), list(feature_names)))
            exps.append(e)
        return exps
예제 #7
0
    def __init__(self, model, data, link=IdentityLink(), **kwargs):

        # convert incoming inputs to standardized iml objects
        self.link = convert_to_link(link)
        self.model = convert_to_model(model)
        self.data = convert_to_data(data)
        match_model_to_data(self.model, self.data)

        # enforce our current input type limitations
        assert isinstance(self.data, DenseData), "Shap explainer only supports the DenseData input currently."
        assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently."

        # init our parameters
        self.N = self.data.data.shape[0]
        self.P = self.data.data.shape[1]
        self.linkfv = np.vectorize(self.link.f)
        self.nsamplesAdded = 0
        self.nsamplesRun = 0
예제 #8
0
def explain_instance(model, data, feature_names, out_names):
    if out_names is None:
        out_names = ["model output"]
    if feature_names is None:
        feature_names = [(i + 1) + "" for i in range(data.shape[1])]

    if type(model) == xgboost.core.Booster:
        contribs = model.predict(xgboost.DMatrix(data), pred_contribs=True)
    elif type(model) == lightgbm.basic.Booster:
        contribs = model.predict(data, pred_contrib=True)
    else:
        return None

    instance = Instance(data[0:1, :], data[0, :])
    e = AdditiveExplanation(
        contribs[0, -1], np.sum(contribs[0, :]), contribs[0, :-1], None,
        instance, IdentityLink(), Model(None, out_names),
        DenseData(np.zeros((1, data.shape[1])), list(feature_names)))
    return e
예제 #9
0
파일: plots.py 프로젝트: Keita1/shap
def visualize(shap_values,
              features=None,
              feature_names=None,
              out_names=None,
              data=None):
    """ Visualize the given SHAP values with an additive force layout. """

    # backwards compatability
    if data is not None:
        warnings.warn(
            "the 'data' parameter has been renamed to 'features' for consistency"
        )
        if features is None:
            features = data

    if type(shap_values) != np.ndarray:
        return iml.visualize(shap_values)

    # convert from a DataFrame or other types
    if str(type(features)) == "<class 'pandas.core.frame.DataFrame'>":
        if feature_names is None:
            feature_names = list(features.columns)
        features = features.as_matrix()
    elif str(type(features)) == "<class 'pandas.core.series.Series'>":
        if feature_names is None:
            feature_names = list(features.index)
        features = features.as_matrix()
    elif str(type(features)) == "list":
        if feature_names is None:
            feature_names = features
        features = None
    elif len(features.shape) == 1 and feature_names is None:
        feature_names = features
        features = None

    if len(shap_values.shape) == 1:
        shap_values = np.reshape(shap_values, (1, len(shap_values)))

    if out_names is None:
        out_names = ["output value"]

    if shap_values.shape[0] == 1:
        if feature_names is None:
            feature_names = ["" for i in range(shap_values.shape[1] - 1)]
        if features is None:
            features = ["" for i in range(len(feature_names))]
        if type(features) == np.ndarray:
            features = features.flatten()

        instance = Instance(np.zeros((1, len(feature_names))), features)
        e = AdditiveExplanation(
            shap_values[0, -1], np.sum(shap_values[0, :]), shap_values[0, :-1],
            None, instance, IdentityLink(), Model(None, out_names),
            DenseData(np.zeros((1, len(feature_names))), list(feature_names)))
        return e

    else:
        exps = []
        for i in range(shap_values.shape[0]):
            if feature_names is None:
                feature_names = ["" for i in range(shap_values.shape[1] - 1)]
            if features is None:
                display_features = ["" for i in range(len(feature_names))]
            else:
                display_features = features[i, :]

            instance = Instance(np.ones((1, len(feature_names))),
                                display_features)
            e = AdditiveExplanation(
                shap_values[i, -1], np.sum(shap_values[i, :]),
                shap_values[i, :-1], None, instance, IdentityLink(),
                Model(None, out_names),
                DenseData(np.ones((1, len(feature_names))),
                          list(feature_names)))
            exps.append(e)
        return exps