Example #1
0
 def test_pickle(self):
     """Make sure data is not saved when pickling a model."""
     model = Model(Domain([]))
     model.original_data = [1, 2, 3]
     model2 = pickle.loads(pickle.dumps(model))
     self.assertEqual(model.domain, model2.domain)
     self.assertEqual(model.original_data, [1, 2, 3])
     self.assertEqual(model2.original_data, None)
Example #2
0
 def test_pickle(self):
     """Make sure data is not saved when pickling a model."""
     model = Model(Domain([]))
     model.original_data = [1, 2, 3]
     model2 = pickle.loads(pickle.dumps(model))
     self.assertEqual(model.domain, model2.domain)
     self.assertEqual(model.original_data, [1, 2, 3])
     self.assertEqual(model2.original_data, None)
Example #3
0
 def amended_data(self, model: Model) -> Table:
     if self.outlier_method != self.Covariance:
         return self.data
     mahal = model.mahalanobis(self.data.X)
     mahal = mahal.reshape(len(self.data), 1)
     attrs = self.data.domain.attributes
     classes = self.data.domain.class_vars
     new_metas = list(self.data.domain.metas) + \
                 [ContinuousVariable(name="Mahalanobis")]
     new_domain = Domain(attrs, classes, new_metas)
     amended_data = self.data.transform(new_domain)
     amended_data.metas = np.hstack((self.data.metas, mahal))
     return amended_data
Example #4
0
def _check_model(model: Model, data: Table) -> bool:
    # return whether data.X and model_domain_data.X differ
    if data.domain.has_discrete_class and isinstance(model, RegModel):
        raise ValueError(
            f"{model} can not be used for data with discrete class."
        )
    elif data.domain.has_continuous_class and isinstance(model, ClsModel):
        raise ValueError(
            f"{model} can not be used for data with continuous class."
        )

    mod_data_X = model.data_to_model_domain(data).X
    if data.X.shape != mod_data_X.shape:
        return True
    elif sp.issparse(data.X) and sp.issparse(mod_data_X):
        return (data.X != mod_data_X).nnz != 0
    else:
        return (data.X != mod_data_X).any()
 def __call__(self, data, *_):
     if data is not None:
         raise ValueError("boom")
     return Model(Domain([]))
def compute_shap_values(
    model: Model,
    data: Table,
    reference_data: Table,
    progress_callback: Callable = None,
) -> Tuple[List[np.ndarray], Table, np.ndarray, np.ndarray]:
    """
    Compute SHAP values - explanation for a model. And also give a transformed
    data table.

    Parameters
    ----------
    model
        Model which is explained.
    data
        Data to be explained
    reference_data
        Background data for perturbation purposes
    progress_callback
        The callback for reporting the progress.

    Returns
    -------
    shap_values
        Shapely values for each data item computed by the SHAP library. The
        result is a list of SHAP values for each class - the class order is
        taken from values in the class_var. Each array in the list has shape
        (num cases x num attributes) - explanation for the contribution of each
         attribute to the final prediction.
    data_transformed
        The table on which explanation was made: table preprocessed by models
        preprocessors
    sample_mask
        SHAP values are computed just for a data sample. It is a boolean mask
        that tells which rows in data_transformed are explained.
    base_value
        The base value (average prediction on dataset) for each class.
    """
    # ensure that sampling and SHAP value calculation is same for same data
    with temp_seed(0):
        if progress_callback is None:
            progress_callback = dummy_callback
        progress_callback(0, "Computing explanation ...")

        data_transformed = model.data_to_model_domain(data)
        reference_data_transformed = model.data_to_model_domain(reference_data)

        shap_values, sample_mask, base_value = _explain_trees(
            model,
            data_transformed,
            reference_data_transformed,
            progress_callback,
        )
        if shap_values is None:
            shap_values, sample_mask, base_value = _explain_other_models(
                model,
                data_transformed,
                reference_data_transformed,
                progress_callback,
            )

        # for regression return array with one value
        if not isinstance(base_value, np.ndarray):
            base_value = np.array([base_value])

        progress_callback(1)
    return shap_values, data_transformed, sample_mask, base_value