def test_pickle(self): """Make sure data is not saved when pickling a model.""" model = Model(Domain([])) model.original_data = [1, 2, 3] model2 = pickle.loads(pickle.dumps(model)) self.assertEqual(model.domain, model2.domain) self.assertEqual(model.original_data, [1, 2, 3]) self.assertEqual(model2.original_data, None)
def amended_data(self, model: Model) -> Table: if self.outlier_method != self.Covariance: return self.data mahal = model.mahalanobis(self.data.X) mahal = mahal.reshape(len(self.data), 1) attrs = self.data.domain.attributes classes = self.data.domain.class_vars new_metas = list(self.data.domain.metas) + \ [ContinuousVariable(name="Mahalanobis")] new_domain = Domain(attrs, classes, new_metas) amended_data = self.data.transform(new_domain) amended_data.metas = np.hstack((self.data.metas, mahal)) return amended_data
def _check_model(model: Model, data: Table) -> bool: # return whether data.X and model_domain_data.X differ if data.domain.has_discrete_class and isinstance(model, RegModel): raise ValueError( f"{model} can not be used for data with discrete class." ) elif data.domain.has_continuous_class and isinstance(model, ClsModel): raise ValueError( f"{model} can not be used for data with continuous class." ) mod_data_X = model.data_to_model_domain(data).X if data.X.shape != mod_data_X.shape: return True elif sp.issparse(data.X) and sp.issparse(mod_data_X): return (data.X != mod_data_X).nnz != 0 else: return (data.X != mod_data_X).any()
def __call__(self, data, *_): if data is not None: raise ValueError("boom") return Model(Domain([]))
def compute_shap_values( model: Model, data: Table, reference_data: Table, progress_callback: Callable = None, ) -> Tuple[List[np.ndarray], Table, np.ndarray, np.ndarray]: """ Compute SHAP values - explanation for a model. And also give a transformed data table. Parameters ---------- model Model which is explained. data Data to be explained reference_data Background data for perturbation purposes progress_callback The callback for reporting the progress. Returns ------- shap_values Shapely values for each data item computed by the SHAP library. The result is a list of SHAP values for each class - the class order is taken from values in the class_var. Each array in the list has shape (num cases x num attributes) - explanation for the contribution of each attribute to the final prediction. data_transformed The table on which explanation was made: table preprocessed by models preprocessors sample_mask SHAP values are computed just for a data sample. It is a boolean mask that tells which rows in data_transformed are explained. base_value The base value (average prediction on dataset) for each class. """ # ensure that sampling and SHAP value calculation is same for same data with temp_seed(0): if progress_callback is None: progress_callback = dummy_callback progress_callback(0, "Computing explanation ...") data_transformed = model.data_to_model_domain(data) reference_data_transformed = model.data_to_model_domain(reference_data) shap_values, sample_mask, base_value = _explain_trees( model, data_transformed, reference_data_transformed, progress_callback, ) if shap_values is None: shap_values, sample_mask, base_value = _explain_other_models( model, data_transformed, reference_data_transformed, progress_callback, ) # for regression return array with one value if not isinstance(base_value, np.ndarray): base_value = np.array([base_value]) progress_callback(1) return shap_values, data_transformed, sample_mask, base_value