def test_min_max_values(self, data, shape_X_y, min_max_values): min_value, max_value = min_max_values X, y = data.draw( numpy_X_y_matrices(shape_X_y, min_value=min_value, max_value=max_value) ) assert X.min() >= min_value assert y.min() >= min_value assert X.max() <= max_value assert y.max() <= max_value
class TestShap: @given(regressor=models(), X_y=numpy_X_y_matrices(min_value=-100, max_value=100)) def test_predict(self, shap_explainer, regressor, X_y): X, y = X_y regressor.fit(X, y) shap_explainer.fit(regressor, X) test_matrix = X[:2, :] shap_explainer.predict(test_matrix) self._check_shap_values(shap_explainer, test_matrix) def _check_shap_values(self, shap_explainer: Explainer, test_matrix: np.ndarray): assert isinstance(shap_explainer.shap_values_, np.ndarray) assert shap_explainer.shap_values_.shape == test_matrix.shape @given(X_y=numpy_X_y_matrices(min_value=-100, max_value=100)) def test_fit_no_feature_names(self, shap_explainer, unrecognized_regressor, X_y): X, y = X_y unrecognized_regressor.fit(X, y) with pytest.raises(ValueError): shap_explainer.fit(unrecognized_regressor, X)
class TestLime: @given(regressor=models(), X_y=numpy_X_y_matrices(min_value=-100, max_value=100)) def test_predict(self, lime_explainer, regressor, X_y): X, y = X_y regressor.fit(X, y) lime_explainer.fit(regressor, X) test_matrix = X[:2, :] lime_explainer.predict(test_matrix) self._check_explanations(lime_explainer) def _check_explanations(self, lime_explainer: _LimeExplainer): assert isinstance(lime_explainer._explanations_, list) assert all( isinstance(explanation, Explanation) for explanation in lime_explainer._explanations_)
def test_input_as_tuples(self, data, shape_X_y): X, y = data.draw(numpy_X_y_matrices(shape_X_y)) assert X.shape == shape_X_y[0] assert y.shape == shape_X_y[1] assert len(y.shape) == 1
def test_no_infinity(self, data, shape_X_y): X, y = data.draw( numpy_X_y_matrices(shape_X_y, allow_nan=True, allow_infinity=False) ) assert not np.isinf(X).any() assert not np.isinf(y).any()
def test_error_shape_0_different(self, data): with pytest.raises(ValueError): data.draw(numpy_X_y_matrices([[10, 5], [4, 1]]))
def test_error_shape_0_smaller_shape_1(self, data): with pytest.raises(ValueError): data.draw(numpy_X_y_matrices([[10, 20], [10, 1]]))
def test_input_as_strategy(self, data): data.draw(numpy_X_y_matrices(shape_X_y_matrices()))
class TestMultiFeatureMultiOutputRegressor: def test_constructor(self, estimator): multi_feature_multi_output_regressor = MultiFeatureMultiOutputRegressor( estimator ) assert multi_feature_multi_output_regressor.n_jobs == 1 @given(data=data(), X_y=numpy_X_y_matrices(min_value=-10000, max_value=10000)) def test_fit_bad_y(self, data, estimator, X_y): X, y = X_y y = y[:, 0].flatten() target_to_feature_dict = data.draw( numeric_target_to_feature_dicts(n_targets=1, n_features=X.shape[1]) ) multi_feature_multi_output_regressor = MultiFeatureMultiOutputRegressor( estimator ) with pytest.raises(ValueError): multi_feature_multi_output_regressor.fit( X, y, target_to_features_dict=target_to_feature_dict ) @given(X_y=numpy_X_y_matrices(min_value=-10000, max_value=10000)) def test_fit_as_multi_output_regressor_if_target_to_feature_none( self, estimator, X_y ): X, y = X_y multi_feature_multi_output_regressor = MultiFeatureMultiOutputRegressor( estimator ) multi_feature_multi_output_regressor.fit(X, y) multi_output_regressor = MultiOutputRegressor(estimator) multi_output_regressor.fit(X, y) assert_almost_equal( multi_feature_multi_output_regressor.predict(X), multi_output_regressor.predict(X), ) @given(X=numpy_X_matrix(min_value=-10000, max_value=10000)) def test_error_predict_with_no_fit(self, estimator, X): regressor = MultiFeatureMultiOutputRegressor(estimator) with pytest.raises(NotFittedError): regressor.predict(X) @given(data=data(), X_y=numpy_X_y_matrices(min_value=-10000, max_value=10000)) def test_fit_target_to_feature_dict_working(self, data, X_y, estimator): X, y = X_y target_to_feature_dict = data.draw( numeric_target_to_feature_dicts(n_targets=y.shape[1], n_features=X.shape[1]) ) multi_feature_multi_output_regressor = MultiFeatureMultiOutputRegressor( estimator ) multi_feature_multi_output_regressor.fit( X, y, target_to_features_dict=target_to_feature_dict ) @given( data=data(), X_y=numpy_X_y_matrices(min_value=-10000, max_value=10000), ) def test_fit_target_to_feature_dict_consistent(self, data, X_y, estimator): X, y = X_y target_to_feature_dict = data.draw( numeric_target_to_feature_dicts(n_targets=y.shape[1], n_features=X.shape[1]) ) multi_feature_multi_output_regressor = MultiFeatureMultiOutputRegressor( estimator ) multi_feature_multi_output_regressor.fit( X, y, target_to_features_dict=target_to_feature_dict ) for i, estimator_ in enumerate( multi_feature_multi_output_regressor.estimators_ ): expected_n_features = len(target_to_feature_dict[i]) assert len(estimator_.coef_) == expected_n_features @given( data=data(), X_y=numpy_X_y_matrices(min_value=-10000, max_value=10000), ) def test_predict_target_to_feature_dict(self, data, X_y, estimator): X, y = X_y target_to_feature_dict = data.draw( numeric_target_to_feature_dicts(n_targets=y.shape[1], n_features=X.shape[1]) ) multi_feature_multi_output_regressor = MultiFeatureMultiOutputRegressor( estimator ) multi_feature_multi_output_regressor.fit( X, y, target_to_features_dict=target_to_feature_dict ) X_predict = data.draw(numpy_X_matrix([100, X.shape[1]])) multi_feature_multi_output_regressor.predict(X_predict) @given( data=data(), X_y=numpy_X_y_matrices(min_value=-10000, max_value=10000), ) def test_error_predict_target_to_feature_dict_wrong_X_shape( self, data, X_y, estimator ): X, y = X_y target_to_feature_dict = data.draw( numeric_target_to_feature_dicts(n_targets=y.shape[1], n_features=X.shape[1]) ) multi_feature_multi_output_regressor = MultiFeatureMultiOutputRegressor( estimator ) multi_feature_multi_output_regressor.fit( X, y, target_to_features_dict=target_to_feature_dict ) X_predict = data.draw(numpy_X_matrix([100, 30])) with pytest.raises(ValueError): multi_feature_multi_output_regressor.predict(X_predict)
class TestExplainableRegressor: @pytest.mark.parametrize("explainer_type", ["lime", "shap"]) @given(estimator=regressors()) def test_constructor(self, estimator, explainer_type): regressor = ExplainableRegressor(estimator, explainer_type) if explainer_type == "lime": assert isinstance(regressor.explainer, _LimeExplainer) elif explainer_type == "shap": assert isinstance(regressor.explainer, _ShapExplainer) @given(estimator=regressors()) def test_constructor_bad_explainer(self, estimator): with pytest.raises(ValueError): ExplainableRegressor(estimator, "bad") @pytest.mark.parametrize("explainer_type", ["lime", "shap"]) @given(bad_estimator=bad_regressors()) def test_constructor_bad_regressor(self, bad_estimator, explainer_type): with pytest.raises(TypeError): ExplainableRegressor(bad_estimator, explainer_type) @pytest.mark.parametrize("explainer_type", ["lime", "shap"]) @given(estimator=regressors(), X=numpy_X_matrices()) def test_error_predict_not_fitted(self, estimator, explainer_type, X): regressor = ExplainableRegressor(estimator, explainer_type) with pytest.raises(NotFittedError): regressor.predict(X) def _get_fit_attributes(self, estimator: BaseEstimator) -> List[str]: return [ v for v in vars(estimator) if v.endswith("_") and not v.startswith("__") ] @pytest.mark.parametrize("explainer_type", ["lime", "shap"]) @given(estimator=regressors(), X_y=numpy_X_y_matrices(min_value=-100, max_value=100)) def test_fit_values(self, estimator, explainer_type, X_y): X, y = X_y regressor = ExplainableRegressor(estimator, explainer_type) regressor.fit(X, y) cloned_estimator = clone(estimator) cloned_estimator.fit(X, y) estimator_fit_attributes = self._get_fit_attributes( regressor.estimator) cloned_estimator_fit_attributes = self._get_fit_attributes( cloned_estimator) np.testing.assert_array_equal(estimator_fit_attributes, cloned_estimator_fit_attributes) @settings(deadline=pd.Timedelta(milliseconds=5000), max_examples=7) @pytest.mark.parametrize("explainer_type", ["lime", "shap"]) @given(estimator=regressors(), X_y=numpy_X_y_matrices(min_value=-100, max_value=100)) def test_predict_values(self, estimator, explainer_type, X_y): X, y = X_y X_test = X[:1, :] regressor = ExplainableRegressor(estimator, explainer_type) regressor_predictions = regressor.fit(X, y).predict(X_test) cloned_estimator = clone(estimator) estimator_predictions = cloned_estimator.fit(X, y).predict(X_test) assert regressor_predictions.shape == estimator_predictions.shape assert regressor_predictions.shape[0] == len(regressor.explanations_)
class TestAllExplainers: @pytest.mark.parametrize("explainer", lazy_fixtures([lime_explainer, shap_explainer])) def test_constructor(self, explainer): pass def _check_all_parameters_fitted(self, explainer): assert hasattr(explainer, "model_") assert hasattr(explainer, "explainer_") assert hasattr(explainer, "feature_names_") @pytest.mark.parametrize("explainer", lazy_fixtures([lime_explainer, shap_explainer])) @given(regressor=models(), X_y=numpy_X_y_matrices(min_value=-100, max_value=100)) def test_fit_no_feature_names(self, explainer, regressor, X_y): X, y = X_y regressor.fit(X, y) explainer.fit(regressor, X) check_is_fitted(explainer) self._check_all_parameters_fitted(explainer) np.testing.assert_array_equal(explainer.feature_names_, [f"{i}" for i in range(X.shape[1])]) @pytest.mark.parametrize("explainer", lazy_fixtures([lime_explainer, shap_explainer])) @given( data=data(), regressor=models(), X_y=numpy_X_y_matrices(min_value=-100, max_value=100), ) def test_fit_feature_names(self, data, explainer, regressor, X_y): X, y = X_y feature_names = data.draw( lists(elements=text(), min_size=X.shape[1], max_size=X.shape[1])) regressor.fit(X, y) explainer.fit(regressor, X, feature_names) check_is_fitted(explainer) self._check_all_parameters_fitted(explainer) @pytest.mark.parametrize("explainer", lazy_fixtures([lime_explainer, shap_explainer])) @given(regressor=models(), X=numpy_X_matrices()) def test_error_fit_regressor_not_fitted(self, explainer, regressor, X): with pytest.raises(NotFittedError): explainer.fit(regressor, X) def _check_predict_output(self, explainer: Explainer, predictions: np.ndarray, test_matrix: np.ndarray): assert predictions.shape[0] == test_matrix.shape[0] assert isinstance(explainer.explanations_, list) assert len(explainer.explanations_) == predictions.shape[0] assert all( isinstance(key, str) for explanation in explainer.explanations_ for key in explanation.keys()) assert all([ len(explanation) == test_matrix.shape[1] for explanation in explainer.explanations_ ]) @settings(deadline=pd.Timedelta(milliseconds=5000), max_examples=7) @pytest.mark.parametrize("explainer", lazy_fixtures([lime_explainer, shap_explainer])) @given(regressor=models(), X_y=numpy_X_y_matrices(min_value=-100, max_value=100)) def test_predict(self, explainer, regressor, X_y): X, y = X_y regressor.fit(X, y) explainer.fit(regressor, X) test_matrix = X[:2, :] predictions = explainer.predict(test_matrix) self._check_predict_output(explainer, predictions, test_matrix) @pytest.mark.parametrize("explainer", lazy_fixtures([lime_explainer, shap_explainer])) @given(X=numpy_X_matrices(min_value=-100, max_value=100)) def test_error_predict_not_fit(self, explainer, X): with pytest.raises(NotFittedError): explainer.predict(X[:2, :])