def test_check_no_attributes_set_in_init(): class NonConformantEstimatorPrivateSet(BaseEstimator): def __init__(self): self.you_should_not_set_this_ = None class NonConformantEstimatorNoParamSet(BaseEstimator): def __init__(self, you_should_set_this_=None): pass msg = ( "Estimator estimator_name should not set any" " attribute apart from parameters during init." r" Found attributes \['you_should_not_set_this_'\]." ) with raises(AssertionError, match=msg): check_no_attributes_set_in_init( "estimator_name", NonConformantEstimatorPrivateSet() ) msg = ( "Estimator estimator_name should store all parameters as an attribute" " during init" ) with raises(AttributeError, match=msg): check_no_attributes_set_in_init( "estimator_name", NonConformantEstimatorNoParamSet() )
def test_deprecated_Estimator_check_estimator(): err_msg = "'Estimator' was deprecated in favor of" with warnings.catch_warnings(): warnings.simplefilter("error", FutureWarning) with raises(FutureWarning, match=err_msg, may_pass=True): check_estimator(Estimator=NuSVC()) err_msg = "Either estimator or Estimator should be passed" with raises(ValueError, match=err_msg, may_pass=False): check_estimator()
def test_check_classifiers_multilabel_output_format_decision_function(): n_samples, test_size, n_outputs = 100, 25, 5 _, y = make_multilabel_classification( n_samples=n_samples, n_features=2, n_classes=n_outputs, n_labels=3, length=50, allow_unlabeled=True, random_state=0, ) y_test = y[-test_size:] class MultiLabelClassifierDecisionFunction(_BaseMultiLabelClassifierMock): def decision_function(self, X): return self.response_output # 1. inconsistent array type clf = MultiLabelClassifierDecisionFunction(response_output=y_test.tolist()) err_msg = ( r"MultiLabelClassifierDecisionFunction.decision_function is expected " r"to output a NumPy array. Got <class 'list'> instead." ) with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_decision_function( clf.__class__.__name__, clf, ) # 2. inconsistent shape clf = MultiLabelClassifierDecisionFunction(response_output=y_test[:, :-1]) err_msg = ( r"MultiLabelClassifierDecisionFunction.decision_function is expected " r"to provide a NumPy array of shape \(n_samples, n_outputs\). Got " r"\(25, 4\) instead of \(25, 5\)" ) with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_decision_function( clf.__class__.__name__, clf, ) # 3. inconsistent dtype clf = MultiLabelClassifierDecisionFunction(response_output=y_test) err_msg = ( r"MultiLabelClassifierDecisionFunction.decision_function is expected " r"to output a floating dtype." ) with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_decision_function( clf.__class__.__name__, clf, )
def test_check_dataframe_column_names_consistency(): err_msg = "Estimator does not have a feature_names_in_" with raises(ValueError, match=err_msg): check_dataframe_column_names_consistency("estimator_name", BaseBadClassifier()) check_dataframe_column_names_consistency("estimator_name", PartialFitChecksName()) lr = LogisticRegression() check_dataframe_column_names_consistency(lr.__class__.__name__, lr) lr.__doc__ = "Docstring that does not document the estimator's attributes" err_msg = ( "Estimator LogisticRegression does not document its feature_names_in_ attribute" ) with raises(ValueError, match=err_msg): check_dataframe_column_names_consistency(lr.__class__.__name__, lr)
def test_not_an_array_array_function(): not_array = _NotAnArray(np.ones(10)) msg = "Don't want to call array_function sum!" with raises(TypeError, match=msg): np.sum(not_array) # always returns True assert np.may_share_memory(not_array, None)
def test_check_class_weight_balanced_linear_classifier(): # check that ill-computed balanced weights raises an exception msg = "Classifier estimator_name is not computing class_weight=balanced properly" with raises(AssertionError, match=msg): check_class_weight_balanced_linear_classifier( "estimator_name", BadBalancedWeightsClassifier )
def test_check_dataframe_column_names_consistency(): err_msg = "Estimator does not have a feature_names_in_" with raises(ValueError, match=err_msg): check_dataframe_column_names_consistency("estimator_name", BaseBadClassifier()) check_dataframe_column_names_consistency("estimator_name", PartialFitChecksName())
def test_check_outlier_corruption(): # should raise AssertionError decision = np.array([0.0, 1.0, 1.5, 2.0]) with raises(AssertionError): check_outlier_corruption(1, 2, decision) # should pass decision = np.array([0.0, 1.0, 1.0, 2.0]) check_outlier_corruption(1, 2, decision)
def test_not_an_array_array_function(): if np_version < parse_version("1.17"): raise SkipTest("array_function protocol not supported in numpy <1.17") not_array = _NotAnArray(np.ones(10)) msg = "Don't want to call array_function sum!" with raises(TypeError, match=msg): np.sum(not_array) # always returns True assert np.may_share_memory(not_array, None)
def test_check_estimators_unfitted(): # check that a ValueError/AttributeError is raised when calling predict # on an unfitted estimator msg = "Did not raise" with raises(AssertionError, match=msg): check_estimators_unfitted("estimator", NoSparseClassifier()) # check that CorrectNotFittedError inherit from either ValueError # or AttributeError check_estimators_unfitted("estimator", CorrectNotFittedErrorClassifier())
def test_check_classifiers_multilabel_output_format_predict(): n_samples, test_size, n_outputs = 100, 25, 5 _, y = make_multilabel_classification( n_samples=n_samples, n_features=2, n_classes=n_outputs, n_labels=3, length=50, allow_unlabeled=True, random_state=0, ) y_test = y[-test_size:] class MultiLabelClassifierPredict(_BaseMultiLabelClassifierMock): def predict(self, X): return self.response_output # 1. inconsistent array type clf = MultiLabelClassifierPredict(response_output=y_test.tolist()) err_msg = ( r"MultiLabelClassifierPredict.predict is expected to output a " r"NumPy array. Got <class 'list'> instead." ) with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_predict(clf.__class__.__name__, clf) # 2. inconsistent shape clf = MultiLabelClassifierPredict(response_output=y_test[:, :-1]) err_msg = ( r"MultiLabelClassifierPredict.predict outputs a NumPy array of " r"shape \(25, 4\) instead of \(25, 5\)." ) with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_predict(clf.__class__.__name__, clf) # 3. inconsistent dtype clf = MultiLabelClassifierPredict(response_output=y_test.astype(np.float64)) err_msg = ( r"MultiLabelClassifierPredict.predict does not output the same " r"dtype than the targets." ) with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_predict(clf.__class__.__name__, clf)
def test_check_estimator_get_tags_default_keys(): estimator = EstimatorMissingDefaultTags() err_msg = (r"EstimatorMissingDefaultTags._get_tags\(\) is missing entries" r" for the following default tags: {'allow_nan'}") with raises(AssertionError, match=err_msg): check_estimator_get_tags_default_keys(estimator.__class__.__name__, estimator) # noop check when _get_tags is not available estimator = MinimalTransformer() check_estimator_get_tags_default_keys(estimator.__class__.__name__, estimator)
def test_check_fit_check_is_fitted(): class Estimator(BaseEstimator): def __init__(self, behavior="attribute"): self.behavior = behavior def fit(self, X, y, **kwargs): if self.behavior == "attribute": self.is_fitted_ = True elif self.behavior == "method": self._is_fitted = True return self @available_if(lambda self: self.behavior in {"method", "always-true"}) def __sklearn_is_fitted__(self): if self.behavior == "always-true": return True return hasattr(self, "_is_fitted") with raises(Exception, match="passes check_is_fitted before being fit"): check_fit_check_is_fitted("estimator", Estimator(behavior="always-true")) check_fit_check_is_fitted("estimator", Estimator(behavior="method")) check_fit_check_is_fitted("estimator", Estimator(behavior="attribute"))
def test_check_regressor_data_not_an_array(): with raises(AssertionError, match="Not equal to tolerance"): check_regressor_data_not_an_array("estimator_name", EstimatorInconsistentForPandas())
def test_check_estimator_transformer_no_mixin(): # check that TransformerMixin is not required for transformer tests to run with raises(AttributeError, ".*fit_transform.*"): check_estimator(BadTransformerWithoutMixin())
def test_check_estimator(): # tests that the estimator actually fails on "bad" estimators. # not a complete test of all checks, which are very extensive. # check that we have a set_params and can clone msg = "Passing a class was deprecated" with raises(TypeError, match=msg): check_estimator(object) msg = ("Parameter 'p' of estimator 'HasMutableParameters' is of type " "object which is not allowed") # check that the "default_constructible" test checks for mutable parameters check_estimator(HasImmutableParameters()) # should pass with raises(AssertionError, match=msg): check_estimator(HasMutableParameters()) # check that values returned by get_params match set_params msg = "get_params result does not match what was passed to set_params" with raises(AssertionError, match=msg): check_estimator(ModifiesValueInsteadOfRaisingError()) with warnings.catch_warnings(record=True) as records: check_estimator(RaisesErrorInSetParams()) assert UserWarning in [rec.category for rec in records] with raises(AssertionError, match=msg): check_estimator(ModifiesAnotherValue()) # check that we have a fit method msg = "object has no attribute 'fit'" with raises(AttributeError, match=msg): check_estimator(BaseEstimator()) # check that fit does input validation msg = "Did not raise" with raises(AssertionError, match=msg): check_estimator(BaseBadClassifier()) # check that sample_weights in fit accepts pandas.Series type try: from pandas import Series # noqa msg = ("Estimator NoSampleWeightPandasSeriesType raises error if " "'sample_weight' parameter is of type pandas.Series") with raises(ValueError, match=msg): check_estimator(NoSampleWeightPandasSeriesType()) except ImportError: pass # check that predict does input validation (doesn't accept dicts in input) msg = "Estimator NoCheckinPredict doesn't check for NaN and inf in predict" with raises(AssertionError, match=msg): check_estimator(NoCheckinPredict()) # check that estimator state does not change # at transform/predict/predict_proba time msg = "Estimator changes __dict__ during predict" with raises(AssertionError, match=msg): check_estimator(ChangesDict()) # check that `fit` only changes attributes that # are private (start with an _ or end with a _). msg = ("Estimator ChangesWrongAttribute should not change or mutate " "the parameter wrong_attribute from 0 to 1 during fit.") with raises(AssertionError, match=msg): check_estimator(ChangesWrongAttribute()) check_estimator(ChangesUnderscoreAttribute()) # check that `fit` doesn't add any public attribute msg = (r"Estimator adds public attribute\(s\) during the fit method." " Estimators are only allowed to add private attributes" " either started with _ or ended" " with _ but wrong_attribute added") with raises(AssertionError, match=msg): check_estimator(SetsWrongAttribute()) # check for sample order invariance name = NotInvariantSampleOrder.__name__ method = "predict" msg = ("{method} of {name} is not invariant when applied to a dataset" "with different sample order.").format(method=method, name=name) with raises(AssertionError, match=msg): check_estimator(NotInvariantSampleOrder()) # check for invariant method name = NotInvariantPredict.__name__ method = "predict" msg = ("{method} of {name} is not invariant when applied to a subset." ).format(method=method, name=name) with raises(AssertionError, match=msg): check_estimator(NotInvariantPredict()) # check for sparse matrix input handling name = NoSparseClassifier.__name__ msg = "Estimator %s doesn't seem to fail gracefully on sparse data" % name with raises(AssertionError, match=msg): check_estimator(NoSparseClassifier()) # Large indices test on bad estimator msg = ("Estimator LargeSparseNotSupportedClassifier doesn't seem to " r"support \S{3}_64 matrix, and is not failing gracefully.*") with raises(AssertionError, match=msg): check_estimator(LargeSparseNotSupportedClassifier()) # does error on binary_only untagged estimator msg = "Only 2 classes are supported" with raises(ValueError, match=msg): check_estimator(UntaggedBinaryClassifier()) # non-regression test for estimators transforming to sparse data check_estimator(SparseTransformer()) # doesn't error on actual estimator check_estimator(LogisticRegression()) check_estimator(LogisticRegression(C=0.01)) check_estimator(MultiTaskElasticNet()) # doesn't error on binary_only tagged estimator check_estimator(TaggedBinaryClassifier()) # Check regressor with requires_positive_y estimator tag msg = "negative y values not supported!" with raises(ValueError, match=msg): check_estimator(RequiresPositiveYRegressor()) # Does not raise error on classifier with poor_score tag check_estimator(PoorScoreLogisticRegression())
def test_raises(): # Tests for the raises context manager # Proper type, no match with raises(TypeError): raise TypeError() # Proper type, proper match with raises(TypeError, match="how are you") as cm: raise TypeError("hello how are you") assert cm.raised_and_matched # Proper type, proper match with multiple patterns with raises(TypeError, match=["not this one", "how are you"]) as cm: raise TypeError("hello how are you") assert cm.raised_and_matched # bad type, no match with pytest.raises(ValueError, match="this will be raised"): with raises(TypeError) as cm: raise ValueError("this will be raised") assert not cm.raised_and_matched # Bad type, no match, with a err_msg with pytest.raises(AssertionError, match="the failure message"): with raises(TypeError, err_msg="the failure message") as cm: raise ValueError() assert not cm.raised_and_matched # bad type, with match (is ignored anyway) with pytest.raises(ValueError, match="this will be raised"): with raises(TypeError, match="this is ignored") as cm: raise ValueError("this will be raised") assert not cm.raised_and_matched # proper type but bad match with pytest.raises(AssertionError, match="should contain one of the following patterns"): with raises(TypeError, match="hello") as cm: raise TypeError("Bad message") assert not cm.raised_and_matched # proper type but bad match, with err_msg with pytest.raises(AssertionError, match="the failure message"): with raises(TypeError, match="hello", err_msg="the failure message") as cm: raise TypeError("Bad message") assert not cm.raised_and_matched # no raise with default may_pass=False with pytest.raises(AssertionError, match="Did not raise"): with raises(TypeError) as cm: pass assert not cm.raised_and_matched # no raise with may_pass=True with raises(TypeError, match="hello", may_pass=True) as cm: pass # still OK assert not cm.raised_and_matched # Multiple exception types: with raises((TypeError, ValueError)): raise TypeError() with raises((TypeError, ValueError)): raise ValueError() with pytest.raises(AssertionError): with raises((TypeError, ValueError)): pass
def test_check_classifiers_multilabel_output_format_predict_proba(): n_samples, test_size, n_outputs = 100, 25, 5 _, y = make_multilabel_classification( n_samples=n_samples, n_features=2, n_classes=n_outputs, n_labels=3, length=50, allow_unlabeled=True, random_state=0, ) y_test = y[-test_size:] class MultiLabelClassifierPredictProba(_BaseMultiLabelClassifierMock): def predict_proba(self, X): return self.response_output # 1. unknown output type clf = MultiLabelClassifierPredictProba( response_output=sp.csr_matrix(y_test)) err_msg = ( "Unknown returned type .*csr_matrix.* by " r"MultiLabelClassifierPredictProba.predict_proba. A list or a Numpy " r"array is expected.") with raises(ValueError, match=err_msg): check_classifiers_multilabel_output_format_predict_proba( clf.__class__.__name__, clf, ) # 2. for list output # 2.1. inconsistent length clf = MultiLabelClassifierPredictProba(response_output=y_test.tolist()) err_msg = ( "When MultiLabelClassifierPredictProba.predict_proba returns a list, " "the list should be of length n_outputs and contain NumPy arrays. Got " f"length of {test_size} instead of {n_outputs}.") with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_predict_proba( clf.__class__.__name__, clf, ) # 2.2. array of inconsistent shape response_output = [np.ones_like(y_test) for _ in range(n_outputs)] clf = MultiLabelClassifierPredictProba(response_output=response_output) err_msg = ( r"When MultiLabelClassifierPredictProba.predict_proba returns a list, " r"this list should contain NumPy arrays of shape \(n_samples, 2\). Got " r"NumPy arrays of shape \(25, 5\) instead of \(25, 2\).") with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_predict_proba( clf.__class__.__name__, clf, ) # 2.3. array of inconsistent dtype response_output = [ np.ones(shape=(y_test.shape[0], 2), dtype=np.int64) for _ in range(n_outputs) ] clf = MultiLabelClassifierPredictProba(response_output=response_output) err_msg = ( "When MultiLabelClassifierPredictProba.predict_proba returns a list, " "it should contain NumPy arrays with floating dtype.") with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_predict_proba( clf.__class__.__name__, clf, ) # 2.4. array does not contain probability (each row should sum to 1) response_output = [ np.ones(shape=(y_test.shape[0], 2), dtype=np.float64) for _ in range(n_outputs) ] clf = MultiLabelClassifierPredictProba(response_output=response_output) err_msg = ( r"When MultiLabelClassifierPredictProba.predict_proba returns a list, " r"each NumPy array should contain probabilities for each class and " r"thus each row should sum to 1") with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_predict_proba( clf.__class__.__name__, clf, ) # 3 for array output # 3.1. array of inconsistent shape clf = MultiLabelClassifierPredictProba(response_output=y_test[:, :-1]) err_msg = ( r"When MultiLabelClassifierPredictProba.predict_proba returns a NumPy " r"array, the expected shape is \(n_samples, n_outputs\). Got \(25, 4\)" r" instead of \(25, 5\).") with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_predict_proba( clf.__class__.__name__, clf, ) # 3.2. array of inconsistent dtype response_output = np.zeros_like(y_test, dtype=np.int64) clf = MultiLabelClassifierPredictProba(response_output=response_output) err_msg = ( r"When MultiLabelClassifierPredictProba.predict_proba returns a NumPy " r"array, the expected data type is floating.") with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_predict_proba( clf.__class__.__name__, clf, ) # 4. array does not contain probabilities clf = MultiLabelClassifierPredictProba(response_output=y_test * 2.0) err_msg = ( r"When MultiLabelClassifierPredictProba.predict_proba returns a NumPy " r"array, this array is expected to provide probabilities of the " r"positive class and should therefore contain values between 0 and 1.") with raises(AssertionError, match=err_msg): check_classifiers_multilabel_output_format_predict_proba( clf.__class__.__name__, clf, )
def test_check_classifier_data_not_an_array(): with raises(AssertionError, match='Not equal to tolerance'): check_classifier_data_not_an_array('estimator_name', EstimatorInconsistentForPandas())