def test_check_estimator_clones(): # check that check_estimator doesn't modify the estimator it receives from mrex.datasets import load_iris iris = load_iris() for Estimator in [ GaussianMixture, LinearRegression, RandomForestClassifier, NMF, SGDClassifier, MiniBatchKMeans ]: with ignore_warnings(category=(FutureWarning, DeprecationWarning)): # when 'est = SGDClassifier()' est = Estimator() set_checking_parameters(est) set_random_state(est) # without fitting old_hash = joblib.hash(est) check_estimator(est) assert old_hash == joblib.hash(est) with ignore_warnings(category=(FutureWarning, DeprecationWarning)): # when 'est = SGDClassifier()' est = Estimator() set_checking_parameters(est) set_random_state(est) # with fitting est.fit(iris.data + 10, iris.target) old_hash = joblib.hash(est) check_estimator(est) assert old_hash == joblib.hash(est)
def test_check_estimator_pairwise(): # check that check_estimator() works on estimator with _pairwise # kernel or metric # test precomputed kernel est = SVC(kernel='precomputed') check_estimator(est) # test precomputed metric est = KNeighborsRegressor(metric='precomputed') check_estimator(est)
def test_check_estimator_generate_only(): estimator_cls_gen_checks = check_estimator(LogisticRegression, generate_only=True) all_instance_gen_checks = check_estimator(LogisticRegression(), generate_only=True) assert isgenerator(estimator_cls_gen_checks) assert isgenerator(all_instance_gen_checks) estimator_cls_checks = list(estimator_cls_gen_checks) all_instance_checks = list(all_instance_gen_checks) # all classes checks include check_parameters_default_constructible assert len(estimator_cls_checks) == len(all_instance_checks) + 1 # TODO: meta-estimators like GridSearchCV has required parameters # that do not have default values. This is expected to change in the future with pytest.raises(SkipTest): for estimator, check in check_estimator(GridSearchCV, generate_only=True): check(estimator)
def test_check_estimators_voting_estimator(estimator): # FIXME: to be removed when meta-estimators can be specified themselves # their testing parameters (for required parameters). check_estimator(estimator) check_no_attributes_set_in_init(estimator.__class__.__name__, estimator)
def test_novelty_true_common_tests(): # the common tests are run for the default LOF (novelty=False). # here we run these common tests for LOF when novelty=True check_estimator(neighbors.LocalOutlierFactor(novelty=True))
def test_check_estimator(): # tests that the estimator actually fails on "bad" estimators. # not a complete test of all checks, which are very extensive. # check that we have a set_params and can clone msg = "it does not implement a 'get_params' methods" assert_raises_regex(TypeError, msg, check_estimator, object) assert_raises_regex(TypeError, msg, check_estimator, object()) # check that values returned by get_params match set_params msg = "get_params result does not match what was passed to set_params" assert_raises_regex(AssertionError, msg, check_estimator, ModifiesValueInsteadOfRaisingError()) assert_warns(UserWarning, check_estimator, RaisesErrorInSetParams()) assert_raises_regex(AssertionError, msg, check_estimator, ModifiesAnotherValue()) # check that we have a fit method msg = "object has no attribute 'fit'" assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator) assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator()) # check that fit does input validation msg = "ValueError not raised" assert_raises_regex(AssertionError, msg, check_estimator, BaseBadClassifier) assert_raises_regex(AssertionError, msg, check_estimator, BaseBadClassifier()) # check that sample_weights in fit accepts pandas.Series type try: from pandas import Series # noqa msg = ("Estimator NoSampleWeightPandasSeriesType raises error if " "'sample_weight' parameter is of type pandas.Series") assert_raises_regex(ValueError, msg, check_estimator, NoSampleWeightPandasSeriesType) except ImportError: pass # check that predict does input validation (doesn't accept dicts in input) msg = "Estimator doesn't check for NaN and inf in predict" assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict) assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict()) # check that estimator state does not change # at transform/predict/predict_proba time msg = 'Estimator changes __dict__ during predict' assert_raises_regex(AssertionError, msg, check_estimator, ChangesDict) # check that `fit` only changes attribures that # are private (start with an _ or end with a _). msg = ('Estimator ChangesWrongAttribute should not change or mutate ' 'the parameter wrong_attribute from 0 to 1 during fit.') assert_raises_regex(AssertionError, msg, check_estimator, ChangesWrongAttribute) check_estimator(ChangesUnderscoreAttribute) # check that `fit` doesn't add any public attribute msg = (r'Estimator adds public attribute\(s\) during the fit method.' ' Estimators are only allowed to add private attributes' ' either started with _ or ended' ' with _ but wrong_attribute added') assert_raises_regex(AssertionError, msg, check_estimator, SetsWrongAttribute) # check for invariant method name = NotInvariantPredict.__name__ method = 'predict' msg = ("{method} of {name} is not invariant when applied " "to a subset.").format(method=method, name=name) assert_raises_regex(AssertionError, msg, check_estimator, NotInvariantPredict) # check for sparse matrix input handling name = NoSparseClassifier.__name__ msg = "Estimator %s doesn't seem to fail gracefully on sparse data" % name # the check for sparse input handling prints to the stdout, # instead of raising an error, so as not to remove the original traceback. # that means we need to jump through some hoops to catch it. old_stdout = sys.stdout string_buffer = StringIO() sys.stdout = string_buffer try: check_estimator(NoSparseClassifier) except: pass finally: sys.stdout = old_stdout assert msg in string_buffer.getvalue() # Large indices test on bad estimator msg = ('Estimator LargeSparseNotSupportedClassifier doesn\'t seem to ' r'support \S{3}_64 matrix, and is not failing gracefully.*') assert_raises_regex(AssertionError, msg, check_estimator, LargeSparseNotSupportedClassifier) # does error on binary_only untagged estimator msg = 'Only 2 classes are supported' assert_raises_regex(ValueError, msg, check_estimator, UntaggedBinaryClassifier) # non-regression test for estimators transforming to sparse data check_estimator(SparseTransformer()) # doesn't error on actual estimator check_estimator(LogisticRegression) check_estimator(LogisticRegression(C=0.01)) check_estimator(MultiTaskElasticNet) check_estimator(MultiTaskElasticNet()) # doesn't error on binary_only tagged estimator check_estimator(TaggedBinaryClassifier) # Check regressor with requires_positive_y estimator tag check_estimator(RequiresPositiveYRegressor)