def test_check_class_weight_balanced_linear_classifier(): # check that ill-computed balanced weights raises an exception assert_raises_regex( AssertionError, "Classifier estimator_name is not computing" " class_weight=balanced properly.", check_class_weight_balanced_linear_classifier, 'estimator_name', BadBalancedWeightsClassifier)
def test_ridgecv_store_cv_values(): rng = np.random.RandomState(42) n_samples = 8 n_features = 5 x = rng.randn(n_samples, n_features) alphas = [1e-1, 1e0, 1e1] n_alphas = len(alphas) r = RidgeCV(alphas=alphas, cv=None, store_cv_values=True) # with len(y.shape) == 1 y = rng.randn(n_samples) r.fit(x, y) assert r.cv_values_.shape == (n_samples, n_alphas) # with len(y.shape) == 2 n_targets = 3 y = rng.randn(n_samples, n_targets) r.fit(x, y) assert r.cv_values_.shape == (n_samples, n_targets, n_alphas) r = RidgeCV(cv=3, store_cv_values=True) assert_raises_regex(ValueError, 'cv!=None and store_cv_values', r.fit, x, y)
def test_pca_validation(setup): for solver in solver_list: # Ensures that solver-specific extreme inputs for the n_components # parameter raise errors X = mt.array([[0, 1, 0], [1, 0, 0]]) smallest_d = 2 # The smallest dimension lower_limit = {"randomized": 1, "full": 0, "auto": 0} # We conduct the same test on X.T so that it is invariant to axis. for data in [X, X.T]: for n_components in [-1, 3]: if solver == "auto": solver_reported = "full" else: solver_reported = solver assert_raises_regex( ValueError, f"n_components={n_components}L? must be between " rf"{lower_limit[solver]}L? and min\(n_samples, n_features\)=" f"{smallest_d}L? with svd_solver='{solver_reported}'", PCA(n_components, svd_solver=solver).fit, data, ) n_components = 1.0 type_ncom = type(n_components) assert_raise_message( ValueError, f"n_components={n_components} must be of type int " f"when greater than or equal to 1, was of type={type_ncom}", PCA(n_components, svd_solver=solver).fit, data, )
def test_pca_validation(self): for solver in self.solver_list: # Ensures that solver-specific extreme inputs for the n_components # parameter raise errors X = mt.array([[0, 1, 0], [1, 0, 0]]) smallest_d = 2 # The smallest dimension lower_limit = {'randomized': 1, 'full': 0, 'auto': 0} # We conduct the same test on X.T so that it is invariant to axis. for data in [X, X.T]: for n_components in [-1, 3]: if solver == 'auto': solver_reported = 'full' else: solver_reported = solver assert_raises_regex( ValueError, "n_components={}L? must be between " r"{}L? and min\(n_samples, n_features\)=" "{}L? with svd_solver=\'{}\'".format( n_components, lower_limit[solver], smallest_d, solver_reported), PCA(n_components, svd_solver=solver).fit, data) n_components = 1.0 type_ncom = type(n_components) assert_raise_message( ValueError, "n_components={} must be of type int " "when greater than or equal to 1, was of type={}".format( n_components, type_ncom), PCA(n_components, svd_solver=solver).fit, data)
def test_not_an_array_array_function(): if np_version < parse_version('1.17'): raise SkipTest("array_function protocol not supported in numpy <1.17") not_array = _NotAnArray(np.ones(10)) msg = "Don't want to call array_function sum!" assert_raises_regex(TypeError, msg, np.sum, not_array) # always returns True assert np.may_share_memory(not_array, None)
def test_bad_pyfunc_metric(): def wrong_distance(x, y): return "1" X = np.ones((5, 2)) assert_raises_regex(TypeError, "Custom distance function must accept two vectors", BallTree, X, metric=wrong_distance)
def test_fit_predict_on_pipeline_without_fit_predict(): # tests that a pipeline does not have fit_predict method when final # step of pipeline does not have fit_predict defined scaler = StandardScaler() pca = PCA(svd_solver='full') pipe = Pipeline([('scaler', scaler), ('pca', pca)]) assert_raises_regex(AttributeError, "'PCA' object has no attribute 'fit_predict'", getattr, pipe, 'fit_predict')
def check_classifiers_cont_target(name, estimator_orig): # Check if classifier throws an exception when fed regression targets X, _ = _create_small_ts_dataset() y = np.random.random(len(X)) e = clone(estimator_orig) msg = 'Unknown label type: ' if not e._get_tags()["no_validation"]: assert_raises_regex(ValueError, msg, e.fit, X, y)
def test_check_estimators_unfitted(): # check that a ValueError/AttributeError is raised when calling predict # on an unfitted estimator msg = "NotFittedError not raised by predict" assert_raises_regex(AssertionError, msg, check_estimators_unfitted, "estimator", NoSparseClassifier()) # check that CorrectNotFittedError inherit from either ValueError # or AttributeError check_estimators_unfitted("estimator", CorrectNotFittedErrorClassifier())
def test_precompute_invalid_argument(): X, y, _, _ = build_dataset() for clf in [ElasticNetCV(precompute="invalid"), LassoCV(precompute="invalid")]: assert_raises_regex(ValueError, ".*should be.*True.*False.*auto.*" "array-like.*Got 'invalid'", clf.fit, X, y) # Precompute = 'auto' is not supported for ElasticNet assert_raises_regex(ValueError, ".*should be.*True.*False.*array-like.*" "Got 'auto'", ElasticNet(precompute='auto').fit, X, y)
def test_check_classification_targets(): for y_type in EXAMPLES.keys(): if y_type in ["unknown", "continuous", 'continuous-multioutput']: for example in EXAMPLES[y_type]: msg = 'Unknown label type: ' assert_raises_regex(ValueError, msg, check_classification_targets, example) else: for example in EXAMPLES[y_type]: check_classification_targets(example)
def test_check_non_negative(retype): A = np.array([[1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) X = retype(A) check_non_negative(X, "") X = retype([[0, 0], [0, 0]]) check_non_negative(X, "") A[0, 0] = -1 X = retype(A) assert_raises_regex(ValueError, "Negative ", check_non_negative, X, "")
def test_gen_even_slices(): # check that gen_even_slices contains all samples some_range = range(10) joined_range = list( chain(*[some_range[slice] for slice in gen_even_slices(10, 3)])) assert_array_equal(some_range, joined_range) # check that passing negative n_chunks raises an error slices = gen_even_slices(10, -1) assert_raises_regex(ValueError, "gen_even_slices got n_packs=-1, must be" " >=1", next, slices)
def test_check_estimator_required_parameters_skip(): class MyEstimator(BaseEstimator): _required_parameters = ["special_parameter"] def __init__(self, special_parameter): self.special_parameter = special_parameter assert_raises_regex( SkipTest, r"Can't instantiate estimator MyEstimator " r"which requires parameters " r"\['special_parameter'\]", check_estimator, MyEstimator)
def test_check_fit2d_1feature(): class MyEst(SVC): # raises a bad error message when only 1 feature is passed def fit(self, X, y): if X.shape[1] == 1: raise ValueError("non informative error message") assert_raises_regex( AssertionError, "The error message should contain one of the following", check_fit2d_1feature, 'estimator_name', MyEst())
def test_pipeline_with_cache_attribute(): X = np.array([[1, 2]]) pipe = Pipeline([('transf', Transf()), ('clf', Mult())], memory=DummyMemory()) pipe.fit(X, y=None) dummy = WrongDummyMemory() pipe = Pipeline([('transf', Transf()), ('clf', Mult())], memory=dummy) assert_raises_regex( ValueError, "'memory' should be None, a string or" " have the same interface as joblib.Memory." " Got memory='{}' instead.".format(dummy), pipe.fit, X)
def test_ridgecv_negative_alphas(): X = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y = [1, 1, 1, -1, -1] # Negative integers ridge = RidgeCV(alphas=(-1, -10, -100)) assert_raises_regex(ValueError, "alphas must be positive", ridge.fit, X, y) # Negative floats ridge = RidgeCV(alphas=(-0.1, -1.0, -10.0)) assert_raises_regex(ValueError, "alphas must be positive", ridge.fit, X, y)
def test_pipeline_wrong_memory(): # Test that an error is raised when memory is not a string or a Memory # instance X = iris.data y = iris.target # Define memory as an integer memory = 1 cached_pipe = Pipeline([('transf', DummyTransf()), ('svc', SVC())], memory=memory) assert_raises_regex(ValueError, "'memory' should be None, a string or" " have the same interface as joblib.Memory." " Got memory='1' instead.", cached_pipe.fit, X, y)
def test_multi_target_sample_weights_api(): X = [[1, 2, 3], [4, 5, 6]] y = [[3.141, 2.718], [2.718, 3.141]] w = [0.8, 0.6] rgr = MultiOutputRegressor(OrthogonalMatchingPursuit()) assert_raises_regex(ValueError, "does not support sample weights", rgr.fit, X, y, w) # no exception should be raised if the base estimator supports weights rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0)) rgr.fit(X, y, w)
def test_check_estimator_required_parameters_skip(): # TODO: remove whole test in 0.24 since passes classes to check_estimator() # isn't supported anymore class MyEstimator(BaseEstimator): _required_parameters = ["special_parameter"] def __init__(self, special_parameter): self.special_parameter = special_parameter assert_raises_regex( SkipTest, r"Can't instantiate estimator MyEstimator " r"which requires parameters " r"\['special_parameter'\]", check_estimator, MyEstimator)
def test_novelty_errors(): X = iris.data # check errors for novelty=False clf = neighbors.LocalOutlierFactor() clf.fit(X) # predict, decision_function and score_samples raise ValueError for method in ['predict', 'decision_function', 'score_samples']: msg = ('{} is not available when novelty=False'.format(method)) assert_raises_regex(AttributeError, msg, getattr, clf, method) # check errors for novelty=True clf = neighbors.LocalOutlierFactor(novelty=True) msg = 'fit_predict is not available when novelty=True' assert_raises_regex(AttributeError, msg, getattr, clf, 'fit_predict')
def test_ridge_regression_check_arguments_validity(return_intercept, sample_weight, arr_type, solver): """check if all combinations of arguments give valid estimations""" # test excludes 'svd' solver because it raises exception for sparse inputs rng = check_random_state(42) X = rng.rand(1000, 3) true_coefs = [1, 2, 0.1] y = np.dot(X, true_coefs) true_intercept = 0. if return_intercept: true_intercept = 10000. y += true_intercept X_testing = arr_type(X) alpha, atol, tol = 1e-3, 1e-4, 1e-6 if solver not in ['sag', 'auto'] and return_intercept: assert_raises_regex(ValueError, "In Ridge, only 'sag' solver", ridge_regression, X_testing, y, alpha=alpha, solver=solver, sample_weight=sample_weight, return_intercept=return_intercept, tol=tol) return out = ridge_regression( X_testing, y, alpha=alpha, solver=solver, sample_weight=sample_weight, return_intercept=return_intercept, tol=tol, ) if return_intercept: coef, intercept = out assert_allclose(coef, true_coefs, rtol=0, atol=atol) assert_allclose(intercept, true_intercept, rtol=0, atol=atol) else: assert_allclose(out, true_coefs, rtol=0, atol=atol)
def test_check_estimator_get_tags_default_keys(): estimator = EstimatorMissingDefaultTags() err_msg = (r"EstimatorMissingDefaultTags._get_tags\(\) is missing entries" r" for the following default tags: {'allow_nan'}") assert_raises_regex( AssertionError, err_msg, check_estimator_get_tags_default_keys, estimator.__class__.__name__, estimator, ) # noop check when _get_tags is not available estimator = MinimalTransformer() check_estimator_get_tags_default_keys(estimator.__class__.__name__, estimator)
def test_type_of_target(): for group, group_examples in EXAMPLES.items(): for example in group_examples: assert type_of_target(example) == group, ( 'type_of_target(%r) should be %r, got %r' % (example, group, type_of_target(example))) for example in NON_ARRAY_LIKE_EXAMPLES: msg_regex = r'Expected array-like \(array or non-string sequence\).*' assert_raises_regex(ValueError, msg_regex, type_of_target, example) for example in MULTILABEL_SEQUENCES: msg = ('You appear to be using a legacy multi-label data ' 'representation. Sequence of sequences are no longer supported;' ' use a binary array or sparse matrix instead.') assert_raises_regex(ValueError, msg, type_of_target, example)
def test_feature_union(): # basic sanity check for feature union X = iris.data X -= X.mean(axis=0) y = iris.target svd = TruncatedSVD(n_components=2, random_state=0) select = SelectKBest(k=1) fs = FeatureUnion([("svd", svd), ("select", select)]) fs.fit(X, y) X_transformed = fs.transform(X) assert X_transformed.shape == (X.shape[0], 3) # check if it does the expected thing assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X)) assert_array_equal(X_transformed[:, -1], select.fit_transform(X, y).ravel()) # test if it also works for sparse input # We use a different svd object to control the random_state stream fs = FeatureUnion([("svd", svd), ("select", select)]) X_sp = sparse.csr_matrix(X) X_sp_transformed = fs.fit_transform(X_sp, y) assert_array_almost_equal(X_transformed, X_sp_transformed.toarray()) # Test clone fs2 = assert_no_warnings(clone, fs) assert fs.transformer_list[0][1] is not fs2.transformer_list[0][1] # test setting parameters fs.set_params(select__k=2) assert fs.fit_transform(X, y).shape == (X.shape[0], 4) # test it works with transformers missing fit_transform fs = FeatureUnion([("mock", Transf()), ("svd", svd), ("select", select)]) X_transformed = fs.fit_transform(X, y) assert X_transformed.shape == (X.shape[0], 8) # test error if some elements do not support transform assert_raises_regex(TypeError, 'All estimators should implement fit and ' 'transform.*\\bNoTrans\\b', FeatureUnion, [("transform", Transf()), ("no_transform", NoTrans())]) # test that init accepts tuples fs = FeatureUnion((("svd", svd), ("select", select))) fs.fit(X, y)
def test_check_memory(): memory = check_memory("cache_directory") assert memory.cachedir == os.path.join('cache_directory', 'joblib') memory = check_memory(None) assert memory.cachedir is None dummy = DummyMemory() memory = check_memory(dummy) assert memory is dummy assert_raises_regex( ValueError, "'memory' should be None, a string or" " have the same interface as joblib.Memory." " Got memory='1' instead.", check_memory, 1) dummy = WrongDummyMemory() assert_raises_regex( ValueError, "'memory' should be None, a string or" " have the same interface as joblib.Memory." " Got memory='{}' instead.".format(dummy), check_memory, dummy)
def test_check_no_attributes_set_in_init(): class NonConformantEstimatorPrivateSet(BaseEstimator): def __init__(self): self.you_should_not_set_this_ = None class NonConformantEstimatorNoParamSet(BaseEstimator): def __init__(self, you_should_set_this_=None): pass assert_raises_regex( AssertionError, "Estimator estimator_name should not set any" " attribute apart from parameters during init." r" Found attributes \['you_should_not_set_this_'\].", check_no_attributes_set_in_init, 'estimator_name', NonConformantEstimatorPrivateSet()) assert_raises_regex( AttributeError, "Estimator estimator_name should store all " "parameters as an attribute during init.", check_no_attributes_set_in_init, 'estimator_name', NonConformantEstimatorNoParamSet())
def test_check_consistent_length(): check_consistent_length([1], [2], [3], [4], [5]) check_consistent_length([[1, 2], [[1, 2]]], [1, 2], ['a', 'b']) check_consistent_length([1], (2, ), np.array([3]), sp.csr_matrix((1, 2))) assert_raises_regex(ValueError, 'inconsistent numbers of samples', check_consistent_length, [1, 2], [1]) assert_raises_regex(TypeError, r"got <\w+ 'int'>", check_consistent_length, [1, 2], 1) assert_raises_regex(TypeError, r"got <\w+ 'object'>", check_consistent_length, [1, 2], object()) assert_raises(TypeError, check_consistent_length, [1, 2], np.array(1)) # Despite ensembles having __len__ they must raise TypeError assert_raises_regex(TypeError, 'Expected sequence or array-like', check_consistent_length, [1, 2], RandomForestRegressor())
def check_target_type(name, estimator): # should raise warning if the target is continuous (we cannot raise error) X = np.random.random((20, 2)) y = np.linspace(0, 1, 20) msg = "Unknown label type: 'continuous'" assert_raises_regex( ValueError, msg, estimator.fit_resample, X, y, ) # if the target is multilabel then we should raise an error rng = np.random.RandomState(42) y = rng.randint(2, size=(20, 3)) msg = "Multilabel and multioutput targets are not supported." assert_raises_regex( ValueError, msg, estimator.fit_resample, X, y, )
def test_check_array_complex_data_error(): X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # list of lists X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]] assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # tuple of tuples X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j)) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # list of np arrays X = [ np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j]) ] assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # tuple of np arrays X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j])) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # dataframe X = MockDataFrame( np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # sparse matrix X = sp.coo_matrix([[0, 1 + 2j], [0, 0]]) assert_raises_regex(ValueError, "Complex data not supported", check_array, X)