def test_raises_value_error_if_sample_weights_greater_than_1d(): # Sample weights must be either scalar or 1D n_sampless = [2, 3] n_featuress = [3, 2] rng = np.random.RandomState(42) for n_samples, n_features in zip(n_sampless, n_featuress): X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) sample_weights_OK = rng.randn(n_samples)**2 + 1 sample_weights_OK_1 = 1. sample_weights_OK_2 = 2. sample_weights_not_OK = sample_weights_OK[:, np.newaxis] sample_weights_not_OK_2 = sample_weights_OK[np.newaxis, :] ridge = Ridge(alpha=1) # make sure the "OK" sample weights actually work ridge.fit(X, y, sample_weights_OK) ridge.fit(X, y, sample_weights_OK_1) ridge.fit(X, y, sample_weights_OK_2) def fit_ridge_not_ok(): ridge.fit(X, y, sample_weights_not_OK) def fit_ridge_not_ok_2(): ridge.fit(X, y, sample_weights_not_OK_2) assert_raise_message(ValueError, "Sample weights must be 1D array or scalar", fit_ridge_not_ok) assert_raise_message(ValueError, "Sample weights must be 1D array or scalar", fit_ridge_not_ok_2)
def test_gaussian_mixture_predict_predict_proba(): rng = np.random.RandomState(0) rand_data = RandomData(rng) for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] Y = rand_data.Y g = GaussianMixture(n_components=rand_data.n_components, random_state=rng, weights_init=rand_data.weights, means_init=rand_data.means, precisions_init=rand_data.precisions[covar_type], covariance_type=covar_type) # Check a warning message arrive if we don't do fit assert_raise_message( NotFittedError, "This GaussianMixture instance is not fitted " "yet. Call 'fit' with appropriate arguments " "before using this method.", g.predict, X) g.fit(X) Y_pred = g.predict(X) Y_pred_proba = g.predict_proba(X).argmax(axis=1) assert_array_equal(Y_pred, Y_pred_proba) assert adjusted_rand_score(Y, Y_pred) > .95
def test_warm_start(): X = X_iris y = y_iris y_2classes = np.array([0] * 75 + [1] * 75) y_3classes = np.array([0] * 40 + [1] * 40 + [2] * 70) y_3classes_alt = np.array([0] * 50 + [1] * 50 + [3] * 50) y_4classes = np.array([0] * 37 + [1] * 37 + [2] * 38 + [3] * 38) y_5classes = np.array([0] * 30 + [1] * 30 + [2] * 30 + [3] * 30 + [4] * 30) # No error raised clf = MLPClassifier(hidden_layer_sizes=2, solver='lbfgs', warm_start=True).fit(X, y) clf.fit(X, y) clf.fit(X, y_3classes) for y_i in (y_2classes, y_3classes_alt, y_4classes, y_5classes): clf = MLPClassifier(hidden_layer_sizes=2, solver='lbfgs', warm_start=True).fit(X, y) message = ('warm_start can only be used where `y` has the same ' 'classes as in the previous call to fit.' ' Previously got [0 1 2], `y` has %s' % np.unique(y_i)) assert_raise_message(ValueError, message, clf.fit, X, y_i)
def test_column_transformer_special_strings(): # one 'drop' -> ignore X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T ct = ColumnTransformer([('trans1', Trans(), [0]), ('trans2', 'drop', [1])]) exp = np.array([[0.], [1.], [2.]]) assert_array_equal(ct.fit_transform(X_array), exp) assert_array_equal(ct.fit(X_array).transform(X_array), exp) assert len(ct.transformers_) == 2 assert ct.transformers_[-1][0] != 'remainder' # all 'drop' -> return shape 0 array ct = ColumnTransformer([('trans1', 'drop', [0]), ('trans2', 'drop', [1])]) assert_array_equal(ct.fit(X_array).transform(X_array).shape, (3, 0)) assert_array_equal(ct.fit_transform(X_array).shape, (3, 0)) assert len(ct.transformers_) == 2 assert ct.transformers_[-1][0] != 'remainder' # 'passthrough' X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T ct = ColumnTransformer([('trans1', Trans(), [0]), ('trans2', 'passthrough', [1])]) exp = X_array assert_array_equal(ct.fit_transform(X_array), exp) assert_array_equal(ct.fit(X_array).transform(X_array), exp) assert len(ct.transformers_) == 2 assert ct.transformers_[-1][0] != 'remainder' # None itself / other string is not valid for val in [None, 'other']: ct = ColumnTransformer([('trans1', Trans(), [0]), ('trans2', None, [1])]) assert_raise_message(TypeError, "All estimators should implement", ct.fit_transform, X_array) assert_raise_message(TypeError, "All estimators should implement", ct.fit, X_array)
def test_column_transformer_error_msg_1D(): X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T col_trans = ColumnTransformer([('trans', StandardScaler(), 0)]) assert_raise_message(ValueError, "1D data passed to a transformer", col_trans.fit, X_array) assert_raise_message(ValueError, "1D data passed to a transformer", col_trans.fit_transform, X_array) col_trans = ColumnTransformer([('trans', TransRaise(), 0)]) for func in [col_trans.fit, col_trans.fit_transform]: assert_raise_message(ValueError, "specific message", func, X_array)
def test_error_messages_on_wrong_input(): for score_func in score_funcs: expected = ('labels_true and labels_pred must have same size,' ' got 2 and 3') assert_raise_message(ValueError, expected, score_func, [0, 1], [1, 1, 1]) expected = "labels_true must be 1D: shape is (2" assert_raise_message(ValueError, expected, score_func, [[0, 1], [1, 0]], [1, 1, 1]) expected = "labels_pred must be 1D: shape is (2" assert_raise_message(ValueError, expected, score_func, [0, 1, 0], [[1, 1], [0, 0]])
def test_params_validation(): # Test that invalid parameters raise value error X = np.arange(12).reshape(4, 3) y = [1, 1, 2, 2] NCA = NeighborhoodComponentsAnalysis rng = np.random.RandomState(42) # TypeError assert_raises(TypeError, NCA(max_iter='21').fit, X, y) assert_raises(TypeError, NCA(verbose='true').fit, X, y) assert_raises(TypeError, NCA(tol='1').fit, X, y) assert_raises(TypeError, NCA(n_components='invalid').fit, X, y) assert_raises(TypeError, NCA(warm_start=1).fit, X, y) # ValueError assert_raise_message(ValueError, "`init` must be 'auto', 'pca', 'lda', 'identity', " "'random' or a numpy array of shape " "(n_components, n_features).", NCA(init=1).fit, X, y) assert_raise_message(ValueError, '`max_iter`= -1, must be >= 1.', NCA(max_iter=-1).fit, X, y) init = rng.rand(5, 3) assert_raise_message(ValueError, 'The output dimensionality ({}) of the given linear ' 'transformation `init` cannot be greater than its ' 'input dimensionality ({}).' .format(init.shape[0], init.shape[1]), NCA(init=init).fit, X, y) n_components = 10 assert_raise_message(ValueError, 'The preferred dimensionality of the ' 'projected space `n_components` ({}) cannot ' 'be greater than the given data ' 'dimensionality ({})!' .format(n_components, X.shape[1]), NCA(n_components=n_components).fit, X, y)
def test_column_transformer_get_feature_names(): X_array = np.array([[0., 1., 2.], [2., 4., 6.]]).T ct = ColumnTransformer([('trans', Trans(), [0, 1])]) # raise correct error when not fitted with pytest.raises(NotFittedError): ct.get_feature_names() # raise correct error when no feature names are available ct.fit(X_array) assert_raise_message( AttributeError, "Transformer trans (type Trans) does not provide " "get_feature_names", ct.get_feature_names) # working example X = np.array([[{ 'a': 1, 'b': 2 }, { 'a': 3, 'b': 4 }], [{ 'c': 5 }, { 'c': 6 }]], dtype=object).T ct = ColumnTransformer([('col' + str(i), DictVectorizer(), i) for i in range(2)]) ct.fit(X) assert ct.get_feature_names() == ['col0__a', 'col0__b', 'col1__c'] # passthrough transformers not supported ct = ColumnTransformer([('trans', 'passthrough', [0, 1])]) ct.fit(X) assert_raise_message(NotImplementedError, 'get_feature_names is not yet supported', ct.get_feature_names) ct = ColumnTransformer([('trans', DictVectorizer(), 0)], remainder='passthrough') ct.fit(X) assert_raise_message(NotImplementedError, 'get_feature_names is not yet supported', ct.get_feature_names) # drop transformer ct = ColumnTransformer([('col0', DictVectorizer(), 0), ('col1', 'drop', 1)]) ct.fit(X) assert ct.get_feature_names() == ['col0__a', 'col0__b']
def test_fetch_openml_raises_illegal_argument(): assert_raise_message(ValueError, "Dataset data_id=", fetch_openml, data_id=-1, name="name") assert_raise_message(ValueError, "Dataset data_id=", fetch_openml, data_id=-1, name=None, version="version") assert_raise_message(ValueError, "Dataset data_id=", fetch_openml, data_id=-1, name="name", version="version") assert_raise_message( ValueError, "Neither name nor data_id are provided. " "Please provide name or data_id.", fetch_openml)
def test_assert_allclose_dense_sparse(): x = np.arange(9).reshape(3, 3) msg = "Not equal to tolerance " y = sparse.csc_matrix(x) for X in [x, y]: # basic compare assert_raise_message(AssertionError, msg, assert_allclose_dense_sparse, X, X * 2) assert_allclose_dense_sparse(X, X) assert_raise_message(ValueError, "Can only compare two sparse", assert_allclose_dense_sparse, x, y) A = sparse.diags(np.ones(5), offsets=0).tocsr() B = sparse.csr_matrix(np.ones((1, 5))) assert_raise_message(AssertionError, "Arrays are not equal", assert_allclose_dense_sparse, B, A)
def test_check_weights(): rng = np.random.RandomState(0) rand_data = RandomData(rng) n_components = rand_data.n_components X = rand_data.X['full'] g = GaussianMixture(n_components=n_components) # Check bad shape weights_bad_shape = rng.rand(n_components, 1) g.weights_init = weights_bad_shape assert_raise_message( ValueError, "The parameter 'weights' should have the shape of " "(%d,), but got %s" % (n_components, str(weights_bad_shape.shape)), g.fit, X) # Check bad range weights_bad_range = rng.rand(n_components) + 1 g.weights_init = weights_bad_range assert_raise_message( ValueError, "The parameter 'weights' should be in the range " "[0, 1], but got max value %.5f, min value %.5f" % (np.min(weights_bad_range), np.max(weights_bad_range)), g.fit, X) # Check bad normalization weights_bad_norm = rng.rand(n_components) weights_bad_norm = weights_bad_norm / (weights_bad_norm.sum() + 1) g.weights_init = weights_bad_norm assert_raise_message( ValueError, "The parameter 'weights' should be normalized, " "but got sum(weights) = %.5f" % np.sum(weights_bad_norm), g.fit, X) # Check good weights matrix weights = rand_data.weights g = GaussianMixture(weights_init=weights, n_components=n_components) g.fit(X) assert_array_equal(weights, g.weights_init)
def test_l1_min_c_l2_loss(): # loss='l2' should raise ValueError assert_raise_message(ValueError, "loss type not in", l1_min_c, dense_X, Y1, "l2")
def test_bayesian_mixture_precisions_prior_initialisation(): rng = np.random.RandomState(0) n_samples, n_features = 10, 2 X = rng.rand(n_samples, n_features) # Check raise message for a bad value of degrees_of_freedom_prior bad_degrees_of_freedom_prior_ = n_features - 1. bgmm = BayesianGaussianMixture( degrees_of_freedom_prior=bad_degrees_of_freedom_prior_, random_state=rng) assert_raise_message( ValueError, "The parameter 'degrees_of_freedom_prior' should be " "greater than %d, but got %.3f." % (n_features - 1, bad_degrees_of_freedom_prior_), bgmm.fit, X) # Check correct init for a given value of degrees_of_freedom_prior degrees_of_freedom_prior = rng.rand() + n_features - 1. bgmm = BayesianGaussianMixture( degrees_of_freedom_prior=degrees_of_freedom_prior, random_state=rng).fit(X) assert_almost_equal(degrees_of_freedom_prior, bgmm.degrees_of_freedom_prior_) # Check correct init for the default value of degrees_of_freedom_prior degrees_of_freedom_prior_default = n_features bgmm = BayesianGaussianMixture( degrees_of_freedom_prior=degrees_of_freedom_prior_default, random_state=rng).fit(X) assert_almost_equal(degrees_of_freedom_prior_default, bgmm.degrees_of_freedom_prior_) # Check correct init for a given value of covariance_prior covariance_prior = { 'full': np.cov(X.T, bias=1) + 10, 'tied': np.cov(X.T, bias=1) + 5, 'diag': np.diag(np.atleast_2d(np.cov(X.T, bias=1))) + 3, 'spherical': rng.rand() } bgmm = BayesianGaussianMixture(random_state=rng) for cov_type in ['full', 'tied', 'diag', 'spherical']: bgmm.covariance_type = cov_type bgmm.covariance_prior = covariance_prior[cov_type] bgmm.fit(X) assert_almost_equal(covariance_prior[cov_type], bgmm.covariance_prior_) # Check raise message for a bad spherical value of covariance_prior bad_covariance_prior_ = -1. bgmm = BayesianGaussianMixture(covariance_type='spherical', covariance_prior=bad_covariance_prior_, random_state=rng) assert_raise_message( ValueError, "The parameter 'spherical covariance_prior' " "should be greater than 0., but got %.3f." % bad_covariance_prior_, bgmm.fit, X) # Check correct init for the default value of covariance_prior covariance_prior_default = { 'full': np.atleast_2d(np.cov(X.T)), 'tied': np.atleast_2d(np.cov(X.T)), 'diag': np.var(X, axis=0, ddof=1), 'spherical': np.var(X, axis=0, ddof=1).mean() } bgmm = BayesianGaussianMixture(random_state=0) for cov_type in ['full', 'tied', 'diag', 'spherical']: bgmm.covariance_type = cov_type bgmm.fit(X) assert_almost_equal(covariance_prior_default[cov_type], bgmm.covariance_prior_)
def assert_raises_on_only_one_label(func): """Assert message when there is only one label""" rng = np.random.RandomState(seed=0) assert_raise_message(ValueError, "Number of labels is", func, rng.rand(10, 2), np.zeros(10))
def assert_raises_on_all_points_same_cluster(func): """Assert message when all point are in different clusters""" rng = np.random.RandomState(seed=0) assert_raise_message(ValueError, "Number of labels is", func, rng.rand(10, 2), np.arange(10))
def test_fast_mcd_on_invalid_input(): X = np.arange(100) assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', fast_mcd, X)
def test_max_iter_error(): km = KMeans(max_iter=-1) assert_raise_message(ValueError, 'Number of iterations should be', km.fit, X)
def test_check_classification_targets(): # Test that check_classification_target return correct type. #5782 y = np.array([0.0, 1.1, 2.0, 3.0]) msg = type_of_target(y) assert_raise_message(ValueError, msg, check_classification_targets, y)
def test_check_docstring_parameters(): try: import numpydoc # noqa except ImportError: raise SkipTest("numpydoc is required to test the docstrings") incorrect = check_docstring_parameters(f_ok) assert incorrect == [] incorrect = check_docstring_parameters(f_ok, ignore=['b']) assert incorrect == [] incorrect = check_docstring_parameters(f_missing, ignore=['b']) assert incorrect == [] assert_raise_message(RuntimeError, 'Unknown section Results', check_docstring_parameters, f_bad_sections) assert_raise_message(RuntimeError, 'Unknown section Parameter', check_docstring_parameters, Klass.f_bad_sections) incorrect = check_docstring_parameters(f_check_param_definition) assert (incorrect == [ "mrex.utils.tests.test_testing.f_check_param_definition There " "was no space between the param name and colon ('a: int')", "mrex.utils.tests.test_testing.f_check_param_definition There " "was no space between the param name and colon ('b:')", "mrex.utils.tests.test_testing.f_check_param_definition " "Parameter 'c :' has an empty type spec. Remove the colon", "mrex.utils.tests.test_testing.f_check_param_definition There " "was no space between the param name and colon ('d:int')", ]) messages = [ [ "In function: mrex.utils.tests.test_testing.f_bad_order", "There's a parameter name mismatch in function docstring w.r.t." " function signature, at index 0 diff: 'b' != 'a'", "Full diff:", "- ['b', 'a']", "+ ['a', 'b']" ], [ "In function: " + "mrex.utils.tests.test_testing.f_too_many_param_docstring", "Parameters in function docstring have more items w.r.t. function" " signature, first extra item: c", "Full diff:", "- ['a', 'b']", "+ ['a', 'b', 'c']", "? +++++" ], [ "In function: mrex.utils.tests.test_testing.f_missing", "Parameters in function docstring have less items w.r.t. function" " signature, first missing item: b", "Full diff:", "- ['a', 'b']", "+ ['a']" ], [ "In function: mrex.utils.tests.test_testing.Klass.f_missing", "Parameters in function docstring have less items w.r.t. function" " signature, first missing item: X", "Full diff:", "- ['X', 'y']", "+ []" ], [ "In function: " + "mrex.utils.tests.test_testing.MockMetaEstimator.predict", "There's a parameter name mismatch in function docstring w.r.t." " function signature, at index 0 diff: 'X' != 'y'", "Full diff:", "- ['X']", "? ^", "+ ['y']", "? ^" ], [ "In function: " + "mrex.utils.tests.test_testing.MockMetaEstimator." + "predict_proba", "Parameters in function docstring have less items w.r.t. function" " signature, first missing item: X", "Full diff:", "- ['X']", "+ []" ], [ "In function: " + "mrex.utils.tests.test_testing.MockMetaEstimator.score", "Parameters in function docstring have less items w.r.t. function" " signature, first missing item: X", "Full diff:", "- ['X']", "+ []" ], [ "In function: " + "mrex.utils.tests.test_testing.MockMetaEstimator.fit", "Parameters in function docstring have less items w.r.t. function" " signature, first missing item: X", "Full diff:", "- ['X', 'y']", "+ []" ], ] mock_meta = MockMetaEstimator(delegate=MockEst()) for msg, f in zip(messages, [ f_bad_order, f_too_many_param_docstring, f_missing, Klass.f_missing, mock_meta.predict, mock_meta.predict_proba, mock_meta.score, mock_meta.fit ]): incorrect = check_docstring_parameters(f) assert msg == incorrect, ('\n"%s"\n not in \n"%s"' % (msg, incorrect))
def test_meanshift_all_orphans(): # init away from the data, crash with a sensible warning ms = MeanShift(bandwidth=0.1, seeds=[[-9, -9], [-10, -10]]) msg = "No point was within bandwidth=0.1" assert_raise_message(ValueError, msg, ms.fit, X,)
def test_mcd_class_on_invalid_input(): X = np.arange(100) mcd = MinCovDet() assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', mcd.fit, X)
def test_gaussian_mixture_attributes(): # test bad parameters rng = np.random.RandomState(0) X = rng.rand(10, 2) n_components_bad = 0 gmm = GaussianMixture(n_components=n_components_bad) assert_raise_message( ValueError, "Invalid value for 'n_components': %d " "Estimation requires at least one component" % n_components_bad, gmm.fit, X) # covariance_type should be in [spherical, diag, tied, full] covariance_type_bad = 'bad_covariance_type' gmm = GaussianMixture(covariance_type=covariance_type_bad) assert_raise_message( ValueError, "Invalid value for 'covariance_type': %s " "'covariance_type' should be in " "['spherical', 'tied', 'diag', 'full']" % covariance_type_bad, gmm.fit, X) tol_bad = -1 gmm = GaussianMixture(tol=tol_bad) assert_raise_message( ValueError, "Invalid value for 'tol': %.5f " "Tolerance used by the EM must be non-negative" % tol_bad, gmm.fit, X) reg_covar_bad = -1 gmm = GaussianMixture(reg_covar=reg_covar_bad) assert_raise_message( ValueError, "Invalid value for 'reg_covar': %.5f " "regularization on covariance must be " "non-negative" % reg_covar_bad, gmm.fit, X) max_iter_bad = 0 gmm = GaussianMixture(max_iter=max_iter_bad) assert_raise_message( ValueError, "Invalid value for 'max_iter': %d " "Estimation requires at least one iteration" % max_iter_bad, gmm.fit, X) n_init_bad = 0 gmm = GaussianMixture(n_init=n_init_bad) assert_raise_message( ValueError, "Invalid value for 'n_init': %d " "Estimation requires at least one run" % n_init_bad, gmm.fit, X) init_params_bad = 'bad_method' gmm = GaussianMixture(init_params=init_params_bad) assert_raise_message( ValueError, "Unimplemented initialization method '%s'" % init_params_bad, gmm.fit, X) # test good parameters n_components, tol, n_init, max_iter, reg_covar = 2, 1e-4, 3, 30, 1e-1 covariance_type, init_params = 'full', 'random' gmm = GaussianMixture(n_components=n_components, tol=tol, n_init=n_init, max_iter=max_iter, reg_covar=reg_covar, covariance_type=covariance_type, init_params=init_params).fit(X) assert gmm.n_components == n_components assert gmm.covariance_type == covariance_type assert gmm.tol == tol assert gmm.reg_covar == reg_covar assert gmm.max_iter == max_iter assert gmm.n_init == n_init assert gmm.init_params == init_params
def test_set_pipeline_step_passthrough(passthrough): X = np.array([[1]]) y = np.array([1]) mult2 = Mult(mult=2) mult3 = Mult(mult=3) mult5 = Mult(mult=5) def make(): return Pipeline([('m2', mult2), ('m3', mult3), ('last', mult5)]) pipeline = make() exp = 2 * 3 * 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) pipeline.set_params(m3=passthrough) exp = 2 * 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) assert (pipeline.get_params(deep=True) == {'steps': pipeline.steps, 'm2': mult2, 'm3': passthrough, 'last': mult5, 'memory': None, 'm2__mult': 2, 'last__mult': 5, 'verbose': False }) pipeline.set_params(m2=passthrough) exp = 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) # for other methods, ensure no AttributeErrors on None: other_methods = ['predict_proba', 'predict_log_proba', 'decision_function', 'transform', 'score'] for method in other_methods: getattr(pipeline, method)(X) pipeline.set_params(m2=mult2) exp = 2 * 5 assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) pipeline = make() pipeline.set_params(last=passthrough) # mult2 and mult3 are active exp = 6 assert_array_equal([[exp]], pipeline.fit(X, y).transform(X)) assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) assert_raise_message(AttributeError, "'str' object has no attribute 'predict'", getattr, pipeline, 'predict') # Check 'passthrough' step at construction time exp = 2 * 5 pipeline = Pipeline( [('m2', mult2), ('m3', passthrough), ('last', mult5)]) assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]]))
def test_check_array(): # accept_sparse == False # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) assert_raises(TypeError, check_array, X_csr) # ensure_2d=False X_array = check_array([0, 1, 2], ensure_2d=False) assert X_array.ndim == 1 # ensure_2d=True with 1d array assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', check_array, [0, 1, 2], ensure_2d=True) # ensure_2d=True with scalar array assert_raise_message(ValueError, 'Expected 2D array, got scalar array instead', check_array, 10, ensure_2d=True) # don't allow ndim > 3 X_ndim = np.arange(8).reshape(2, 2, 2) assert_raises(ValueError, check_array, X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # dtype and order enforcement. X_C = np.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(np.int) X_float = X_C.astype(np.float) Xs = [X_C, X_F, X_int, X_float] dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: assert X_checked.dtype == dtype else: assert X_checked.dtype == X.dtype if order == 'C': assert X_checked.flags['C_CONTIGUOUS'] assert not X_checked.flags['F_CONTIGUOUS'] elif order == 'F': assert X_checked.flags['F_CONTIGUOUS'] assert not X_checked.flags['C_CONTIGUOUS'] if copy: assert X is not X_checked else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert X is X_checked # allowed sparse != None X_csc = sp.csc_matrix(X_C) X_coo = X_csc.tocoo() X_dok = X_csc.todok() X_int = X_csc.astype(np.int) X_float = X_csc.astype(np.float) Xs = [X_csc, X_coo, X_dok, X_int, X_float] accept_sparses = [['csr', 'coo'], ['coo', 'dok']] for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses, copys): with warnings.catch_warnings(record=True) as w: X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse, copy=copy) if (dtype is object or sp.isspmatrix_dok(X)) and len(w): message = str(w[0].message) messages = [ "object dtype is not supported by sparse matrices", "Can't check dok sparse matrix for nan or inf." ] assert message in messages else: assert len(w) == 0 if dtype is not None: assert X_checked.dtype == dtype else: assert X_checked.dtype == X.dtype if X.format in accept_sparse: # no change if allowed assert X.format == X_checked.format else: # got converted assert X_checked.format == accept_sparse[0] if copy: assert X is not X_checked else: # doesn't copy if it was already good if X.dtype == X_checked.dtype and X.format == X_checked.format: assert X is X_checked # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert isinstance(X_dense, np.ndarray) # raise on too deep lists assert_raises(ValueError, check_array, X_ndim.tolist()) check_array(X_ndim.tolist(), allow_nd=True) # doesn't raise # convert weird stuff to arrays X_no_array = NotAnArray(X_dense) result = check_array(X_no_array) assert isinstance(result, np.ndarray) # deprecation warning if string-like array with dtype="numeric" expected_warn_regex = r"converted to decimal numbers if dtype='numeric'" X_str = [['11', '12'], ['13', 'xx']] for X in [X_str, np.array(X_str, dtype='U'), np.array(X_str, dtype='S')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric") # deprecation warning if byte-like array with dtype="numeric" X_bytes = [[b'a', b'b'], [b'c', b'd']] for X in [X_bytes, np.array(X_bytes, dtype='V1')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric")
def test_check_X_y_informative_error(): X = np.ones((2, 2)) y = None assert_raise_message(ValueError, "y cannot be None", check_X_y, X, y)
def test_check_array_min_samples_and_features_messages(): # empty list is considered 2D by default: msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_array, [[]]) # If considered a 1D collection when ensure_2d=False, then the minimum # number of samples will break: msg = "0 sample(s) (shape=(0,)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_array, [], ensure_2d=False) # Invalid edge case when checking the default minimum sample of a scalar msg = "Singleton array array(42) cannot be considered a valid collection." assert_raise_message(TypeError, msg, check_array, 42, ensure_2d=False) # Simulate a model that would need at least 2 samples to be well defined X = np.ones((1, 10)) y = np.ones(1) msg = "1 sample(s) (shape=(1, 10)) while a minimum of 2 is required." assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_samples=2) # The same message is raised if the data has 2 dimensions even if this is # not mandatory assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_samples=2, ensure_2d=False) # Simulate a model that would require at least 3 features (e.g. SelectKBest # with k=3) X = np.ones((10, 2)) y = np.ones(2) msg = "2 feature(s) (shape=(10, 2)) while a minimum of 3 is required." assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_features=3) # Only the feature check is enabled whenever the number of dimensions is 2 # even if allow_nd is enabled: assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_features=3, allow_nd=True) # Simulate a case where a pipeline stage as trimmed all the features of a # 2D dataset. X = np.empty(0).reshape(10, 0) y = np.ones(10) msg = "0 feature(s) (shape=(10, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_X_y, X, y) # nd-data is not checked for any minimum number of features by default: X = np.ones((10, 0, 28, 28)) y = np.ones(10) X_checked, y_checked = check_X_y(X, y, allow_nd=True) assert_array_equal(X, X_checked) assert_array_equal(y, y_checked)
def test_estimator_init(): eclf = VotingClassifier(estimators=[]) msg = ('Invalid `estimators` attribute, `estimators` should be' ' a list of (string, estimator) tuples') assert_raise_message(AttributeError, msg, eclf.fit, X, y) clf = LogisticRegression(random_state=1) eclf = VotingClassifier(estimators=[('lr', clf)], voting='error') msg = ('Voting must be \'soft\' or \'hard\'; got (voting=\'error\')') assert_raise_message(ValueError, msg, eclf.fit, X, y) eclf = VotingClassifier(estimators=[('lr', clf)], weights=[1, 2]) msg = ('Number of `estimators` and weights must be equal' '; got 2 weights, 1 estimators') assert_raise_message(ValueError, msg, eclf.fit, X, y) eclf = VotingClassifier(estimators=[('lr', clf), ('lr', clf)], weights=[1, 2]) msg = "Names provided are not unique: ['lr', 'lr']" assert_raise_message(ValueError, msg, eclf.fit, X, y) eclf = VotingClassifier(estimators=[('lr__', clf)]) msg = "Estimator names must not contain __: got ['lr__']" assert_raise_message(ValueError, msg, eclf.fit, X, y) eclf = VotingClassifier(estimators=[('estimators', clf)]) msg = "Estimator names conflict with constructor arguments: ['estimators']" assert_raise_message(ValueError, msg, eclf.fit, X, y)
def test_estimate_bandwidth_with_sparse_matrix(): # Test estimate_bandwidth with sparse matrix X = sparse.lil_matrix((1000, 1000)) msg = "A sparse matrix was passed, but dense data is required." assert_raise_message(TypeError, msg, estimate_bandwidth, X, 200)
def test_zero_cosine_linkage_tree(): # Check that zero vectors in X produce an error when # 'cosine' affinity is used X = np.array([[0, 1], [0, 0]]) msg = 'Cosine affinity cannot be used when X contains zero vectors' assert_raise_message(ValueError, msg, linkage_tree, X, affinity='cosine')
def test_rational_quadratic_kernel(): kernel = RationalQuadratic(length_scale=[1., 1.]) assert_raise_message( AttributeError, "RationalQuadratic kernel only supports isotropic " "version, please use a single " "scalar for length_scale", kernel, X)